Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the PPCISelLowering class.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "PPCISelLowering.h"
15
#include "MCTargetDesc/PPCPredicates.h"
16
#include "PPC.h"
17
#include "PPCCCState.h"
18
#include "PPCCallingConv.h"
19
#include "PPCFrameLowering.h"
20
#include "PPCInstrInfo.h"
21
#include "PPCMachineFunctionInfo.h"
22
#include "PPCPerfectShuffle.h"
23
#include "PPCRegisterInfo.h"
24
#include "PPCSubtarget.h"
25
#include "PPCTargetMachine.h"
26
#include "llvm/ADT/APFloat.h"
27
#include "llvm/ADT/APInt.h"
28
#include "llvm/ADT/ArrayRef.h"
29
#include "llvm/ADT/DenseMap.h"
30
#include "llvm/ADT/None.h"
31
#include "llvm/ADT/STLExtras.h"
32
#include "llvm/ADT/SmallPtrSet.h"
33
#include "llvm/ADT/SmallSet.h"
34
#include "llvm/ADT/SmallVector.h"
35
#include "llvm/ADT/Statistic.h"
36
#include "llvm/ADT/StringRef.h"
37
#include "llvm/ADT/StringSwitch.h"
38
#include "llvm/CodeGen/CallingConvLower.h"
39
#include "llvm/CodeGen/ISDOpcodes.h"
40
#include "llvm/CodeGen/MachineBasicBlock.h"
41
#include "llvm/CodeGen/MachineFrameInfo.h"
42
#include "llvm/CodeGen/MachineFunction.h"
43
#include "llvm/CodeGen/MachineInstr.h"
44
#include "llvm/CodeGen/MachineInstrBuilder.h"
45
#include "llvm/CodeGen/MachineJumpTableInfo.h"
46
#include "llvm/CodeGen/MachineLoopInfo.h"
47
#include "llvm/CodeGen/MachineMemOperand.h"
48
#include "llvm/CodeGen/MachineOperand.h"
49
#include "llvm/CodeGen/MachineRegisterInfo.h"
50
#include "llvm/CodeGen/MachineValueType.h"
51
#include "llvm/CodeGen/RuntimeLibcalls.h"
52
#include "llvm/CodeGen/SelectionDAG.h"
53
#include "llvm/CodeGen/SelectionDAGNodes.h"
54
#include "llvm/CodeGen/ValueTypes.h"
55
#include "llvm/IR/CallSite.h"
56
#include "llvm/IR/CallingConv.h"
57
#include "llvm/IR/Constant.h"
58
#include "llvm/IR/Constants.h"
59
#include "llvm/IR/DataLayout.h"
60
#include "llvm/IR/DebugLoc.h"
61
#include "llvm/IR/DerivedTypes.h"
62
#include "llvm/IR/Function.h"
63
#include "llvm/IR/GlobalValue.h"
64
#include "llvm/IR/IRBuilder.h"
65
#include "llvm/IR/Instructions.h"
66
#include "llvm/IR/Intrinsics.h"
67
#include "llvm/IR/Module.h"
68
#include "llvm/IR/Type.h"
69
#include "llvm/IR/Use.h"
70
#include "llvm/IR/Value.h"
71
#include "llvm/MC/MCExpr.h"
72
#include "llvm/MC/MCRegisterInfo.h"
73
#include "llvm/Support/AtomicOrdering.h"
74
#include "llvm/Support/BranchProbability.h"
75
#include "llvm/Support/Casting.h"
76
#include "llvm/Support/CodeGen.h"
77
#include "llvm/Support/CommandLine.h"
78
#include "llvm/Support/Compiler.h"
79
#include "llvm/Support/Debug.h"
80
#include "llvm/Support/ErrorHandling.h"
81
#include "llvm/Support/Format.h"
82
#include "llvm/Support/KnownBits.h"
83
#include "llvm/Support/MathExtras.h"
84
#include "llvm/Support/raw_ostream.h"
85
#include "llvm/Target/TargetInstrInfo.h"
86
#include "llvm/Target/TargetLowering.h"
87
#include "llvm/Target/TargetMachine.h"
88
#include "llvm/Target/TargetOptions.h"
89
#include "llvm/Target/TargetRegisterInfo.h"
90
#include <algorithm>
91
#include <cassert>
92
#include <cstdint>
93
#include <iterator>
94
#include <list>
95
#include <utility>
96
#include <vector>
97
98
using namespace llvm;
99
100
#define DEBUG_TYPE "ppc-lowering"
101
102
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
103
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
104
105
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
106
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
107
108
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
109
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
110
111
static cl::opt<bool> DisableSCO("disable-ppc-sco",
112
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
113
114
STATISTIC(NumTailCalls, "Number of tail calls");
115
STATISTIC(NumSiblingCalls, "Number of sibling calls");
116
117
// FIXME: Remove this once the bug has been fixed!
118
extern cl::opt<bool> ANDIGlueBug;
119
120
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
121
                                     const PPCSubtarget &STI)
122
1.40k
    : TargetLowering(TM), Subtarget(STI) {
123
1.40k
  // Use _setjmp/_longjmp instead of setjmp/longjmp.
124
1.40k
  setUseUnderscoreSetJmp(true);
125
1.40k
  setUseUnderscoreLongJmp(true);
126
1.40k
127
1.40k
  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
128
1.40k
  // arguments are at least 4/8 bytes aligned.
129
1.40k
  bool isPPC64 = Subtarget.isPPC64();
130
1.40k
  setMinStackArgumentAlignment(isPPC64 ? 
81.02k
:
4379
);
131
1.40k
132
1.40k
  // Set up the register classes.
133
1.40k
  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
134
1.40k
  if (
!useSoftFloat()1.40k
) {
135
1.39k
    addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
136
1.39k
    addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
137
1.39k
  }
138
1.40k
139
1.40k
  // Match BITREVERSE to customized fast code sequence in the td file.
140
1.40k
  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
141
1.40k
  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
142
1.40k
143
1.40k
  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
144
8.44k
  for (MVT VT : MVT::integer_valuetypes()) {
145
8.44k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
146
8.44k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
147
8.44k
  }
148
1.40k
149
1.40k
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
150
1.40k
151
1.40k
  // PowerPC has pre-inc load and store's.
152
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
153
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
154
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
155
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
156
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
157
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
158
1.40k
  setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
159
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
160
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
161
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
162
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
163
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
164
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
165
1.40k
  setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
166
1.40k
167
1.40k
  if (
Subtarget.useCRBits()1.40k
) {
168
1.20k
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
169
1.20k
170
1.20k
    if (
isPPC64 || 1.20k
Subtarget.hasFPCVT()356
) {
171
859
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
172
859
      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
173
859
                         isPPC64 ? 
MVT::i64850
:
MVT::i329
);
174
859
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
175
859
      AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
176
859
                        isPPC64 ? 
MVT::i64850
:
MVT::i329
);
177
1.20k
    } else {
178
347
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
179
347
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
180
347
    }
181
1.20k
182
1.20k
    // PowerPC does not support direct load/store of condition registers.
183
1.20k
    setOperationAction(ISD::LOAD, MVT::i1, Custom);
184
1.20k
    setOperationAction(ISD::STORE, MVT::i1, Custom);
185
1.20k
186
1.20k
    // FIXME: Remove this once the ANDI glue bug is fixed:
187
1.20k
    if (ANDIGlueBug)
188
0
      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
189
1.20k
190
7.23k
    for (MVT VT : MVT::integer_valuetypes()) {
191
7.23k
      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
192
7.23k
      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
193
7.23k
      setTruncStoreAction(VT, MVT::i1, Expand);
194
7.23k
    }
195
1.20k
196
1.20k
    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
197
1.20k
  }
198
1.40k
199
1.40k
  // This is used in the ppcf128->int sequence.  Note it has different semantics
200
1.40k
  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
201
1.40k
  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
202
1.40k
203
1.40k
  // We do not currently implement these libm ops for PowerPC.
204
1.40k
  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
205
1.40k
  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
206
1.40k
  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
207
1.40k
  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
208
1.40k
  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
209
1.40k
  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
210
1.40k
211
1.40k
  // PowerPC has no SREM/UREM instructions unless we are on P9
212
1.40k
  // On P9 we may use a hardware instruction to compute the remainder.
213
1.40k
  // The instructions are not legalized directly because in the cases where the
214
1.40k
  // result of both the remainder and the division is required it is more
215
1.40k
  // efficient to compute the remainder from the result of the division rather
216
1.40k
  // than use the remainder instruction.
217
1.40k
  if (
Subtarget.isISA3_0()1.40k
) {
218
68
    setOperationAction(ISD::SREM, MVT::i32, Custom);
219
68
    setOperationAction(ISD::UREM, MVT::i32, Custom);
220
68
    setOperationAction(ISD::SREM, MVT::i64, Custom);
221
68
    setOperationAction(ISD::UREM, MVT::i64, Custom);
222
1.40k
  } else {
223
1.33k
    setOperationAction(ISD::SREM, MVT::i32, Expand);
224
1.33k
    setOperationAction(ISD::UREM, MVT::i32, Expand);
225
1.33k
    setOperationAction(ISD::SREM, MVT::i64, Expand);
226
1.33k
    setOperationAction(ISD::UREM, MVT::i64, Expand);
227
1.33k
  }
228
1.40k
229
1.40k
  if (
Subtarget.hasP9Vector()1.40k
) {
230
60
    setOperationAction(ISD::ABS, MVT::v4i32, Legal);
231
60
    setOperationAction(ISD::ABS, MVT::v8i16, Legal);
232
60
    setOperationAction(ISD::ABS, MVT::v16i8, Legal);
233
60
  }
234
1.40k
235
1.40k
  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
236
1.40k
  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
237
1.40k
  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
238
1.40k
  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
239
1.40k
  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
240
1.40k
  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
241
1.40k
  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
242
1.40k
  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
243
1.40k
  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
244
1.40k
245
1.40k
  // We don't support sin/cos/sqrt/fmod/pow
246
1.40k
  setOperationAction(ISD::FSIN , MVT::f64, Expand);
247
1.40k
  setOperationAction(ISD::FCOS , MVT::f64, Expand);
248
1.40k
  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
249
1.40k
  setOperationAction(ISD::FREM , MVT::f64, Expand);
250
1.40k
  setOperationAction(ISD::FPOW , MVT::f64, Expand);
251
1.40k
  setOperationAction(ISD::FMA  , MVT::f64, Legal);
252
1.40k
  setOperationAction(ISD::FSIN , MVT::f32, Expand);
253
1.40k
  setOperationAction(ISD::FCOS , MVT::f32, Expand);
254
1.40k
  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
255
1.40k
  setOperationAction(ISD::FREM , MVT::f32, Expand);
256
1.40k
  setOperationAction(ISD::FPOW , MVT::f32, Expand);
257
1.40k
  setOperationAction(ISD::FMA  , MVT::f32, Legal);
258
1.40k
259
1.40k
  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
260
1.40k
261
1.40k
  // If we're enabling GP optimizations, use hardware square root
262
1.40k
  if (!Subtarget.hasFSQRT() &&
263
531
      
!(TM.Options.UnsafeFPMath && 531
Subtarget.hasFRSQRTE()3
&&
264
0
        Subtarget.hasFRE()))
265
531
    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
266
1.40k
267
1.40k
  if (!Subtarget.hasFSQRT() &&
268
531
      
!(TM.Options.UnsafeFPMath && 531
Subtarget.hasFRSQRTES()3
&&
269
0
        Subtarget.hasFRES()))
270
531
    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
271
1.40k
272
1.40k
  if (
Subtarget.hasFCPSGN()1.40k
) {
273
777
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
274
777
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
275
1.40k
  } else {
276
630
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
277
630
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
278
630
  }
279
1.40k
280
1.40k
  if (
Subtarget.hasFPRND()1.40k
) {
281
779
    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
282
779
    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
283
779
    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
284
779
    setOperationAction(ISD::FROUND, MVT::f64, Legal);
285
779
286
779
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
287
779
    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
288
779
    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
289
779
    setOperationAction(ISD::FROUND, MVT::f32, Legal);
290
779
  }
291
1.40k
292
1.40k
  // PowerPC does not have BSWAP
293
1.40k
  // CTPOP or CTTZ were introduced in P8/P9 respectivelly
294
1.40k
  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
295
1.40k
  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
296
1.40k
  if (
Subtarget.isISA3_0()1.40k
) {
297
68
    setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
298
68
    setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
299
1.40k
  } else {
300
1.33k
    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
301
1.33k
    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
302
1.33k
  }
303
1.40k
304
1.40k
  if (
Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast1.40k
) {
305
690
    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
306
690
    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
307
1.40k
  } else {
308
717
    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
309
717
    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
310
717
  }
311
1.40k
312
1.40k
  // PowerPC does not have ROTR
313
1.40k
  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
314
1.40k
  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
315
1.40k
316
1.40k
  if (
!Subtarget.useCRBits()1.40k
) {
317
201
    // PowerPC does not have Select
318
201
    setOperationAction(ISD::SELECT, MVT::i32, Expand);
319
201
    setOperationAction(ISD::SELECT, MVT::i64, Expand);
320
201
    setOperationAction(ISD::SELECT, MVT::f32, Expand);
321
201
    setOperationAction(ISD::SELECT, MVT::f64, Expand);
322
201
  }
323
1.40k
324
1.40k
  // PowerPC wants to turn select_cc of FP into fsel when possible.
325
1.40k
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
326
1.40k
  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
327
1.40k
328
1.40k
  // PowerPC wants to optimize integer setcc a bit
329
1.40k
  if (!Subtarget.useCRBits())
330
201
    setOperationAction(ISD::SETCC, MVT::i32, Custom);
331
1.40k
332
1.40k
  // PowerPC does not have BRCOND which requires SetCC
333
1.40k
  if (!Subtarget.useCRBits())
334
201
    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
335
1.40k
336
1.40k
  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
337
1.40k
338
1.40k
  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
339
1.40k
  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
340
1.40k
341
1.40k
  // PowerPC does not have [U|S]INT_TO_FP
342
1.40k
  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
343
1.40k
  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
344
1.40k
345
1.40k
  if (
Subtarget.hasDirectMove() && 1.40k
isPPC64401
) {
346
399
    setOperationAction(ISD::BITCAST, MVT::f32, Legal);
347
399
    setOperationAction(ISD::BITCAST, MVT::i32, Legal);
348
399
    setOperationAction(ISD::BITCAST, MVT::i64, Legal);
349
399
    setOperationAction(ISD::BITCAST, MVT::f64, Legal);
350
1.40k
  } else {
351
1.00k
    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
352
1.00k
    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
353
1.00k
    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
354
1.00k
    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
355
1.00k
  }
356
1.40k
357
1.40k
  // We cannot sextinreg(i1).  Expand to shifts.
358
1.40k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
359
1.40k
360
1.40k
  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
361
1.40k
  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
362
1.40k
  // support continuation, user-level threading, and etc.. As a result, no
363
1.40k
  // other SjLj exception interfaces are implemented and please don't build
364
1.40k
  // your own exception handling based on them.
365
1.40k
  // LLVM/Clang supports zero-cost DWARF exception handling.
366
1.40k
  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
367
1.40k
  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
368
1.40k
369
1.40k
  // We want to legalize GlobalAddress and ConstantPool nodes into the
370
1.40k
  // appropriate instructions to materialize the address.
371
1.40k
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
372
1.40k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
373
1.40k
  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
374
1.40k
  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
375
1.40k
  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
376
1.40k
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
377
1.40k
  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
378
1.40k
  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
379
1.40k
  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
380
1.40k
  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
381
1.40k
382
1.40k
  // TRAP is legal.
383
1.40k
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
384
1.40k
385
1.40k
  // TRAMPOLINE is custom lowered.
386
1.40k
  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
387
1.40k
  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
388
1.40k
389
1.40k
  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
390
1.40k
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
391
1.40k
392
1.40k
  if (
Subtarget.isSVR4ABI()1.40k
) {
393
1.25k
    if (
isPPC641.25k
) {
394
992
      // VAARG always uses double-word chunks, so promote anything smaller.
395
992
      setOperationAction(ISD::VAARG, MVT::i1, Promote);
396
992
      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
397
992
      setOperationAction(ISD::VAARG, MVT::i8, Promote);
398
992
      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
399
992
      setOperationAction(ISD::VAARG, MVT::i16, Promote);
400
992
      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
401
992
      setOperationAction(ISD::VAARG, MVT::i32, Promote);
402
992
      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
403
992
      setOperationAction(ISD::VAARG, MVT::Other, Expand);
404
1.25k
    } else {
405
258
      // VAARG is custom lowered with the 32-bit SVR4 ABI.
406
258
      setOperationAction(ISD::VAARG, MVT::Other, Custom);
407
258
      setOperationAction(ISD::VAARG, MVT::i64, Custom);
408
258
    }
409
1.25k
  } else
410
157
    setOperationAction(ISD::VAARG, MVT::Other, Expand);
411
1.40k
412
1.40k
  if (
Subtarget.isSVR4ABI() && 1.40k
!isPPC641.25k
)
413
1.40k
    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
414
258
    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
415
1.40k
  else
416
1.14k
    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
417
1.40k
418
1.40k
  // Use the default implementation.
419
1.40k
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
420
1.40k
  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
421
1.40k
  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
422
1.40k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
423
1.40k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
424
1.40k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
425
1.40k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
426
1.40k
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
427
1.40k
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
428
1.40k
429
1.40k
  // We want to custom lower some of our intrinsics.
430
1.40k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
431
1.40k
432
1.40k
  // To handle counter-based loop conditions.
433
1.40k
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
434
1.40k
435
1.40k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
436
1.40k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
437
1.40k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
438
1.40k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
439
1.40k
440
1.40k
  // Comparisons that require checking two conditions.
441
1.40k
  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
442
1.40k
  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
443
1.40k
  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
444
1.40k
  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
445
1.40k
  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
446
1.40k
  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
447
1.40k
  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
448
1.40k
  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
449
1.40k
  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
450
1.40k
  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
451
1.40k
  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
452
1.40k
  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
453
1.40k
454
1.40k
  if (
Subtarget.has64BitSupport()1.40k
) {
455
1.08k
    // They also have instructions for converting between i64 and fp.
456
1.08k
    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
457
1.08k
    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
458
1.08k
    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
459
1.08k
    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
460
1.08k
    // This is just the low 32 bits of a (signed) fp->i64 conversion.
461
1.08k
    // We cannot do this with Promote because i64 is not a legal type.
462
1.08k
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
463
1.08k
464
1.08k
    if (
Subtarget.hasLFIWAX() || 1.08k
Subtarget.isPPC64()309
)
465
1.03k
      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
466
1.40k
  } else {
467
321
    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
468
321
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
469
321
  }
470
1.40k
471
1.40k
  // With the instructions enabled under FPCVT, we can do everything.
472
1.40k
  if (
Subtarget.hasFPCVT()1.40k
) {
473
764
    if (
Subtarget.has64BitSupport()764
) {
474
764
      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
475
764
      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
476
764
      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
477
764
      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
478
764
    }
479
764
480
764
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
481
764
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
482
764
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
483
764
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
484
764
  }
485
1.40k
486
1.40k
  if (
Subtarget.use64BitRegs()1.40k
) {
487
1.02k
    // 64-bit PowerPC implementations can support i64 types directly
488
1.02k
    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
489
1.02k
    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
490
1.02k
    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
491
1.02k
    // 64-bit PowerPC wants to expand i128 shifts itself.
492
1.02k
    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
493
1.02k
    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
494
1.02k
    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
495
1.40k
  } else {
496
379
    // 32-bit PowerPC wants to expand i64 shifts itself.
497
379
    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
498
379
    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
499
379
    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
500
379
  }
501
1.40k
502
1.40k
  if (
Subtarget.hasAltivec()1.40k
) {
503
822
    // First set operation action for all vector types to expand. Then we
504
822
    // will selectively turn on ones that can be effectively codegen'd.
505
77.2k
    for (MVT VT : MVT::vector_valuetypes()) {
506
77.2k
      // add/sub are legal for all supported vector VT's.
507
77.2k
      setOperationAction(ISD::ADD, VT, Legal);
508
77.2k
      setOperationAction(ISD::SUB, VT, Legal);
509
77.2k
510
77.2k
      // Vector instructions introduced in P8
511
77.2k
      if (
Subtarget.hasP8Altivec() && 77.2k
(VT.SimpleTy != MVT::v1i128)40.6k
) {
512
40.1k
        setOperationAction(ISD::CTPOP, VT, Legal);
513
40.1k
        setOperationAction(ISD::CTLZ, VT, Legal);
514
40.1k
      }
515
37.0k
      else {
516
37.0k
        setOperationAction(ISD::CTPOP, VT, Expand);
517
37.0k
        setOperationAction(ISD::CTLZ, VT, Expand);
518
37.0k
      }
519
77.2k
520
77.2k
      // Vector instructions introduced in P9
521
77.2k
      if (
Subtarget.hasP9Altivec() && 77.2k
(VT.SimpleTy != MVT::v1i128)6.39k
)
522
6.32k
        setOperationAction(ISD::CTTZ, VT, Legal);
523
77.2k
      else
524
70.9k
        setOperationAction(ISD::CTTZ, VT, Expand);
525
77.2k
526
77.2k
      // We promote all shuffles to v16i8.
527
77.2k
      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
528
77.2k
      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
529
77.2k
530
77.2k
      // We promote all non-typed operations to v4i32.
531
77.2k
      setOperationAction(ISD::AND   , VT, Promote);
532
77.2k
      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
533
77.2k
      setOperationAction(ISD::OR    , VT, Promote);
534
77.2k
      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
535
77.2k
      setOperationAction(ISD::XOR   , VT, Promote);
536
77.2k
      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
537
77.2k
      setOperationAction(ISD::LOAD  , VT, Promote);
538
77.2k
      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
539
77.2k
      setOperationAction(ISD::SELECT, VT, Promote);
540
77.2k
      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
541
77.2k
      setOperationAction(ISD::SELECT_CC, VT, Promote);
542
77.2k
      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
543
77.2k
      setOperationAction(ISD::STORE, VT, Promote);
544
77.2k
      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
545
77.2k
546
77.2k
      // No other operations are legal.
547
77.2k
      setOperationAction(ISD::MUL , VT, Expand);
548
77.2k
      setOperationAction(ISD::SDIV, VT, Expand);
549
77.2k
      setOperationAction(ISD::SREM, VT, Expand);
550
77.2k
      setOperationAction(ISD::UDIV, VT, Expand);
551
77.2k
      setOperationAction(ISD::UREM, VT, Expand);
552
77.2k
      setOperationAction(ISD::FDIV, VT, Expand);
553
77.2k
      setOperationAction(ISD::FREM, VT, Expand);
554
77.2k
      setOperationAction(ISD::FNEG, VT, Expand);
555
77.2k
      setOperationAction(ISD::FSQRT, VT, Expand);
556
77.2k
      setOperationAction(ISD::FLOG, VT, Expand);
557
77.2k
      setOperationAction(ISD::FLOG10, VT, Expand);
558
77.2k
      setOperationAction(ISD::FLOG2, VT, Expand);
559
77.2k
      setOperationAction(ISD::FEXP, VT, Expand);
560
77.2k
      setOperationAction(ISD::FEXP2, VT, Expand);
561
77.2k
      setOperationAction(ISD::FSIN, VT, Expand);
562
77.2k
      setOperationAction(ISD::FCOS, VT, Expand);
563
77.2k
      setOperationAction(ISD::FABS, VT, Expand);
564
77.2k
      setOperationAction(ISD::FFLOOR, VT, Expand);
565
77.2k
      setOperationAction(ISD::FCEIL,  VT, Expand);
566
77.2k
      setOperationAction(ISD::FTRUNC, VT, Expand);
567
77.2k
      setOperationAction(ISD::FRINT,  VT, Expand);
568
77.2k
      setOperationAction(ISD::FNEARBYINT, VT, Expand);
569
77.2k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
570
77.2k
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
571
77.2k
      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
572
77.2k
      setOperationAction(ISD::MULHU, VT, Expand);
573
77.2k
      setOperationAction(ISD::MULHS, VT, Expand);
574
77.2k
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
575
77.2k
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
576
77.2k
      setOperationAction(ISD::UDIVREM, VT, Expand);
577
77.2k
      setOperationAction(ISD::SDIVREM, VT, Expand);
578
77.2k
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
579
77.2k
      setOperationAction(ISD::FPOW, VT, Expand);
580
77.2k
      setOperationAction(ISD::BSWAP, VT, Expand);
581
77.2k
      setOperationAction(ISD::VSELECT, VT, Expand);
582
77.2k
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
583
77.2k
      setOperationAction(ISD::ROTL, VT, Expand);
584
77.2k
      setOperationAction(ISD::ROTR, VT, Expand);
585
77.2k
586
7.26M
      for (MVT InnerVT : MVT::vector_valuetypes()) {
587
7.26M
        setTruncStoreAction(VT, InnerVT, Expand);
588
7.26M
        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
589
7.26M
        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
590
7.26M
        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
591
7.26M
      }
592
77.2k
    }
593
822
594
822
    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
595
822
    // with merges, splats, etc.
596
822
    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
597
822
598
822
    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
599
822
    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
600
822
    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
601
822
    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
602
822
    setOperationAction(ISD::SELECT, MVT::v4i32,
603
822
                       Subtarget.useCRBits() ? 
Legal676
:
Expand146
);
604
822
    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
605
822
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
606
822
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
607
822
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
608
822
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
609
822
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
610
822
    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
611
822
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
612
822
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
613
822
614
822
    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
615
822
    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
616
822
    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
617
822
    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
618
822
619
822
    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
620
822
    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
621
822
622
822
    if (
TM.Options.UnsafeFPMath || 822
Subtarget.hasVSX()810
) {
623
636
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
624
636
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
625
636
    }
626
822
627
822
    if (Subtarget.hasP8Altivec())
628
432
      setOperationAction(ISD::MUL, MVT::v4i32, Legal);
629
822
    else
630
390
      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
631
822
632
822
    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
633
822
    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
634
822
635
822
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
636
822
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
637
822
638
822
    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
639
822
    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
640
822
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
641
822
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
642
822
643
822
    // Altivec does not contain unordered floating-point compare instructions
644
822
    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
645
822
    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
646
822
    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
647
822
    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
648
822
649
822
    if (
Subtarget.hasVSX()822
) {
650
632
      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
651
632
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
652
632
      if (
Subtarget.hasP8Vector()632
) {
653
407
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
654
407
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
655
407
      }
656
632
      if (
Subtarget.hasDirectMove() && 632
isPPC64401
) {
657
399
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
658
399
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
659
399
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
660
399
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
661
399
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
662
399
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
663
399
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
664
399
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
665
399
      }
666
632
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
667
632
668
632
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
669
632
      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
670
632
      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
671
632
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
672
632
      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
673
632
674
632
      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
675
632
676
632
      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
677
632
      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
678
632
679
632
      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
680
632
      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
681
632
682
632
      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
683
632
      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
684
632
      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
685
632
      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
686
632
      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
687
632
688
632
      // Share the Altivec comparison restrictions.
689
632
      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
690
632
      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
691
632
      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
692
632
      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
693
632
694
632
      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
695
632
      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
696
632
697
632
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
698
632
699
632
      if (Subtarget.hasP8Vector())
700
407
        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
701
632
702
632
      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
703
632
704
632
      addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
705
632
      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
706
632
      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
707
632
708
632
      if (
Subtarget.hasP8Altivec()632
) {
709
409
        setOperationAction(ISD::SHL, MVT::v2i64, Legal);
710
409
        setOperationAction(ISD::SRA, MVT::v2i64, Legal);
711
409
        setOperationAction(ISD::SRL, MVT::v2i64, Legal);
712
409
713
409
        // 128 bit shifts can be accomplished via 3 instructions for SHL and
714
409
        // SRL, but not for SRA because of the instructions available:
715
409
        // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
716
409
        // doing
717
409
        setOperationAction(ISD::SHL, MVT::v1i128, Expand);
718
409
        setOperationAction(ISD::SRL, MVT::v1i128, Expand);
719
409
        setOperationAction(ISD::SRA, MVT::v1i128, Expand);
720
409
721
409
        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
722
409
      }
723
223
      else {
724
223
        setOperationAction(ISD::SHL, MVT::v2i64, Expand);
725
223
        setOperationAction(ISD::SRA, MVT::v2i64, Expand);
726
223
        setOperationAction(ISD::SRL, MVT::v2i64, Expand);
727
223
728
223
        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
729
223
730
223
        // VSX v2i64 only supports non-arithmetic operations.
731
223
        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
732
223
        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
733
223
      }
734
632
735
632
      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
736
632
      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
737
632
      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
738
632
      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
739
632
740
632
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
741
632
742
632
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
743
632
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
744
632
      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
745
632
      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
746
632
747
632
      // Vector operation legalization checks the result type of
748
632
      // SIGN_EXTEND_INREG, overall legalization checks the inner type.
749
632
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
750
632
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
751
632
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
752
632
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
753
632
754
632
      setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
755
632
      setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
756
632
      setOperationAction(ISD::FABS, MVT::v4f32, Legal);
757
632
      setOperationAction(ISD::FABS, MVT::v2f64, Legal);
758
632
759
632
      if (Subtarget.hasDirectMove())
760
401
        setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
761
632
      setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
762
632
763
632
      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
764
632
    }
765
822
766
822
    if (
Subtarget.hasP8Altivec()822
) {
767
432
      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
768
432
      addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
769
432
    }
770
822
771
822
    if (
Subtarget.hasP9Vector()822
) {
772
60
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
773
60
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
774
60
775
60
      // 128 bit shifts can be accomplished via 3 instructions for SHL and
776
60
      // SRL, but not for SRA because of the instructions available:
777
60
      // VS{RL} and VS{RL}O.
778
60
      setOperationAction(ISD::SHL, MVT::v1i128, Legal);
779
60
      setOperationAction(ISD::SRL, MVT::v1i128, Legal);
780
60
      setOperationAction(ISD::SRA, MVT::v1i128, Expand);
781
60
    }
782
822
  }
783
1.40k
784
1.40k
  if (
Subtarget.hasQPX()1.40k
) {
785
40
    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
786
40
    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
787
40
    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
788
40
    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
789
40
790
40
    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
791
40
    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
792
40
793
40
    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
794
40
    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
795
40
796
40
    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
797
40
    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
798
40
799
40
    if (!Subtarget.useCRBits())
800
5
      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
801
40
    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
802
40
803
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
804
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
805
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
806
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
807
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
808
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
809
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
810
40
811
40
    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
812
40
    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
813
40
814
40
    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
815
40
    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
816
40
    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
817
40
818
40
    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
819
40
    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
820
40
    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
821
40
    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
822
40
    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
823
40
    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
824
40
    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
825
40
    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
826
40
    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
827
40
    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
828
40
829
40
    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
830
40
    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
831
40
832
40
    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
833
40
    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
834
40
835
40
    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
836
40
837
40
    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
838
40
    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
839
40
    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
840
40
    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
841
40
842
40
    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
843
40
    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
844
40
845
40
    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
846
40
    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
847
40
848
40
    if (!Subtarget.useCRBits())
849
5
      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
850
40
    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
851
40
852
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
853
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
854
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
855
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
856
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
857
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
858
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
859
40
860
40
    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
861
40
    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
862
40
863
40
    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
864
40
    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
865
40
    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
866
40
    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
867
40
    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
868
40
    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
869
40
    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
870
40
    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
871
40
    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
872
40
    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
873
40
874
40
    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
875
40
    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
876
40
877
40
    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
878
40
    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
879
40
880
40
    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
881
40
882
40
    setOperationAction(ISD::AND , MVT::v4i1, Legal);
883
40
    setOperationAction(ISD::OR , MVT::v4i1, Legal);
884
40
    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
885
40
886
40
    if (!Subtarget.useCRBits())
887
5
      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
888
40
    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
889
40
890
40
    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
891
40
    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
892
40
893
40
    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
894
40
    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
895
40
    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
896
40
    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
897
40
    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
898
40
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
899
40
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
900
40
901
40
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
902
40
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
903
40
904
40
    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
905
40
906
40
    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
907
40
    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
908
40
    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
909
40
    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
910
40
911
40
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
912
40
    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
913
40
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
914
40
    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
915
40
916
40
    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
917
40
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
918
40
919
40
    // These need to set FE_INEXACT, and so cannot be vectorized here.
920
40
    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
921
40
    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
922
40
923
40
    if (
TM.Options.UnsafeFPMath40
) {
924
3
      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
925
3
      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
926
3
927
3
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
928
3
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
929
40
    } else {
930
37
      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
931
37
      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
932
37
933
37
      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
934
37
      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
935
37
    }
936
40
  }
937
1.40k
938
1.40k
  if (Subtarget.has64BitSupport())
939
1.08k
    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
940
1.40k
941
1.40k
  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? 
Legal1.02k
:
Custom379
);
942
1.40k
943
1.40k
  if (
!isPPC641.40k
) {
944
379
    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
945
379
    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
946
379
  }
947
1.40k
948
1.40k
  setBooleanContents(ZeroOrOneBooleanContent);
949
1.40k
950
1.40k
  if (
Subtarget.hasAltivec()1.40k
) {
951
822
    // Altivec instructions set fields to all zeros or all ones.
952
822
    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
953
822
  }
954
1.40k
955
1.40k
  if (
!isPPC641.40k
) {
956
379
    // These libcalls are not available in 32-bit.
957
379
    setLibcallName(RTLIB::SHL_I128, nullptr);
958
379
    setLibcallName(RTLIB::SRL_I128, nullptr);
959
379
    setLibcallName(RTLIB::SRA_I128, nullptr);
960
379
  }
961
1.40k
962
1.40k
  setStackPointerRegisterToSaveRestore(isPPC64 ? 
PPC::X11.02k
:
PPC::R1379
);
963
1.40k
964
1.40k
  // We have target-specific dag combine patterns for the following nodes:
965
1.40k
  setTargetDAGCombine(ISD::SHL);
966
1.40k
  setTargetDAGCombine(ISD::SRA);
967
1.40k
  setTargetDAGCombine(ISD::SRL);
968
1.40k
  setTargetDAGCombine(ISD::SINT_TO_FP);
969
1.40k
  setTargetDAGCombine(ISD::BUILD_VECTOR);
970
1.40k
  if (Subtarget.hasFPCVT())
971
764
    setTargetDAGCombine(ISD::UINT_TO_FP);
972
1.40k
  setTargetDAGCombine(ISD::LOAD);
973
1.40k
  setTargetDAGCombine(ISD::STORE);
974
1.40k
  setTargetDAGCombine(ISD::BR_CC);
975
1.40k
  if (Subtarget.useCRBits())
976
1.20k
    setTargetDAGCombine(ISD::BRCOND);
977
1.40k
  setTargetDAGCombine(ISD::BSWAP);
978
1.40k
  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
979
1.40k
  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
980
1.40k
  setTargetDAGCombine(ISD::INTRINSIC_VOID);
981
1.40k
982
1.40k
  setTargetDAGCombine(ISD::SIGN_EXTEND);
983
1.40k
  setTargetDAGCombine(ISD::ZERO_EXTEND);
984
1.40k
  setTargetDAGCombine(ISD::ANY_EXTEND);
985
1.40k
986
1.40k
  if (
Subtarget.useCRBits()1.40k
) {
987
1.20k
    setTargetDAGCombine(ISD::TRUNCATE);
988
1.20k
    setTargetDAGCombine(ISD::SETCC);
989
1.20k
    setTargetDAGCombine(ISD::SELECT_CC);
990
1.20k
  }
991
1.40k
992
1.40k
  // Use reciprocal estimates.
993
1.40k
  if (
TM.Options.UnsafeFPMath1.40k
) {
994
17
    setTargetDAGCombine(ISD::FDIV);
995
17
    setTargetDAGCombine(ISD::FSQRT);
996
17
  }
997
1.40k
998
1.40k
  // Darwin long double math library functions have $LDBL128 appended.
999
1.40k
  if (
Subtarget.isDarwin()1.40k
) {
1000
157
    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1001
157
    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1002
157
    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1003
157
    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1004
157
    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1005
157
    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1006
157
    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1007
157
    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1008
157
    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1009
157
    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1010
157
  }
1011
1.40k
1012
1.40k
  // With 32 condition bits, we don't need to sink (and duplicate) compares
1013
1.40k
  // aggressively in CodeGenPrep.
1014
1.40k
  if (
Subtarget.useCRBits()1.40k
) {
1015
1.20k
    setHasMultipleConditionRegisters();
1016
1.20k
    setJumpIsExpensive();
1017
1.20k
  }
1018
1.40k
1019
1.40k
  setMinFunctionAlignment(2);
1020
1.40k
  if (Subtarget.isDarwin())
1021
157
    setPrefFunctionAlignment(4);
1022
1.40k
1023
1.40k
  switch (Subtarget.getDarwinDirective()) {
1024
558
  default: break;
1025
849
  case PPC::DIR_970:
1026
849
  case PPC::DIR_A2:
1027
849
  case PPC::DIR_E500mc:
1028
849
  case PPC::DIR_E5500:
1029
849
  case PPC::DIR_PWR4:
1030
849
  case PPC::DIR_PWR5:
1031
849
  case PPC::DIR_PWR5X:
1032
849
  case PPC::DIR_PWR6:
1033
849
  case PPC::DIR_PWR6X:
1034
849
  case PPC::DIR_PWR7:
1035
849
  case PPC::DIR_PWR8:
1036
849
  case PPC::DIR_PWR9:
1037
849
    setPrefFunctionAlignment(4);
1038
849
    setPrefLoopAlignment(4);
1039
849
    break;
1040
1.40k
  }
1041
1.40k
1042
1.40k
  
if (1.40k
Subtarget.enableMachineScheduler()1.40k
)
1043
776
    setSchedulingPreference(Sched::Source);
1044
1.40k
  else
1045
631
    setSchedulingPreference(Sched::Hybrid);
1046
1.40k
1047
1.40k
  computeRegisterProperties(STI.getRegisterInfo());
1048
1.40k
1049
1.40k
  // The Freescale cores do better with aggressive inlining of memcpy and
1050
1.40k
  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1051
1.40k
  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1052
1.40k
      
Subtarget.getDarwinDirective() == PPC::DIR_E55001.40k
) {
1053
5
    MaxStoresPerMemset = 32;
1054
5
    MaxStoresPerMemsetOptSize = 16;
1055
5
    MaxStoresPerMemcpy = 32;
1056
5
    MaxStoresPerMemcpyOptSize = 8;
1057
5
    MaxStoresPerMemmove = 32;
1058
5
    MaxStoresPerMemmoveOptSize = 8;
1059
1.40k
  } else 
if (1.40k
Subtarget.getDarwinDirective() == PPC::DIR_A21.40k
) {
1060
80
    // The A2 also benefits from (very) aggressive inlining of memcpy and
1061
80
    // friends. The overhead of a the function call, even when warm, can be
1062
80
    // over one hundred cycles.
1063
80
    MaxStoresPerMemset = 128;
1064
80
    MaxStoresPerMemcpy = 128;
1065
80
    MaxStoresPerMemmove = 128;
1066
80
    MaxLoadsPerMemcmp = 128;
1067
1.40k
  } else {
1068
1.32k
    MaxLoadsPerMemcmp = 8;
1069
1.32k
    MaxLoadsPerMemcmpOptSize = 4;
1070
1.32k
  }
1071
1.40k
}
1072
1073
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1074
/// the desired ByVal argument alignment.
1075
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1076
276
                             unsigned MaxMaxAlign) {
1077
276
  if (MaxAlign == MaxMaxAlign)
1078
0
    return;
1079
276
  
if (VectorType *276
VTy276
= dyn_cast<VectorType>(Ty)) {
1080
4
    if (
MaxMaxAlign >= 32 && 4
VTy->getBitWidth() >= 2560
)
1081
0
      MaxAlign = 32;
1082
4
    else 
if (4
VTy->getBitWidth() >= 128 && 4
MaxAlign < 164
)
1083
4
      MaxAlign = 16;
1084
276
  } else 
if (ArrayType *272
ATy272
= dyn_cast<ArrayType>(Ty)) {
1085
20
    unsigned EltAlign = 0;
1086
20
    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1087
20
    if (EltAlign > MaxAlign)
1088
0
      MaxAlign = EltAlign;
1089
272
  } else 
if (StructType *252
STy252
= dyn_cast<StructType>(Ty)) {
1090
156
    for (auto *EltTy : STy->elements()) {
1091
156
      unsigned EltAlign = 0;
1092
156
      getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1093
156
      if (EltAlign > MaxAlign)
1094
4
        MaxAlign = EltAlign;
1095
156
      if (MaxAlign == MaxMaxAlign)
1096
4
        break;
1097
276
    }
1098
272
  }
1099
276
}
1100
1101
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1102
/// function arguments in the caller parameter area.
1103
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1104
133
                                                  const DataLayout &DL) const {
1105
133
  // Darwin passes everything on 4 byte boundary.
1106
133
  if (Subtarget.isDarwin())
1107
11
    return 4;
1108
122
1109
122
  // 16byte and wider vectors are passed on 16byte boundary.
1110
122
  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1111
122
  
unsigned Align = Subtarget.isPPC64() ? 122
8113
:
49
;
1112
122
  if (
Subtarget.hasAltivec() || 122
Subtarget.hasQPX()22
)
1113
100
    
getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 100
320
:
16100
);
1114
133
  return Align;
1115
133
}
1116
1117
10.7k
bool PPCTargetLowering::useSoftFloat() const {
1118
10.7k
  return Subtarget.useSoftFloat();
1119
10.7k
}
1120
1121
0
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1122
0
  switch ((PPCISD::NodeType)Opcode) {
1123
0
  case PPCISD::FIRST_NUMBER:    break;
1124
0
  case PPCISD::FSEL:            return "PPCISD::FSEL";
1125
0
  case PPCISD::FCFID:           return "PPCISD::FCFID";
1126
0
  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1127
0
  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1128
0
  case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1129
0
  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1130
0
  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1131
0
  case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1132
0
  case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1133
0
  case PPCISD::FRE:             return "PPCISD::FRE";
1134
0
  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1135
0
  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1136
0
  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1137
0
  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1138
0
  case PPCISD::VPERM:           return "PPCISD::VPERM";
1139
0
  case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1140
0
  case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1141
0
  case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1142
0
  case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1143
0
  case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1144
0
  case PPCISD::CMPB:            return "PPCISD::CMPB";
1145
0
  case PPCISD::Hi:              return "PPCISD::Hi";
1146
0
  case PPCISD::Lo:              return "PPCISD::Lo";
1147
0
  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1148
0
  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1149
0
  case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1150
0
  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1151
0
  case PPCISD::SRL:             return "PPCISD::SRL";
1152
0
  case PPCISD::SRA:             return "PPCISD::SRA";
1153
0
  case PPCISD::SHL:             return "PPCISD::SHL";
1154
0
  case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1155
0
  case PPCISD::CALL:            return "PPCISD::CALL";
1156
0
  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1157
0
  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1158
0
  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1159
0
  case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1160
0
  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1161
0
  case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1162
0
  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1163
0
  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1164
0
  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1165
0
  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1166
0
  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1167
0
  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1168
0
  case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1169
0
  case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1170
0
  case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1171
0
  case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1172
0
  case PPCISD::VCMP:            return "PPCISD::VCMP";
1173
0
  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1174
0
  case PPCISD::LBRX:            return "PPCISD::LBRX";
1175
0
  case PPCISD::STBRX:           return "PPCISD::STBRX";
1176
0
  case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1177
0
  case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1178
0
  case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1179
0
  case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1180
0
  case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1181
0
  case PPCISD::SExtVElems:      return "PPCISD::SExtVElems";
1182
0
  case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1183
0
  case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1184
0
  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1185
0
  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1186
0
  case PPCISD::BDZ:             return "PPCISD::BDZ";
1187
0
  case PPCISD::MFFS:            return "PPCISD::MFFS";
1188
0
  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1189
0
  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1190
0
  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1191
0
  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1192
0
  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1193
0
  case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1194
0
  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1195
0
  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1196
0
  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1197
0
  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1198
0
  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1199
0
  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1200
0
  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1201
0
  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1202
0
  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1203
0
  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1204
0
  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1205
0
  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1206
0
  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1207
0
  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1208
0
  case PPCISD::SC:              return "PPCISD::SC";
1209
0
  case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1210
0
  case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1211
0
  case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1212
0
  case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1213
0
  case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1214
0
  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1215
0
  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1216
0
  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1217
0
  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1218
0
  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1219
0
  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1220
0
  }
1221
0
  return nullptr;
1222
0
}
1223
1224
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1225
6.13k
                                          EVT VT) const {
1226
6.13k
  if (!VT.isVector())
1227
5.89k
    
return Subtarget.useCRBits() ? 5.89k
MVT::i15.45k
:
MVT::i32438
;
1228
243
1229
243
  
if (243
Subtarget.hasQPX()243
)
1230
5
    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1231
238
1232
238
  return VT.changeVectorElementTypeToInteger();
1233
238
}
1234
1235
557
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1236
557
  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1237
557
  return true;
1238
557
}
1239
1240
//===----------------------------------------------------------------------===//
1241
// Node matching predicates, for use by the tblgen matching code.
1242
//===----------------------------------------------------------------------===//
1243
1244
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1245
19
static bool isFloatingPointZero(SDValue Op) {
1246
19
  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1247
9
    return CFP->getValueAPF().isZero();
1248
10
  else 
if (10
ISD::isEXTLoad(Op.getNode()) || 10
ISD::isNON_EXTLoad(Op.getNode())10
) {
1249
0
    // Maybe this has already been legalized into the constant pool?
1250
0
    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1251
0
      
if (const ConstantFP *0
CFP0
= dyn_cast<ConstantFP>(CP->getConstVal()))
1252
0
        return CFP->getValueAPF().isZero();
1253
10
  }
1254
10
  return false;
1255
10
}
1256
1257
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1258
/// true if Op is undef or if it matches the specified value.
1259
22.9k
static bool isConstantOrUndef(int Op, int Val) {
1260
17.0k
  return Op < 0 || Op == Val;
1261
22.9k
}
1262
1263
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1264
/// VPKUHUM instruction.
1265
/// The ShuffleKind distinguishes between big-endian operations with
1266
/// two different inputs (0), either-endian operations with two identical
1267
/// inputs (1), and little-endian operations with two different inputs (2).
1268
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1269
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1270
821
                               SelectionDAG &DAG) {
1271
821
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1272
821
  if (
ShuffleKind == 0821
) {
1273
366
    if (IsLE)
1274
60
      return false;
1275
339
    
for (unsigned i = 0; 306
i != 16339
;
++i33
)
1276
339
      
if (339
!isConstantOrUndef(N->getMaskElt(i), i*2+1)339
)
1277
306
        return false;
1278
821
  } else 
if (455
ShuffleKind == 2455
) {
1279
222
    if (!IsLE)
1280
3
      return false;
1281
455
    
for (unsigned i = 0; 219
i != 16455
;
++i236
)
1282
452
      
if (452
!isConstantOrUndef(N->getMaskElt(i), i*2)452
)
1283
216
        return false;
1284
455
  } else 
if (233
ShuffleKind == 1233
) {
1285
233
    unsigned j = IsLE ? 
094
:
1139
;
1286
386
    for (unsigned i = 0; 
i != 8386
;
++i153
)
1287
377
      
if (377
!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1288
190
          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1289
224
        return false;
1290
455
  }
1291
12
  return true;
1292
821
}
1293
1294
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1295
/// VPKUWUM instruction.
1296
/// The ShuffleKind distinguishes between big-endian operations with
1297
/// two different inputs (0), either-endian operations with two identical
1298
/// inputs (1), and little-endian operations with two different inputs (2).
1299
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1300
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1301
837
                               SelectionDAG &DAG) {
1302
837
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1303
837
  if (
ShuffleKind == 0837
) {
1304
366
    if (IsLE)
1305
60
      return false;
1306
320
    
for (unsigned i = 0; 306
i != 16320
;
i += 214
)
1307
320
      
if (320
!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1308
16
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1309
306
        return false;
1310
837
  } else 
if (471
ShuffleKind == 2471
) {
1311
225
    if (!IsLE)
1312
3
      return false;
1313
381
    
for (unsigned i = 0; 222
i != 16381
;
i += 2159
)
1314
378
      
if (378
!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1315
215
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1316
219
        return false;
1317
471
  } else 
if (246
ShuffleKind == 1246
) {
1318
246
    unsigned j = IsLE ? 
097
:
2149
;
1319
339
    for (unsigned i = 0; 
i != 8339
;
i += 293
)
1320
326
      
if (326
!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1321
131
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1322
123
          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1323
93
          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1324
233
        return false;
1325
471
  }
1326
16
  return true;
1327
837
}
1328
1329
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1330
/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1331
/// current subtarget.
1332
///
1333
/// The ShuffleKind distinguishes between big-endian operations with
1334
/// two different inputs (0), either-endian operations with two identical
1335
/// inputs (1), and little-endian operations with two different inputs (2).
1336
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1337
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1338
105
                               SelectionDAG &DAG) {
1339
105
  const PPCSubtarget& Subtarget =
1340
105
    static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1341
105
  if (!Subtarget.hasP8Vector())
1342
0
    return false;
1343
105
1344
105
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1345
105
  if (
ShuffleKind == 0105
) {
1346
30
    if (IsLE)
1347
1
      return false;
1348
45
    
for (unsigned i = 0; 29
i != 1645
;
i += 416
)
1349
42
      
if (42
!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1350
20
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1351
20
          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1352
18
          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1353
26
        return false;
1354
105
  } else 
if (75
ShuffleKind == 275
) {
1355
24
    if (!IsLE)
1356
0
      return false;
1357
41
    
for (unsigned i = 0; 24
i != 1641
;
i += 417
)
1358
38
      
if (38
!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1359
17
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1360
17
          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1361
17
          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1362
21
        return false;
1363
75
  } else 
if (51
ShuffleKind == 151
) {
1364
51
    unsigned j = IsLE ? 
023
:
428
;
1365
67
    for (unsigned i = 0; 
i != 867
;
i += 416
)
1366
61
      
if (61
!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1367
26
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1368
26
          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1369
24
          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1370
22
          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1371
16
          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1372
16
          !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1373
16
          !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1374
45
        return false;
1375
75
  }
1376
12
  return true;
1377
105
}
1378
1379
/// isVMerge - Common function, used to match vmrg* shuffles.
1380
///
1381
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1382
2.97k
                     unsigned LHSStart, unsigned RHSStart) {
1383
2.97k
  if (N->getValueType(0) != MVT::v16i8)
1384
0
    return false;
1385
2.97k
  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1386
2.97k
         "Unsupported merge size!");
1387
2.97k
1388
5.50k
  for (unsigned i = 0; 
i != 8/UnitSize5.50k
;
++i2.53k
) // Step over units
1389
10.0k
    
for (unsigned j = 0; 4.90k
j != UnitSize10.0k
;
++j5.19k
) { // Step over bytes within unit
1390
7.56k
      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1391
7.56k
                             LHSStart+j+i*UnitSize) ||
1392
6.08k
          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1393
6.08k
                             RHSStart+j+i*UnitSize))
1394
2.37k
        return false;
1395
4.90k
    }
1396
603
  return true;
1397
2.97k
}
1398
1399
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1400
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1401
/// The ShuffleKind distinguishes between big-endian merges with two
1402
/// different inputs (0), either-endian merges with two identical inputs (1),
1403
/// and little-endian merges with two different inputs (2).  For the latter,
1404
/// the input operands are swapped (see PPCInstrAltivec.td).
1405
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1406
1.98k
                             unsigned ShuffleKind, SelectionDAG &DAG) {
1407
1.98k
  if (
DAG.getDataLayout().isLittleEndian()1.98k
) {
1408
845
    if (ShuffleKind == 1) // unary
1409
200
      return isVMerge(N, UnitSize, 0, 0);
1410
645
    else 
if (645
ShuffleKind == 2645
) // swapped
1411
465
      return isVMerge(N, UnitSize, 0, 16);
1412
645
    else
1413
180
      return false;
1414
1.13k
  } else {
1415
1.13k
    if (ShuffleKind == 1) // unary
1416
288
      return isVMerge(N, UnitSize, 8, 8);
1417
849
    else 
if (849
ShuffleKind == 0849
) // normal
1418
840
      return isVMerge(N, UnitSize, 8, 24);
1419
849
    else
1420
9
      return false;
1421
0
  }
1422
1.98k
}
1423
1424
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1425
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1426
/// The ShuffleKind distinguishes between big-endian merges with two
1427
/// different inputs (0), either-endian merges with two identical inputs (1),
1428
/// and little-endian merges with two different inputs (2).  For the latter,
1429
/// the input operands are swapped (see PPCInstrAltivec.td).
1430
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1431
1.37k
                             unsigned ShuffleKind, SelectionDAG &DAG) {
1432
1.37k
  if (
DAG.getDataLayout().isLittleEndian()1.37k
) {
1433
407
    if (ShuffleKind == 1) // unary
1434
125
      return isVMerge(N, UnitSize, 8, 8);
1435
282
    else 
if (282
ShuffleKind == 2282
) // swapped
1436
102
      return isVMerge(N, UnitSize, 8, 24);
1437
282
    else
1438
180
      return false;
1439
967
  } else {
1440
967
    if (ShuffleKind == 1) // unary
1441
180
      return isVMerge(N, UnitSize, 0, 0);
1442
787
    else 
if (787
ShuffleKind == 0787
) // normal
1443
778
      return isVMerge(N, UnitSize, 0, 16);
1444
787
    else
1445
9
      return false;
1446
0
  }
1447
1.37k
}
1448
1449
/**
1450
 * \brief Common function used to match vmrgew and vmrgow shuffles
1451
 *
1452
 * The indexOffset determines whether to look for even or odd words in
1453
 * the shuffle mask. This is based on the of the endianness of the target
1454
 * machine.
1455
 *   - Little Endian:
1456
 *     - Use offset of 0 to check for odd elements
1457
 *     - Use offset of 4 to check for even elements
1458
 *   - Big Endian:
1459
 *     - Use offset of 0 to check for even elements
1460
 *     - Use offset of 4 to check for odd elements
1461
 * A detailed description of the vector element ordering for little endian and
1462
 * big endian can be found at
1463
 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1464
 * Targeting your applications - what little endian and big endian IBM XL C/C++
1465
 * compiler differences mean to you
1466
 *
1467
 * The mask to the shuffle vector instruction specifies the indices of the
1468
 * elements from the two input vectors to place in the result. The elements are
1469
 * numbered in array-access order, starting with the first vector. These vectors
1470
 * are always of type v16i8, thus each vector will contain 16 elements of size
1471
 * 8. More info on the shuffle vector can be found in the
1472
 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1473
 * Language Reference.
1474
 *
1475
 * The RHSStartValue indicates whether the same input vectors are used (unary)
1476
 * or two different input vectors are used, based on the following:
1477
 *   - If the instruction uses the same vector for both inputs, the range of the
1478
 *     indices will be 0 to 15. In this case, the RHSStart value passed should
1479
 *     be 0.
1480
 *   - If the instruction has two different vectors then the range of the
1481
 *     indices will be 0 to 31. In this case, the RHSStart value passed should
1482
 *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1483
 *     to 31 specify elements in the second vector).
1484
 *
1485
 * \param[in] N The shuffle vector SD Node to analyze
1486
 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1487
 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1488
 * vector to the shuffle_vector instruction
1489
 * \return true iff this shuffle vector represents an even or odd word merge
1490
 */
1491
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1492
195
                     unsigned RHSStartValue) {
1493
195
  if (N->getValueType(0) != MVT::v16i8)
1494
0
    return false;
1495
195
1496
264
  
for (unsigned i = 0; 195
i < 2264
;
++i69
)
1497
553
    
for (unsigned j = 0; 237
j < 4553
;
++j316
)
1498
484
      
if (484
!isConstantOrUndef(N->getMaskElt(i*4+j),
1499
484
                             i*RHSStartValue+j+IndexOffset) ||
1500
327
          !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1501
327
                             i*RHSStartValue+j+IndexOffset+8))
1502
168
        return false;
1503
27
  return true;
1504
195
}
1505
1506
/**
1507
 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1508
 * vmrgow instructions.
1509
 *
1510
 * \param[in] N The shuffle vector SD Node to analyze
1511
 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1512
 * \param[in] ShuffleKind Identify the type of merge:
1513
 *   - 0 = big-endian merge with two different inputs;
1514
 *   - 1 = either-endian merge with two identical inputs;
1515
 *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1516
 *     little-endian merges).
1517
 * \param[in] DAG The current SelectionDAG
1518
 * \return true iff this shuffle mask
1519
 */
1520
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1521
203
                              unsigned ShuffleKind, SelectionDAG &DAG) {
1522
203
  if (
DAG.getDataLayout().isLittleEndian()203
) {
1523
94
    unsigned indexOffset = CheckEven ? 
450
:
044
;
1524
94
    if (ShuffleKind == 1) // Unary
1525
43
      return isVMerge(N, indexOffset, 0);
1526
51
    else 
if (51
ShuffleKind == 251
) // swapped
1527
45
      return isVMerge(N, indexOffset, 16);
1528
51
    else
1529
6
      return false;
1530
203
  }
1531
109
  else {
1532
109
    unsigned indexOffset = CheckEven ? 
059
:
450
;
1533
109
    if (ShuffleKind == 1) // Unary
1534
52
      return isVMerge(N, indexOffset, 0);
1535
57
    else 
if (57
ShuffleKind == 057
) // Normal
1536
55
      return isVMerge(N, indexOffset, 16);
1537
57
    else
1538
2
      return false;
1539
0
  }
1540
0
  return false;
1541
0
}
1542
1543
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1544
/// amount, otherwise return -1.
1545
/// The ShuffleKind distinguishes between big-endian operations with two
1546
/// different inputs (0), either-endian operations with two identical inputs
1547
/// (1), and little-endian operations with two different inputs (2).  For the
1548
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1549
int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1550
1.13k
                             SelectionDAG &DAG) {
1551
1.13k
  if (N->getValueType(0) != MVT::v16i8)
1552
0
    return -1;
1553
1.13k
1554
1.13k
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1555
1.13k
1556
1.13k
  // Find the first non-undef value in the shuffle mask.
1557
1.13k
  unsigned i;
1558
1.34k
  for (i = 0; 
i != 16 && 1.34k
SVOp->getMaskElt(i) < 01.34k
;
++i210
)
1559
210
    /*search*/;
1560
1.13k
1561
1.13k
  if (
i == 161.13k
)
return -10
; // all undef.
1562
1.13k
1563
1.13k
  // Otherwise, check to see if the rest of the elements are consecutively
1564
1.13k
  // numbered from this value.
1565
1.13k
  unsigned ShiftAmt = SVOp->getMaskElt(i);
1566
1.13k
  if (
ShiftAmt < i1.13k
)
return -139
;
1567
1.09k
1568
1.09k
  ShiftAmt -= i;
1569
1.09k
  bool isLE = DAG.getDataLayout().isLittleEndian();
1570
1.09k
1571
1.09k
  if (
(ShuffleKind == 0 && 1.09k
!isLE458
) ||
(ShuffleKind == 2 && 693
isLE223
)) {
1572
618
    // Check the rest of the elements to see if they are consecutive.
1573
2.63k
    for (++i; 
i != 162.63k
;
++i2.01k
)
1574
2.57k
      
if (2.57k
!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)2.57k
)
1575
558
        return -1;
1576
1.09k
  } else 
if (473
ShuffleKind == 1473
) {
1577
410
    // Check the rest of the elements to see if they are consecutive.
1578
2.60k
    for (++i; 
i != 162.60k
;
++i2.19k
)
1579
2.50k
      
if (2.50k
!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)2.50k
)
1580
309
        return -1;
1581
410
  } else
1582
63
    return -1;
1583
161
1584
161
  
if (161
isLE161
)
1585
50
    ShiftAmt = 16 - ShiftAmt;
1586
1.13k
1587
1.13k
  return ShiftAmt;
1588
1.13k
}
1589
1590
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1591
/// specifies a splat of a single element that is suitable for input to
1592
/// VSPLTB/VSPLTH/VSPLTW.
1593
1.27k
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1594
1.27k
  assert(N->getValueType(0) == MVT::v16i8 &&
1595
1.27k
         (EltSize == 1 || EltSize == 2 || EltSize == 4));
1596
1.27k
1597
1.27k
  // The consecutive indices need to specify an element, not part of two
1598
1.27k
  // different elements.  So abandon ship early if this isn't the case.
1599
1.27k
  if (N->getMaskElt(0) % EltSize != 0)
1600
171
    return false;
1601
1.10k
1602
1.10k
  // This is a splat operation if each element of the permute is the same, and
1603
1.10k
  // if the value doesn't reference the second vector.
1604
1.10k
  unsigned ElementBase = N->getMaskElt(0);
1605
1.10k
1606
1.10k
  // FIXME: Handle UNDEF elements too!
1607
1.10k
  if (ElementBase >= 16)
1608
27
    return false;
1609
1.07k
1610
1.07k
  // Check that the indices are consecutive, in the case of a multi-byte element
1611
1.07k
  // splatted with a v16i8 mask.
1612
2.25k
  
for (unsigned i = 1; 1.07k
i != EltSize2.25k
;
++i1.17k
)
1613
1.36k
    
if (1.36k
N->getMaskElt(i) < 0 || 1.36k
N->getMaskElt(i) != (int)(i+ElementBase)1.30k
)
1614
187
      return false;
1615
1.07k
1616
2.58k
  
for (unsigned i = EltSize, e = 16; 889
i != e2.58k
;
i += EltSize1.69k
) {
1617
2.36k
    if (
N->getMaskElt(i) < 02.36k
)
continue89
;
1618
5.32k
    
for (unsigned j = 0; 2.28k
j != EltSize5.32k
;
++j3.04k
)
1619
3.72k
      
if (3.72k
N->getMaskElt(i+j) != N->getMaskElt(j)3.72k
)
1620
677
        return false;
1621
2.36k
  }
1622
212
  return true;
1623
1.27k
}
1624
1625
/// Check that the mask is shuffling N byte elements. Within each N byte
1626
/// element of the mask, the indices could be either in increasing or
1627
/// decreasing order as long as they are consecutive.
1628
/// \param[in] N the shuffle vector SD Node to analyze
1629
/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1630
/// Word/DoubleWord/QuadWord).
1631
/// \param[in] StepLen the delta indices number among the N byte element, if
1632
/// the mask is in increasing/decreasing order then it is 1/-1.
1633
/// \return true iff the mask is shuffling N byte elements.
1634
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1635
2.80k
                                   int StepLen) {
1636
2.80k
  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1637
2.80k
         "Unexpected element width.");
1638
2.80k
  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1639
2.80k
1640
2.80k
  unsigned NumOfElem = 16 / Width;
1641
2.80k
  unsigned MaskVal[16]; //  Width is never greater than 16
1642
5.01k
  for (unsigned i = 0; 
i < NumOfElem5.01k
;
++i2.20k
) {
1643
4.53k
    MaskVal[0] = N->getMaskElt(i * Width);
1644
4.53k
    if (
(StepLen == 1) && 4.53k
(MaskVal[0] % Width)3.29k
) {
1645
348
      return false;
1646
4.18k
    } else 
if (4.18k
(StepLen == -1) && 4.18k
((MaskVal[0] + 1) % Width)1.24k
) {
1647
1.06k
      return false;
1648
1.06k
    }
1649
3.12k
1650
10.8k
    
for (unsigned int j = 1; 3.12k
j < Width10.8k
;
++j7.70k
) {
1651
8.62k
      MaskVal[j] = N->getMaskElt(i * Width + j);
1652
8.62k
      if (
MaskVal[j] != MaskVal[j-1] + StepLen8.62k
) {
1653
925
        return false;
1654
925
      }
1655
8.62k
    }
1656
4.53k
  }
1657
2.80k
1658
474
  return true;
1659
2.80k
}
1660
1661
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1662
439
                          unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1663
439
  if (!isNByteElemShuffleMask(N, 4, 1))
1664
232
    return false;
1665
207
1666
207
  // Now we look at mask elements 0,4,8,12
1667
207
  unsigned M0 = N->getMaskElt(0) / 4;
1668
207
  unsigned M1 = N->getMaskElt(4) / 4;
1669
207
  unsigned M2 = N->getMaskElt(8) / 4;
1670
207
  unsigned M3 = N->getMaskElt(12) / 4;
1671
207
  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1672
207
  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1673
207
1674
207
  // Below, let H and L be arbitrary elements of the shuffle mask
1675
207
  // where H is in the range [4,7] and L is in the range [0,3].
1676
207
  // H, 1, 2, 3 or L, 5, 6, 7
1677
207
  if (
(M0 > 3 && 207
M1 == 164
&&
M2 == 220
&&
M3 == 316
) ||
1678
207
      
(M0 < 4 && 191
M1 == 5143
&&
M2 == 620
&&
M3 == 716
)) {
1679
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M0 & 0x3]16
:
BigEndianShifts[M0 & 0x3]16
;
1680
32
    InsertAtByte = IsLE ? 
1216
:
016
;
1681
32
    Swap = M0 < 4;
1682
32
    return true;
1683
32
  }
1684
175
  // 0, H, 2, 3 or 4, L, 6, 7
1685
175
  
if (175
(M1 > 3 && 175
M0 == 052
&&
M2 == 220
&&
M3 == 316
) ||
1686
175
      
(M1 < 4 && 159
M0 == 4123
&&
M2 == 616
&&
M3 == 716
)) {
1687
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M1 & 0x3]16
:
BigEndianShifts[M1 & 0x3]16
;
1688
32
    InsertAtByte = IsLE ? 
816
:
416
;
1689
32
    Swap = M1 < 4;
1690
32
    return true;
1691
32
  }
1692
143
  // 0, 1, H, 3 or 4, 5, L, 7
1693
143
  
if (143
(M2 > 3 && 143
M0 == 039
&&
M1 == 122
&&
M3 == 322
) ||
1694
143
      
(M2 < 4 && 127
M0 == 4104
&&
M1 == 516
&&
M3 == 716
)) {
1695
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M2 & 0x3]16
:
BigEndianShifts[M2 & 0x3]16
;
1696
32
    InsertAtByte = IsLE ? 
416
:
816
;
1697
32
    Swap = M2 < 4;
1698
32
    return true;
1699
32
  }
1700
111
  // 0, 1, 2, H or 4, 5, 6, L
1701
111
  
if (111
(M3 > 3 && 111
M0 == 028
&&
M1 == 126
&&
M2 == 222
) ||
1702
111
      
(M3 < 4 && 95
M0 == 483
&&
M1 == 516
&&
M2 == 616
)) {
1703
32
    ShiftElts = IsLE ? 
LittleEndianShifts[M3 & 0x3]16
:
BigEndianShifts[M3 & 0x3]16
;
1704
32
    InsertAtByte = IsLE ? 
016
:
1216
;
1705
32
    Swap = M3 < 4;
1706
32
    return true;
1707
32
  }
1708
79
1709
79
  // If both vector operands for the shuffle are the same vector, the mask will
1710
79
  // contain only elements from the first one and the second one will be undef.
1711
79
  
if (79
N->getOperand(1).isUndef()79
) {
1712
67
    ShiftElts = 0;
1713
67
    Swap = true;
1714
67
    unsigned XXINSERTWSrcElem = IsLE ? 
231
:
136
;
1715
67
    if (
M0 == XXINSERTWSrcElem && 67
M1 == 12
&&
M2 == 22
&&
M3 == 32
) {
1716
2
      InsertAtByte = IsLE ? 
121
:
01
;
1717
2
      return true;
1718
2
    }
1719
65
    
if (65
M0 == 0 && 65
M1 == XXINSERTWSrcElem46
&&
M2 == 24
&&
M3 == 33
) {
1720
1
      InsertAtByte = IsLE ? 
81
:
40
;
1721
1
      return true;
1722
1
    }
1723
64
    
if (64
M0 == 0 && 64
M1 == 145
&&
M2 == XXINSERTWSrcElem6
&&
M3 == 33
) {
1724
1
      InsertAtByte = IsLE ? 
40
:
81
;
1725
1
      return true;
1726
1
    }
1727
63
    
if (63
M0 == 0 && 63
M1 == 144
&&
M2 == 25
&&
M3 == XXINSERTWSrcElem4
) {
1728
2
      InsertAtByte = IsLE ? 
01
:
121
;
1729
2
      return true;
1730
2
    }
1731
73
  }
1732
73
1733
73
  return false;
1734
73
}
1735
1736
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1737
633
                               bool &Swap, bool IsLE) {
1738
633
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1739
633
  // Ensure each byte index of the word is consecutive.
1740
633
  if (!isNByteElemShuffleMask(N, 4, 1))
1741
399
    return false;
1742
234
1743
234
  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1744
234
  unsigned M0 = N->getMaskElt(0) / 4;
1745
234
  unsigned M1 = N->getMaskElt(4) / 4;
1746
234
  unsigned M2 = N->getMaskElt(8) / 4;
1747
234
  unsigned M3 = N->getMaskElt(12) / 4;
1748
234
1749
234
  // If both vector operands for the shuffle are the same vector, the mask will
1750
234
  // contain only elements from the first one and the second one will be undef.
1751
234
  if (
N->getOperand(1).isUndef()234
) {
1752
155
    assert(M0 < 4 && "Indexing into an undef vector?");
1753
155
    if (
M1 != (M0 + 1) % 4 || 155
M2 != (M1 + 1) % 429
||
M3 != (M2 + 1) % 428
)
1754
129
      return false;
1755
26
1756
26
    
ShiftElts = IsLE ? 26
(4 - M0) % 49
:
M017
;
1757
155
    Swap = false;
1758
155
    return true;
1759
155
  }
1760
79
1761
79
  // Ensure each word index of the ShuffleVector Mask is consecutive.
1762
79
  
if (79
M1 != (M0 + 1) % 8 || 79
M2 != (M1 + 1) % 850
||
M3 != (M2 + 1) % 835
)
1763
45
    return false;
1764
34
1765
34
  
if (34
IsLE34
) {
1766
14
    if (
M0 == 0 || 14
M0 == 714
||
M0 == 610
||
M0 == 58
) {
1767
8
      // Input vectors don't need to be swapped if the leading element
1768
8
      // of the result is one of the 3 left elements of the second vector
1769
8
      // (or if there is no shift to be done at all).
1770
8
      Swap = false;
1771
8
      ShiftElts = (8 - M0) % 8;
1772
14
    } else 
if (6
M0 == 4 || 6
M0 == 36
||
M0 == 24
||
M0 == 12
) {
1773
6
      // Input vectors need to be swapped if the leading element
1774
6
      // of the result is one of the 3 left elements of the first vector
1775
6
      // (or if we're shifting by 4 - thereby simply swapping the vectors).
1776
6
      Swap = true;
1777
6
      ShiftElts = (4 - M0) % 4;
1778
6
    }
1779
14
1780
14
    return true;
1781
0
  } else {                                          // BE
1782
20
    if (
M0 == 0 || 20
M0 == 120
||
M0 == 217
||
M0 == 313
) {
1783
12
      // Input vectors don't need to be swapped if the leading element
1784
12
      // of the result is one of the 4 elements of the first vector.
1785
12
      Swap = false;
1786
12
      ShiftElts = M0;
1787
20
    } else 
if (8
M0 == 4 || 8
M0 == 58
||
M0 == 66
||
M0 == 74
) {
1788
8
      // Input vectors need to be swapped if the leading element
1789
8
      // of the result is one of the 4 elements of the right vector.
1790
8
      Swap = true;
1791
8
      ShiftElts = M0 - 4;
1792
8
    }
1793
20
1794
20
    return true;
1795
20
  }
1796
0
}
1797
1798
1.16k
bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1799
1.16k
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1800
1.16k
1801
1.16k
  if (!isNByteElemShuffleMask(N, Width, -1))
1802
1.14k
    return false;
1803
20
1804
50
  
for (int i = 0; 20
i < 1650
;
i += Width30
)
1805
42
    
if (42
N->getMaskElt(i) != i + Width - 142
)
1806
12
      return false;
1807
20
1808
8
  return true;
1809
1.16k
}
1810
1811
294
bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1812
294
  return isXXBRShuffleMaskHelper(N, 2);
1813
294
}
1814
1815
292
bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1816
292
  return isXXBRShuffleMaskHelper(N, 4);
1817
292
}
1818
1819
290
bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1820
290
  return isXXBRShuffleMaskHelper(N, 8);
1821
290
}
1822
1823
288
bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
1824
288
  return isXXBRShuffleMaskHelper(N, 16);
1825
288
}
1826
1827
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1828
/// if the inputs to the instruction should be swapped and set \p DM to the
1829
/// value for the immediate.
1830
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1831
/// AND element 0 of the result comes from the first input (LE) or second input
1832
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1833
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1834
/// mask.
1835
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
1836
573
                               bool &Swap, bool IsLE) {
1837
573
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1838
573
1839
573
  // Ensure each byte index of the double word is consecutive.
1840
573
  if (!isNByteElemShuffleMask(N, 8, 1))
1841
560
    return false;
1842
13
1843
13
  unsigned M0 = N->getMaskElt(0) / 8;
1844
13
  unsigned M1 = N->getMaskElt(8) / 8;
1845
13
  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
1846
13
1847
13
  // If both vector operands for the shuffle are the same vector, the mask will
1848
13
  // contain only elements from the first one and the second one will be undef.
1849
13
  if (
N->getOperand(1).isUndef()13
) {
1850
0
    if (
(M0 | M1) < 20
) {
1851
0
      DM = IsLE ? 
(((~M1) & 1) << 1) + ((~M0) & 1)0
:
(M0 << 1) + (M1 & 1)0
;
1852
0
      Swap = false;
1853
0
      return true;
1854
0
    } else
1855
0
      return false;
1856
13
  }
1857
13
1858
13
  
if (13
IsLE13
) {
1859
7
    if (
M0 > 1 && 7
M1 < 20
) {
1860
0
      Swap = false;
1861
7
    } else 
if (7
M0 < 2 && 7
M1 > 17
) {
1862
7
      M0 = (M0 + 2) % 4;
1863
7
      M1 = (M1 + 2) % 4;
1864
7
      Swap = true;
1865
7
    } else
1866
0
      return false;
1867
7
1868
7
    // Note: if control flow comes here that means Swap is already set above
1869
7
    DM = (((~M1) & 1) << 1) + ((~M0) & 1);
1870
7
    return true;
1871
0
  } else { // BE
1872
6
    if (
M0 < 2 && 6
M1 > 16
) {
1873
6
      Swap = false;
1874
6
    } else 
if (0
M0 > 1 && 0
M1 < 20
) {
1875
0
      M0 = (M0 + 2) % 4;
1876
0
      M1 = (M1 + 2) % 4;
1877
0
      Swap = true;
1878
0
    } else
1879
0
      return false;
1880
6
1881
6
    // Note: if control flow comes here that means Swap is already set above
1882
6
    DM = (M0 << 1) + (M1 & 1);
1883
6
    return true;
1884
6
  }
1885
573
}
1886
1887
1888
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1889
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1890
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1891
126
                                SelectionDAG &DAG) {
1892
126
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1893
126
  assert(isSplatShuffleMask(SVOp, EltSize));
1894
126
  if (DAG.getDataLayout().isLittleEndian())
1895
57
    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1896
126
  else
1897
69
    return SVOp->getMaskElt(0) / EltSize;
1898
0
}
1899
1900
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1901
/// by using a vspltis[bhw] instruction of the specified element size, return
1902
/// the constant being splatted.  The ByteSize field indicates the number of
1903
/// bytes of each element [124] -> [bhw].
1904
136
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1905
136
  SDValue OpVal(nullptr, 0);
1906
136
1907
136
  // If ByteSize of the splat is bigger than the element size of the
1908
136
  // build_vector, then we have a case where we are checking for a splat where
1909
136
  // multiple elements of the buildvector are folded together into a single
1910
136
  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1911
136
  unsigned EltSize = 16/N->getNumOperands();
1912
136
  if (
EltSize < ByteSize136
) {
1913
0
    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1914
0
    SDValue UniquedVals[4];
1915
0
    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1916
0
1917
0
    // See if all of the elements in the buildvector agree across.
1918
0
    for (unsigned i = 0, e = N->getNumOperands(); 
i != e0
;
++i0
) {
1919
0
      if (
N->getOperand(i).isUndef()0
)
continue0
;
1920
0
      // If the element isn't a constant, bail fully out.
1921
0
      
if (0
!isa<ConstantSDNode>(N->getOperand(i))0
)
return SDValue()0
;
1922
0
1923
0
      
if (0
!UniquedVals[i&(Multiple-1)].getNode()0
)
1924
0
        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1925
0
      else 
if (0
UniquedVals[i&(Multiple-1)] != N->getOperand(i)0
)
1926
0
        return SDValue();  // no match.
1927
0
    }
1928
0
1929
0
    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1930
0
    // either constant or undef values that are identical for each chunk.  See
1931
0
    // if these chunks can form into a larger vspltis*.
1932
0
1933
0
    // Check to see if all of the leading entries are either 0 or -1.  If
1934
0
    // neither, then this won't fit into the immediate field.
1935
0
    bool LeadingZero = true;
1936
0
    bool LeadingOnes = true;
1937
0
    for (unsigned i = 0; 
i != Multiple-10
;
++i0
) {
1938
0
      if (
!UniquedVals[i].getNode()0
)
continue0
; // Must have been undefs.
1939
0
1940
0
      LeadingZero &= isNullConstant(UniquedVals[i]);
1941
0
      LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1942
0
    }
1943
0
    // Finally, check the least significant entry.
1944
0
    if (
LeadingZero0
) {
1945
0
      if (!UniquedVals[Multiple-1].getNode())
1946
0
        return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1947
0
      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1948
0
      if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1949
0
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1950
0
    }
1951
0
    
if (0
LeadingOnes0
) {
1952
0
      if (!UniquedVals[Multiple-1].getNode())
1953
0
        return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1954
0
      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1955
0
      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1956
0
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1957
0
    }
1958
0
1959
0
    return SDValue();
1960
0
  }
1961
136
1962
136
  // Check to see if this buildvec has a single non-undef value in its elements.
1963
1.40k
  
for (unsigned i = 0, e = N->getNumOperands(); 136
i != e1.40k
;
++i1.26k
) {
1964
1.26k
    if (
N->getOperand(i).isUndef()1.26k
)
continue0
;
1965
1.26k
    
if (1.26k
!OpVal.getNode()1.26k
)
1966
136
      OpVal = N->getOperand(i);
1967
1.12k
    else 
if (1.12k
OpVal != N->getOperand(i)1.12k
)
1968
0
      return SDValue();
1969
1.26k
  }
1970
136
1971
136
  
if (136
!OpVal.getNode()136
)
return SDValue()0
; // All UNDEF: use implicit def.
1972
136
1973
136
  unsigned ValSizeInBytes = EltSize;
1974
136
  uint64_t Value = 0;
1975
136
  if (ConstantSDNode *
CN136
= dyn_cast<ConstantSDNode>(OpVal)) {
1976
136
    Value = CN->getZExtValue();
1977
136
  } else 
if (ConstantFPSDNode *0
CN0
= dyn_cast<ConstantFPSDNode>(OpVal)) {
1978
0
    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1979
0
    Value = FloatToBits(CN->getValueAPF().convertToFloat());
1980
0
  }
1981
136
1982
136
  // If the splat value is larger than the element value, then we can never do
1983
136
  // this splat.  The only case that we could fit the replicated bits into our
1984
136
  // immediate field for would be zero, and we prefer to use vxor for it.
1985
136
  if (
ValSizeInBytes < ByteSize136
)
return SDValue()0
;
1986
136
1987
136
  // If the element value is larger than the splat value, check if it consists
1988
136
  // of a repeated bit pattern of size ByteSize.
1989
136
  
if (136
!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)136
)
1990
0
    return SDValue();
1991
136
1992
136
  // Properly sign extend the value.
1993
136
  int MaskVal = SignExtend32(Value, ByteSize * 8);
1994
136
1995
136
  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1996
136
  if (
MaskVal == 0136
)
return SDValue()42
;
1997
94
1998
94
  // Finally, if this value fits in a 5 bit sext field, return it
1999
94
  
if (94
SignExtend32<5>(MaskVal) == MaskVal94
)
2000
94
    return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2001
0
  return SDValue();
2002
0
}
2003
2004
/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2005
/// amount, otherwise return -1.
2006
71
int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2007
71
  EVT VT = N->getValueType(0);
2008
71
  if (
VT != MVT::v4f64 && 71
VT != MVT::v4f3226
&&
VT != MVT::v4i10
)
2009
0
    return -1;
2010
71
2011
71
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2012
71
2013
71
  // Find the first non-undef value in the shuffle mask.
2014
71
  unsigned i;
2015
71
  for (i = 0; 
i != 4 && 71
SVOp->getMaskElt(i) < 071
;
++i0
)
2016
0
    /*search*/;
2017
71
2018
71
  if (
i == 471
)
return -10
; // all undef.
2019
71
2020
71
  // Otherwise, check to see if the rest of the elements are consecutively
2021
71
  // numbered from this value.
2022
71
  unsigned ShiftAmt = SVOp->getMaskElt(i);
2023
71
  if (
ShiftAmt < i71
)
return -10
;
2024
71
  ShiftAmt -= i;
2025
71
2026
71
  // Check the rest of the elements to see if they are consecutive.
2027
87
  for (++i; 
i != 487
;
++i16
)
2028
87
    
if (87
!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)87
)
2029
71
      return -1;
2030
71
2031
0
  return ShiftAmt;
2032
71
}
2033
2034
//===----------------------------------------------------------------------===//
2035
//  Addressing Mode Selection
2036
//===----------------------------------------------------------------------===//
2037
2038
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2039
/// or 64-bit immediate, and if the value can be accurately represented as a
2040
/// sign extension from a 16-bit value.  If so, this returns true and the
2041
/// immediate.
2042
11.3k
bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2043
11.3k
  if (!isa<ConstantSDNode>(N))
2044
1.21k
    return false;
2045
10.1k
2046
10.1k
  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2047
10.1k
  if (N->getValueType(0) == MVT::i32)
2048
2.75k
    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2049
10.1k
  else
2050
7.39k
    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2051
0
}
2052
11.2k
bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2053
11.2k
  return isIntS16Immediate(Op.getNode(), Imm);
2054
11.2k
}
2055
2056
/// SelectAddressRegReg - Given the specified addressed, check to see if it
2057
/// can be represented as an indexed [r+r] operation.  Returns false if it
2058
/// can be more efficiently represented with [r+imm].
2059
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2060
                                            SDValue &Index,
2061
13.0k
                                            SelectionDAG &DAG) const {
2062
13.0k
  int16_t imm = 0;
2063
13.0k
  if (
N.getOpcode() == ISD::ADD13.0k
) {
2064
5.13k
    if (isIntS16Immediate(N.getOperand(1), imm))
2065
4.31k
      return false;    // r+i
2066
818
    
if (818
N.getOperand(1).getOpcode() == PPCISD::Lo818
)
2067
319
      return false;    // r+i
2068
499
2069
499
    Base = N.getOperand(0);
2070
499
    Index = N.getOperand(1);
2071
499
    return true;
2072
7.93k
  } else 
if (7.93k
N.getOpcode() == ISD::OR7.93k
) {
2073
775
    if (isIntS16Immediate(N.getOperand(1), imm))
2074
755
      return false;    // r+i can fold it if we can.
2075
20
2076
20
    // If this is an or of disjoint bitfields, we can codegen this as an add
2077
20
    // (for better address arithmetic) if the LHS and RHS of the OR are provably
2078
20
    // disjoint.
2079
20
    KnownBits LHSKnown, RHSKnown;
2080
20
    DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2081
20
2082
20
    if (
LHSKnown.Zero.getBoolValue()20
) {
2083
20
      DAG.computeKnownBits(N.getOperand(1), RHSKnown);
2084
20
      // If all of the bits are known zero on the LHS or RHS, the add won't
2085
20
      // carry.
2086
20
      if (
~(LHSKnown.Zero | RHSKnown.Zero) == 020
) {
2087
20
        Base = N.getOperand(0);
2088
20
        Index = N.getOperand(1);
2089
20
        return true;
2090
20
      }
2091
7.16k
    }
2092
7.93k
  }
2093
7.16k
2094
7.16k
  return false;
2095
7.16k
}
2096
2097
// If we happen to be doing an i64 load or store into a stack slot that has
2098
// less than a 4-byte alignment, then the frame-index elimination may need to
2099
// use an indexed load or store instruction (because the offset may not be a
2100
// multiple of 4). The extra register needed to hold the offset comes from the
2101
// register scavenger, and it is possible that the scavenger will need to use
2102
// an emergency spill slot. As a result, we need to make sure that a spill slot
2103
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2104
// stack slot.
2105
3.15k
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2106
3.15k
  // FIXME: This does not handle the LWA case.
2107
3.15k
  if (VT != MVT::i64)
2108
1.32k
    return;
2109
1.83k
2110
1.83k
  // NOTE: We'll exclude negative FIs here, which come from argument
2111
1.83k
  // lowering, because there are no known test cases triggering this problem
2112
1.83k
  // using packed structures (or similar). We can remove this exclusion if
2113
1.83k
  // we find such a test case. The reason why this is so test-case driven is
2114
1.83k
  // because this entire 'fixup' is only to prevent crashes (from the
2115
1.83k
  // register scavenger) on not-really-valid inputs. For example, if we have:
2116
1.83k
  //   %a = alloca i1
2117
1.83k
  //   %b = bitcast i1* %a to i64*
2118
1.83k
  //   store i64* a, i64 b
2119
1.83k
  // then the store should really be marked as 'align 1', but is not. If it
2120
1.83k
  // were marked as 'align 1' then the indexed form would have been
2121
1.83k
  // instruction-selected initially, and the problem this 'fixup' is preventing
2122
1.83k
  // won't happen regardless.
2123
1.83k
  
if (1.83k
FrameIdx < 01.83k
)
2124
419
    return;
2125
1.41k
2126
1.41k
  MachineFunction &MF = DAG.getMachineFunction();
2127
1.41k
  MachineFrameInfo &MFI = MF.getFrameInfo();
2128
1.41k
2129
1.41k
  unsigned Align = MFI.getObjectAlignment(FrameIdx);
2130
1.41k
  if (Align >= 4)
2131
1.38k
    return;
2132
36
2133
36
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2134
36
  FuncInfo->setHasNonRISpills();
2135
36
}
2136
2137
/// Returns true if the address N can be represented by a base register plus
2138
/// a signed 16-bit displacement [r+imm], and if it is not better
2139
/// represented as reg+reg.  If \p Alignment is non-zero, only accept
2140
/// displacements that are multiples of that value.
2141
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2142
                                            SDValue &Base,
2143
                                            SelectionDAG &DAG,
2144
7.90k
                                            unsigned Alignment) const {
2145
7.90k
  // FIXME dl should come from parent load or store, not from address
2146
7.90k
  SDLoc dl(N);
2147
7.90k
  // If this can be more profitably realized as r+r, fail.
2148
7.90k
  if (SelectAddressRegReg(N, Disp, Base, DAG))
2149
166
    return false;
2150
7.73k
2151
7.73k
  
if (7.73k
N.getOpcode() == ISD::ADD7.73k
) {
2152
3.15k
    int16_t imm = 0;
2153
3.15k
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2154
3.15k
        
(!Alignment || 2.89k
(imm % Alignment) == 01.15k
)) {
2155
2.89k
      Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2156
2.89k
      if (FrameIndexSDNode *
FI2.89k
= dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2157
205
        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2158
205
        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2159
2.89k
      } else {
2160
2.69k
        Base = N.getOperand(0);
2161
2.69k
      }
2162
2.89k
      return true; // [r+i]
2163
263
    } else 
if (263
N.getOperand(1).getOpcode() == PPCISD::Lo263
) {
2164
263
      // Match LOAD (ADD (X, Lo(G))).
2165
263
      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2166
263
             && "Cannot handle constant offsets yet!");
2167
263
      Disp = N.getOperand(1).getOperand(0);  // The global address.
2168
263
      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2169
263
             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2170
263
             Disp.getOpcode() == ISD::TargetConstantPool ||
2171
263
             Disp.getOpcode() == ISD::TargetJumpTable);
2172
263
      Base = N.getOperand(0);
2173
263
      return true;  // [&g+r]
2174
263
    }
2175
4.57k
  } else 
if (4.57k
N.getOpcode() == ISD::OR4.57k
) {
2176
748
    int16_t imm = 0;
2177
748
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2178
748
        
(!Alignment || 748
(imm % Alignment) == 0103
)) {
2179
748
      // If this is an or of disjoint bitfields, we can codegen this as an add
2180
748
      // (for better address arithmetic) if the LHS and RHS of the OR are
2181
748
      // provably disjoint.
2182
748
      KnownBits LHSKnown;
2183
748
      DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2184
748
2185
748
      if (
(LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL748
) {
2186
748
        // If all of the bits are known zero on the LHS or RHS, the add won't
2187
748
        // carry.
2188
748
        if (FrameIndexSDNode *FI =
2189
745
              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2190
745
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2191
745
          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2192
748
        } else {
2193
3
          Base = N.getOperand(0);
2194
3
        }
2195
748
        Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2196
748
        return true;
2197
748
      }
2198
4.57k
    }
2199
3.82k
  } else 
if (ConstantSDNode *3.82k
CN3.82k
= dyn_cast<ConstantSDNode>(N)) {
2200
113
    // Loading from a constant address.
2201
113
2202
113
    // If this address fits entirely in a 16-bit sext immediate field, codegen
2203
113
    // this as "d, 0"
2204
113
    int16_t Imm;
2205
113
    if (
isIntS16Immediate(CN, Imm) && 113
(!Alignment || 101
(Imm % Alignment) == 025
)) {
2206
101
      Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2207
101
      Base = DAG.getRegister(Subtarget.isPPC64() ? 
PPC::ZERO832
:
PPC::ZERO69
,
2208
101
                             CN->getValueType(0));
2209
101
      return true;
2210
101
    }
2211
12
2212
12
    // Handle 32-bit sext immediates with LIS + addr mode.
2213
12
    
if (12
(CN->getValueType(0) == MVT::i32 ||
2214
7
         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2215
12
        
(!Alignment || 9
(CN->getZExtValue() % Alignment) == 02
)) {
2216
9
      int Addr = (int)CN->getZExtValue();
2217
9
2218
9
      // Otherwise, break this down into an LIS + disp.
2219
9
      Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2220
9
2221
9
      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2222
9
                                   MVT::i32);
2223
9
      unsigned Opc = CN->getValueType(0) == MVT::i32 ? 
PPC::LIS5
:
PPC::LIS84
;
2224
9
      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2225
9
      return true;
2226
9
    }
2227
3.71k
  }
2228
3.71k
2229
3.71k
  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2230
3.71k
  if (FrameIndexSDNode *
FI3.71k
= dyn_cast<FrameIndexSDNode>(N)) {
2231
2.20k
    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2232
2.20k
    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2233
2.20k
  } else
2234
1.50k
    Base = N;
2235
7.90k
  return true;      // [r+0]
2236
7.90k
}
2237
2238
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2239
/// represented as an indexed [r+r] operation.
2240
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2241
                                                SDValue &Index,
2242
3.99k
                                                SelectionDAG &DAG) const {
2243
3.99k
  // Check to see if we can easily represent this as an [r+r] address.  This
2244
3.99k
  // will fail if it thinks that the address is more profitably represented as
2245
3.99k
  // reg+imm, e.g. where imm = 0.
2246
3.99k
  if (SelectAddressRegReg(N, Base, Index, DAG))
2247
75
    return true;
2248
3.92k
2249
3.92k
  // If the address is the result of an add, we will utilize the fact that the
2250
3.92k
  // address calculation includes an implicit add.  However, we can reduce
2251
3.92k
  // register pressure if we do not materialize a constant just for use as the
2252
3.92k
  // index register.  We only get rid of the add if it is not an add of a
2253
3.92k
  // value and a 16-bit signed constant and both have a single use.
2254
3.92k
  int16_t imm = 0;
2255
3.92k
  if (N.getOpcode() == ISD::ADD &&
2256
718
      (!isIntS16Immediate(N.getOperand(1), imm) ||
2257
3.92k
       
!N.getOperand(1).hasOneUse()705
||
!N.getOperand(0).hasOneUse()540
)) {
2258
661
    Base = N.getOperand(0);
2259
661
    Index = N.getOperand(1);
2260
661
    return true;
2261
661
  }
2262
3.26k
2263
3.26k
  // Otherwise, do it the hard way, using R0 as the base register.
2264
3.26k
  
Base = DAG.getRegister(Subtarget.isPPC64() ? 3.26k
PPC::ZERO82.97k
:
PPC::ZERO284
,
2265
3.99k
                         N.getValueType());
2266
3.99k
  Index = N;
2267
3.99k
  return true;
2268
3.99k
}
2269
2270
/// getPreIndexedAddressParts - returns true by value, base pointer and
2271
/// offset pointer and addressing mode by reference if the node's address
2272
/// can be legally represented as pre-indexed load / store address.
2273
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2274
                                                  SDValue &Offset,
2275
                                                  ISD::MemIndexedMode &AM,
2276
830
                                                  SelectionDAG &DAG) const {
2277
830
  if (
DisablePPCPreinc830
)
return false0
;
2278
830
2279
830
  bool isLoad = true;
2280
830
  SDValue Ptr;
2281
830
  EVT VT;
2282
830
  unsigned Alignment;
2283
830
  if (LoadSDNode *
LD830
= dyn_cast<LoadSDNode>(N)) {
2284
453
    Ptr = LD->getBasePtr();
2285
453
    VT = LD->getMemoryVT();
2286
453
    Alignment = LD->getAlignment();
2287
830
  } else 
if (StoreSDNode *377
ST377
= dyn_cast<StoreSDNode>(N)) {
2288
377
    Ptr = ST->getBasePtr();
2289
377
    VT  = ST->getMemoryVT();
2290
377
    Alignment = ST->getAlignment();
2291
377
    isLoad = false;
2292
377
  } else
2293
0
    return false;
2294
830
2295
830
  // PowerPC doesn't have preinc load/store instructions for vectors (except
2296
830
  // for QPX, which does have preinc r+r forms).
2297
830
  
if (830
VT.isVector()830
) {
2298
1
    if (
!Subtarget.hasQPX() || 1
(VT != MVT::v4f64 && 1
VT != MVT::v4f320
)) {
2299
0
      return false;
2300
1
    } else 
if (1
SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)1
) {
2301
1
      AM = ISD::PRE_INC;
2302
1
      return true;
2303
1
    }
2304
829
  }
2305
829
2306
829
  
if (829
SelectAddressRegReg(Ptr, Base, Offset, DAG)829
) {
2307
107
    // Common code will reject creating a pre-inc form if the base pointer
2308
107
    // is a frame index, or if N is a store and the base pointer is either
2309
107
    // the same as or a predecessor of the value being stored.  Check for
2310
107
    // those situations here, and try with swapped Base/Offset instead.
2311
107
    bool Swap = false;
2312
107
2313
107
    if (
isa<FrameIndexSDNode>(Base) || 107
isa<RegisterSDNode>(Base)107
)
2314
0
      Swap = true;
2315
107
    else 
if (107
!isLoad107
) {
2316
33
      SDValue Val = cast<StoreSDNode>(N)->getValue();
2317
33
      if (
Val == Base || 33
Base.getNode()->isPredecessorOf(Val.getNode())33
)
2318
22
        Swap = true;
2319
107
    }
2320
107
2321
107
    if (Swap)
2322
22
      std::swap(Base, Offset);
2323
107
2324
107
    AM = ISD::PRE_INC;
2325
107
    return true;
2326
107
  }
2327
722
2328
722
  // LDU/STU can only handle immediates that are a multiple of 4.
2329
722
  
if (722
VT != MVT::i64722
) {
2330
581
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2331
0
      return false;
2332
141
  } else {
2333
141
    // LDU/STU need an address with at least 4-byte alignment.
2334
141
    if (Alignment < 4)
2335
4
      return false;
2336
137
2337
137
    
if (137
!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)137
)
2338
0
      return false;
2339
718
  }
2340
718
2341
718
  
if (LoadSDNode *718
LD718
= dyn_cast<LoadSDNode>(N)) {
2342
376
    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2343
376
    // sext i32 to i64 when addr mode is r+i.
2344
376
    if (
LD->getValueType(0) == MVT::i64 && 376
LD->getMemoryVT() == MVT::i3290
&&
2345
1
        LD->getExtensionType() == ISD::SEXTLOAD &&
2346
0
        isa<ConstantSDNode>(Offset))
2347
0
      return false;
2348
718
  }
2349
718
2350
718
  AM = ISD::PRE_INC;
2351
718
  return true;
2352
718
}
2353
2354
//===----------------------------------------------------------------------===//
2355
//  LowerOperation implementation
2356
//===----------------------------------------------------------------------===//
2357
2358
/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2359
/// and LoOpFlags to the target MO flags.
2360
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2361
                               unsigned &HiOpFlags, unsigned &LoOpFlags,
2362
352
                               const GlobalValue *GV = nullptr) {
2363
352
  HiOpFlags = PPCII::MO_HA;
2364
352
  LoOpFlags = PPCII::MO_LO;
2365
352
2366
352
  // Don't use the pic base if not in PIC relocation model.
2367
352
  if (
IsPIC352
) {
2368
35
    HiOpFlags |= PPCII::MO_PIC_FLAG;
2369
35
    LoOpFlags |= PPCII::MO_PIC_FLAG;
2370
35
  }
2371
352
2372
352
  // If this is a reference to a global value that requires a non-lazy-ptr, make
2373
352
  // sure that instruction lowering adds it.
2374
352
  if (
GV && 352
Subtarget.hasLazyResolverStub(GV)259
) {
2375
127
    HiOpFlags |= PPCII::MO_NLP_FLAG;
2376
127
    LoOpFlags |= PPCII::MO_NLP_FLAG;
2377
127
2378
127
    if (
GV->hasHiddenVisibility()127
) {
2379
3
      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2380
3
      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2381
3
    }
2382
127
  }
2383
352
}
2384
2385
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2386
334
                             SelectionDAG &DAG) {
2387
334
  SDLoc DL(HiPart);
2388
334
  EVT PtrVT = HiPart.getValueType();
2389
334
  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2390
334
2391
334
  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2392
334
  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2393
334
2394
334
  // With PIC, the first instruction is actually "GR+hi(&G)".
2395
334
  if (isPIC)
2396
17
    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2397
17
                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2398
334
2399
334
  // Generate non-pic code that has direct accesses to the constant pool.
2400
334
  // The address of the global is just (hi(&g)+lo(&g)).
2401
334
  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2402
334
}
2403
2404
3.10k
static void setUsesTOCBasePtr(MachineFunction &MF) {
2405
3.10k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2406
3.10k
  FuncInfo->setUsesTOCBasePtr();
2407
3.10k
}
2408
2409
3.05k
static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2410
3.05k
  setUsesTOCBasePtr(DAG.getMachineFunction());
2411
3.05k
}
2412
2413
static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2414
1.83k
                           SDValue GA) {
2415
1.83k
  EVT VT = Is64Bit ? 
MVT::i641.81k
:
MVT::i3218
;
2416
1.81k
  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2417
18
                DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2418
1.83k
2419
1.83k
  SDValue Ops[] = { GA, Reg };
2420
1.83k
  return DAG.getMemIntrinsicNode(
2421
1.83k
      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2422
1.83k
      MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2423
1.83k
      false, 0);
2424
1.83k
}
2425
2426
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2427
652
                                             SelectionDAG &DAG) const {
2428
652
  EVT PtrVT = Op.getValueType();
2429
652
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2430
652
  const Constant *C = CP->getConstVal();
2431
652
2432
652
  // 64-bit SVR4 ABI code is always position-independent.
2433
652
  // The actual address of the GlobalValue is stored in the TOC.
2434
652
  if (
Subtarget.isSVR4ABI() && 652
Subtarget.isPPC64()618
) {
2435
565
    setUsesTOCBasePtr(DAG);
2436
565
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2437
565
    return getTOCEntry(DAG, SDLoc(CP), true, GA);
2438
565
  }
2439
87
2440
87
  unsigned MOHiFlag, MOLoFlag;
2441
87
  bool IsPIC = isPositionIndependent();
2442
87
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2443
87
2444
87
  if (
IsPIC && 87
Subtarget.isSVR4ABI()11
) {
2445
11
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2446
11
                                           PPCII::MO_PIC_FLAG);
2447
11
    return getTOCEntry(DAG, SDLoc(CP), false, GA);
2448
11
  }
2449
76
2450
76
  SDValue CPIHi =
2451
76
    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2452
76
  SDValue CPILo =
2453
76
    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2454
76
  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2455
76
}
2456
2457
// For 64-bit PowerPC, prefer the more compact relative encodings.
2458
// This trades 32 bits per jump table entry for one or two instructions
2459
// on the jump site.
2460
16
unsigned PPCTargetLowering::getJumpTableEncoding() const {
2461
16
  if (isJumpTableRelative())
2462
13
    return MachineJumpTableInfo::EK_LabelDifference32;
2463
3
2464
3
  return TargetLowering::getJumpTableEncoding();
2465
3
}
2466
2467
27
bool PPCTargetLowering::isJumpTableRelative() const {
2468
27
  if (Subtarget.isPPC64())
2469
21
    return true;
2470
6
  return TargetLowering::isJumpTableRelative();
2471
6
}
2472
2473
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2474
8
                                                    SelectionDAG &DAG) const {
2475
8
  if (!Subtarget.isPPC64())
2476
0
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2477
8
2478
8
  switch (getTargetMachine().getCodeModel()) {
2479
5
  case CodeModel::Small:
2480
5
  case CodeModel::Medium:
2481
5
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2482
3
  default:
2483
3
    return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2484
3
                       getPointerTy(DAG.getDataLayout()));
2485
0
  }
2486
0
}
2487
2488
const MCExpr *
2489
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2490
                                                unsigned JTI,
2491
44
                                                MCContext &Ctx) const {
2492
44
  if (!Subtarget.isPPC64())
2493
0
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2494
44
2495
44
  switch (getTargetMachine().getCodeModel()) {
2496
32
  case CodeModel::Small:
2497
32
  case CodeModel::Medium:
2498
32
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2499
12
  default:
2500
12
    return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2501
0
  }
2502
0
}
2503
2504
11
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2505
11
  EVT PtrVT = Op.getValueType();
2506
11
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2507
11
2508
11
  // 64-bit SVR4 ABI code is always position-independent.
2509
11
  // The actual address of the GlobalValue is stored in the TOC.
2510
11
  if (
Subtarget.isSVR4ABI() && 11
Subtarget.isPPC64()10
) {
2511
8
    setUsesTOCBasePtr(DAG);
2512
8
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2513
8
    return getTOCEntry(DAG, SDLoc(JT), true, GA);
2514
8
  }
2515
3
2516
3
  unsigned MOHiFlag, MOLoFlag;
2517
3
  bool IsPIC = isPositionIndependent();
2518
3
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2519
3
2520
3
  if (
IsPIC && 3
Subtarget.isSVR4ABI()0
) {
2521
0
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2522
0
                                        PPCII::MO_PIC_FLAG);
2523
0
    return getTOCEntry(DAG, SDLoc(GA), false, GA);
2524
0
  }
2525
3
2526
3
  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2527
3
  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2528
3
  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2529
3
}
2530
2531
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2532
9
                                             SelectionDAG &DAG) const {
2533
9
  EVT PtrVT = Op.getValueType();
2534
9
  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2535
9
  const BlockAddress *BA = BASDN->getBlockAddress();
2536
9
2537
9
  // 64-bit SVR4 ABI code is always position-independent.
2538
9
  // The actual BlockAddress is stored in the TOC.
2539
9
  if (
Subtarget.isSVR4ABI() && 9
Subtarget.isPPC64()6
) {
2540
6
    setUsesTOCBasePtr(DAG);
2541
6
    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2542
6
    return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2543
6
  }
2544
3
2545
3
  unsigned MOHiFlag, MOLoFlag;
2546
3
  bool IsPIC = isPositionIndependent();
2547
3
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2548
3
  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2549
3
  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2550
3
  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2551
3
}
2552
2553
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2554
39
                                              SelectionDAG &DAG) const {
2555
39
  // FIXME: TLS addresses currently use medium model code sequences,
2556
39
  // which is the most useful form.  Eventually support for small and
2557
39
  // large models could be added if users need it, at the cost of
2558
39
  // additional complexity.
2559
39
  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2560
39
  if (DAG.getTarget().Options.EmulatedTLS)
2561
6
    return LowerToTLSEmulatedModel(GA, DAG);
2562
33
2563
33
  SDLoc dl(GA);
2564
33
  const GlobalValue *GV = GA->getGlobal();
2565
33
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2566
33
  bool is64bit = Subtarget.isPPC64();
2567
33
  const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2568
33
  PICLevel::Level picLevel = M->getPICLevel();
2569
33
2570
33
  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2571
33
2572
33
  if (
Model == TLSModel::LocalExec33
) {
2573
10
    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2574
10
                                               PPCII::MO_TPREL_HA);
2575
10
    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2576
10
                                               PPCII::MO_TPREL_LO);
2577
9
    SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2578
1
                             : DAG.getRegister(PPC::R2, MVT::i32);
2579
10
2580
10
    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2581
10
    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2582
10
  }
2583
23
2584
23
  
if (23
Model == TLSModel::InitialExec23
) {
2585
3
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2586
3
    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2587
3
                                                PPCII::MO_TLS);
2588
3
    SDValue GOTPtr;
2589
3
    if (
is64bit3
) {
2590
2
      setUsesTOCBasePtr(DAG);
2591
2
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2592
2
      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2593
2
                           PtrVT, GOTReg, TGA);
2594
2
    } else
2595
1
      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2596
3
    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2597
3
                                   PtrVT, TGA, GOTPtr);
2598
3
    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2599
3
  }
2600
20
2601
20
  
if (20
Model == TLSModel::GeneralDynamic20
) {
2602
12
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2603
12
    SDValue GOTPtr;
2604
12
    if (
is64bit12
) {
2605
10
      setUsesTOCBasePtr(DAG);
2606
10
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2607
10
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2608
10
                                   GOTReg, TGA);
2609
12
    } else {
2610
2
      if (picLevel == PICLevel::SmallPIC)
2611
0
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2612
2
      else
2613
2
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2614
2
    }
2615
12
    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2616
12
                       GOTPtr, TGA, TGA);
2617
12
  }
2618
8
2619
8
  
if (8
Model == TLSModel::LocalDynamic8
) {
2620
8
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2621
8
    SDValue GOTPtr;
2622
8
    if (
is64bit8
) {
2623
6
      setUsesTOCBasePtr(DAG);
2624
6
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2625
6
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2626
6
                           GOTReg, TGA);
2627
8
    } else {
2628
2
      if (picLevel == PICLevel::SmallPIC)
2629
0
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2630
2
      else
2631
2
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2632
2
    }
2633
8
    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2634
8
                                  PtrVT, GOTPtr, TGA, TGA);
2635
8
    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2636
8
                                      PtrVT, TLSAddr, TGA);
2637
8
    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2638
8
  }
2639
0
2640
0
  
llvm_unreachable0
("Unknown TLS model!");
2641
0
}
2642
2643
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2644
1.49k
                                              SelectionDAG &DAG) const {
2645
1.49k
  EVT PtrVT = Op.getValueType();
2646
1.49k
  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2647
1.49k
  SDLoc DL(GSDN);
2648
1.49k
  const GlobalValue *GV = GSDN->getGlobal();
2649
1.49k
2650
1.49k
  // 64-bit SVR4 ABI code is always position-independent.
2651
1.49k
  // The actual address of the GlobalValue is stored in the TOC.
2652
1.49k
  if (
Subtarget.isSVR4ABI() && 1.49k
Subtarget.isPPC64()1.33k
) {
2653
1.23k
    setUsesTOCBasePtr(DAG);
2654
1.23k
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2655
1.23k
    return getTOCEntry(DAG, DL, true, GA);
2656
1.23k
  }
2657
259
2658
259
  unsigned MOHiFlag, MOLoFlag;
2659
259
  bool IsPIC = isPositionIndependent();
2660
259
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2661
259
2662
259
  if (
IsPIC && 259
Subtarget.isSVR4ABI()22
) {
2663
7
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2664
7
                                            GSDN->getOffset(),
2665
7
                                            PPCII::MO_PIC_FLAG);
2666
7
    return getTOCEntry(DAG, DL, false, GA);
2667
7
  }
2668
252
2669
252
  SDValue GAHi =
2670
252
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2671
252
  SDValue GALo =
2672
252
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2673
252
2674
252
  SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2675
252
2676
252
  // If the global reference is actually to a non-lazy-pointer, we have to do an
2677
252
  // extra load to get the address of the global.
2678
252
  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2679
127
    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2680
1.49k
  return Ptr;
2681
1.49k
}
2682
2683
38
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2684
38
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2685
38
  SDLoc dl(Op);
2686
38
2687
38
  if (
Op.getValueType() == MVT::v2i6438
) {
2688
16
    // When the operands themselves are v2i64 values, we need to do something
2689
16
    // special because VSX has no underlying comparison operations for these.
2690
16
    if (
Op.getOperand(0).getValueType() == MVT::v2i6416
) {
2691
12
      // Equality can be handled by casting to the legal type for Altivec
2692
12
      // comparisons, everything else needs to be expanded.
2693
12
      if (
CC == ISD::SETEQ || 12
CC == ISD::SETNE8
) {
2694
8
        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2695
8
                 DAG.getSetCC(dl, MVT::v4i32,
2696
8
                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2697
8
                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2698
8
                   CC));
2699
8
      }
2700
4
2701
4
      return SDValue();
2702
4
    }
2703
4
2704
4
    // We handle most of these in the usual way.
2705
4
    return Op;
2706
4
  }
2707
22
2708
22
  // If we're comparing for equality to zero, expose the fact that this is
2709
22
  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2710
22
  // fold the new nodes.
2711
22
  
if (SDValue 22
V22
= lowerCmpEqZeroToCtlzSrl(Op, DAG))
2712
6
    return V;
2713
16
2714
16
  
if (ConstantSDNode *16
C16
= dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2715
14
    // Leave comparisons against 0 and -1 alone for now, since they're usually
2716
14
    // optimized.  FIXME: revisit this when we can custom lower all setcc
2717
14
    // optimizations.
2718
14
    if (
C->isAllOnesValue() || 14
C->isNullValue()13
)
2719
9
      return SDValue();
2720
7
  }
2721
7
2722
7
  // If we have an integer seteq/setne, turn it into a compare against zero
2723
7
  // by xor'ing the rhs with the lhs, which is faster than setting a
2724
7
  // condition register, reading it back out, and masking the correct bit.  The
2725
7
  // normal approach here uses sub to do this instead of xor.  Using xor exposes
2726
7
  // the result to other bit-twiddling opportunities.
2727
7
  EVT LHSVT = Op.getOperand(0).getValueType();
2728
7
  if (
LHSVT.isInteger() && 7
(CC == ISD::SETEQ || 7
CC == ISD::SETNE5
)) {
2729
2
    EVT VT = Op.getValueType();
2730
2
    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2731
2
                                Op.getOperand(1));
2732
2
    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2733
2
  }
2734
5
  return SDValue();
2735
5
}
2736
2737
1
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2738
1
  SDNode *Node = Op.getNode();
2739
1
  EVT VT = Node->getValueType(0);
2740
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2741
1
  SDValue InChain = Node->getOperand(0);
2742
1
  SDValue VAListPtr = Node->getOperand(1);
2743
1
  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2744
1
  SDLoc dl(Node);
2745
1
2746
1
  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2747
1
2748
1
  // gpr_index
2749
1
  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2750
1
                                    VAListPtr, MachinePointerInfo(SV), MVT::i8);
2751
1
  InChain = GprIndex.getValue(1);
2752
1
2753
1
  if (
VT == MVT::i641
) {
2754
0
    // Check if GprIndex is even
2755
0
    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2756
0
                                 DAG.getConstant(1, dl, MVT::i32));
2757
0
    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2758
0
                                DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2759
0
    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2760
0
                                          DAG.getConstant(1, dl, MVT::i32));
2761
0
    // Align GprIndex to be even if it isn't
2762
0
    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2763
0
                           GprIndex);
2764
0
  }
2765
1
2766
1
  // fpr index is 1 byte after gpr
2767
1
  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2768
1
                               DAG.getConstant(1, dl, MVT::i32));
2769
1
2770
1
  // fpr
2771
1
  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2772
1
                                    FprPtr, MachinePointerInfo(SV), MVT::i8);
2773
1
  InChain = FprIndex.getValue(1);
2774
1
2775
1
  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2776
1
                                       DAG.getConstant(8, dl, MVT::i32));
2777
1
2778
1
  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2779
1
                                        DAG.getConstant(4, dl, MVT::i32));
2780
1
2781
1
  // areas
2782
1
  SDValue OverflowArea =
2783
1
      DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2784
1
  InChain = OverflowArea.getValue(1);
2785
1
2786
1
  SDValue RegSaveArea =
2787
1
      DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2788
1
  InChain = RegSaveArea.getValue(1);
2789
1
2790
1
  // select overflow_area if index > 8
2791
1
  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? 
GprIndex1
:
FprIndex0
,
2792
1
                            DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2793
1
2794
1
  // adjustment constant gpr_index * 4/8
2795
1
  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2796
1
                                    VT.isInteger() ? 
GprIndex1
:
FprIndex0
,
2797
1
                                    DAG.getConstant(VT.isInteger() ? 
41
:
80
, dl,
2798
1
                                                    MVT::i32));
2799
1
2800
1
  // OurReg = RegSaveArea + RegConstant
2801
1
  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2802
1
                               RegConstant);
2803
1
2804
1
  // Floating types are 32 bytes into RegSaveArea
2805
1
  if (VT.isFloatingPoint())
2806
0
    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2807
0
                         DAG.getConstant(32, dl, MVT::i32));
2808
1
2809
1
  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2810
1
  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2811
1
                                   VT.isInteger() ? 
GprIndex1
:
FprIndex0
,
2812
1
                                   DAG.getConstant(VT == MVT::i64 ? 
20
:
11
, dl,
2813
1
                                                   MVT::i32));
2814
1
2815
1
  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2816
1
                              VT.isInteger() ? 
VAListPtr1
:
FprPtr0
,
2817
1
                              MachinePointerInfo(SV), MVT::i8);
2818
1
2819
1
  // determine if we should load from reg_save_area or overflow_area
2820
1
  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2821
1
2822
1
  // increase overflow_area by 4/8 if gpr/fpr > 8
2823
1
  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2824
1
                                          DAG.getConstant(VT.isInteger() ? 
41
:
80
,
2825
1
                                          dl, MVT::i32));
2826
1
2827
1
  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2828
1
                             OverflowAreaPlusN);
2829
1
2830
1
  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2831
1
                              MachinePointerInfo(), MVT::i32);
2832
1
2833
1
  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2834
1
}
2835
2836
1
SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2837
1
  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2838
1
2839
1
  // We have to copy the entire va_list struct:
2840
1
  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2841
1
  return DAG.getMemcpy(Op.getOperand(0), Op,
2842
1
                       Op.getOperand(1), Op.getOperand(2),
2843
1
                       DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2844
1
                       false, MachinePointerInfo(), MachinePointerInfo());
2845
1
}
2846
2847
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2848
1
                                                  SelectionDAG &DAG) const {
2849
1
  return Op.getOperand(0);
2850
1
}
2851
2852
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2853
1
                                                SelectionDAG &DAG) const {
2854
1
  SDValue Chain = Op.getOperand(0);
2855
1
  SDValue Trmp = Op.getOperand(1); // trampoline
2856
1
  SDValue FPtr = Op.getOperand(2); // nested function
2857
1
  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2858
1
  SDLoc dl(Op);
2859
1
2860
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2861
1
  bool isPPC64 = (PtrVT == MVT::i64);
2862
1
  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2863
1
2864
1
  TargetLowering::ArgListTy Args;
2865
1
  TargetLowering::ArgListEntry Entry;
2866
1
2867
1
  Entry.Ty = IntPtrTy;
2868
1
  Entry.Node = Trmp; Args.push_back(Entry);
2869
1
2870
1
  // TrampSize == (isPPC64 ? 48 : 40);
2871
1
  Entry.Node = DAG.getConstant(isPPC64 ? 
480
:
401
, dl,
2872
1
                               isPPC64 ? 
MVT::i640
:
MVT::i321
);
2873
1
  Args.push_back(Entry);
2874
1
2875
1
  Entry.Node = FPtr; Args.push_back(Entry);
2876
1
  Entry.Node = Nest; Args.push_back(Entry);
2877
1
2878
1
  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2879
1
  TargetLowering::CallLoweringInfo CLI(DAG);
2880
1
  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2881
1
      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2882
1
      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
2883
1
2884
1
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2885
1
  return CallResult.second;
2886
1
}
2887
2888
6
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2889
6
  MachineFunction &MF = DAG.getMachineFunction();
2890
6
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2891
6
  EVT PtrVT = getPointerTy(MF.getDataLayout());
2892
6
2893
6
  SDLoc dl(Op);
2894
6
2895
6
  if (
Subtarget.isDarwinABI() || 6
Subtarget.isPPC64()4
) {
2896
6
    // vastart just stores the address of the VarArgsFrameIndex slot into the
2897
6
    // memory location argument.
2898
6
    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2899
6
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2900
6
    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2901
6
                        MachinePointerInfo(SV));
2902
6
  }
2903
0
2904
0
  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2905
0
  // We suppose the given va_list is already allocated.
2906
0
  //
2907
0
  // typedef struct {
2908
0
  //  char gpr;     /* index into the array of 8 GPRs
2909
0
  //                 * stored in the register save area
2910
0
  //                 * gpr=0 corresponds to r3,
2911
0
  //                 * gpr=1 to r4, etc.
2912
0
  //                 */
2913
0
  //  char fpr;     /* index into the array of 8 FPRs
2914
0
  //                 * stored in the register save area
2915
0
  //                 * fpr=0 corresponds to f1,
2916
0
  //                 * fpr=1 to f2, etc.
2917
0
  //                 */
2918
0
  //  char *overflow_arg_area;
2919
0
  //                /* location on stack that holds
2920
0
  //                 * the next overflow argument
2921
0
  //                 */
2922
0
  //  char *reg_save_area;
2923
0
  //               /* where r3:r10 and f1:f8 (if saved)
2924
0
  //                * are stored
2925
0
  //                */
2926
0
  // } va_list[1];
2927
0
2928
0
  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2929
0
  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2930
0
  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2931
0
                                            PtrVT);
2932
0
  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2933
0
                                 PtrVT);
2934
0
2935
0
  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2936
0
  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2937
0
2938
0
  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2939
0
  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2940
0
2941
0
  uint64_t FPROffset = 1;
2942
0
  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2943
0
2944
0
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2945
0
2946
0
  // Store first byte : number of int regs
2947
0
  SDValue firstStore =
2948
0
      DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2949
0
                        MachinePointerInfo(SV), MVT::i8);
2950
0
  uint64_t nextOffset = FPROffset;
2951
0
  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2952
0
                                  ConstFPROffset);
2953
0
2954
0
  // Store second byte : number of float regs
2955
0
  SDValue secondStore =
2956
0
      DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2957
0
                        MachinePointerInfo(SV, nextOffset), MVT::i8);
2958
0
  nextOffset += StackOffset;
2959
0
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2960
0
2961
0
  // Store second word : arguments given on stack
2962
0
  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2963
0
                                    MachinePointerInfo(SV, nextOffset));
2964
0
  nextOffset += FrameOffset;
2965
0
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2966
0
2967
0
  // Store third word : arguments given in registers
2968
0
  return DAG.getStore(thirdStore, dl, FR, nextPtr,
2969
0
                      MachinePointerInfo(SV, nextOffset));
2970
0
}
2971
2972
#include "PPCGenCallingConv.inc"
2973
2974
// Function whose sole purpose is to kill compiler warnings
2975
// stemming from unused functions included from PPCGenCallingConv.inc.
2976
0
CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2977
0
  return Flag ? 
CC_PPC64_ELF_FIS0
:
RetCC_PPC64_ELF_FIS0
;
2978
0
}
2979
2980
bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2981
                                      CCValAssign::LocInfo &LocInfo,
2982
                                      ISD::ArgFlagsTy &ArgFlags,
2983
1.64k
                                      CCState &State) {
2984
1.64k
  return true;
2985
1.64k
}
2986
2987
bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2988
                                             MVT &LocVT,
2989
                                             CCValAssign::LocInfo &LocInfo,
2990
                                             ISD::ArgFlagsTy &ArgFlags,
2991
99
                                             CCState &State) {
2992
99
  static const MCPhysReg ArgRegs[] = {
2993
99
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2994
99
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2995
99
  };
2996
99
  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2997
99
2998
99
  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2999
99
3000
99
  // Skip one register if the first unallocated register has an even register
3001
99
  // number and there are still argument registers available which have not been
3002
99
  // allocated yet. RegNum is actually an index into ArgRegs, which means we
3003
99
  // need to skip a register if RegNum is odd.
3004
99
  if (
RegNum != NumArgRegs && 99
RegNum % 2 == 198
) {
3005
11
    State.AllocateReg(ArgRegs[RegNum]);
3006
11
  }
3007
99
3008
99
  // Always return false here, as this function only makes sure that the first
3009
99
  // unallocated register has an odd register number and does not actually
3010
99
  // allocate a register for the current argument.
3011
99
  return false;
3012
99
}
3013
3014
bool
3015
llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
3016
                                                  MVT &LocVT,
3017
                                                  CCValAssign::LocInfo &LocInfo,
3018
                                                  ISD::ArgFlagsTy &ArgFlags,
3019
6
                                                  CCState &State) {
3020
6
  static const MCPhysReg ArgRegs[] = {
3021
6
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3022
6
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3023
6
  };
3024
6
  const unsigned NumArgRegs = array_lengthof(ArgRegs);
3025
6
3026
6
  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3027
6
  int RegsLeft = NumArgRegs - RegNum;
3028
6
3029
6
  // Skip if there is not enough registers left for long double type (4 gpr regs
3030
6
  // in soft float mode) and put long double argument on the stack.
3031
6
  if (
RegNum != NumArgRegs && 6
RegsLeft < 46
) {
3032
4
    for (int i = 0; 
i < RegsLeft4
;
i++3
) {
3033
3
      State.AllocateReg(ArgRegs[RegNum + i]);
3034
3
    }
3035
1
  }
3036
6
3037
6
  return false;
3038
6
}
3039
3040
bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3041
                                               MVT &LocVT,
3042
                                               CCValAssign::LocInfo &LocInfo,
3043
                                               ISD::ArgFlagsTy &ArgFlags,
3044
51
                                               CCState &State) {
3045
51
  static const MCPhysReg ArgRegs[] = {
3046
51
    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3047
51
    PPC::F8
3048
51
  };
3049
51
3050
51
  const unsigned NumArgRegs = array_lengthof(ArgRegs);
3051
51
3052
51
  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3053
51
3054
51
  // If there is only one Floating-point register left we need to put both f64
3055
51
  // values of a split ppc_fp128 value on the stack.
3056
51
  if (
RegNum != NumArgRegs && 51
ArgRegs[RegNum] == PPC::F851
) {
3057
0
    State.AllocateReg(ArgRegs[RegNum]);
3058
0
  }
3059
51
3060
51
  // Always return false here, as this function only makes sure that the two f64
3061
51
  // values a ppc_fp128 value is split into are both passed in registers or both
3062
51
  // passed on the stack and does not actually allocate a register for the
3063
51
  // current argument.
3064
51
  return false;
3065
51
}
3066
3067
/// FPR - The set of FP registers that should be allocated for arguments,
3068
/// on Darwin.
3069
static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3070
                                PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3071
                                PPC::F11, PPC::F12, PPC::F13};
3072
3073
/// QFPR - The set of QPX registers that should be allocated for arguments.
3074
static const MCPhysReg QFPR[] = {
3075
    PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3076
    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3077
3078
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3079
/// the stack.
3080
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3081
18.7k
                                       unsigned PtrByteSize) {
3082
18.7k
  unsigned ArgSize = ArgVT.getStoreSize();
3083
18.7k
  if (Flags.isByVal())
3084
144
    ArgSize = Flags.getByValSize();
3085
18.7k
3086
18.7k
  // Round up to multiples of the pointer size, except for array members,
3087
18.7k
  // which are always packed.
3088
18.7k
  if (!Flags.isInConsecutiveRegs())
3089
16.5k
    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3090
18.7k
3091
18.7k
  return ArgSize;
3092
18.7k
}
3093
3094
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3095
/// on the stack.
3096
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3097
                                            ISD::ArgFlagsTy Flags,
3098
33.3k
                                            unsigned PtrByteSize) {
3099
33.3k
  unsigned Align = PtrByteSize;
3100
33.3k
3101
33.3k
  // Altivec parameters are padded to a 16 byte boundary.
3102
33.3k
  if (
ArgVT == MVT::v4f32 || 33.3k
ArgVT == MVT::v4i3232.2k
||
3103
33.3k
      
ArgVT == MVT::v8i1629.8k
||
ArgVT == MVT::v16i829.3k
||
3104
33.3k
      
ArgVT == MVT::v2f6428.7k
||
ArgVT == MVT::v2i6428.1k
||
3105
26.8k
      ArgVT == MVT::v1i128)
3106
6.68k
    Align = 16;
3107
33.3k
  // QPX vector types stored in double-precision are padded to a 32 byte
3108
33.3k
  // boundary.
3109
26.6k
  else 
if (26.6k
ArgVT == MVT::v4f64 || 26.6k
ArgVT == MVT::v4i126.4k
)
3110
266
    Align = 32;
3111
33.3k
3112
33.3k
  // ByVal parameters are aligned as requested.
3113
33.3k
  if (
Flags.isByVal()33.3k
) {
3114
262
    unsigned BVAlign = Flags.getByValAlign();
3115
262
    if (
BVAlign > PtrByteSize262
) {
3116
40
      if (BVAlign % PtrByteSize != 0)
3117
0
          llvm_unreachable(
3118
40
            "ByVal alignment is not a multiple of the pointer size");
3119
40
3120
40
      Align = BVAlign;
3121
40
    }
3122
262
  }
3123
33.3k
3124
33.3k
  // Array members are always packed to their original alignment.
3125
33.3k
  
if (33.3k
Flags.isInConsecutiveRegs()33.3k
) {
3126
3.82k
    // If the array member was split into multiple registers, the first
3127
3.82k
    // needs to be aligned to the size of the full type.  (Except for
3128
3.82k
    // ppcf128, which is only aligned as its f64 components.)
3129
3.82k
    if (
Flags.isSplit() && 3.82k
OrigVT != MVT::ppcf128192
)
3130
84
      Align = OrigVT.getStoreSize();
3131
3.82k
    else
3132
3.74k
      Align = ArgVT.getStoreSize();
3133
3.82k
  }
3134
33.3k
3135
33.3k
  return Align;
3136
33.3k
}
3137
3138
/// CalculateStackSlotUsed - Return whether this argument will use its
3139
/// stack slot (instead of being passed in registers).  ArgOffset,
3140
/// AvailableFPRs, and AvailableVRs must hold the current argument
3141
/// position, and will be updated to account for this argument.
3142
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3143
                                   ISD::ArgFlagsTy Flags,
3144
                                   unsigned PtrByteSize,
3145
                                   unsigned LinkageSize,
3146
                                   unsigned ParamAreaSize,
3147
                                   unsigned &ArgOffset,
3148
                                   unsigned &AvailableFPRs,
3149
15.4k
                                   unsigned &AvailableVRs, bool HasQPX) {
3150
15.4k
  bool UseMemory = false;
3151
15.4k
3152
15.4k
  // Respect alignment of argument on the stack.
3153
15.4k
  unsigned Align =
3154
15.4k
    CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3155
15.4k
  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3156
15.4k
  // If there's no space left in the argument save area, we must
3157
15.4k
  // use memory (this check also catches zero-sized arguments).
3158
15.4k
  if (ArgOffset >= LinkageSize + ParamAreaSize)
3159
2.54k
    UseMemory = true;
3160
15.4k
3161
15.4k
  // Allocate argument on the stack.
3162
15.4k
  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3163
15.4k
  if (Flags.isInConsecutiveRegsLast())
3164
227
    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3165
15.4k
  // If we overran the argument save area, we must use memory
3166
15.4k
  // (this check catches arguments passed partially in memory)
3167
15.4k
  if (ArgOffset > LinkageSize + ParamAreaSize)
3168
2.57k
    UseMemory = true;
3169
15.4k
3170
15.4k
  // However, if the argument is actually passed in an FPR or a VR,
3171
15.4k
  // we don't use memory after all.
3172
15.4k
  if (
!Flags.isByVal()15.4k
) {
3173
15.3k
    if (
ArgVT == MVT::f32 || 15.3k
ArgVT == MVT::f6413.6k
||
3174
15.3k
        // QPX registers overlap with the scalar FP registers.
3175
11.7k
        
(HasQPX && 11.7k
(ArgVT == MVT::v4f32 ||
3176
218
                    ArgVT == MVT::v4f64 ||
3177
308
                    ArgVT == MVT::v4i1)))
3178
3.84k
      
if (3.84k
AvailableFPRs > 03.84k
) {
3179
3.65k
        --AvailableFPRs;
3180
3.65k
        return false;
3181
3.65k
      }
3182
11.7k
    
if (11.7k
ArgVT == MVT::v4f32 || 11.7k
ArgVT == MVT::v4i3211.3k
||
3183
11.7k
        
ArgVT == MVT::v8i169.94k
||
ArgVT == MVT::v16i89.70k
||
3184
11.7k
        
ArgVT == MVT::v2f649.42k
||
ArgVT == MVT::v2i649.12k
||
3185
8.47k
        ArgVT == MVT::v1i128)
3186
3.33k
      
if (3.33k
AvailableVRs > 03.33k
) {
3187
3.02k
        --AvailableVRs;
3188
3.02k
        return false;
3189
3.02k
      }
3190
8.76k
  }
3191
8.76k
3192
8.76k
  return UseMemory;
3193
8.76k
}
3194
3195
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3196
/// ensure minimum alignment required for target.
3197
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3198
7.49k
                                     unsigned NumBytes) {
3199
7.49k
  unsigned TargetAlign = Lowering->getStackAlignment();
3200
7.49k
  unsigned AlignMask = TargetAlign - 1;
3201
7.49k
  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3202
7.49k
  return NumBytes;
3203
7.49k
}
3204
3205
SDValue PPCTargetLowering::LowerFormalArguments(
3206
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3207
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3208
7.49k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3209
7.49k
  if (
Subtarget.isSVR4ABI()7.49k
) {
3210
7.24k
    if (Subtarget.isPPC64())
3211
6.53k
      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3212
6.53k
                                         dl, DAG, InVals);
3213
7.24k
    else
3214
709
      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3215
709
                                         dl, DAG, InVals);
3216
252
  } else {
3217
252
    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3218
252
                                       dl, DAG, InVals);
3219
252
  }
3220
0
}
3221
3222
SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3223
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3224
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3225
709
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3226
709
3227
709
  // 32-bit SVR4 ABI Stack Frame Layout:
3228
709
  //              +-----------------------------------+
3229
709
  //        +-->  |            Back chain             |
3230
709
  //        |     +-----------------------------------+
3231
709
  //        |     | Floating-point register save area |
3232
709
  //        |     +-----------------------------------+
3233
709
  //        |     |    General register save area     |
3234
709
  //        |     +-----------------------------------+
3235
709
  //        |     |          CR save word             |
3236
709
  //        |     +-----------------------------------+
3237
709
  //        |     |         VRSAVE save word          |
3238
709
  //        |     +-----------------------------------+
3239
709
  //        |     |         Alignment padding         |
3240
709
  //        |     +-----------------------------------+
3241
709
  //        |     |     Vector register save area     |
3242
709
  //        |     +-----------------------------------+
3243
709
  //        |     |       Local variable space        |
3244
709
  //        |     +-----------------------------------+
3245
709
  //        |     |        Parameter list area        |
3246
709
  //        |     +-----------------------------------+
3247
709
  //        |     |           LR save word            |
3248
709
  //        |     +-----------------------------------+
3249
709
  // SP-->  +---  |            Back chain             |
3250
709
  //              +-----------------------------------+
3251
709
  //
3252
709
  // Specifications:
3253
709
  //   System V Application Binary Interface PowerPC Processor Supplement
3254
709
  //   AltiVec Technology Programming Interface Manual
3255
709
3256
709
  MachineFunction &MF = DAG.getMachineFunction();
3257
709
  MachineFrameInfo &MFI = MF.getFrameInfo();
3258
709
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3259
709
3260
709
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3261
709
  // Potential tail calls could cause overwriting of argument stack slots.
3262
709
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3263
2
                       (CallConv == CallingConv::Fast));
3264
709
  unsigned PtrByteSize = 4;
3265
709
3266
709
  // Assign locations to all of the incoming arguments.
3267
709
  SmallVector<CCValAssign, 16> ArgLocs;
3268
709
  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3269
709
                 *DAG.getContext());
3270
709
3271
709
  // Reserve space for the linkage area on the stack.
3272
709
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3273
709
  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3274
709
  if (useSoftFloat())
3275
24
    CCInfo.PreAnalyzeFormalArguments(Ins);
3276
709
3277
709
  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3278
709
  CCInfo.clearWasPPCF128();
3279
709
3280
1.85k
  for (unsigned i = 0, e = ArgLocs.size(); 
i != e1.85k
;
++i1.14k
) {
3281
1.14k
    CCValAssign &VA = ArgLocs[i];
3282
1.14k
3283
1.14k
    // Arguments stored in registers.
3284
1.14k
    if (
VA.isRegLoc()1.14k
) {
3285
1.12k
      const TargetRegisterClass *RC;
3286
1.12k
      EVT ValVT = VA.getValVT();
3287
1.12k
3288
1.12k
      switch (ValVT.getSimpleVT().SimpleTy) {
3289
0
        default:
3290
0
          llvm_unreachable("ValVT not supported by formal arguments Lowering");
3291
778
        case MVT::i1:
3292
778
        case MVT::i32:
3293
778
          RC = &PPC::GPRCRegClass;
3294
778
          break;
3295
162
        case MVT::f32:
3296
162
          if (Subtarget.hasP8Vector())
3297
0
            RC = &PPC::VSSRCRegClass;
3298
162
          else
3299
162
            RC = &PPC::F4RCRegClass;
3300
162
          break;
3301
166
        case MVT::f64:
3302
166
          if (Subtarget.hasVSX())
3303
0
            RC = &PPC::VSFRCRegClass;
3304
166
          else
3305
166
            RC = &PPC::F8RCRegClass;
3306
166
          break;
3307
11
        case MVT::v16i8:
3308
11
        case MVT::v8i16:
3309
11
        case MVT::v4i32:
3310
11
          RC = &PPC::VRRCRegClass;
3311
11
          break;
3312
9
        case MVT::v4f32:
3313
9
          RC = Subtarget.hasQPX() ? 
&PPC::QSRCRegClass0
:
&PPC::VRRCRegClass9
;
3314
9
          break;
3315
0
        case MVT::v2f64:
3316
0
        case MVT::v2i64:
3317
0
          RC = &PPC::VRRCRegClass;
3318
0
          break;
3319
0
        case MVT::v4f64:
3320
0
          RC = &PPC::QFRCRegClass;
3321
0
          break;
3322
0
        case MVT::v4i1:
3323
0
          RC = &PPC::QBRCRegClass;
3324
0
          break;
3325
1.12k
      }
3326
1.12k
3327
1.12k
      // Transform the arguments stored in physical registers into virtual ones.
3328
1.12k
      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3329
1.12k
      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3330
1.12k
                                            ValVT == MVT::i1 ? 
MVT::i322
:
ValVT1.12k
);
3331
1.12k
3332
1.12k
      if (ValVT == MVT::i1)
3333
2
        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3334
1.12k
3335
1.12k
      InVals.push_back(ArgValue);
3336
1.14k
    } else {
3337
23
      // Argument stored in memory.
3338
23
      assert(VA.isMemLoc());
3339
23
3340
23
      unsigned ArgSize = VA.getLocVT().getStoreSize();
3341
23
      int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3342
23
                                     isImmutable);
3343
23
3344
23
      // Create load nodes to retrieve arguments from the stack.
3345
23
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3346
23
      InVals.push_back(
3347
23
          DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3348
23
    }
3349
1.14k
  }
3350
709
3351
709
  // Assign locations to all of the incoming aggregate by value arguments.
3352
709
  // Aggregates passed by value are stored in the local variable space of the
3353
709
  // caller's stack frame, right above the parameter list area.
3354
709
  SmallVector<CCValAssign, 16> ByValArgLocs;
3355
709
  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3356
709
                      ByValArgLocs, *DAG.getContext());
3357
709
3358
709
  // Reserve stack space for the allocations in CCInfo.
3359
709
  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3360
709
3361
709
  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3362
709
3363
709
  // Area that is at least reserved in the caller of this function.
3364
709
  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3365
709
  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3366
709
3367
709
  // Set the size that is at least reserved in caller of this function.  Tail
3368
709
  // call optimized function's reserved stack space needs to be aligned so that
3369
709
  // taking the difference between two stack areas will result in an aligned
3370
709
  // stack.
3371
709
  MinReservedArea =
3372
709
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3373
709
  FuncInfo->setMinReservedArea(MinReservedArea);
3374
709
3375
709
  SmallVector<SDValue, 8> MemOps;
3376
709
3377
709
  // If the function takes variable number of arguments, make a frame index for
3378
709
  // the start of the first vararg value... for expansion of llvm.va_start.
3379
709
  if (
isVarArg709
) {
3380
1
    static const MCPhysReg GPArgRegs[] = {
3381
1
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3382
1
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3383
1
    };
3384
1
    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3385
1
3386
1
    static const MCPhysReg FPArgRegs[] = {
3387
1
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3388
1
      PPC::F8
3389
1
    };
3390
1
    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3391
1
3392
1
    if (useSoftFloat())
3393
0
       NumFPArgRegs = 0;
3394
1
3395
1
    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3396
1
    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3397
1
3398
1
    // Make room for NumGPArgRegs and NumFPArgRegs.
3399
1
    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3400
1
                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3401
1
3402
1
    FuncInfo->setVarArgsStackOffset(
3403
1
      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3404
1
                            CCInfo.getNextStackOffset(), true));
3405
1
3406
1
    FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3407
1
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3408
1
3409
1
    // The fixed integer arguments of a variadic function are stored to the
3410
1
    // VarArgsFrameIndex on the stack so that they may be loaded by
3411
1
    // dereferencing the result of va_next.
3412
9
    for (unsigned GPRIndex = 0; 
GPRIndex != NumGPArgRegs9
;
++GPRIndex8
) {
3413
8
      // Get an existing live-in vreg, or add a new one.
3414
8
      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3415
8
      if (!VReg)
3416
7
        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3417
8
3418
8
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3419
8
      SDValue Store =
3420
8
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3421
8
      MemOps.push_back(Store);
3422
8
      // Increment the address by four for the next argument to store
3423
8
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3424
8
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3425
8
    }
3426
1
3427
1
    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3428
1
    // is set.
3429
1
    // The double arguments are stored to the VarArgsFrameIndex
3430
1
    // on the stack.
3431
9
    for (unsigned FPRIndex = 0; 
FPRIndex != NumFPArgRegs9
;
++FPRIndex8
) {
3432
8
      // Get an existing live-in vreg, or add a new one.
3433
8
      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3434
8
      if (!VReg)
3435
7
        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3436
8
3437
8
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3438
8
      SDValue Store =
3439
8
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3440
8
      MemOps.push_back(Store);
3441
8
      // Increment the address by eight for the next argument to store
3442
8
      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3443
8
                                         PtrVT);
3444
8
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3445
8
    }
3446
1
  }
3447
709
3448
709
  if (!MemOps.empty())
3449
1
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3450
709
3451
709
  return Chain;
3452
709
}
3453
3454
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3455
// value to MVT::i64 and then truncate to the correct register size.
3456
SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3457
                                             EVT ObjectVT, SelectionDAG &DAG,
3458
                                             SDValue ArgVal,
3459
2.57k
                                             const SDLoc &dl) const {
3460
2.57k
  if (Flags.isSExt())
3461
884
    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3462
884
                         DAG.getValueType(ObjectVT));
3463
1.69k
  else 
if (1.69k
Flags.isZExt()1.69k
)
3464
572
    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3465
572
                         DAG.getValueType(ObjectVT));
3466
2.57k
3467
2.57k
  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3468
2.57k
}
3469
3470
SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3471
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3472
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3473
6.53k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3474
6.53k
  // TODO: add description of PPC stack frame format, or at least some docs.
3475
6.53k
  //
3476
6.53k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
3477
6.53k
  bool isLittleEndian = Subtarget.isLittleEndian();
3478
6.53k
  MachineFunction &MF = DAG.getMachineFunction();
3479
6.53k
  MachineFrameInfo &MFI = MF.getFrameInfo();
3480
6.53k
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3481
6.53k
3482
6.53k
  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3483
6.53k
         "fastcc not supported on varargs functions");
3484
6.53k
3485
6.53k
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3486
6.53k
  // Potential tail calls could cause overwriting of argument stack slots.
3487
6.53k
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3488
2
                       (CallConv == CallingConv::Fast));
3489
6.53k
  unsigned PtrByteSize = 8;
3490
6.53k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3491
6.53k
3492
6.53k
  static const MCPhysReg GPR[] = {
3493
6.53k
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3494
6.53k
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3495
6.53k
  };
3496
6.53k
  static const MCPhysReg VR[] = {
3497
6.53k
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3498
6.53k
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3499
6.53k
  };
3500
6.53k
3501
6.53k
  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3502
6.53k
  const unsigned Num_FPR_Regs = useSoftFloat() ? 
08
:
136.52k
;
3503
6.53k
  const unsigned Num_VR_Regs  = array_lengthof(VR);
3504
6.53k
  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3505
6.53k
3506
6.53k
  // Do a first pass over the arguments to determine whether the ABI
3507
6.53k
  // guarantees that our caller has allocated the parameter save area
3508
6.53k
  // on its stack frame.  In the ELFv1 ABI, this is always the case;
3509
6.53k
  // in the ELFv2 ABI, it is true if this is a vararg function or if
3510
6.53k
  // any parameter is located in a stack slot.
3511
6.53k
3512
2.52k
  bool HasParameterArea = !isELFv2ABI || isVarArg;
3513
6.53k
  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3514
6.53k
  unsigned NumBytes = LinkageSize;
3515
6.53k
  unsigned AvailableFPRs = Num_FPR_Regs;
3516
6.53k
  unsigned AvailableVRs = Num_VR_Regs;
3517
20.7k
  for (unsigned i = 0, e = Ins.size(); 
i != e20.7k
;
++i14.2k
) {
3518
14.2k
    if (Ins[i].Flags.isNest())
3519
1
      continue;
3520
14.2k
3521
14.2k
    
if (14.2k
CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3522
14.2k
                               PtrByteSize, LinkageSize, ParamAreaSize,
3523
14.2k
                               NumBytes, AvailableFPRs, AvailableVRs,
3524
14.2k
                               Subtarget.hasQPX()))
3525
1.31k
      HasParameterArea = true;
3526
14.2k
  }
3527
6.53k
3528
6.53k
  // Add DAG nodes to load the arguments or copy them out of registers.  On
3529
6.53k
  // entry to a function on PPC, the arguments start after the linkage area,
3530
6.53k
  // although the first ones are often in registers.
3531
6.53k
3532
6.53k
  unsigned ArgOffset = LinkageSize;
3533
6.53k
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3534
6.53k
  unsigned &QFPR_idx = FPR_idx;
3535
6.53k
  SmallVector<SDValue, 8> MemOps;
3536
6.53k
  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3537
6.53k
  unsigned CurArgIdx = 0;
3538
20.7k
  for (unsigned ArgNo = 0, e = Ins.size(); 
ArgNo != e20.7k
;
++ArgNo14.2k
) {
3539
14.2k
    SDValue ArgVal;
3540
14.2k
    bool needsLoad = false;
3541
14.2k
    EVT ObjectVT = Ins[ArgNo].VT;
3542
14.2k
    EVT OrigVT = Ins[ArgNo].ArgVT;
3543
14.2k
    unsigned ObjSize = ObjectVT.getStoreSize();
3544
14.2k
    unsigned ArgSize = ObjSize;
3545
14.2k
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3546
14.2k
    if (
Ins[ArgNo].isOrigArg()14.2k
) {
3547
14.2k
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3548
14.2k
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3549
14.2k
    }
3550
14.2k
    // We re-align the argument offset for each argument, except when using the
3551
14.2k
    // fast calling convention, when we need to make sure we do that only when
3552
14.2k
    // we'll actually use a stack slot.
3553
14.2k
    unsigned CurArgOffset, Align;
3554
12.7k
    auto ComputeArgOffset = [&]() {
3555
12.7k
      /* Respect alignment of argument on the stack.  */
3556
12.7k
      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3557
12.7k
      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3558
12.7k
      CurArgOffset = ArgOffset;
3559
12.7k
    };
3560
14.2k
3561
14.2k
    if (
CallConv != CallingConv::Fast14.2k
) {
3562
12.1k
      ComputeArgOffset();
3563
12.1k
3564
12.1k
      /* Compute GPR index associated with argument offset.  */
3565
12.1k
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3566
12.1k
      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3567
12.1k
    }
3568
14.2k
3569
14.2k
    // FIXME the codegen can be much improved in some cases.
3570
14.2k
    // We do not have to keep everything in memory.
3571
14.2k
    if (
Flags.isByVal()14.2k
) {
3572
72
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3573
72
3574
72
      if (CallConv == CallingConv::Fast)
3575
1
        ComputeArgOffset();
3576
72
3577
72
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3578
72
      ObjSize = Flags.getByValSize();
3579
72
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3580
72
      // Empty aggregate parameters do not take up registers.  Examples:
3581
72
      //   struct { } a;
3582
72
      //   union  { } b;
3583
72
      //   int c[0];
3584
72
      // etc.  However, we have to provide a place-holder in InVals, so
3585
72
      // pretend we have an 8-byte item at the current address for that
3586
72
      // purpose.
3587
72
      if (
!ObjSize72
) {
3588
2
        int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3589
2
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3590
2
        InVals.push_back(FIN);
3591
2
        continue;
3592
2
      }
3593
70
3594
70
      // Create a stack object covering all stack doublewords occupied
3595
70
      // by the argument.  If the argument is (fully or partially) on
3596
70
      // the stack, or if the argument is fully in registers but the
3597
70
      // caller has allocated the parameter save anyway, we can refer
3598
70
      // directly to the caller's stack frame.  Otherwise, create a
3599
70
      // local copy in our own frame.
3600
70
      int FI;
3601
70
      if (HasParameterArea ||
3602
2
          ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3603
68
        FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3604
70
      else
3605
2
        FI = MFI.CreateStackObject(ArgSize, Align, false);
3606
70
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3607
70
3608
70
      // Handle aggregates smaller than 8 bytes.
3609
70
      if (
ObjSize < PtrByteSize70
) {
3610
29
        // The value of the object is its address, which differs from the
3611
29
        // address of the enclosing doubleword on big-endian systems.
3612
29
        SDValue Arg = FIN;
3613
29
        if (
!isLittleEndian29
) {
3614
28
          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3615
28
          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3616
28
        }
3617
29
        InVals.push_back(Arg);
3618
29
3619
29
        if (
GPR_idx != Num_GPR_Regs29
) {
3620
16
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3621
16
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3622
16
          SDValue Store;
3623
16
3624
16
          if (
ObjSize==1 || 16
ObjSize==214
||
ObjSize==411
) {
3625
2
            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3626
6
                           
(ObjSize == 2 ? 6
MVT::i163
:
MVT::i323
));
3627
8
            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3628
8
                                      MachinePointerInfo(&*FuncArg), ObjType);
3629
16
          } else {
3630
8
            // For sizes that don't fit a truncating store (3, 5, 6, 7),
3631
8
            // store the whole register as-is to the parameter save area
3632
8
            // slot.
3633
8
            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3634
8
                                 MachinePointerInfo(&*FuncArg));
3635
8
          }
3636
16
3637
16
          MemOps.push_back(Store);
3638
16
        }
3639
29
        // Whether we copied from a register or not, advance the offset
3640
29
        // into the parameter save area by a full doubleword.
3641
29
        ArgOffset += PtrByteSize;
3642
29
        continue;
3643
29
      }
3644
41
3645
41
      // The value of the object is its address, which is the address of
3646
41
      // its first stack doubleword.
3647
41
      InVals.push_back(FIN);
3648
41
3649
41
      // Store whatever pieces of the object are in registers to memory.
3650
112
      for (unsigned j = 0; 
j < ArgSize112
;
j += PtrByteSize71
) {
3651
84
        if (GPR_idx == Num_GPR_Regs)
3652
13
          break;
3653
71
3654
71
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3655
71
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3656
71
        SDValue Addr = FIN;
3657
71
        if (
j71
) {
3658
40
          SDValue Off = DAG.getConstant(j, dl, PtrVT);
3659
40
          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3660
40
        }
3661
84
        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3662
84
                                     MachinePointerInfo(&*FuncArg, j));
3663
84
        MemOps.push_back(Store);
3664
84
        ++GPR_idx;
3665
84
      }
3666
72
      ArgOffset += ArgSize;
3667
72
      continue;
3668
72
    }
3669
14.1k
3670
14.1k
    switch (ObjectVT.getSimpleVT().SimpleTy) {
3671
0
    
default: 0
llvm_unreachable0
("Unhandled argument type!");
3672
7.41k
    case MVT::i1:
3673
7.41k
    case MVT::i32:
3674
7.41k
    case MVT::i64:
3675
7.41k
      if (
Flags.isNest()7.41k
) {
3676
1
        // The 'nest' parameter, if any, is passed in R11.
3677
1
        unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3678
1
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3679
1
3680
1
        if (
ObjectVT == MVT::i32 || 1
ObjectVT == MVT::i11
)
3681
0
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3682
1
3683
1
        break;
3684
1
      }
3685
7.41k
3686
7.41k
      // These can be scalar arguments or elements of an integer array type
3687
7.41k
      // passed directly.  Clang may use those instead of "byval" aggregate
3688
7.41k
      // types to avoid forcing arguments to memory unnecessarily.
3689
7.41k
      
if (7.41k
GPR_idx != Num_GPR_Regs7.41k
) {
3690
6.81k
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3691
6.81k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3692
6.81k
3693
6.81k
        if (
ObjectVT == MVT::i32 || 6.81k
ObjectVT == MVT::i14.37k
)
3694
6.81k
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3695
6.81k
          // value to MVT::i64 and then truncate to the correct register size.
3696
2.55k
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3697
7.41k
      } else {
3698
607
        if (CallConv == CallingConv::Fast)
3699
328
          ComputeArgOffset();
3700
607
3701
607
        needsLoad = true;
3702
607
        ArgSize = PtrByteSize;
3703
607
      }
3704
7.41k
      if (
CallConv != CallingConv::Fast || 7.41k
needsLoad699
)
3705
7.04k
        ArgOffset += 8;
3706
7.41k
      break;
3707
7.41k
3708
3.20k
    case MVT::f32:
3709
3.20k
    case MVT::f64:
3710
3.20k
      // These can be scalar arguments or elements of a float array type
3711
3.20k
      // passed directly.  The latter are used to implement ELFv2 homogenous
3712
3.20k
      // float aggregates.
3713
3.20k
      if (
FPR_idx != Num_FPR_Regs3.20k
) {
3714
3.05k
        unsigned VReg;
3715
3.05k
3716
3.05k
        if (ObjectVT == MVT::f32)
3717
1.39k
          VReg = MF.addLiveIn(FPR[FPR_idx],
3718
1.39k
                              Subtarget.hasP8Vector()
3719
341
                                  ? &PPC::VSSRCRegClass
3720
1.39k
                                  : &PPC::F4RCRegClass);
3721
3.05k
        else
3722
1.65k
          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3723
702
                                                ? &PPC::VSFRCRegClass
3724
1.65k
                                                : &PPC::F8RCRegClass);
3725
3.05k
3726
3.05k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3727
3.05k
        ++FPR_idx;
3728
3.20k
      } else 
if (153
GPR_idx != Num_GPR_Regs && 153
CallConv != CallingConv::Fast27
) {
3729
27
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3730
27
        // once we support fp <-> gpr moves.
3731
27
3732
27
        // This can only ever happen in the presence of f32 array types,
3733
27
        // since otherwise we never run out of FPRs before running out
3734
27
        // of GPRs.
3735
27
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3736
27
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3737
27
3738
27
        if (
ObjectVT == MVT::f3227
) {
3739
24
          if (
(ArgOffset % PtrByteSize) == (isLittleEndian ? 24
424
:
00
))
3740
9
            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3741
9
                                 DAG.getConstant(32, dl, MVT::i32));
3742
24
          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3743
24
        }
3744
27
3745
27
        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3746
153
      } else {
3747
126
        if (CallConv == CallingConv::Fast)
3748
123
          ComputeArgOffset();
3749
153
3750
153
        needsLoad = true;
3751
153
      }
3752
3.20k
3753
3.20k
      // When passing an array of floats, the array occupies consecutive
3754
3.20k
      // space in the argument area; only round up to the next doubleword
3755
3.20k
      // at the end of the array.  Otherwise, each float takes 8 bytes.
3756
3.20k
      if (
CallConv != CallingConv::Fast || 3.20k
needsLoad672
) {
3757
2.65k
        ArgSize = Flags.isInConsecutiveRegs() ? 
ObjSize483
:
PtrByteSize2.17k
;
3758
2.65k
        ArgOffset += ArgSize;
3759
2.65k
        if (Flags.isInConsecutiveRegsLast())
3760
69
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3761
2.65k
      }
3762
3.20k
      break;
3763
3.38k
    case MVT::v4f32:
3764
3.38k
    case MVT::v4i32:
3765
3.38k
    case MVT::v8i16:
3766
3.38k
    case MVT::v16i8:
3767
3.38k
    case MVT::v2f64:
3768
3.38k
    case MVT::v2i64:
3769
3.38k
    case MVT::v1i128:
3770
3.38k
      if (
!Subtarget.hasQPX()3.38k
) {
3771
3.29k
      // These can be scalar arguments or elements of a vector array type
3772
3.29k
      // passed directly.  The latter are used to implement ELFv2 homogenous
3773
3.29k
      // vector aggregates.
3774
3.29k
      if (
VR_idx != Num_VR_Regs3.29k
) {
3775
2.98k
        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3776
2.98k
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3777
2.98k
        ++VR_idx;
3778
3.29k
      } else {
3779
312
        if (CallConv == CallingConv::Fast)
3780
164
          ComputeArgOffset();
3781
312
3782
312
        needsLoad = true;
3783
312
      }
3784
3.29k
      if (
CallConv != CallingConv::Fast || 3.29k
needsLoad656
)
3785
2.80k
        ArgOffset += 16;
3786
3.29k
      break;
3787
3.29k
      } // not QPX
3788
90
3789
3.38k
      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3790
90
             "Invalid QPX parameter type");
3791
90
      /* fall through */
3792
90
3793
221
    case MVT::v4f64:
3794
221
    case MVT::v4i1:
3795
221
      // QPX vectors are treated like their scalar floating-point subregisters
3796
221
      // (except that they're larger).
3797
221
      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 
1690
:
32131
;
3798
221
      if (
QFPR_idx != Num_QFPR_Regs221
) {
3799
221
        const TargetRegisterClass *RC;
3800
221
        switch (ObjectVT.getSimpleVT().SimpleTy) {
3801
77
        case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3802
90
        case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3803
54
        default:         RC = &PPC::QBRCRegClass; break;
3804
221
        }
3805
221
3806
221
        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3807
221
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3808
221
        ++QFPR_idx;
3809
221
      } else {
3810
0
        if (CallConv == CallingConv::Fast)
3811
0
          ComputeArgOffset();
3812
0
        needsLoad = true;
3813
0
      }
3814
221
      
if (221
CallConv != CallingConv::Fast || 221
needsLoad0
)
3815
221
        ArgOffset += Sz;
3816
7.41k
      break;
3817
14.1k
    }
3818
14.1k
3819
14.1k
    // We need to load the argument to a virtual register if we determined
3820
14.1k
    // above that we ran out of physical registers of the appropriate type.
3821
14.1k
    
if (14.1k
needsLoad14.1k
) {
3822
1.04k
      if (
ObjSize < ArgSize && 1.04k
!isLittleEndian16
)
3823
14
        CurArgOffset += ArgSize - ObjSize;
3824
1.04k
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3825
1.04k
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3826
1.04k
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3827
1.04k
    }
3828
14.2k
3829
14.2k
    InVals.push_back(ArgVal);
3830
14.2k
  }
3831
6.53k
3832
6.53k
  // Area that is at least reserved in the caller of this function.
3833
6.53k
  unsigned MinReservedArea;
3834
6.53k
  if (HasParameterArea)
3835
4.04k
    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3836
6.53k
  else
3837
2.48k
    MinReservedArea = LinkageSize;
3838
6.53k
3839
6.53k
  // Set the size that is at least reserved in caller of this function.  Tail
3840
6.53k
  // call optimized functions' reserved stack space needs to be aligned so that
3841
6.53k
  // taking the difference between two stack areas will result in an aligned
3842
6.53k
  // stack.
3843
6.53k
  MinReservedArea =
3844
6.53k
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3845
6.53k
  FuncInfo->setMinReservedArea(MinReservedArea);
3846
6.53k
3847
6.53k
  // If the function takes variable number of arguments, make a frame index for
3848
6.53k
  // the start of the first vararg value... for expansion of llvm.va_start.
3849
6.53k
  if (
isVarArg6.53k
) {
3850
9
    int Depth = ArgOffset;
3851
9
3852
9
    FuncInfo->setVarArgsFrameIndex(
3853
9
      MFI.CreateFixedObject(PtrByteSize, Depth, true));
3854
9
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3855
9
3856
9
    // If this function is vararg, store any remaining integer argument regs
3857
9
    // to their spots on the stack so that they may be loaded by dereferencing
3858
9
    // the result of va_next.
3859
9
    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3860
72
         
GPR_idx < Num_GPR_Regs72
;
++GPR_idx63
) {
3861
63
      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3862
63
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3863
63
      SDValue Store =
3864
63
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3865
63
      MemOps.push_back(Store);
3866
63
      // Increment the address by four for the next argument to store
3867
63
      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3868
63
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3869
63
    }
3870
9
  }
3871
6.53k
3872
6.53k
  if (!MemOps.empty())
3873
38
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3874
6.53k
3875
6.53k
  return Chain;
3876
6.53k
}
3877
3878
SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3879
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3880
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3881
252
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3882
252
  // TODO: add description of PPC stack frame format, or at least some docs.
3883
252
  //
3884
252
  MachineFunction &MF = DAG.getMachineFunction();
3885
252
  MachineFrameInfo &MFI = MF.getFrameInfo();
3886
252
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3887
252
3888
252
  EVT PtrVT = getPointerTy(MF.getDataLayout());
3889
252
  bool isPPC64 = PtrVT == MVT::i64;
3890
252
  // Potential tail calls could cause overwriting of argument stack slots.
3891
252
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3892
2
                       (CallConv == CallingConv::Fast));
3893
252
  unsigned PtrByteSize = isPPC64 ? 
866
:
4186
;
3894
252
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3895
252
  unsigned ArgOffset = LinkageSize;
3896
252
  // Area that is at least reserved in caller of this function.
3897
252
  unsigned MinReservedArea = ArgOffset;
3898
252
3899
252
  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3900
252
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3901
252
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3902
252
  };
3903
252
  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3904
252
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3905
252
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3906
252
  };
3907
252
  static const MCPhysReg VR[] = {
3908
252
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3909
252
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3910
252
  };
3911
252
3912
252
  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3913
252
  const unsigned Num_FPR_Regs = useSoftFloat() ? 
00
:
13252
;
3914
252
  const unsigned Num_VR_Regs  = array_lengthof( VR);
3915
252
3916
252
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3917
252
3918
252
  const MCPhysReg *GPR = isPPC64 ? 
GPR_6466
:
GPR_32186
;
3919
252
3920
252
  // In 32-bit non-varargs functions, the stack space for vectors is after the
3921
252
  // stack space for non-vectors.  We do not use this space unless we have
3922
252
  // too many vectors to fit in registers, something that only occurs in
3923
252
  // constructed examples:), but we have to walk the arglist to figure
3924
252
  // that out...for the pathological case, compute VecArgOffset as the
3925
252
  // start of the vector parameter area.  Computing VecArgOffset is the
3926
252
  // entire point of the following loop.
3927
252
  unsigned VecArgOffset = ArgOffset;
3928
252
  if (
!isVarArg && 252
!isPPC64250
) {
3929
463
    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3930
279
         
++ArgNo279
) {
3931
279
      EVT ObjectVT = Ins[ArgNo].VT;
3932
279
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3933
279
3934
279
      if (
Flags.isByVal()279
) {
3935
5
        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3936
5
        unsigned ObjSize = Flags.getByValSize();
3937
5
        unsigned ArgSize =
3938
5
                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3939
5
        VecArgOffset += ArgSize;
3940
5
        continue;
3941
5
      }
3942
274
3943
274
      switch(ObjectVT.getSimpleVT().SimpleTy) {
3944
0
      
default: 0
llvm_unreachable0
("Unhandled argument type!");
3945
208
      case MVT::i1:
3946
208
      case MVT::i32:
3947
208
      case MVT::f32:
3948
208
        VecArgOffset += 4;
3949
208
        break;
3950
64
      case MVT::i64:  // PPC64
3951
64
      case MVT::f64:
3952
64
        // FIXME: We are guaranteed to be !isPPC64 at this point.
3953
64
        // Does MVT::i64 apply?
3954
64
        VecArgOffset += 8;
3955
64
        break;
3956
2
      case MVT::v4f32:
3957
2
      case MVT::v4i32:
3958
2
      case MVT::v8i16:
3959
2
      case MVT::v16i8:
3960
2
        // Nothing to do, we're only looking at Nonvector args here.
3961
2
        break;
3962
279
      }
3963
279
    }
3964
184
  }
3965
252
  // We've found where the vector parameter area in memory is.  Skip the
3966
252
  // first 12 parameters; these don't use that memory.
3967
252
  VecArgOffset = ((VecArgOffset+15)/16)*16;
3968
252
  VecArgOffset += 12*16;
3969
252
3970
252
  // Add DAG nodes to load the arguments or copy them out of registers.  On
3971
252
  // entry to a function on PPC, the arguments start after the linkage area,
3972
252
  // although the first ones are often in registers.
3973
252
3974
252
  SmallVector<SDValue, 8> MemOps;
3975
252
  unsigned nAltivecParamsAtEnd = 0;
3976
252
  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3977
252
  unsigned CurArgIdx = 0;
3978
650
  for (unsigned ArgNo = 0, e = Ins.size(); 
ArgNo != e650
;
++ArgNo398
) {
3979
398
    SDValue ArgVal;
3980
398
    bool needsLoad = false;
3981
398
    EVT ObjectVT = Ins[ArgNo].VT;
3982
398
    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3983
398
    unsigned ArgSize = ObjSize;
3984
398
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3985
398
    if (
Ins[ArgNo].isOrigArg()398
) {
3986
398
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3987
398
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3988
398
    }
3989
398
    unsigned CurArgOffset = ArgOffset;
3990
398
3991
398
    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3992
398
    if (
ObjectVT==MVT::v4f32 || 398
ObjectVT==MVT::v4i32396
||
3993
398
        
ObjectVT==MVT::v8i16396
||
ObjectVT==MVT::v16i8396
) {
3994
4
      if (
isVarArg || 4
isPPC644
) {
3995
2
        MinReservedArea = ((MinReservedArea+15)/16)*16;
3996
2
        MinReservedArea += CalculateStackSlotSize(ObjectVT,
3997
2
                                                  Flags,
3998
2
                                                  PtrByteSize);
3999
4
      } else  nAltivecParamsAtEnd++;
4000
4
    } else
4001
398
      // Calculate min reserved area.
4002
394
      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4003
394
                                                Flags,
4004
394
                                                PtrByteSize);
4005
398
4006
398
    // FIXME the codegen can be much improved in some cases.
4007
398
    // We do not have to keep everything in memory.
4008
398
    if (
Flags.isByVal()398
) {
4009
9
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4010
9
4011
9
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4012
9
      ObjSize = Flags.getByValSize();
4013
9
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4014
9
      // Objects of size 1 and 2 are right justified, everything else is
4015
9
      // left justified.  This means the memory address is adjusted forwards.
4016
9
      if (
ObjSize==1 || 9
ObjSize==29
) {
4017
1
        CurArgOffset = CurArgOffset + (4 - ObjSize);
4018
1
      }
4019
9
      // The value of the object is its address.
4020
9
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4021
9
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4022
9
      InVals.push_back(FIN);
4023
9
      if (
ObjSize==1 || 9
ObjSize==29
) {
4024
1
        if (
GPR_idx != Num_GPR_Regs1
) {
4025
1
          unsigned VReg;
4026
1
          if (isPPC64)
4027
0
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4028
1
          else
4029
1
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4030
1
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4031
1
          EVT ObjType = ObjSize == 1 ? 
MVT::i80
:
MVT::i161
;
4032
1
          SDValue Store =
4033
1
              DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4034
1
                                MachinePointerInfo(&*FuncArg), ObjType);
4035
1
          MemOps.push_back(Store);
4036
1
          ++GPR_idx;
4037
1
        }
4038
1
4039
1
        ArgOffset += PtrByteSize;
4040
1
4041
1
        continue;
4042
1
      }
4043
23
      
for (unsigned j = 0; 8
j < ArgSize23
;
j += PtrByteSize15
) {
4044
17
        // Store whatever pieces of the object are in registers
4045
17
        // to memory.  ArgOffset will be the address of the beginning
4046
17
        // of the object.
4047
17
        if (
GPR_idx != Num_GPR_Regs17
) {
4048
15
          unsigned VReg;
4049
15
          if (isPPC64)
4050
6
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4051
15
          else
4052
9
            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4053
15
          int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4054
15
          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4055
15
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4056
15
          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4057
15
                                       MachinePointerInfo(&*FuncArg, j));
4058
15
          MemOps.push_back(Store);
4059
15
          ++GPR_idx;
4060
15
          ArgOffset += PtrByteSize;
4061
17
        } else {
4062
2
          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4063
2
          break;
4064
2
        }
4065
17
      }
4066
9
      continue;
4067
9
    }
4068
389
4069
389
    switch (ObjectVT.getSimpleVT().SimpleTy) {
4070
0
    
default: 0
llvm_unreachable0
("Unhandled argument type!");
4071
236
    case MVT::i1:
4072
236
    case MVT::i32:
4073
236
      if (
!isPPC64236
) {
4074
211
        if (
GPR_idx != Num_GPR_Regs211
) {
4075
198
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4076
198
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4077
198
4078
198
          if (ObjectVT == MVT::i1)
4079
0
            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4080
198
4081
198
          ++GPR_idx;
4082
211
        } else {
4083
13
          needsLoad = true;
4084
13
          ArgSize = PtrByteSize;
4085
13
        }
4086
211
        // All int arguments reserve stack space in the Darwin ABI.
4087
211
        ArgOffset += PtrByteSize;
4088
211
        break;
4089
211
      }
4090
25
      
LLVM_FALLTHROUGH25
;
4091
92
    case MVT::i64:  // PPC64
4092
92
      if (
GPR_idx != Num_GPR_Regs92
) {
4093
82
        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4094
82
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4095
82
4096
82
        if (
ObjectVT == MVT::i32 || 82
ObjectVT == MVT::i165
)
4097
82
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4098
82
          // value to MVT::i64 and then truncate to the correct register size.
4099
17
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4100
82
4101
82
        ++GPR_idx;
4102
92
      } else {
4103
10
        needsLoad = true;
4104
10
        ArgSize = PtrByteSize;
4105
10
      }
4106
92
      // All int arguments reserve stack space in the Darwin ABI.
4107
92
      ArgOffset += 8;
4108
92
      break;
4109
25
4110
82
    case MVT::f32:
4111
82
    case MVT::f64:
4112
82
      // Every 4 bytes of argument space consumes one of the GPRs available for
4113
82
      // argument passing.
4114
82
      if (
GPR_idx != Num_GPR_Regs82
) {
4115
73
        ++GPR_idx;
4116
73
        if (
ObjSize == 8 && 73
GPR_idx != Num_GPR_Regs67
&&
!isPPC6466
)
4117
56
          ++GPR_idx;
4118
73
      }
4119
82
      if (
FPR_idx != Num_FPR_Regs82
) {
4120
82
        unsigned VReg;
4121
82
4122
82
        if (ObjectVT == MVT::f32)
4123
6
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4124
82
        else
4125
76
          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4126
82
4127
82
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4128
82
        ++FPR_idx;
4129
0
      } else {
4130
0
        needsLoad = true;
4131
0
      }
4132
82
4133
82
      // All FP arguments reserve stack space in the Darwin ABI.
4134
82
      ArgOffset += isPPC64 ? 
812
:
ObjSize70
;
4135
82
      break;
4136
4
    case MVT::v4f32:
4137
4
    case MVT::v4i32:
4138
4
    case MVT::v8i16:
4139
4
    case MVT::v16i8:
4140
4
      // Note that vector arguments in registers don't reserve stack space,
4141
4
      // except in varargs functions.
4142
4
      if (
VR_idx != Num_VR_Regs4
) {
4143
4
        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4144
4
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4145
4
        if (
isVarArg4
) {
4146
0
          while (
(ArgOffset % 16) != 00
) {
4147
0
            ArgOffset += PtrByteSize;
4148
0
            if (GPR_idx != Num_GPR_Regs)
4149
0
              GPR_idx++;
4150
0
          }
4151
0
          ArgOffset += 16;
4152
0
          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4153
0
        }
4154
4
        ++VR_idx;
4155
0
      } else {
4156
0
        if (
!isVarArg && 0
!isPPC640
) {
4157
0
          // Vectors go after all the nonvectors.
4158
0
          CurArgOffset = VecArgOffset;
4159
0
          VecArgOffset += 16;
4160
0
        } else {
4161
0
          // Vectors are aligned.
4162
0
          ArgOffset = ((ArgOffset+15)/16)*16;
4163
0
          CurArgOffset = ArgOffset;
4164
0
          ArgOffset += 16;
4165
0
        }
4166
0
        needsLoad = true;
4167
0
      }
4168
236
      break;
4169
389
    }
4170
389
4171
389
    // We need to load the argument to a virtual register if we determined above
4172
389
    // that we ran out of physical registers of the appropriate type.
4173
389
    
if (389
needsLoad389
) {
4174
23
      int FI = MFI.CreateFixedObject(ObjSize,
4175
23
                                     CurArgOffset + (ArgSize - ObjSize),
4176
23
                                     isImmutable);
4177
23
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4178
23
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4179
23
    }
4180
398
4181
398
    InVals.push_back(ArgVal);
4182
398
  }
4183
252
4184
252
  // Allow for Altivec parameters at the end, if needed.
4185
252
  
if (252
nAltivecParamsAtEnd252
) {
4186
2
    MinReservedArea = ((MinReservedArea+15)/16)*16;
4187
2
    MinReservedArea += 16*nAltivecParamsAtEnd;
4188
2
  }
4189
252
4190
252
  // Area that is at least reserved in the caller of this function.
4191
252
  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4192
252
4193
252
  // Set the size that is at least reserved in caller of this function.  Tail
4194
252
  // call optimized functions' reserved stack space needs to be aligned so that
4195
252
  // taking the difference between two stack areas will result in an aligned
4196
252
  // stack.
4197
252
  MinReservedArea =
4198
252
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4199
252
  FuncInfo->setMinReservedArea(MinReservedArea);
4200
252
4201
252
  // If the function takes variable number of arguments, make a frame index for
4202
252
  // the start of the first vararg value... for expansion of llvm.va_start.
4203
252
  if (
isVarArg252
) {
4204
2
    int Depth = ArgOffset;
4205
2
4206
2
    FuncInfo->setVarArgsFrameIndex(
4207
2
      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4208
2
                            Depth, true));
4209
2
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4210
2
4211
2
    // If this function is vararg, store any remaining integer argument regs
4212
2
    // to their spots on the stack so that they may be loaded by dereferencing
4213
2
    // the result of va_next.
4214
9
    for (; 
GPR_idx != Num_GPR_Regs9
;
++GPR_idx7
) {
4215
7
      unsigned VReg;
4216
7
4217
7
      if (isPPC64)
4218
0
        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4219
7
      else
4220
7
        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4221
7
4222
7
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4223
7
      SDValue Store =
4224
7
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4225
7
      MemOps.push_back(Store);
4226
7
      // Increment the address by four for the next argument to store
4227
7
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4228
7
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4229
7
    }
4230
2
  }
4231
252
4232
252
  if (!MemOps.empty())
4233
6
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4234
252
4235
252
  return Chain;
4236
252
}
4237
4238
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4239
/// adjusted to accommodate the arguments for the tailcall.
4240
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4241
1.49k
                                   unsigned ParamSize) {
4242
1.49k
4243
1.49k
  if (
!isTailCall1.49k
)
return 01.49k
;
4244
3
4245
3
  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4246
3
  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4247
3
  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4248
3
  // Remember only if the new adjustement is bigger.
4249
3
  if (SPDiff < FI->getTailCallSPDelta())
4250
0
    FI->setTailCallSPDelta(SPDiff);
4251
1.49k
4252
1.49k
  return SPDiff;
4253
1.49k
}
4254
4255
static bool isFunctionGlobalAddress(SDValue Callee);
4256
4257
static bool
4258
callsShareTOCBase(const Function *Caller, SDValue Callee,
4259
1.22k
                    const TargetMachine &TM) {
4260
1.22k
  // If !G, Callee can be an external symbol.
4261
1.22k
  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4262
1.22k
  if (!G)
4263
107
    return false;
4264
1.12k
4265
1.12k
  // The medium and large code models are expected to provide a sufficiently
4266
1.12k
  // large TOC to provide all data addressing needs of a module with a
4267
1.12k
  // single TOC. Since each module will be addressed with a single TOC then we
4268
1.12k
  // only need to check that caller and callee don't cross dso boundaries.
4269
1.12k
  
if (1.12k
CodeModel::Medium == TM.getCodeModel() ||
4270
43
      CodeModel::Large == TM.getCodeModel())
4271
1.07k
    return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
4272
43
4273
43
  // Otherwise we need to ensure callee and caller are in the same section,
4274
43
  // since the linker may allocate multiple TOCs, and we don't know which
4275
43
  // sections will belong to the same TOC base.
4276
43
4277
43
  const GlobalValue *GV = G->getGlobal();
4278
43
  if (!GV->isStrongDefinitionForLinker())
4279
23
    return false;
4280
20
4281
20
  // Any explicitly-specified sections and section prefixes must also match.
4282
20
  // Also, if we're using -ffunction-sections, then each function is always in
4283
20
  // a different section (the same is true for COMDAT functions).
4284
20
  
if (20
TM.getFunctionSections() || 20
GV->hasComdat()20
||
Caller->hasComdat()18
||
4285
18
      GV->getSection() != Caller->getSection())
4286
4
    return false;
4287
16
  
if (const auto *16
F16
= dyn_cast<Function>(GV)) {
4288
16
    if (F->getSectionPrefix() != Caller->getSectionPrefix())
4289
0
      return false;
4290
16
  }
4291
16
4292
16
  // If the callee might be interposed, then we can't assume the ultimate call
4293
16
  // target will be in the same section. Even in cases where we can assume that
4294
16
  // interposition won't happen, in any case where the linker might insert a
4295
16
  // stub to allow for interposition, we must generate code as though
4296
16
  // interposition might occur. To understand why this matters, consider a
4297
16
  // situation where: a -> b -> c where the arrows indicate calls. b and c are
4298
16
  // in the same section, but a is in a different module (i.e. has a different
4299
16
  // TOC base pointer). If the linker allows for interposition between b and c,
4300
16
  // then it will generate a stub for the call edge between b and c which will
4301
16
  // save the TOC pointer into the designated stack slot allocated by b. If we
4302
16
  // return true here, and therefore allow a tail call between b and c, that
4303
16
  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4304
16
  // pointer into the stack slot allocated by a (where the a -> b stub saved
4305
16
  // a's TOC base pointer). If we're not considering a tail call, but rather,
4306
16
  // whether a nop is needed after the call instruction in b, because the linker
4307
16
  // will insert a stub, it might complain about a missing nop if we omit it
4308
16
  // (although many don't complain in this case).
4309
16
  
if (16
!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)16
)
4310
1
    return false;
4311
15
4312
15
  return true;
4313
15
}
4314
4315
static bool
4316
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4317
25
                            const SmallVectorImpl<ISD::OutputArg> &Outs) {
4318
25
  assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4319
25
4320
25
  const unsigned PtrByteSize = 8;
4321
25
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4322
25
4323
25
  static const MCPhysReg GPR[] = {
4324
25
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4325
25
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4326
25
  };
4327
25
  static const MCPhysReg VR[] = {
4328
25
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4329
25
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4330
25
  };
4331
25
4332
25
  const unsigned NumGPRs = array_lengthof(GPR);
4333
25
  const unsigned NumFPRs = 13;
4334
25
  const unsigned NumVRs = array_lengthof(VR);
4335
25
  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4336
25
4337
25
  unsigned NumBytes = LinkageSize;
4338
25
  unsigned AvailableFPRs = NumFPRs;
4339
25
  unsigned AvailableVRs = NumVRs;
4340
25
4341
166
  for (const ISD::OutputArg& Param : Outs) {
4342
166
    if (
Param.Flags.isNest()166
)
continue0
;
4343
166
4344
166
    
if (166
CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4345
166
                               PtrByteSize, LinkageSize, ParamAreaSize,
4346
166
                               NumBytes, AvailableFPRs, AvailableVRs,
4347
166
                               Subtarget.hasQPX()))
4348
9
      return true;
4349
16
  }
4350
16
  return false;
4351
16
}
4352
4353
static bool
4354
64
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4355
64
  if (CS.arg_size() != CallerFn->arg_size())
4356
9
    return false;
4357
55
4358
55
  ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4359
55
  ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4360
55
  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4361
55
4362
119
  for (; 
CalleeArgIter != CalleeArgEnd119
;
++CalleeArgIter, ++CallerArgIter64
) {
4363
80
    const Value* CalleeArg = *CalleeArgIter;
4364
80
    const Value* CallerArg = &(*CallerArgIter);
4365
80
    if (CalleeArg == CallerArg)
4366
60
      continue;
4367
20
4368
20
    // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4369
20
    //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4370
20
    //      }
4371
20
    // 1st argument of callee is undef and has the same type as caller.
4372
20
    
if (20
CalleeArg->getType() == CallerArg->getType() &&
4373
12
        isa<UndefValue>(CalleeArg))
4374
4
      continue;
4375
16
4376
16
    return false;
4377
16
  }
4378
55
4379
39
  return true;
4380
64
}
4381
4382
bool
4383
PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4384
                                    SDValue Callee,
4385
                                    CallingConv::ID CalleeCC,
4386
                                    ImmutableCallSite CS,
4387
                                    bool isVarArg,
4388
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4389
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4390
187
                                    SelectionDAG& DAG) const {
4391
187
  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4392
187
4393
187
  if (
DisableSCO && 187
!TailCallOpt0
)
return false0
;
4394
187
4395
187
  // Variadic argument functions are not supported.
4396
187
  
if (187
isVarArg187
)
return false6
;
4397
181
4398
181
  MachineFunction &MF = DAG.getMachineFunction();
4399
181
  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4400
181
4401
181
  // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4402
181
  // the same calling convention
4403
181
  if (
CallerCC != CalleeCC181
)
return false23
;
4404
158
4405
158
  // SCO support C calling convention
4406
158
  
if (158
CalleeCC != CallingConv::Fast && 158
CalleeCC != CallingConv::C156
)
4407
0
    return false;
4408
158
4409
158
  // Caller contains any byval parameter is not supported.
4410
158
  
if (158
any_of(Ins, [](const ISD::InputArg &IA) 158
{ return IA.Flags.isByVal(); }36
))
4411
0
    return false;
4412
158
4413
158
  // Callee contains any byval parameter is not supported, too.
4414
158
  // Note: This is a quick work around, because in some cases, e.g.
4415
158
  // caller's stack size > callee's stack size, we are still able to apply
4416
158
  // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4417
934
  
if (158
any_of(Outs, [](const ISD::OutputArg& OA) 158
{ return OA.Flags.isByVal(); }934
))
4418
5
    return false;
4419
153
4420
153
  // No TCO/SCO on indirect call because Caller have to restore its TOC
4421
153
  
if (153
!isFunctionGlobalAddress(Callee) &&
4422
13
      !isa<ExternalSymbolSDNode>(Callee))
4423
12
    return false;
4424
141
4425
141
  // If the caller and callee potentially have different TOC bases then we
4426
141
  // cannot tail call since we need to restore the TOC pointer after the call.
4427
141
  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4428
141
  
if (141
!callsShareTOCBase(MF.getFunction(), Callee, getTargetMachine())141
)
4429
76
    return false;
4430
65
4431
65
  // TCO allows altering callee ABI, so we don't have to check further.
4432
65
  
if (65
CalleeCC == CallingConv::Fast && 65
TailCallOpt2
)
4433
1
    return true;
4434
64
4435
64
  
if (64
DisableSCO64
)
return false0
;
4436
64
4437
64
  // If callee use the same argument list that caller is using, then we can
4438
64
  // apply SCO on this case. If it is not, then we need to check if callee needs
4439
64
  // stack for passing arguments.
4440
64
  
if (64
!hasSameArgumentList(MF.getFunction(), CS) &&
4441
64
      
needStackSlotPassParameters(Subtarget, Outs)25
) {
4442
9
    return false;
4443
9
  }
4444
55
4445
55
  return true;
4446
55
}
4447
4448
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4449
/// for tail call optimization. Targets which want to do tail call
4450
/// optimization should implement this function.
4451
bool
4452
PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4453
                                                     CallingConv::ID CalleeCC,
4454
                                                     bool isVarArg,
4455
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
4456
16
                                                     SelectionDAG& DAG) const {
4457
16
  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4458
14
    return false;
4459
2
4460
2
  // Variable argument functions are not supported.
4461
2
  
if (2
isVarArg2
)
4462
0
    return false;
4463
2
4464
2
  MachineFunction &MF = DAG.getMachineFunction();
4465
2
  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4466
2
  if (
CalleeCC == CallingConv::Fast && 2
CallerCC == CalleeCC2
) {
4467
2
    // Functions containing by val parameters are not supported.
4468
4
    for (unsigned i = 0; 
i != Ins.size()4
;
i++2
) {
4469
2
       ISD::ArgFlagsTy Flags = Ins[i].Flags;
4470
2
       if (
Flags.isByVal()2
)
return false0
;
4471
2
    }
4472
2
4473
2
    // Non-PIC/GOT tail calls are supported.
4474
2
    
if (2
getTargetMachine().getRelocationModel() != Reloc::PIC_2
)
4475
1
      return true;
4476
1
4477
1
    // At the moment we can only do local tail calls (in same module, hidden
4478
1
    // or protected) if we are generating PIC.
4479
1
    
if (GlobalAddressSDNode *1
G1
= dyn_cast<GlobalAddressSDNode>(Callee))
4480
1
      return G->getGlobal()->hasHiddenVisibility()
4481
1
          || G->getGlobal()->hasProtectedVisibility();
4482
0
  }
4483
0
4484
0
  return false;
4485
0
}
4486
4487
/// isCallCompatibleAddress - Return the immediate to use if the specified
4488
/// 32-bit value is representable in the immediate field of a BxA instruction.
4489
361
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4490
361
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4491
361
  if (
!C361
)
return nullptr348
;
4492
13
4493
13
  int Addr = C->getZExtValue();
4494
13
  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4495
13
      SignExtend32<26>(Addr) != Addr)
4496
0
    return nullptr;  // Top 6 bits have to be sext of immediate.
4497
13
4498
13
  return DAG
4499
13
      .getConstant(
4500
13
          (int)C->getZExtValue() >> 2, SDLoc(Op),
4501
13
          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4502
13
      .getNode();
4503
13
}
4504
4505
namespace {
4506
4507
struct TailCallArgumentInfo {
4508
  SDValue Arg;
4509
  SDValue FrameIdxOp;
4510
  int FrameIdx = 0;
4511
4512
64
  TailCallArgumentInfo() = default;
4513
};
4514
4515
} // end anonymous namespace
4516
4517
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4518
static void StoreTailCallArgumentsToStackSlot(
4519
    SelectionDAG &DAG, SDValue Chain,
4520
    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4521
3
    SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4522
3
  for (unsigned i = 0, e = TailCallArgs.size(); 
i != e3
;
++i0
) {
4523
0
    SDValue Arg = TailCallArgs[i].Arg;
4524
0
    SDValue FIN = TailCallArgs[i].FrameIdxOp;
4525
0
    int FI = TailCallArgs[i].FrameIdx;
4526
0
    // Store relative to framepointer.
4527
0
    MemOpChains.push_back(DAG.getStore(
4528
0
        Chain, dl, Arg, FIN,
4529
0
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4530
0
  }
4531
3
}
4532
4533
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4534
/// the appropriate stack slot for the tail call optimized function call.
4535
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4536
                                             SDValue OldRetAddr, SDValue OldFP,
4537
3
                                             int SPDiff, const SDLoc &dl) {
4538
3
  if (
SPDiff3
) {
4539
1
    // Calculate the new stack slot for the return address.
4540
1
    MachineFunction &MF = DAG.getMachineFunction();
4541
1
    const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4542
1
    const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4543
1
    bool isPPC64 = Subtarget.isPPC64();
4544
1
    int SlotSize = isPPC64 ? 
80
:
41
;
4545
1
    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4546
1
    int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4547
1
                                                         NewRetAddrLoc, true);
4548
1
    EVT VT = isPPC64 ? 
MVT::i640
:
MVT::i321
;
4549
1
    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4550
1
    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4551
1
                         MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4552
1
4553
1
    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4554
1
    // slot as the FP is never overwritten.
4555
1
    if (
Subtarget.isDarwinABI()1
) {
4556
0
      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4557
0
      int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4558
0
                                                         true);
4559
0
      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4560
0
      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4561
0
                           MachinePointerInfo::getFixedStack(
4562
0
                               DAG.getMachineFunction(), NewFPIdx));
4563
0
    }
4564
1
  }
4565
3
  return Chain;
4566
3
}
4567
4568
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4569
/// the position of the argument.
4570
static void
4571
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4572
                         SDValue Arg, int SPDiff, unsigned ArgOffset,
4573
64
                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4574
64
  int Offset = ArgOffset + SPDiff;
4575
64
  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4576
64
  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4577
64
  EVT VT = isPPC64 ? 
MVT::i6464
:
MVT::i320
;
4578
64
  SDValue FIN = DAG.getFrameIndex(FI, VT);
4579
64
  TailCallArgumentInfo Info;
4580
64
  Info.Arg = Arg;
4581
64
  Info.FrameIdxOp = FIN;
4582
64
  Info.FrameIdx = FI;
4583
64
  TailCallArguments.push_back(Info);
4584
64
}
4585
4586
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4587
/// stack slot. Returns the chain as result and the loaded frame pointers in
4588
/// LROpOut/FPOpout. Used when tail calling.
4589
SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4590
    SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4591
1.55k
    SDValue &FPOpOut, const SDLoc &dl) const {
4592
1.55k
  if (
SPDiff1.55k
) {
4593
1
    // Load the LR and FP stack slot for later adjusting.
4594
1
    EVT VT = Subtarget.isPPC64() ? 
MVT::i640
:
MVT::i321
;
4595
1
    LROpOut = getReturnAddrFrameIndex(DAG);
4596
1
    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4597
1
    Chain = SDValue(LROpOut.getNode(), 1);
4598
1
4599
1
    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4600
1
    // slot as the FP is never overwritten.
4601
1
    if (
Subtarget.isDarwinABI()1
) {
4602
0
      FPOpOut = getFramePointerFrameIndex(DAG);
4603
0
      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4604
0
      Chain = SDValue(FPOpOut.getNode(), 1);
4605
0
    }
4606
1
  }
4607
1.55k
  return Chain;
4608
1.55k
}
4609
4610
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4611
/// by "Src" to address "Dst" of size "Size".  Alignment information is
4612
/// specified by the specific parameter attribute. The copy will be passed as
4613
/// a byval function parameter.
4614
/// Sometimes what we are copying is the end of a larger object, the part that
4615
/// does not fit in registers.
4616
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4617
                                         SDValue Chain, ISD::ArgFlagsTy Flags,
4618
48
                                         SelectionDAG &DAG, const SDLoc &dl) {
4619
48
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4620
48
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4621
48
                       false, false, false, MachinePointerInfo(),
4622
48
                       MachinePointerInfo());
4623
48
}
4624
4625
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4626
/// tail calls.
4627
static void LowerMemOpCallTo(
4628
    SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4629
    SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4630
    bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4631
472
    SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4632
472
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4633
472
  if (
!isTailCall472
) {
4634
408
    if (
isVector408
) {
4635
88
      SDValue StackPtr;
4636
88
      if (isPPC64)
4637
88
        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4638
88
      else
4639
0
        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4640
88
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4641
88
                           DAG.getConstant(ArgOffset, dl, PtrVT));
4642
88
    }
4643
408
    MemOpChains.push_back(
4644
408
        DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4645
408
    // Calculate and remember argument location.
4646
472
  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4647
64
                                  TailCallArguments);
4648
472
}
4649
4650
static void
4651
PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4652
                const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4653
                SDValue FPOp,
4654
3
                SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4655
3
  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4656
3
  // might overwrite each other in case of tail call optimization.
4657
3
  SmallVector<SDValue, 8> MemOpChains2;
4658
3
  // Do not flag preceding copytoreg stuff together with the following stuff.
4659
3
  InFlag = SDValue();
4660
3
  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4661
3
                                    MemOpChains2, dl);
4662
3
  if (!MemOpChains2.empty())
4663
0
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4664
3
4665
3
  // Store the return address to the appropriate stack slot.
4666
3
  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4667
3
4668
3
  // Emit callseq_end just before tailcall node.
4669
3
  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4670
3
                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4671
3
  InFlag = Chain.getValue(1);
4672
3
}
4673
4674
// Is this global address that of a function that can be called by name? (as
4675
// opposed to something that must hold a descriptor for an indirect call).
4676
2.93k
static bool isFunctionGlobalAddress(SDValue Callee) {
4677
2.93k
  if (GlobalAddressSDNode *
G2.93k
= dyn_cast<GlobalAddressSDNode>(Callee)) {
4678
2.43k
    if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4679
2.42k
        Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4680
3
      return false;
4681
2.42k
4682
2.42k
    return G->getGlobal()->getValueType()->isFunctionTy();
4683
2.42k
  }
4684
506
4685
506
  return false;
4686
506
}
4687
4688
static unsigned
4689
PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4690
            SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4691
            bool isPatchPoint, bool hasNest,
4692
            SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4693
            SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4694
1.55k
            ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4695
1.55k
  bool isPPC64 = Subtarget.isPPC64();
4696
1.55k
  bool isSVR4ABI = Subtarget.isSVR4ABI();
4697
1.55k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
4698
1.55k
4699
1.55k
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4700
1.55k
  NodeTys.push_back(MVT::Other);   // Returns a chain
4701
1.55k
  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4702
1.55k
4703
1.55k
  unsigned CallOpc = PPCISD::CALL;
4704
1.55k
4705
1.55k
  bool needIndirectCall = true;
4706
1.55k
  if (
!isSVR4ABI || 1.55k
!isPPC641.43k
)
4707
351
    
if (SDNode *351
Dest351
= isBLACompatibleAddress(Callee, DAG)) {
4708
9
      // If this is an absolute destination address, use the munged value.
4709
9
      Callee = SDValue(Dest, 0);
4710
9
      needIndirectCall = false;
4711
9
    }
4712
1.55k
4713
1.55k
  // PC-relative references to external symbols should go through $stub, unless
4714
1.55k
  // we're building with the leopard linker or later, which automatically
4715
1.55k
  // synthesizes these stubs.
4716
1.55k
  const TargetMachine &TM = DAG.getTarget();
4717
1.55k
  const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4718
1.55k
  const GlobalValue *GV = nullptr;
4719
1.55k
  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4720
1.22k
    GV = G->getGlobal();
4721
1.55k
  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4722
1.55k
  bool UsePlt = !Local && 
Subtarget.isTargetELF()1.41k
&&
!isPPC641.29k
;
4723
1.55k
4724
1.55k
  if (
isFunctionGlobalAddress(Callee)1.55k
) {
4725
1.22k
    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4726
1.22k
    // A call to a TLS address is actually an indirect call to a
4727
1.22k
    // thread-specific pointer.
4728
1.22k
    unsigned OpFlags = 0;
4729
1.22k
    if (UsePlt)
4730
97
      OpFlags = PPCII::MO_PLT;
4731
1.22k
4732
1.22k
    // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4733
1.22k
    // every direct call is) turn it into a TargetGlobalAddress /
4734
1.22k
    // TargetExternalSymbol node so that legalize doesn't hack it.
4735
1.22k
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4736
1.22k
                                        Callee.getValueType(), 0, OpFlags);
4737
1.22k
    needIndirectCall = false;
4738
1.22k
  }
4739
1.55k
4740
1.55k
  if (ExternalSymbolSDNode *
S1.55k
= dyn_cast<ExternalSymbolSDNode>(Callee)) {
4741
256
    unsigned char OpFlags = 0;
4742
256
4743
256
    if (UsePlt)
4744
120
      OpFlags = PPCII::MO_PLT;
4745
256
4746
256
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4747
256
                                         OpFlags);
4748
256
    needIndirectCall = false;
4749
256
  }
4750
1.55k
4751
1.55k
  if (
isPatchPoint1.55k
) {
4752
28
    // We'll form an invalid direct call when lowering a patchpoint; the full
4753
28
    // sequence for an indirect call is complicated, and many of the
4754
28
    // instructions introduced might have side effects (and, thus, can't be
4755
28
    // removed later). The call itself will be removed as soon as the
4756
28
    // argument/return lowering is complete, so the fact that it has the wrong
4757
28
    // kind of operands should not really matter.
4758
28
    needIndirectCall = false;
4759
28
  }
4760
1.55k
4761
1.55k
  if (
needIndirectCall1.55k
) {
4762
37
    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4763
37
    // to do the call, we can't use PPCISD::CALL.
4764
37
    SDValue MTCTROps[] = {Chain, Callee, InFlag};
4765
37
4766
37
    if (
isSVR4ABI && 37
isPPC6431
&&
!isELFv2ABI28
) {
4767
19
      // Function pointers in the 64-bit SVR4 ABI do not point to the function
4768
19
      // entry point, but to the function descriptor (the function entry point
4769
19
      // address is part of the function descriptor though).
4770
19
      // The function descriptor is a three doubleword structure with the
4771
19
      // following fields: function entry point, TOC base address and
4772
19
      // environment pointer.
4773
19
      // Thus for a call through a function pointer, the following actions need
4774
19
      // to be performed:
4775
19
      //   1. Save the TOC of the caller in the TOC save area of its stack
4776
19
      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4777
19
      //   2. Load the address of the function entry point from the function
4778
19
      //      descriptor.
4779
19
      //   3. Load the TOC of the callee from the function descriptor into r2.
4780
19
      //   4. Load the environment pointer from the function descriptor into
4781
19
      //      r11.
4782
19
      //   5. Branch to the function entry point address.
4783
19
      //   6. On return of the callee, the TOC of the caller needs to be
4784
19
      //      restored (this is done in FinishCall()).
4785
19
      //
4786
19
      // The loads are scheduled at the beginning of the call sequence, and the
4787
19
      // register copies are flagged together to ensure that no other
4788
19
      // operations can be scheduled in between. E.g. without flagging the
4789
19
      // copies together, a TOC access in the caller could be scheduled between
4790
19
      // the assignment of the callee TOC and the branch to the callee, which
4791
19
      // results in the TOC access going through the TOC of the callee instead
4792
19
      // of going through the TOC of the caller, which leads to incorrect code.
4793
19
4794
19
      // Load the address of the function entry point from the function
4795
19
      // descriptor.
4796
19
      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4797
19
      if (LDChain.getValueType() == MVT::Glue)
4798
19
        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4799
19
4800
19
      auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4801
18
                          ? (MachineMemOperand::MODereferenceable |
4802
18
                             MachineMemOperand::MOInvariant)
4803
1
                          : MachineMemOperand::MONone;
4804
19
4805
19
      MachinePointerInfo MPI(CS ? 
CS.getCalledValue()19
:
nullptr0
);
4806
19
      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4807
19
                                        /* Alignment = */ 8, MMOFlags);
4808
19
4809
19
      // Load environment pointer into r11.
4810
19
      SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4811
19
      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4812
19
      SDValue LoadEnvPtr =
4813
19
          DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4814
19
                      /* Alignment = */ 8, MMOFlags);
4815
19
4816
19
      SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4817
19
      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4818
19
      SDValue TOCPtr =
4819
19
          DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4820
19
                      /* Alignment = */ 8, MMOFlags);
4821
19
4822
19
      setUsesTOCBasePtr(DAG);
4823
19
      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4824
19
                                        InFlag);
4825
19
      Chain = TOCVal.getValue(0);
4826
19
      InFlag = TOCVal.getValue(1);
4827
19
4828
19
      // If the function call has an explicit 'nest' parameter, it takes the
4829
19
      // place of the environment pointer.
4830
19
      if (
!hasNest19
) {
4831
18
        SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4832
18
                                          InFlag);
4833
18
4834
18
        Chain = EnvVal.getValue(0);
4835
18
        InFlag = EnvVal.getValue(1);
4836
18
      }
4837
19
4838
19
      MTCTROps[0] = Chain;
4839
19
      MTCTROps[1] = LoadFuncPtr;
4840
19
      MTCTROps[2] = InFlag;
4841
19
    }
4842
37
4843
37
    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4844
37
                        makeArrayRef(MTCTROps, InFlag.getNode() ? 
334
:
23
));
4845
37
    InFlag = Chain.getValue(1);
4846
37
4847
37
    NodeTys.clear();
4848
37
    NodeTys.push_back(MVT::Other);
4849
37
    NodeTys.push_back(MVT::Glue);
4850
37
    Ops.push_back(Chain);
4851
37
    CallOpc = PPCISD::BCTRL;
4852
37
    Callee.setNode(nullptr);
4853
37
    // Add use of X11 (holding environment pointer)
4854
37
    if (
isSVR4ABI && 37
isPPC6431
&&
!isELFv2ABI28
&&
!hasNest19
)
4855
18
      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4856
37
    // Add CTR register as callee so a bctr can be emitted later.
4857
37
    if (isTailCall)
4858
0
      
Ops.push_back(DAG.getRegister(isPPC64 ? 0
PPC::CTR80
:
PPC::CTR0
, PtrVT));
4859
37
  }
4860
1.55k
4861
1.55k
  // If this is a direct call, pass the chain and the callee.
4862
1.55k
  if (
Callee.getNode()1.55k
) {
4863
1.51k
    Ops.push_back(Chain);
4864
1.51k
    Ops.push_back(Callee);
4865
1.51k
  }
4866
1.55k
  // If this is a tail call add stack pointer delta.
4867
1.55k
  if (isTailCall)
4868
58
    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4869
1.55k
4870
1.55k
  // Add argument registers to the end of the list so that they are known live
4871
1.55k
  // into the call.
4872
5.36k
  for (unsigned i = 0, e = RegsToPass.size(); 
i != e5.36k
;
++i3.81k
)
4873
3.81k
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4874
3.81k
                                  RegsToPass[i].second.getValueType()));
4875
1.55k
4876
1.55k
  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4877
1.55k
  // into the call.
4878
1.55k
  if (
isSVR4ABI && 1.55k
isPPC641.43k
&&
!isPatchPoint1.20k
) {
4879
1.17k
    setUsesTOCBasePtr(DAG);
4880
1.17k
    Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4881
1.17k
  }
4882
1.55k
4883
1.55k
  return CallOpc;
4884
1.55k
}
4885
4886
SDValue PPCTargetLowering::LowerCallResult(
4887
    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4888
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4889
1.49k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4890
1.49k
  SmallVector<CCValAssign, 16> RVLocs;
4891
1.49k
  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4892
1.49k
                    *DAG.getContext());
4893
1.49k
  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4894
1.49k
4895
1.49k
  // Copy all of the result registers out of their specified physreg.
4896
2.35k
  for (unsigned i = 0, e = RVLocs.size(); 
i != e2.35k
;
++i861
) {
4897
861
    CCValAssign &VA = RVLocs[i];
4898
861
    assert(VA.isRegLoc() && "Can only return in registers!");
4899
861
4900
861
    SDValue Val = DAG.getCopyFromReg(Chain, dl,
4901
861
                                     VA.getLocReg(), VA.getLocVT(), InFlag);
4902
861
    Chain = Val.getValue(1);
4903
861
    InFlag = Val.getValue(2);
4904
861
4905
861
    switch (VA.getLocInfo()) {
4906
0
    
default: 0
llvm_unreachable0
("Unknown loc info!");
4907
742
    case CCValAssign::Full: break;
4908
55
    case CCValAssign::AExt:
4909
55
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4910
55
      break;
4911
9
    case CCValAssign::ZExt:
4912
9
      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4913
9
                        DAG.getValueType(VA.getValVT()));
4914
9
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4915
9
      break;
4916
55
    case CCValAssign::SExt:
4917
55
      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4918
55
                        DAG.getValueType(VA.getValVT()));
4919
55
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4920
55
      break;
4921
861
    }
4922
861
4923
861
    InVals.push_back(Val);
4924
861
  }
4925
1.49k
4926
1.49k
  return Chain;
4927
1.49k
}
4928
4929
SDValue PPCTargetLowering::FinishCall(
4930
    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4931
    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4932
    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4933
    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4934
    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4935
1.55k
    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
4936
1.55k
  std::vector<EVT> NodeTys;
4937
1.55k
  SmallVector<SDValue, 8> Ops;
4938
1.55k
  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4939
1.55k
                                 SPDiff, isTailCall, isPatchPoint, hasNest,
4940
1.55k
                                 RegsToPass, Ops, NodeTys, CS, Subtarget);
4941
1.55k
4942
1.55k
  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4943
1.55k
  if (
isVarArg && 1.55k
Subtarget.isSVR4ABI()140
&&
!Subtarget.isPPC64()102
)
4944
57
    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4945
1.55k
4946
1.55k
  // When performing tail call optimization the callee pops its arguments off
4947
1.55k
  // the stack. Account for this here so these bytes can be pushed back on in
4948
1.55k
  // PPCFrameLowering::eliminateCallFramePseudoInstr.
4949
1.55k
  int BytesCalleePops =
4950
1.55k
    (CallConv == CallingConv::Fast &&
4951
1.55k
     
getTargetMachine().Options.GuaranteedTailCallOpt28
) ?
NumBytes3
:
01.54k
;
4952
1.55k
4953
1.55k
  // Add a register mask operand representing the call-preserved registers.
4954
1.55k
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4955
1.55k
  const uint32_t *Mask =
4956
1.55k
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4957
1.55k
  assert(Mask && "Missing call preserved mask for calling convention");
4958
1.55k
  Ops.push_back(DAG.getRegisterMask(Mask));
4959
1.55k
4960
1.55k
  if (InFlag.getNode())
4961
1.04k
    Ops.push_back(InFlag);
4962
1.55k
4963
1.55k
  // Emit tail call.
4964
1.55k
  if (
isTailCall1.55k
) {
4965
58
    assert(((Callee.getOpcode() == ISD::Register &&
4966
58
             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4967
58
            Callee.getOpcode() == ISD::TargetExternalSymbol ||
4968
58
            Callee.getOpcode() == ISD::TargetGlobalAddress ||
4969
58
            isa<ConstantSDNode>(Callee)) &&
4970
58
    "Expecting an global address, external symbol, absolute value or register");
4971
58
4972
58
    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4973
58
    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4974
58
  }
4975
1.49k
4976
1.49k
  // Add a NOP immediately after the branch instruction when using the 64-bit
4977
1.49k
  // SVR4 ABI. At link time, if caller and callee are in a different module and
4978
1.49k
  // thus have a different TOC, the call will be replaced with a call to a stub
4979
1.49k
  // function which saves the current TOC, loads the TOC of the callee and
4980
1.49k
  // branches to the callee. The NOP will be replaced with a load instruction
4981
1.49k
  // which restores the TOC of the caller from the TOC save slot of the current
4982
1.49k
  // stack frame. If caller and callee belong to the same module (and have the
4983
1.49k
  // same TOC), the NOP will remain unchanged.
4984
1.49k
4985
1.49k
  MachineFunction &MF = DAG.getMachineFunction();
4986
1.49k
  if (
!isTailCall && 1.49k
Subtarget.isSVR4ABI()1.49k
&&
Subtarget.isPPC64()1.37k
&&
4987
1.49k
      
!isPatchPoint1.14k
) {
4988
1.11k
    if (
CallOpc == PPCISD::BCTRL1.11k
) {
4989
28
      // This is a call through a function pointer.
4990
28
      // Restore the caller TOC from the save area into R2.
4991
28
      // See PrepareCall() for more information about calls through function
4992
28
      // pointers in the 64-bit SVR4 ABI.
4993
28
      // We are using a target-specific load with r2 hard coded, because the
4994
28
      // result of a target-independent load would never go directly into r2,
4995
28
      // since r2 is a reserved register (which prevents the register allocator
4996
28
      // from allocating it), resulting in an additional register being
4997
28
      // allocated and an unnecessary move instruction being generated.
4998
28
      CallOpc = PPCISD::BCTRL_LOAD_TOC;
4999
28
5000
28
      EVT PtrVT = getPointerTy(DAG.getDataLayout());
5001
28
      SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5002
28
      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5003
28
      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5004
28
      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5005
28
5006
28
      // The address needs to go after the chain input but before the flag (or
5007
28
      // any other variadic arguments).
5008
28
      Ops.insert(std::next(Ops.begin()), AddTOC);
5009
1.11k
    } else 
if (1.08k
CallOpc == PPCISD::CALL &&
5010
1.08k
      
!callsShareTOCBase(MF.getFunction(), Callee, DAG.getTarget())1.08k
) {
5011
1.03k
      // Otherwise insert NOP for non-local calls.
5012
1.03k
      CallOpc = PPCISD::CALL_NOP;
5013
1.03k
    }
5014
1.11k
  }
5015
1.49k
5016
1.49k
  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5017
1.49k
  InFlag = Chain.getValue(1);
5018
1.49k
5019
1.49k
  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5020
1.49k
                             DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5021
1.49k
                             InFlag, dl);
5022
1.49k
  if (!Ins.empty())
5023
726
    InFlag = Chain.getValue(1);
5024
1.55k
5025
1.55k
  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5026
1.55k
                         Ins, dl, DAG, InVals);
5027
1.55k
}
5028
5029
SDValue
5030
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5031
1.55k
                             SmallVectorImpl<SDValue> &InVals) const {
5032
1.55k
  SelectionDAG &DAG                     = CLI.DAG;
5033
1.55k
  SDLoc &dl                             = CLI.DL;
5034
1.55k
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5035
1.55k
  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5036
1.55k
  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5037
1.55k
  SDValue Chain                         = CLI.Chain;
5038
1.55k
  SDValue Callee                        = CLI.Callee;
5039
1.55k
  bool &isTailCall                      = CLI.IsTailCall;
5040
1.55k
  CallingConv::ID CallConv              = CLI.CallConv;
5041
1.55k
  bool isVarArg                         = CLI.IsVarArg;
5042
1.55k
  bool isPatchPoint                     = CLI.IsPatchPoint;
5043
1.55k
  ImmutableCallSite CS                  = CLI.CS;
5044
1.55k
5045
1.55k
  if (
isTailCall1.55k
) {
5046
204
    if (
Subtarget.useLongCalls() && 204
!(CS && 1
CS.isMustTailCall()1
))
5047
1
      isTailCall = false;
5048
203
    else 
if (203
Subtarget.isSVR4ABI() && 203
Subtarget.isPPC64()198
)
5049
187
      isTailCall =
5050
187
        IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5051
187
                                                 isVarArg, Outs, Ins, DAG);
5052
203
    else
5053
16
      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5054
16
                                                     Ins, DAG);
5055
204
    if (
isTailCall204
) {
5056
58
      ++NumTailCalls;
5057
58
      if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5058
55
        ++NumSiblingCalls;
5059
58
5060
58
      assert(isa<GlobalAddressSDNode>(Callee) &&
5061
58
             "Callee should be an llvm::Function object.");
5062
58
      DEBUG(
5063
58
        const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5064
58
        const unsigned Width = 80 - strlen("TCO caller: ")
5065
58
                                  - strlen(", callee linkage: 0, 0");
5066
58
        dbgs() << "TCO caller: "
5067
58
               << left_justify(DAG.getMachineFunction().getName(), Width)
5068
58
               << ", callee linkage: "
5069
58
               << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
5070
58
      );
5071
58
    }
5072
204
  }
5073
1.55k
5074
1.55k
  if (
!isTailCall && 1.55k
CS1.49k
&&
CS.isMustTailCall()1.20k
)
5075
0
    report_fatal_error("failed to perform tail call elimination on a call "
5076
0
                       "site marked musttail");
5077
1.55k
5078
1.55k
  // When long calls (i.e. indirect calls) are always used, calls are always
5079
1.55k
  // made via function pointer. If we have a function name, first translate it
5080
1.55k
  // into a pointer.
5081
1.55k
  
if (1.55k
Subtarget.useLongCalls() && 1.55k
isa<GlobalAddressSDNode>(Callee)1
&&
5082
1
      !isTailCall)
5083
1
    Callee = LowerGlobalAddress(Callee, DAG);
5084
1.55k
5085
1.55k
  if (
Subtarget.isSVR4ABI()1.55k
) {
5086
1.43k
    if (Subtarget.isPPC64())
5087
1.20k
      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5088
1.20k
                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
5089
1.20k
                              dl, DAG, InVals, CS);
5090
1.43k
    else
5091
232
      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5092
232
                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
5093
232
                              dl, DAG, InVals, CS);
5094
119
  }
5095
119
5096
119
  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5097
119
                          isTailCall, isPatchPoint, Outs, OutVals, Ins,
5098
119
                          dl, DAG, InVals, CS);
5099
119
}
5100
5101
SDValue PPCTargetLowering::LowerCall_32SVR4(
5102
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5103
    bool isTailCall, bool isPatchPoint,
5104
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5105
    const SmallVectorImpl<SDValue> &OutVals,
5106
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5107
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5108
232
    ImmutableCallSite CS) const {
5109
232
  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5110
232
  // of the 32-bit SVR4 ABI stack frame layout.
5111
232
5112
232
  assert((CallConv == CallingConv::C ||
5113
232
          CallConv == CallingConv::Fast) && "Unknown calling convention!");
5114
232
5115
232
  unsigned PtrByteSize = 4;
5116
232
5117
232
  MachineFunction &MF = DAG.getMachineFunction();
5118
232
5119
232
  // Mark this function as potentially containing a function that contains a
5120
232
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5121
232
  // and restoring the callers stack pointer in this functions epilog. This is
5122
232
  // done because by tail calling the called function might overwrite the value
5123
232
  // in this function's (MF) stack pointer stack slot 0(SP).
5124
232
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5125
1
      CallConv == CallingConv::Fast)
5126
1
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5127
232
5128
232
  // Count how many bytes are to be pushed on the stack, including the linkage
5129
232
  // area, parameter list area and the part of the local variable space which
5130
232
  // contains copies of aggregates which are passed by value.
5131
232
5132
232
  // Assign locations to all of the outgoing arguments.
5133
232
  SmallVector<CCValAssign, 16> ArgLocs;
5134
232
  PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5135
232
5136
232
  // Reserve space for the linkage area on the stack.
5137
232
  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5138
232
                       PtrByteSize);
5139
232
  if (useSoftFloat())
5140
27
    CCInfo.PreAnalyzeCallOperands(Outs);
5141
232
5142
232
  if (
isVarArg232
) {
5143
57
    // Handle fixed and variable vector arguments differently.
5144
57
    // Fixed vector arguments go into registers as long as registers are
5145
57
    // available. Variable vector arguments always go into memory.
5146
57
    unsigned NumArgs = Outs.size();
5147
57
5148
157
    for (unsigned i = 0; 
i != NumArgs157
;
++i100
) {
5149
100
      MVT ArgVT = Outs[i].VT;
5150
100
      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5151
100
      bool Result;
5152
100
5153
100
      if (
Outs[i].IsFixed100
) {
5154
17
        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5155
17
                               CCInfo);
5156
100
      } else {
5157
83
        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5158
83
                                      ArgFlags, CCInfo);
5159
83
      }
5160
100
5161
100
      if (
Result100
) {
5162
#ifndef NDEBUG
5163
        errs() << "Call operand #" << i << " has unhandled type "
5164
             << EVT(ArgVT).getEVTString() << "\n";
5165
#endif
5166
0
        llvm_unreachable(nullptr);
5167
0
      }
5168
100
    }
5169
232
  } else {
5170
175
    // All arguments are treated the same.
5171
175
    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5172
175
  }
5173
232
  CCInfo.clearWasPPCF128();
5174
232
5175
232
  // Assign locations to all of the outgoing aggregate by value arguments.
5176
232
  SmallVector<CCValAssign, 16> ByValArgLocs;
5177
232
  CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5178
232
5179
232
  // Reserve stack space for the allocations in CCInfo.
5180
232
  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5181
232
5182
232
  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5183
232
5184
232
  // Size of the linkage area, parameter list area and the part of the local
5185
232
  // space variable where copies of aggregates which are passed by value are
5186
232
  // stored.
5187
232
  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5188
232
5189
232
  // Calculate by how many bytes the stack has to be adjusted in case of tail
5190
232
  // call optimization.
5191
232
  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5192
232
5193
232
  // Adjust the stack pointer for the new arguments...
5194
232
  // These operations are automatically eliminated by the prolog/epilog pass
5195
232
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5196
232
  SDValue CallSeqStart = Chain;
5197
232
5198
232
  // Load the return address and frame pointer so it can be moved somewhere else
5199
232
  // later.
5200
232
  SDValue LROp, FPOp;
5201
232
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5202
232
5203
232
  // Set up a copy of the stack pointer for use loading and storing any
5204
232
  // arguments that may not fit in the registers available for argument
5205
232
  // passing.
5206
232
  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5207
232
5208
232
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5209
232
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5210
232
  SmallVector<SDValue, 8> MemOpChains;
5211
232
5212
232
  bool seenFloatArg = false;
5213
232
  // Walk the register/memloc assignments, inserting copies/loads.
5214
232
  for (unsigned i = 0, j = 0, e = ArgLocs.size();
5215
739
       i != e;
5216
507
       
++i507
) {
5217
507
    CCValAssign &VA = ArgLocs[i];
5218
507
    SDValue Arg = OutVals[i];
5219
507
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5220
507
5221
507
    if (
Flags.isByVal()507
) {
5222
0
      // Argument is an aggregate which is passed by value, thus we need to
5223
0
      // create a copy of it in the local variable space of the current stack
5224
0
      // frame (which is the stack frame of the caller) and pass the address of
5225
0
      // this copy to the callee.
5226
0
      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5227
0
      CCValAssign &ByValVA = ByValArgLocs[j++];
5228
0
      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5229
0
5230
0
      // Memory reserved in the local variable space of the callers stack frame.
5231
0
      unsigned LocMemOffset = ByValVA.getLocMemOffset();
5232
0
5233
0
      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5234
0
      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5235
0
                           StackPtr, PtrOff);
5236
0
5237
0
      // Create a copy of the argument in the local area of the current
5238
0
      // stack frame.
5239
0
      SDValue MemcpyCall =
5240
0
        CreateCopyOfByValArgument(Arg, PtrOff,
5241
0
                                  CallSeqStart.getNode()->getOperand(0),
5242
0
                                  Flags, DAG, dl);
5243
0
5244
0
      // This must go outside the CALLSEQ_START..END.
5245
0
      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5246
0
                                                     SDLoc(MemcpyCall));
5247
0
      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5248
0
                             NewCallSeqStart.getNode());
5249
0
      Chain = CallSeqStart = NewCallSeqStart;
5250
0
5251
0
      // Pass the address of the aggregate copy on the stack either in a
5252
0
      // physical register or in the parameter list area of the current stack
5253
0
      // frame to the callee.
5254
0
      Arg = PtrOff;
5255
0
    }
5256
507
5257
507
    if (
VA.isRegLoc()507
) {
5258
501
      if (Arg.getValueType() == MVT::i1)
5259
1
        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
5260
501
5261
501
      seenFloatArg |= VA.getLocVT().isFloatingPoint();
5262
501
      // Put argument in a physical register.
5263
501
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5264
507
    } else {
5265
6
      // Put argument in the parameter list area of the current stack frame.
5266
6
      assert(VA.isMemLoc());
5267
6
      unsigned LocMemOffset = VA.getLocMemOffset();
5268
6
5269
6
      if (
!isTailCall6
) {
5270
6
        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5271
6
        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5272
6
                             StackPtr, PtrOff);
5273
6
5274
6
        MemOpChains.push_back(
5275
6
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5276
6
      } else {
5277
0
        // Calculate and remember argument location.
5278
0
        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5279
0
                                 TailCallArguments);
5280
0
      }
5281
6
    }
5282
507
  }
5283
232
5284
232
  if (!MemOpChains.empty())
5285
3
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5286
232
5287
232
  // Build a sequence of copy-to-reg nodes chained together with token chain
5288
232
  // and flag operands which copy the outgoing args into the appropriate regs.
5289
232
  SDValue InFlag;
5290
733
  for (unsigned i = 0, e = RegsToPass.size(); 
i != e733
;
++i501
) {
5291
501
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5292
501
                             RegsToPass[i].second, InFlag);
5293
501
    InFlag = Chain.getValue(1);
5294
501
  }
5295
232
5296
232
  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5297
232
  // registers.
5298
232
  if (
isVarArg232
) {
5299
57
    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5300
57
    SDValue Ops[] = { Chain, InFlag };
5301
57
5302
57
    Chain = DAG.getNode(seenFloatArg ? 
PPCISD::CR6SET5
:
PPCISD::CR6UNSET52
,
5303
57
                        dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 
235
:
122
));
5304
57
5305
57
    InFlag = Chain.getValue(1);
5306
57
  }
5307
232
5308
232
  if (isTailCall)
5309
1
    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5310
1
                    TailCallArguments);
5311
232
5312
232
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5313
232
                    /* unused except on PPC64 ELFv1 */ false, DAG,
5314
232
                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5315
232
                    NumBytes, Ins, InVals, CS);
5316
232
}
5317
5318
// Copy an argument into memory, being careful to do this outside the
5319
// call sequence for the call to which the argument belongs.
5320
SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5321
    SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5322
48
    SelectionDAG &DAG, const SDLoc &dl) const {
5323
48
  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5324
48
                        CallSeqStart.getNode()->getOperand(0),
5325
48
                        Flags, DAG, dl);
5326
48
  // The MEMCPY must go outside the CALLSEQ_START..END.
5327
48
  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5328
48
  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5329
48
                                                 SDLoc(MemcpyCall));
5330
48
  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5331
48
                         NewCallSeqStart.getNode());
5332
48
  return NewCallSeqStart;
5333
48
}
5334
5335
SDValue PPCTargetLowering::LowerCall_64SVR4(
5336
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5337
    bool isTailCall, bool isPatchPoint,
5338
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5339
    const SmallVectorImpl<SDValue> &OutVals,
5340
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5341
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5342
1.20k
    ImmutableCallSite CS) const {
5343
1.20k
  bool isELFv2ABI = Subtarget.isELFv2ABI();
5344
1.20k
  bool isLittleEndian = Subtarget.isLittleEndian();
5345
1.20k
  unsigned NumOps = Outs.size();
5346
1.20k
  bool hasNest = false;
5347
1.20k
  bool IsSibCall = false;
5348
1.20k
5349
1.20k
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5350
1.20k
  unsigned PtrByteSize = 8;
5351
1.20k
5352
1.20k
  MachineFunction &MF = DAG.getMachineFunction();
5353
1.20k
5354
1.20k
  if (
isTailCall && 1.20k
!getTargetMachine().Options.GuaranteedTailCallOpt56
)
5355
55
    IsSibCall = true;
5356
1.20k
5357
1.20k
  // Mark this function as potentially containing a function that contains a
5358
1.20k
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5359
1.20k
  // and restoring the callers stack pointer in this functions epilog. This is
5360
1.20k
  // done because by tail calling the called function might overwrite the value
5361
1.20k
  // in this function's (MF) stack pointer stack slot 0(SP).
5362
1.20k
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5363
1
      CallConv == CallingConv::Fast)
5364
1
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5365
1.20k
5366
1.20k
  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5367
1.20k
         "fastcc not supported on varargs functions");
5368
1.20k
5369
1.20k
  // Count how many bytes are to be pushed on the stack, including the linkage
5370
1.20k
  // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5371
1.20k
  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5372
1.20k
  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5373
1.20k
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5374
1.20k
  unsigned NumBytes = LinkageSize;
5375
1.20k
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5376
1.20k
  unsigned &QFPR_idx = FPR_idx;
5377
1.20k
5378
1.20k
  static const MCPhysReg GPR[] = {
5379
1.20k
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5380
1.20k
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5381
1.20k
  };
5382
1.20k
  static const MCPhysReg VR[] = {
5383
1.20k
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5384
1.20k
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5385
1.20k
  };
5386
1.20k
5387
1.20k
  const unsigned NumGPRs = array_lengthof(GPR);
5388
1.20k
  const unsigned NumFPRs = useSoftFloat() ? 
08
:
131.19k
;
5389
1.20k
  const unsigned NumVRs  = array_lengthof(VR);
5390
1.20k
  const unsigned NumQFPRs = NumFPRs;
5391
1.20k
5392
1.20k
  // On ELFv2, we can avoid allocating the parameter area if all the arguments
5393
1.20k
  // can be passed to the callee in registers.
5394
1.20k
  // For the fast calling convention, there is another check below.
5395
1.20k
  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5396
1.20k
  bool HasParameterArea = !isELFv2ABI || 
isVarArg582
||
CallConv == CallingConv::Fast565
;
5397
1.20k
  if (
!HasParameterArea1.20k
) {
5398
564
    unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5399
564
    unsigned AvailableFPRs = NumFPRs;
5400
564
    unsigned AvailableVRs = NumVRs;
5401
564
    unsigned NumBytesTmp = NumBytes;
5402
1.63k
    for (unsigned i = 0; 
i != NumOps1.63k
;
++i1.07k
) {
5403
1.07k
      if (
Outs[i].Flags.isNest()1.07k
)
continue0
;
5404
1.07k
      
if (1.07k
CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5405
1.07k
                                PtrByteSize, LinkageSize, ParamAreaSize,
5406
1.07k
                                NumBytesTmp, AvailableFPRs, AvailableVRs,
5407
1.07k
                                Subtarget.hasQPX()))
5408
83
        HasParameterArea = true;
5409
1.07k
    }
5410
564
  }
5411
1.20k
5412
1.20k
  // When using the fast calling convention, we don't provide backing for
5413
1.20k
  // arguments that will be in registers.
5414
1.20k
  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5415
1.20k
5416
1.20k
  // Add up all the space actually used.
5417
4.47k
  for (unsigned i = 0; 
i != NumOps4.47k
;
++i3.27k
) {
5418
3.27k
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5419
3.27k
    EVT ArgVT = Outs[i].VT;
5420
3.27k
    EVT OrigVT = Outs[i].ArgVT;
5421
3.27k
5422
3.27k
    if (Flags.isNest())
5423
2
      continue;
5424
3.26k
5425
3.26k
    
if (3.26k
CallConv == CallingConv::Fast3.26k
) {
5426
1.06k
      if (Flags.isByVal())
5427
1
        NumGPRsUsed += (Flags.getByValSize()+7)/8;
5428
1.06k
      else
5429
1.06k
        switch (ArgVT.getSimpleVT().SimpleTy) {
5430
0
        
default: 0
llvm_unreachable0
("Unexpected ValueType for argument!");
5431
364
        case MVT::i1:
5432
364
        case MVT::i32:
5433
364
        case MVT::i64:
5434
364
          if (++NumGPRsUsed <= NumGPRs)
5435
188
            continue;
5436
176
          break;
5437
352
        case MVT::v4i32:
5438
352
        case MVT::v8i16:
5439
352
        case MVT::v16i8:
5440
352
        case MVT::v2f64:
5441
352
        case MVT::v2i64:
5442
352
        case MVT::v1i128:
5443
352
          if (++NumVRsUsed <= NumVRs)
5444
264
            continue;
5445
88
          break;
5446
0
        case MVT::v4f32:
5447
0
          // When using QPX, this is handled like a FP register, otherwise, it
5448
0
          // is an Altivec register.
5449
0
          if (
Subtarget.hasQPX()0
) {
5450
0
            if (++NumFPRsUsed <= NumFPRs)
5451
0
              continue;
5452
0
          } else {
5453
0
            if (++NumVRsUsed <= NumVRs)
5454
0
              continue;
5455
0
          }
5456
0
          break;
5457
352
        case MVT::f32:
5458
352
        case MVT::f64:
5459
352
        case MVT::v4f64: // QPX
5460
352
        case MVT::v4i1:  // QPX
5461
352
          if (++NumFPRsUsed <= NumFPRs)
5462
286
            continue;
5463
66
          break;
5464
1.06k
        }
5465
1.06k
    }
5466
2.53k
5467
2.53k
    /* Respect alignment of argument on the stack.  */
5468
2.53k
    unsigned Align =
5469
2.53k
      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5470
2.53k
    NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5471
2.53k
5472
2.53k
    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5473
2.53k
    if (Flags.isInConsecutiveRegsLast())
5474
90
      NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5475
3.27k
  }
5476
1.20k
5477
1.20k
  unsigned NumBytesActuallyUsed = NumBytes;
5478
1.20k
5479
1.20k
  // In the old ELFv1 ABI,
5480
1.20k
  // the prolog code of the callee may store up to 8 GPR argument registers to
5481
1.20k
  // the stack, allowing va_start to index over them in memory if its varargs.
5482
1.20k
  // Because we cannot tell if this is needed on the caller side, we have to
5483
1.20k
  // conservatively assume that it is needed.  As such, make sure we have at
5484
1.20k
  // least enough stack space for the caller to store the 8 GPRs.
5485
1.20k
  // In the ELFv2 ABI, we allocate the parameter area iff a callee
5486
1.20k
  // really requires memory operands, e.g. a vararg function.
5487
1.20k
  if (HasParameterArea)
5488
667
    NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5489
1.20k
  else
5490
533
    NumBytes = LinkageSize;
5491
1.20k
5492
1.20k
  // Tail call needs the stack to be aligned.
5493
1.20k
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5494
1
      CallConv == CallingConv::Fast)
5495
1
    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5496
1.20k
5497
1.20k
  int SPDiff = 0;
5498
1.20k
5499
1.20k
  // Calculate by how many bytes the stack has to be adjusted in case of tail
5500
1.20k
  // call optimization.
5501
1.20k
  if (!IsSibCall)
5502
1.14k
    SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5503
1.20k
5504
1.20k
  // To protect arguments on the stack from being clobbered in a tail call,
5505
1.20k
  // force all the loads to happen before doing any other lowering.
5506
1.20k
  if (isTailCall)
5507
56
    Chain = DAG.getStackArgumentTokenFactor(Chain);
5508
1.20k
5509
1.20k
  // Adjust the stack pointer for the new arguments...
5510
1.20k
  // These operations are automatically eliminated by the prolog/epilog pass
5511
1.20k
  if (!IsSibCall)
5512
1.14k
    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5513
1.20k
  SDValue CallSeqStart = Chain;
5514
1.20k
5515
1.20k
  // Load the return address and frame pointer so it can be move somewhere else
5516
1.20k
  // later.
5517
1.20k
  SDValue LROp, FPOp;
5518
1.20k
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5519
1.20k
5520
1.20k
  // Set up a copy of the stack pointer for use loading and storing any
5521
1.20k
  // arguments that may not fit in the registers available for argument
5522
1.20k
  // passing.
5523
1.20k
  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5524
1.20k
5525
1.20k
  // Figure out which arguments are going to go in registers, and which in
5526
1.20k
  // memory.  Also, if this is a vararg function, floating point operations
5527
1.20k
  // must be stored to our stack, and loaded into integer regs as well, if
5528
1.20k
  // any integer regs are available for argument passing.
5529
1.20k
  unsigned ArgOffset = LinkageSize;
5530
1.20k
5531
1.20k
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5532
1.20k
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5533
1.20k
5534
1.20k
  SmallVector<SDValue, 8> MemOpChains;
5535
4.47k
  for (unsigned i = 0; 
i != NumOps4.47k
;
++i3.27k
) {
5536
3.27k
    SDValue Arg = OutVals[i];
5537
3.27k
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5538
3.27k
    EVT ArgVT = Outs[i].VT;
5539
3.27k
    EVT OrigVT = Outs[i].ArgVT;
5540
3.27k
5541
3.27k
    // PtrOff will be used to store the current argument to the stack if a
5542
3.27k
    // register cannot be found for it.
5543
3.27k
    SDValue PtrOff;
5544
3.27k
5545
3.27k
    // We re-align the argument offset for each argument, except when using the
5546
3.27k
    // fast calling convention, when we need to make sure we do that only when
5547
3.27k
    // we'll actually use a stack slot.
5548
2.53k
    auto ComputePtrOff = [&]() {
5549
2.53k
      /* Respect alignment of argument on the stack.  */
5550
2.53k
      unsigned Align =
5551
2.53k
        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5552
2.53k
      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5553
2.53k
5554
2.53k
      PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5555
2.53k
5556
2.53k
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5557
2.53k
    };
5558
3.27k
5559
3.27k
    if (
CallConv != CallingConv::Fast3.27k
) {
5560
2.20k
      ComputePtrOff();
5561
2.20k
5562
2.20k
      /* Compute GPR index associated with argument offset.  */
5563
2.20k
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5564
2.20k
      GPR_idx = std::min(GPR_idx, NumGPRs);
5565
2.20k
    }
5566
3.27k
5567
3.27k
    // Promote integers to 64-bit values.
5568
3.27k
    if (
Arg.getValueType() == MVT::i32 || 3.27k
Arg.getValueType() == MVT::i13.03k
) {
5569
235
      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5570
235
      unsigned ExtOp = Flags.isSExt() ? 
ISD::SIGN_EXTEND119
:
ISD::ZERO_EXTEND116
;
5571
235
      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5572
235
    }
5573
3.27k
5574
3.27k
    // FIXME memcpy is used way more than necessary.  Correctness first.
5575
3.27k
    // Note: "by value" is code for passing a structure by value, not
5576
3.27k
    // basic types.
5577
3.27k
    if (
Flags.isByVal()3.27k
) {
5578
57
      // Note: Size includes alignment padding, so
5579
57
      //   struct x { short a; char b; }
5580
57
      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5581
57
      // These are the proper values we need for right-justifying the
5582
57
      // aggregate in a parameter register.
5583
57
      unsigned Size = Flags.getByValSize();
5584
57
5585
57
      // An empty aggregate parameter takes up no storage and no
5586
57
      // registers.
5587
57
      if (Size == 0)
5588
2
        continue;
5589
55
5590
55
      
if (55
CallConv == CallingConv::Fast55
)
5591
1
        ComputePtrOff();
5592
55
5593
55
      // All aggregates smaller than 8 bytes must be passed right-justified.
5594
55
      if (
Size==1 || 55
Size==251
||
Size==447
) {
5595
16
        EVT VT = (Size==1) ? 
MVT::i84
:
((Size==2) ? 12
MVT::i164
:
MVT::i328
);
5596
16
        if (
GPR_idx != NumGPRs16
) {
5597
7
          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5598
7
                                        MachinePointerInfo(), VT);
5599
7
          MemOpChains.push_back(Load.getValue(1));
5600
7
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5601
7
5602
7
          ArgOffset += PtrByteSize;
5603
7
          continue;
5604
7
        }
5605
48
      }
5606
48
5607
48
      
if (48
GPR_idx == NumGPRs && 48
Size < 823
) {
5608
13
        SDValue AddPtr = PtrOff;
5609
13
        if (
!isLittleEndian13
) {
5610
12
          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5611
12
                                          PtrOff.getValueType());
5612
12
          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5613
12
        }
5614
13
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5615
13
                                                          CallSeqStart,
5616
13
                                                          Flags, DAG, dl);
5617
13
        ArgOffset += PtrByteSize;
5618
13
        continue;
5619
13
      }
5620
35
      // Copy entire object into memory.  There are cases where gcc-generated
5621
35
      // code assumes it is there, even if it could be put entirely into
5622
35
      // registers.  (This is not what the doc says.)
5623
35
5624
35
      // FIXME: The above statement is likely due to a misunderstanding of the
5625
35
      // documents.  All arguments must be copied into the parameter area BY
5626
35
      // THE CALLEE in the event that the callee takes the address of any
5627
35
      // formal argument.  That has not yet been implemented.  However, it is
5628
35
      // reasonable to use the stack area as a staging area for the register
5629
35
      // load.
5630
35
5631
35
      // Skip this for small aggregates, as we will use the same slot for a
5632
35
      // right-justified copy, below.
5633
35
      
if (35
Size >= 835
)
5634
27
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5635
27
                                                          CallSeqStart,
5636
27
                                                          Flags, DAG, dl);
5637
35
5638
35
      // When a register is available, pass a small aggregate right-justified.
5639
35
      if (
Size < 8 && 35
GPR_idx != NumGPRs8
) {
5640
8
        // The easiest way to get this right-justified in a register
5641
8
        // is to copy the structure into the rightmost portion of a
5642
8
        // local variable slot, then load the whole slot into the
5643
8
        // register.
5644
8
        // FIXME: The memcpy seems to produce pretty awful code for
5645
8
        // small aggregates, particularly for packed ones.
5646
8
        // FIXME: It would be preferable to use the slot in the
5647
8
        // parameter save area instead of a new local variable.
5648
8
        SDValue AddPtr = PtrOff;
5649
8
        if (
!isLittleEndian8
) {
5650
8
          SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5651
8
          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5652
8
        }
5653
8
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5654
8
                                                          CallSeqStart,
5655
8
                                                          Flags, DAG, dl);
5656
8
5657
8
        // Load the slot into the register.
5658
8
        SDValue Load =
5659
8
            DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5660
8
        MemOpChains.push_back(Load.getValue(1));
5661
8
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5662
8
5663
8
        // Done with this argument.
5664
8
        ArgOffset += PtrByteSize;
5665
8
        continue;
5666
8
      }
5667
27
5668
27
      // For aggregates larger than PtrByteSize, copy the pieces of the
5669
27
      // object that fit into registers from the parameter save area.
5670
87
      
for (unsigned j=0; 27
j<Size87
;
j+=PtrByteSize60
) {
5671
76
        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5672
76
        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5673
76
        if (
GPR_idx != NumGPRs76
) {
5674
60
          SDValue Load =
5675
60
              DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5676
60
          MemOpChains.push_back(Load.getValue(1));
5677
60
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5678
60
          ArgOffset += PtrByteSize;
5679
76
        } else {
5680
16
          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5681
16
          break;
5682
16
        }
5683
76
      }
5684
57
      continue;
5685
57
    }
5686
3.21k
5687
3.21k
    switch (Arg.getSimpleValueType().SimpleTy) {
5688
0
    
default: 0
llvm_unreachable0
("Unexpected ValueType for argument!");
5689
1.79k
    case MVT::i1:
5690
1.79k
    case MVT::i32:
5691
1.79k
    case MVT::i64:
5692
1.79k
      if (
Flags.isNest()1.79k
) {
5693
2
        // The 'nest' parameter, if any, is passed in R11.
5694
2
        RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5695
2
        hasNest = true;
5696
2
        break;
5697
2
      }
5698
1.78k
5699
1.78k
      // These can be scalar arguments or elements of an integer array type
5700
1.78k
      // passed directly.  Clang may use those instead of "byval" aggregate
5701
1.78k
      // types to avoid forcing arguments to memory unnecessarily.
5702
1.78k
      
if (1.78k
GPR_idx != NumGPRs1.78k
) {
5703
1.47k
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5704
1.78k
      } else {
5705
313
        if (CallConv == CallingConv::Fast)
5706
176
          ComputePtrOff();
5707
313
5708
313
        assert(HasParameterArea &&
5709
313
               "Parameter area must exist to pass an argument in memory.");
5710
313
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5711
313
                         true, isTailCall, false, MemOpChains,
5712
313
                         TailCallArguments, dl);
5713
313
        if (CallConv == CallingConv::Fast)
5714
176
          ArgOffset += PtrByteSize;
5715
313
      }
5716
1.78k
      if (CallConv != CallingConv::Fast)
5717
1.42k
        ArgOffset += PtrByteSize;
5718
1.78k
      break;
5719
973
    case MVT::f32:
5720
973
    case MVT::f64: {
5721
973
      // These can be scalar arguments or elements of a float array type
5722
973
      // passed directly.  The latter are used to implement ELFv2 homogenous
5723
973
      // float aggregates.
5724
973
5725
973
      // Named arguments go into FPRs first, and once they overflow, the
5726
973
      // remaining arguments go into GPRs and then the parameter save area.
5727
973
      // Unnamed arguments for vararg functions always go to GPRs and
5728
973
      // then the parameter save area.  For now, put all arguments to vararg
5729
973
      // routines always in both locations (FPR *and* GPR or stack slot).
5730
965
      bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5731
973
      bool NeededLoad = false;
5732
973
5733
973
      // First load the argument into the next available FPR.
5734
973
      if (FPR_idx != NumFPRs)
5735
877
        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5736
973
5737
973
      // Next, load the argument into GPR or stack slot if needed.
5738
973
      if (!NeedGPROrStack)
5739
869
        ;
5740
104
      else 
if (104
GPR_idx != NumGPRs && 104
CallConv != CallingConv::Fast33
) {
5741
33
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5742
33
        // once we support fp <-> gpr moves.
5743
33
5744
33
        // In the non-vararg case, this can only ever happen in the
5745
33
        // presence of f32 array types, since otherwise we never run
5746
33
        // out of FPRs before running out of GPRs.
5747
33
        SDValue ArgVal;
5748
33
5749
33
        // Double values are always passed in a single GPR.
5750
33
        if (
Arg.getValueType() != MVT::f3233
) {
5751
8
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5752
8
5753
8
        // Non-array float values are extended and passed in a GPR.
5754
33
        } else 
if (25
!Flags.isInConsecutiveRegs()25
) {
5755
4
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5756
4
          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5757
4
5758
4
        // If we have an array of floats, we collect every odd element
5759
4
        // together with its predecessor into one GPR.
5760
25
        } else 
if (21
ArgOffset % PtrByteSize != 021
) {
5761
9
          SDValue Lo, Hi;
5762
9
          Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5763
9
          Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5764
9
          if (!isLittleEndian)
5765
0
            std::swap(Lo, Hi);
5766
9
          ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5767
9
5768
9
        // The final element, if even, goes into the first half of a GPR.
5769
21
        } else 
if (12
Flags.isInConsecutiveRegsLast()12
) {
5770
6
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5771
6
          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5772
6
          if (!isLittleEndian)
5773
0
            ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5774
0
                                 DAG.getConstant(32, dl, MVT::i32));
5775
6
5776
6
        // Non-final even elements are skipped; they will be handled
5777
6
        // together the with subsequent argument on the next go-around.
5778
6
        } else
5779
6
          ArgVal = SDValue();
5780
33
5781
33
        if (ArgVal.getNode())
5782
27
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5783
104
      } else {
5784
71
        if (CallConv == CallingConv::Fast)
5785
66
          ComputePtrOff();
5786
71
5787
71
        // Single-precision floating-point values are mapped to the
5788
71
        // second (rightmost) word of the stack doubleword.
5789
71
        if (Arg.getValueType() == MVT::f32 &&
5790
71
            
!isLittleEndian3
&&
!Flags.isInConsecutiveRegs()2
) {
5791
2
          SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5792
2
          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5793
2
        }
5794
104
5795
104
        assert(HasParameterArea &&
5796
104
               "Parameter area must exist to pass an argument in memory.");
5797
104
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5798
104
                         true, isTailCall, false, MemOpChains,
5799
104
                         TailCallArguments, dl);
5800
104
5801
104
        NeededLoad = true;
5802
104
      }
5803
973
      // When passing an array of floats, the array occupies consecutive
5804
973
      // space in the argument area; only round up to the next doubleword
5805
973
      // at the end of the array.  Otherwise, each float takes 8 bytes.
5806
973
      if (
CallConv != CallingConv::Fast || 973
NeededLoad352
) {
5807
687
        ArgOffset += (Arg.getValueType() == MVT::f32 &&
5808
687
                      
Flags.isInConsecutiveRegs()402
) ?
4255
:
8432
;
5809
687
        if (Flags.isInConsecutiveRegsLast())
5810
45
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5811
687
      }
5812
973
      break;
5813
973
    }
5814
449
    case MVT::v4f32:
5815
449
    case MVT::v4i32:
5816
449
    case MVT::v8i16:
5817
449
    case MVT::v16i8:
5818
449
    case MVT::v2f64:
5819
449
    case MVT::v2i64:
5820
449
    case MVT::v1i128:
5821
449
      if (
!Subtarget.hasQPX()449
) {
5822
449
      // These can be scalar arguments or elements of a vector array type
5823
449
      // passed directly.  The latter are used to implement ELFv2 homogenous
5824
449
      // vector aggregates.
5825
449
5826
449
      // For a varargs call, named arguments go into VRs or on the stack as
5827
449
      // usual; unnamed arguments always go to the stack or the corresponding
5828
449
      // GPRs when within range.  For now, we always put the value in both
5829
449
      // locations (or even all three).
5830
449
      if (
isVarArg449
) {
5831
25
        assert(HasParameterArea &&
5832
25
               "Parameter area must exist if we have a varargs call.");
5833
25
        // We could elide this store in the case where the object fits
5834
25
        // entirely in R registers.  Maybe later.
5835
25
        SDValue Store =
5836
25
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5837
25
        MemOpChains.push_back(Store);
5838
25
        if (
VR_idx != NumVRs25
) {
5839
25
          SDValue Load =
5840
25
              DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5841
25
          MemOpChains.push_back(Load.getValue(1));
5842
25
          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5843
25
        }
5844
25
        ArgOffset += 16;
5845
75
        for (unsigned i=0; 
i<1675
;
i+=PtrByteSize50
) {
5846
50
          if (GPR_idx == NumGPRs)
5847
0
            break;
5848
50
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5849
50
                                   DAG.getConstant(i, dl, PtrVT));
5850
50
          SDValue Load =
5851
50
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5852
50
          MemOpChains.push_back(Load.getValue(1));
5853
50
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5854
50
        }
5855
25
        break;
5856
25
      }
5857
424
5858
424
      // Non-varargs Altivec params go into VRs or on the stack.
5859
424
      
if (424
VR_idx != NumVRs424
) {
5860
336
        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5861
424
      } else {
5862
88
        if (CallConv == CallingConv::Fast)
5863
88
          ComputePtrOff();
5864
88
5865
88
        assert(HasParameterArea &&
5866
88
               "Parameter area must exist to pass an argument in memory.");
5867
88
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5868
88
                         true, isTailCall, true, MemOpChains,
5869
88
                         TailCallArguments, dl);
5870
88
        if (CallConv == CallingConv::Fast)
5871
88
          ArgOffset += 16;
5872
88
      }
5873
424
5874
424
      if (CallConv != CallingConv::Fast)
5875
72
        ArgOffset += 16;
5876
449
      break;
5877
449
      } // not QPX
5878
0
5879
449
      assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5880
0
             "Invalid QPX parameter type");
5881
0
5882
0
      /* fall through */
5883
2
    case MVT::v4f64:
5884
2
    case MVT::v4i1: {
5885
2
      bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5886
2
      if (
isVarArg2
) {
5887
0
        assert(HasParameterArea &&
5888
0
               "Parameter area must exist if we have a varargs call.");
5889
0
        // We could elide this store in the case where the object fits
5890
0
        // entirely in R registers.  Maybe later.
5891
0
        SDValue Store =
5892
0
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5893
0
        MemOpChains.push_back(Store);
5894
0
        if (
QFPR_idx != NumQFPRs0
) {
5895
0
          SDValue Load = DAG.getLoad(IsF32 ? 
MVT::v4f320
:
MVT::v4f640
, dl, Store,
5896
0
                                     PtrOff, MachinePointerInfo());
5897
0
          MemOpChains.push_back(Load.getValue(1));
5898
0
          RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5899
0
        }
5900
0
        ArgOffset += (IsF32 ? 
160
:
320
);
5901
0
        for (unsigned i = 0; 
i < (IsF32 ? 0
16U0
:
32U0
);
i += PtrByteSize0
) {
5902
0
          if (GPR_idx == NumGPRs)
5903
0
            break;
5904
0
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5905
0
                                   DAG.getConstant(i, dl, PtrVT));
5906
0
          SDValue Load =
5907
0
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5908
0
          MemOpChains.push_back(Load.getValue(1));
5909
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5910
0
        }
5911
0
        break;
5912
0
      }
5913
2
5914
2
      // Non-varargs QPX params go into registers or on the stack.
5915
2
      
if (2
QFPR_idx != NumQFPRs2
) {
5916
2
        RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5917
2
      } else {
5918
0
        if (CallConv == CallingConv::Fast)
5919
0
          ComputePtrOff();
5920
0
5921
0
        assert(HasParameterArea &&
5922
0
               "Parameter area must exist to pass an argument in memory.");
5923
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5924
0
                         true, isTailCall, true, MemOpChains,
5925
0
                         TailCallArguments, dl);
5926
0
        if (CallConv == CallingConv::Fast)
5927
0
          
ArgOffset += (IsF32 ? 0
160
:
320
);
5928
0
      }
5929
2
5930
2
      if (CallConv != CallingConv::Fast)
5931
2
        
ArgOffset += (IsF32 ? 2
160
:
322
);
5932
1.79k
      break;
5933
1.79k
      }
5934
3.27k
    }
5935
3.27k
  }
5936
1.20k
5937
1.20k
  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
5938
1.20k
         "mismatch in size of parameter area");
5939
1.20k
  (void)NumBytesActuallyUsed;
5940
1.20k
5941
1.20k
  if (!MemOpChains.empty())
5942
100
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5943
1.20k
5944
1.20k
  // Check if this is an indirect call (MTCTR/BCTRL).
5945
1.20k
  // See PrepareCall() for more information about calls through function
5946
1.20k
  // pointers in the 64-bit SVR4 ABI.
5947
1.20k
  if (
!isTailCall && 1.20k
!isPatchPoint1.14k
&&
5948
1.11k
      !isFunctionGlobalAddress(Callee) &&
5949
1.20k
      
!isa<ExternalSymbolSDNode>(Callee)134
) {
5950
28
    // Load r2 into a virtual register and store it to the TOC save area.
5951
28
    setUsesTOCBasePtr(DAG);
5952
28
    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5953
28
    // TOC save area offset.
5954
28
    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5955
28
    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5956
28
    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5957
28
    Chain = DAG.getStore(
5958
28
        Val.getValue(1), dl, Val, AddPtr,
5959
28
        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5960
28
    // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5961
28
    // This does not mean the MTCTR instruction must use R12; it's easier
5962
28
    // to model this as an extra parameter, so do that.
5963
28
    if (
isELFv2ABI && 28
!isPatchPoint9
)
5964
9
      RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5965
28
  }
5966
1.20k
5967
1.20k
  // Build a sequence of copy-to-reg nodes chained together with token chain
5968
1.20k
  // and flag operands which copy the outgoing args into the appropriate regs.
5969
1.20k
  SDValue InFlag;
5970
4.07k
  for (unsigned i = 0, e = RegsToPass.size(); 
i != e4.07k
;
++i2.87k
) {
5971
2.87k
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5972
2.87k
                             RegsToPass[i].second, InFlag);
5973
2.87k
    InFlag = Chain.getValue(1);
5974
2.87k
  }
5975
1.20k
5976
1.20k
  if (
isTailCall && 1.20k
!IsSibCall56
)
5977
1
    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5978
1
                    TailCallArguments);
5979
1.20k
5980
1.20k
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5981
1.20k
                    DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5982
1.20k
                    SPDiff, NumBytes, Ins, InVals, CS);
5983
1.20k
}
5984
5985
SDValue PPCTargetLowering::LowerCall_Darwin(
5986
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5987
    bool isTailCall, bool isPatchPoint,
5988
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5989
    const SmallVectorImpl<SDValue> &OutVals,
5990
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5991
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5992
119
    ImmutableCallSite CS) const {
5993
119
  unsigned NumOps = Outs.size();
5994
119
5995
119
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5996
119
  bool isPPC64 = PtrVT == MVT::i64;
5997
119
  unsigned PtrByteSize = isPPC64 ? 
820
:
499
;
5998
119
5999
119
  MachineFunction &MF = DAG.getMachineFunction();
6000
119
6001
119
  // Mark this function as potentially containing a function that contains a
6002
119
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
6003
119
  // and restoring the callers stack pointer in this functions epilog. This is
6004
119
  // done because by tail calling the called function might overwrite the value
6005
119
  // in this function's (MF) stack pointer stack slot 0(SP).
6006
119
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6007
1
      CallConv == CallingConv::Fast)
6008
1
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6009
119
6010
119
  // Count how many bytes are to be pushed on the stack, including the linkage
6011
119
  // area, and parameter passing area.  We start with 24/48 bytes, which is
6012
119
  // prereserved space for [SP][CR][LR][3 x unused].
6013
119
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6014
119
  unsigned NumBytes = LinkageSize;
6015
119
6016
119
  // Add up all the space actually used.
6017
119
  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6018
119
  // they all go in registers, but we must reserve stack space for them for
6019
119
  // possible use by the caller.  In varargs or 64-bit calls, parameters are
6020
119
  // assigned stack space in order, with padding so Altivec parameters are
6021
119
  // 16-byte aligned.
6022
119
  unsigned nAltivecParamsAtEnd = 0;
6023
490
  for (unsigned i = 0; 
i != NumOps490
;
++i371
) {
6024
371
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6025
371
    EVT ArgVT = Outs[i].VT;
6026
371
    // Varargs Altivec parameters are padded to a 16 byte boundary.
6027
371
    if (
ArgVT == MVT::v4f32 || 371
ArgVT == MVT::v4i32371
||
6028
371
        
ArgVT == MVT::v8i16371
||
ArgVT == MVT::v16i8371
||
6029
371
        
ArgVT == MVT::v2f64371
||
ArgVT == MVT::v2i64371
) {
6030
0
      if (
!isVarArg && 0
!isPPC640
) {
6031
0
        // Non-varargs Altivec parameters go after all the non-Altivec
6032
0
        // parameters; handle those later so we know how much padding we need.
6033
0
        nAltivecParamsAtEnd++;
6034
0
        continue;
6035
0
      }
6036
0
      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6037
0
      NumBytes = ((NumBytes+15)/16)*16;
6038
0
    }
6039
371
    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6040
371
  }
6041
119
6042
119
  // Allow for Altivec parameters at the end, if needed.
6043
119
  if (
nAltivecParamsAtEnd119
) {
6044
0
    NumBytes = ((NumBytes+15)/16)*16;
6045
0
    NumBytes += 16*nAltivecParamsAtEnd;
6046
0
  }
6047
119
6048
119
  // The prolog code of the callee may store up to 8 GPR argument registers to
6049
119
  // the stack, allowing va_start to index over them in memory if its varargs.
6050
119
  // Because we cannot tell if this is needed on the caller side, we have to
6051
119
  // conservatively assume that it is needed.  As such, make sure we have at
6052
119
  // least enough stack space for the caller to store the 8 GPRs.
6053
119
  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6054
119
6055
119
  // Tail call needs the stack to be aligned.
6056
119
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6057
1
      CallConv == CallingConv::Fast)
6058
1
    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6059
119
6060
119
  // Calculate by how many bytes the stack has to be adjusted in case of tail
6061
119
  // call optimization.
6062
119
  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6063
119
6064
119
  // To protect arguments on the stack from being clobbered in a tail call,
6065
119
  // force all the loads to happen before doing any other lowering.
6066
119
  if (isTailCall)
6067
1
    Chain = DAG.getStackArgumentTokenFactor(Chain);
6068
119
6069
119
  // Adjust the stack pointer for the new arguments...
6070
119
  // These operations are automatically eliminated by the prolog/epilog pass
6071
119
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6072
119
  SDValue CallSeqStart = Chain;
6073
119
6074
119
  // Load the return address and frame pointer so it can be move somewhere else
6075
119
  // later.
6076
119
  SDValue LROp, FPOp;
6077
119
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6078
119
6079
119
  // Set up a copy of the stack pointer for use loading and storing any
6080
119
  // arguments that may not fit in the registers available for argument
6081
119
  // passing.
6082
119
  SDValue StackPtr;
6083
119
  if (isPPC64)
6084
20
    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6085
119
  else
6086
99
    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6087
119
6088
119
  // Figure out which arguments are going to go in registers, and which in
6089
119
  // memory.  Also, if this is a vararg function, floating point operations
6090
119
  // must be stored to our stack, and loaded into integer regs as well, if
6091
119
  // any integer regs are available for argument passing.
6092
119
  unsigned ArgOffset = LinkageSize;
6093
119
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6094
119
6095
119
  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6096
119
    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6097
119
    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6098
119
  };
6099
119
  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6100
119
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6101
119
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6102
119
  };
6103
119
  static const MCPhysReg VR[] = {
6104
119
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6105
119
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6106
119
  };
6107
119
  const unsigned NumGPRs = array_lengthof(GPR_32);
6108
119
  const unsigned NumFPRs = 13;
6109
119
  const unsigned NumVRs  = array_lengthof(VR);
6110
119
6111
119
  const MCPhysReg *GPR = isPPC64 ? 
GPR_6420
:
GPR_3299
;
6112
119
6113
119
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6114
119
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6115
119
6116
119
  SmallVector<SDValue, 8> MemOpChains;
6117
490
  for (unsigned i = 0; 
i != NumOps490
;
++i371
) {
6118
371
    SDValue Arg = OutVals[i];
6119
371
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6120
371
6121
371
    // PtrOff will be used to store the current argument to the stack if a
6122
371
    // register cannot be found for it.
6123
371
    SDValue PtrOff;
6124
371
6125
371
    PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6126
371
6127
371
    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6128
371
6129
371
    // On PPC64, promote integers to 64-bit values.
6130
371
    if (
isPPC64 && 371
Arg.getValueType() == MVT::i3252
) {
6131
3
      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6132
3
      unsigned ExtOp = Flags.isSExt() ? 
ISD::SIGN_EXTEND0
:
ISD::ZERO_EXTEND3
;
6133
3
      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6134
3
    }
6135
371
6136
371
    // FIXME memcpy is used way more than necessary.  Correctness first.
6137
371
    // Note: "by value" is code for passing a structure by value, not
6138
371
    // basic types.
6139
371
    if (
Flags.isByVal()371
) {
6140
2
      unsigned Size = Flags.getByValSize();
6141
2
      // Very small objects are passed right-justified.  Everything else is
6142
2
      // passed left-justified.
6143
2
      if (
Size==1 || 2
Size==22
) {
6144
2
        EVT VT = (Size==1) ? 
MVT::i80
:
MVT::i162
;
6145
2
        if (
GPR_idx != NumGPRs2
) {
6146
2
          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6147
2
                                        MachinePointerInfo(), VT);
6148
2
          MemOpChains.push_back(Load.getValue(1));
6149
2
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6150
2
6151
2
          ArgOffset += PtrByteSize;
6152
2
        } else {
6153
0
          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6154
0
                                          PtrOff.getValueType());
6155
0
          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6156
0
          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6157
0
                                                            CallSeqStart,
6158
0
                                                            Flags, DAG, dl);
6159
0
          ArgOffset += PtrByteSize;
6160
0
        }
6161
2
        continue;
6162
2
      }
6163
0
      // Copy entire object into memory.  There are cases where gcc-generated
6164
0
      // code assumes it is there, even if it could be put entirely into
6165
0
      // registers.  (This is not what the doc says.)
6166
0
      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6167
0
                                                        CallSeqStart,
6168
0
                                                        Flags, DAG, dl);
6169
0
6170
0
      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6171
0
      // copy the pieces of the object that fit into registers from the
6172
0
      // parameter save area.
6173
0
      for (unsigned j=0; 
j<Size0
;
j+=PtrByteSize0
) {
6174
0
        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6175
0
        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6176
0
        if (
GPR_idx != NumGPRs0
) {
6177
0
          SDValue Load =
6178
0
              DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6179
0
          MemOpChains.push_back(Load.getValue(1));
6180
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6181
0
          ArgOffset += PtrByteSize;
6182
0
        } else {
6183
0
          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6184
0
          break;
6185
0
        }
6186
0
      }
6187
2
      continue;
6188
2
    }
6189
369
6190
369
    switch (Arg.getSimpleValueType().SimpleTy) {
6191
0
    
default: 0
llvm_unreachable0
("Unexpected ValueType for argument!");
6192
257
    case MVT::i1:
6193
257
    case MVT::i32:
6194
257
    case MVT::i64:
6195
257
      if (
GPR_idx != NumGPRs257
) {
6196
257
        if (Arg.getValueType() == MVT::i1)
6197
1
          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6198
257
6199
257
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6200
0
      } else {
6201
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6202
0
                         isPPC64, isTailCall, false, MemOpChains,
6203
0
                         TailCallArguments, dl);
6204
0
      }
6205
257
      ArgOffset += PtrByteSize;
6206
257
      break;
6207
112
    case MVT::f32:
6208
112
    case MVT::f64:
6209
112
      if (
FPR_idx != NumFPRs112
) {
6210
112
        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6211
112
6212
112
        if (
isVarArg112
) {
6213
30
          SDValue Store =
6214
30
              DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6215
30
          MemOpChains.push_back(Store);
6216
30
6217
30
          // Float varargs are always shadowed in available integer registers
6218
30
          if (
GPR_idx != NumGPRs30
) {
6219
30
            SDValue Load =
6220
30
                DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6221
30
            MemOpChains.push_back(Load.getValue(1));
6222
30
            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6223
30
          }
6224
30
          if (
GPR_idx != NumGPRs && 30
Arg.getValueType() == MVT::f6430
&&
!isPPC6430
){
6225
30
            SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6226
30
            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6227
30
            SDValue Load =
6228
30
                DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6229
30
            MemOpChains.push_back(Load.getValue(1));
6230
30
            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6231
30
          }
6232
112
        } else {
6233
82
          // If we have any FPRs remaining, we may also have GPRs remaining.
6234
82
          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6235
82
          // GPRs.
6236
82
          if (GPR_idx != NumGPRs)
6237
68
            ++GPR_idx;
6238
82
          if (
GPR_idx != NumGPRs && 82
Arg.getValueType() == MVT::f6468
&&
6239
68
              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6240
60
            ++GPR_idx;
6241
82
        }
6242
112
      } else
6243
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6244
0
                         isPPC64, isTailCall, false, MemOpChains,
6245
0
                         TailCallArguments, dl);
6246
112
      if (isPPC64)
6247
8
        ArgOffset += 8;
6248
112
      else
6249
104
        
ArgOffset += Arg.getValueType() == MVT::f32 ? 104
414
:
890
;
6250
112
      break;
6251
0
    case MVT::v4f32:
6252
0
    case MVT::v4i32:
6253
0
    case MVT::v8i16:
6254
0
    case MVT::v16i8:
6255
0
      if (
isVarArg0
) {
6256
0
        // These go aligned on the stack, or in the corresponding R registers
6257
0
        // when within range.  The Darwin PPC ABI doc claims they also go in
6258
0
        // V registers; in fact gcc does this only for arguments that are
6259
0
        // prototyped, not for those that match the ...  We do it for all
6260
0
        // arguments, seems to work.
6261
0
        while (
ArgOffset % 16 !=00
) {
6262
0
          ArgOffset += PtrByteSize;
6263
0
          if (GPR_idx != NumGPRs)
6264
0
            GPR_idx++;
6265
0
        }
6266
0
        // We could elide this store in the case where the object fits
6267
0
        // entirely in R registers.  Maybe later.
6268
0
        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6269
0
                             DAG.getConstant(ArgOffset, dl, PtrVT));
6270
0
        SDValue Store =
6271
0
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6272
0
        MemOpChains.push_back(Store);
6273
0
        if (
VR_idx != NumVRs0
) {
6274
0
          SDValue Load =
6275
0
              DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6276
0
          MemOpChains.push_back(Load.getValue(1));
6277
0
          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6278
0
        }
6279
0
        ArgOffset += 16;
6280
0
        for (unsigned i=0; 
i<160
;
i+=PtrByteSize0
) {
6281
0
          if (GPR_idx == NumGPRs)
6282
0
            break;
6283
0
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6284
0
                                   DAG.getConstant(i, dl, PtrVT));
6285
0
          SDValue Load =
6286
0
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6287
0
          MemOpChains.push_back(Load.getValue(1));
6288
0
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6289
0
        }
6290
0
        break;
6291
0
      }
6292
0
6293
0
      // Non-varargs Altivec params generally go in registers, but have
6294
0
      // stack space allocated at the end.
6295
0
      
if (0
VR_idx != NumVRs0
) {
6296
0
        // Doesn't have GPR space allocated.
6297
0
        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6298
0
      } else 
if (0
nAltivecParamsAtEnd==00
) {
6299
0
        // We are emitting Altivec params in order.
6300
0
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6301
0
                         isPPC64, isTailCall, true, MemOpChains,
6302
0
                         TailCallArguments, dl);
6303
0
        ArgOffset += 16;
6304
0
      }
6305
257
      break;
6306
371
    }
6307
371
  }
6308
119
  // If all Altivec parameters fit in registers, as they usually do,
6309
119
  // they get stack space following the non-Altivec parameters.  We
6310
119
  // don't track this here because nobody below needs it.
6311
119
  // If there are more Altivec parameters than fit in registers emit
6312
119
  // the stores here.
6313
119
  
if (119
!isVarArg && 119
nAltivecParamsAtEnd > NumVRs81
) {
6314
0
    unsigned j = 0;
6315
0
    // Offset is aligned; skip 1st 12 params which go in V registers.
6316
0
    ArgOffset = ((ArgOffset+15)/16)*16;
6317
0
    ArgOffset += 12*16;
6318
0
    for (unsigned i = 0; 
i != NumOps0
;
++i0
) {
6319
0
      SDValue Arg = OutVals[i];
6320
0
      EVT ArgType = Outs[i].VT;
6321
0
      if (
ArgType==MVT::v4f32 || 0
ArgType==MVT::v4i320
||
6322
0
          
ArgType==MVT::v8i160
||
ArgType==MVT::v16i80
) {
6323
0
        if (
++j > NumVRs0
) {
6324
0
          SDValue PtrOff;
6325
0
          // We are emitting Altivec params in order.
6326
0
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6327
0
                           isPPC64, isTailCall, true, MemOpChains,
6328
0
                           TailCallArguments, dl);
6329
0
          ArgOffset += 16;
6330
0
        }
6331
0
      }
6332
0
    }
6333
0
  }
6334
119
6335
119
  if (!MemOpChains.empty())
6336
15
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6337
119
6338
119
  // On Darwin, R12 must contain the address of an indirect callee.  This does
6339
119
  // not mean the MTCTR instruction must use R12; it's easier to model this as
6340
119
  // an extra parameter, so do that.
6341
119
  if (!isTailCall &&
6342
118
      !isFunctionGlobalAddress(Callee) &&
6343
40
      !isa<ExternalSymbolSDNode>(Callee) &&
6344
10
      !isBLACompatibleAddress(Callee, DAG))
6345
6
    
RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? 6
PPC::X124
:
6346
6
                                                   PPC::R12), Callee));
6347
119
6348
119
  // Build a sequence of copy-to-reg nodes chained together with token chain
6349
119
  // and flag operands which copy the outgoing args into the appropriate regs.
6350
119
  SDValue InFlag;
6351
556
  for (unsigned i = 0, e = RegsToPass.size(); 
i != e556
;
++i437
) {
6352
437
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6353
437
                             RegsToPass[i].second, InFlag);
6354
437
    InFlag = Chain.getValue(1);
6355
437
  }
6356
119
6357
119
  if (isTailCall)
6358
1
    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6359
1
                    TailCallArguments);
6360
119
6361
119
  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6362
119
                    /* unused except on PPC64 ELFv1 */ false, DAG,
6363
119
                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6364
119
                    NumBytes, Ins, InVals, CS);
6365
119
}
6366
6367
bool
6368
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6369
                                  MachineFunction &MF, bool isVarArg,
6370
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
6371
9.12k
                                  LLVMContext &Context) const {
6372
9.12k
  SmallVector<CCValAssign, 16> RVLocs;
6373
9.12k
  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6374
9.12k
  return CCInfo.CheckReturn(Outs, RetCC_PPC);
6375
9.12k
}
6376
6377
SDValue
6378
PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6379
                               bool isVarArg,
6380
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
6381
                               const SmallVectorImpl<SDValue> &OutVals,
6382
6.95k
                               const SDLoc &dl, SelectionDAG &DAG) const {
6383
6.95k
  SmallVector<CCValAssign, 16> RVLocs;
6384
6.95k
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6385
6.95k
                 *DAG.getContext());
6386
6.95k
  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6387
6.95k
6388
6.95k
  SDValue Flag;
6389
6.95k
  SmallVector<SDValue, 4> RetOps(1, Chain);
6390
6.95k
6391
6.95k
  // Copy the result values into the output registers.
6392
12.4k
  for (unsigned i = 0; 
i != RVLocs.size()12.4k
;
++i5.54k
) {
6393
5.54k
    CCValAssign &VA = RVLocs[i];
6394
5.54k
    assert(VA.isRegLoc() && "Can only return in registers!");
6395
5.54k
6396
5.54k
    SDValue Arg = OutVals[i];
6397
5.54k
6398
5.54k
    switch (VA.getLocInfo()) {
6399
0
    
default: 0
llvm_unreachable0
("Unknown loc info!");
6400
4.27k
    case CCValAssign::Full: break;
6401
677
    case CCValAssign::AExt:
6402
677
      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6403
677
      break;
6404
156
    case CCValAssign::ZExt:
6405
156
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6406
156
      break;
6407
429
    case CCValAssign::SExt:
6408
429
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6409
429
      break;
6410
5.54k
    }
6411
5.54k
6412
5.54k
    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6413
5.54k
    Flag = Chain.getValue(1);
6414
5.54k
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6415
5.54k
  }
6416
6.95k
6417
6.95k
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6418
6.95k
  const MCPhysReg *I =
6419
6.95k
    TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6420
6.95k
  if (
I6.95k
) {
6421
158
    for (; 
*I158
;
++I155
) {
6422
155
6423
155
      if (PPC::G8RCRegClass.contains(*I))
6424
56
        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6425
99
      else 
if (99
PPC::F8RCRegClass.contains(*I)99
)
6426
54
        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6427
45
      else 
if (45
PPC::CRRCRegClass.contains(*I)45
)
6428
9
        RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6429
36
      else 
if (36
PPC::VRRCRegClass.contains(*I)36
)
6430
36
        RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6431
36
      else
6432
0
        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6433
155
    }
6434
3
  }
6435
6.95k
6436
6.95k
  RetOps[0] = Chain;  // Update chain.
6437
6.95k
6438
6.95k
  // Add the flag if we have it.
6439
6.95k
  if (Flag.getNode())
6440
5.11k
    RetOps.push_back(Flag);
6441
6.95k
6442
6.95k
  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6443
6.95k
}
6444
6445
SDValue
6446
PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6447
1
                                                SelectionDAG &DAG) const {
6448
1
  SDLoc dl(Op);
6449
1
6450
1
  // Get the correct type for integers.
6451
1
  EVT IntVT = Op.getValueType();
6452
1
6453
1
  // Get the inputs.
6454
1
  SDValue Chain = Op.getOperand(0);
6455
1
  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6456
1
  // Build a DYNAREAOFFSET node.
6457
1
  SDValue Ops[2] = {Chain, FPSIdx};
6458
1
  SDVTList VTs = DAG.getVTList(IntVT);
6459
1
  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6460
1
}
6461
6462
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6463
1
                                             SelectionDAG &DAG) const {
6464
1
  // When we pop the dynamic allocation we need to restore the SP link.
6465
1
  SDLoc dl(Op);
6466
1
6467
1
  // Get the correct type for pointers.
6468
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6469
1
6470
1
  // Construct the stack pointer operand.
6471
1
  bool isPPC64 = Subtarget.isPPC64();
6472
1
  unsigned SP = isPPC64 ? 
PPC::X11
:
PPC::R10
;
6473
1
  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6474
1
6475
1
  // Get the operands for the STACKRESTORE.
6476
1
  SDValue Chain = Op.getOperand(0);
6477
1
  SDValue SaveSP = Op.getOperand(1);
6478
1
6479
1
  // Load the old link SP.
6480
1
  SDValue LoadLinkSP =
6481
1
      DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6482
1
6483
1
  // Restore the stack pointer.
6484
1
  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6485
1
6486
1
  // Store the old link SP.
6487
1
  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6488
1
}
6489
6490
5
SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6491
5
  MachineFunction &MF = DAG.getMachineFunction();
6492
5
  bool isPPC64 = Subtarget.isPPC64();
6493
5
  EVT PtrVT = getPointerTy(MF.getDataLayout());
6494
5
6495
5
  // Get current frame pointer save index.  The users of this index will be
6496
5
  // primarily DYNALLOC instructions.
6497
5
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6498
5
  int RASI = FI->getReturnAddrSaveIndex();
6499
5
6500
5
  // If the frame pointer save index hasn't been defined yet.
6501
5
  if (
!RASI5
) {
6502
5
    // Find out what the fix offset of the frame pointer save area.
6503
5
    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6504
5
    // Allocate the frame index for frame pointer save area.
6505
5
    RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 
82
:
43
, LROffset, false);
6506
5
    // Save the result.
6507
5
    FI->setReturnAddrSaveIndex(RASI);
6508
5
  }
6509
5
  return DAG.getFrameIndex(RASI, PtrVT);
6510
5
}
6511
6512
SDValue
6513
23
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6514
23
  MachineFunction &MF = DAG.getMachineFunction();
6515
23
  bool isPPC64 = Subtarget.isPPC64();
6516
23
  EVT PtrVT = getPointerTy(MF.getDataLayout());
6517
23
6518
23
  // Get current frame pointer save index.  The users of this index will be
6519
23
  // primarily DYNALLOC instructions.
6520
23
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6521
23
  int FPSI = FI->getFramePointerSaveIndex();
6522
23
6523
23
  // If the frame pointer save index hasn't been defined yet.
6524
23
  if (
!FPSI23
) {
6525
21
    // Find out what the fix offset of the frame pointer save area.
6526
21
    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6527
21
    // Allocate the frame index for frame pointer save area.
6528
21
    FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 
812
:
49
, FPOffset, true);
6529
21
    // Save the result.
6530
21
    FI->setFramePointerSaveIndex(FPSI);
6531
21
  }
6532
23
  return DAG.getFrameIndex(FPSI, PtrVT);
6533
23
}
6534
6535
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6536
22
                                                   SelectionDAG &DAG) const {
6537
22
  // Get the inputs.
6538
22
  SDValue Chain = Op.getOperand(0);
6539
22
  SDValue Size  = Op.getOperand(1);
6540
22
  SDLoc dl(Op);
6541
22
6542
22
  // Get the correct type for pointers.
6543
22
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6544
22
  // Negate the size.
6545
22
  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6546
22
                                DAG.getConstant(0, dl, PtrVT), Size);
6547
22
  // Construct a node for the frame pointer save index.
6548
22
  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6549
22
  // Build a DYNALLOC node.
6550
22
  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6551
22
  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6552
22
  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6553
22
}
6554
6555
SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6556
1
                                                     SelectionDAG &DAG) const {
6557
1
  MachineFunction &MF = DAG.getMachineFunction();
6558
1
6559
1
  bool isPPC64 = Subtarget.isPPC64();
6560
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6561
1
6562
1
  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 
81
:
40
, 0, false);
6563
1
  return DAG.getFrameIndex(FI, PtrVT);
6564
1
}
6565
6566
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6567
6
                                               SelectionDAG &DAG) const {
6568
6
  SDLoc DL(Op);
6569
6
  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6570
6
                     DAG.getVTList(MVT::i32, MVT::Other),
6571
6
                     Op.getOperand(0), Op.getOperand(1));
6572
6
}
6573
6574
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6575
5
                                                SelectionDAG &DAG) const {
6576
5
  SDLoc DL(Op);
6577
5
  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6578
5
                     Op.getOperand(0), Op.getOperand(1));
6579
5
}
6580
6581
102
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6582
102
  if (Op.getValueType().isVector())
6583
89
    return LowerVectorLoad(Op, DAG);
6584
13
6585
102
  assert(Op.getValueType() == MVT::i1 &&
6586
13
         "Custom lowering only for i1 loads");
6587
13
6588
13
  // First, load 8 bits into 32 bits, then truncate to 1 bit.
6589
13
6590
13
  SDLoc dl(Op);
6591
13
  LoadSDNode *LD = cast<LoadSDNode>(Op);
6592
13
6593
13
  SDValue Chain = LD->getChain();
6594
13
  SDValue BasePtr = LD->getBasePtr();
6595
13
  MachineMemOperand *MMO = LD->getMemOperand();
6596
13
6597
13
  SDValue NewLD =
6598
13
      DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6599
13
                     BasePtr, MVT::i8, MMO);
6600
13
  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6601
13
6602
13
  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6603
13
  return DAG.getMergeValues(Ops, dl);
6604
13
}
6605
6606
48
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6607
48
  if (Op.getOperand(1).getValueType().isVector())
6608
47
    return LowerVectorStore(Op, DAG);
6609
1
6610
48
  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6611
1
         "Custom lowering only for i1 stores");
6612
1
6613
1
  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6614
1
6615
1
  SDLoc dl(Op);
6616
1
  StoreSDNode *ST = cast<StoreSDNode>(Op);
6617
1
6618
1
  SDValue Chain = ST->getChain();
6619
1
  SDValue BasePtr = ST->getBasePtr();
6620
1
  SDValue Value = ST->getValue();
6621
1
  MachineMemOperand *MMO = ST->getMemOperand();
6622
1
6623
1
  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6624
1
                      Value);
6625
1
  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6626
1
}
6627
6628
// FIXME: Remove this once the ANDI glue bug is fixed:
6629
0
SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6630
0
  assert(Op.getValueType() == MVT::i1 &&
6631
0
         "Custom lowering only for i1 results");
6632
0
6633
0
  SDLoc DL(Op);
6634
0
  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6635
0
                     Op.getOperand(0));
6636
0
}
6637
6638
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6639
/// possible.
6640
257
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6641
257
  // Not FP? Not a fsel.
6642
257
  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6643
115
      !Op.getOperand(2).getValueType().isFloatingPoint())
6644
142
    return Op;
6645
115
6646
115
  // We might be able to do better than this under some circumstances, but in
6647
115
  // general, fsel-based lowering of select is a finite-math-only optimization.
6648
115
  // For more information, see section F.3 of the 2.06 ISA specification.
6649
115
  
if (115
!DAG.getTarget().Options.NoInfsFPMath ||
6650
19
      !DAG.getTarget().Options.NoNaNsFPMath)
6651
96
    return Op;
6652
19
  // TODO: Propagate flags from the select rather than global settings.
6653
19
  SDNodeFlags Flags;
6654
19
  Flags.setNoInfs(true);
6655
19
  Flags.setNoNaNs(true);
6656
19
6657
19
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6658
19
6659
19
  EVT ResVT = Op.getValueType();
6660
19
  EVT CmpVT = Op.getOperand(0).getValueType();
6661
19
  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6662
19
  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6663
19
  SDLoc dl(Op);
6664
19
6665
19
  // If the RHS of the comparison is a 0.0, we don't need to do the
6666
19
  // subtraction at all.
6667
19
  SDValue Sel1;
6668
19
  if (isFloatingPointZero(RHS))
6669
9
    switch (CC) {
6670
0
    default: break;       // SETUO etc aren't handled by fsel.
6671
0
    case ISD::SETNE:
6672
0
      std::swap(TV, FV);
6673
0
      LLVM_FALLTHROUGH;
6674
2
    case ISD::SETEQ:
6675
2
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6676
0
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6677
2
      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6678
2
      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6679
0
        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6680
2
      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6681
2
                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6682
5
    case ISD::SETULT:
6683
5
    case ISD::SETLT:
6684
5
      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6685
5
      LLVM_FALLTHROUGH;
6686
5
    case ISD::SETOGE:
6687
5
    case ISD::SETGE:
6688
5
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6689
0
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6690
5
      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6691
2
    case ISD::SETUGT:
6692
2
    case ISD::SETGT:
6693
2
      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6694
2
      LLVM_FALLTHROUGH;
6695
2
    case ISD::SETOLE:
6696
2
    case ISD::SETLE:
6697
2
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6698
0
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6699
0
      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6700
0
                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6701
10
    }
6702
10
6703
10
  SDValue Cmp;
6704
10
  switch (CC) {
6705
0
  default: break;       // SETUO etc aren't handled by fsel.
6706
0
  case ISD::SETNE:
6707
0
    std::swap(TV, FV);
6708
0
    LLVM_FALLTHROUGH;
6709
2
  case ISD::SETEQ:
6710
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6711
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6712
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6713
2
    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6714
2
    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6715
0
      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6716
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6717
2
                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6718
2
  case ISD::SETULT:
6719
2
  case ISD::SETLT:
6720
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6721
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6722
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6723
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6724
2
  case ISD::SETOGE:
6725
2
  case ISD::SETGE:
6726
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6727
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6728
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6729
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6730
2
  case ISD::SETUGT:
6731
2
  case ISD::SETGT:
6732
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6733
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6734
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6735
2
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6736
2
  case ISD::SETOLE:
6737
2
  case ISD::SETLE:
6738
2
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6739
2
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6740
0
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6741
0
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6742
0
  }
6743
0
  return Op;
6744
0
}
6745
6746
void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6747
                                               SelectionDAG &DAG,
6748
85
                                               const SDLoc &dl) const {
6749
85
  assert(Op.getOperand(0).getValueType().isFloatingPoint());
6750
85
  SDValue Src = Op.getOperand(0);
6751
85
  if (Src.getValueType() == MVT::f32)
6752
51
    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6753
85
6754
85
  SDValue Tmp;
6755
85
  switch (Op.getSimpleValueType().SimpleTy) {
6756
0
  
default: 0
llvm_unreachable0
("Unhandled FP_TO_INT type in custom expander!");
6757
42
  case MVT::i32:
6758
42
    Tmp = DAG.getNode(
6759
42
        Op.getOpcode() == ISD::FP_TO_SINT
6760
26
            ? PPCISD::FCTIWZ
6761
16
            : 
(Subtarget.hasFPCVT() ? 16
PPCISD::FCTIWUZ14
:
PPCISD::FCTIDZ2
),
6762
42
        dl, MVT::f64, Src);
6763
42
    break;
6764
43
  case MVT::i64:
6765
43
    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6766
43
           "i64 FP_TO_UINT is supported only with FPCVT");
6767
28
    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6768
15
                                                        PPCISD::FCTIDUZ,
6769
43
                      dl, MVT::f64, Src);
6770
43
    break;
6771
85
  }
6772
85
6773
85
  // Convert the FP value to an int value through memory.
6774
85
  
bool i32Stack = Op.getValueType() == MVT::i32 && 85
Subtarget.hasSTFIWX()42
&&
6775
27
    
(Op.getOpcode() == ISD::FP_TO_SINT || 27
Subtarget.hasFPCVT()16
);
6776
85
  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? 
MVT::i3225
:
MVT::f6460
);
6777
85
  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6778
85
  MachinePointerInfo MPI =
6779
85
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6780
85
6781
85
  // Emit a store to the stack slot.
6782
85
  SDValue Chain;
6783
85
  if (
i32Stack85
) {
6784
25
    MachineFunction &MF = DAG.getMachineFunction();
6785
25
    MachineMemOperand *MMO =
6786
25
      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6787
25
    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6788
25
    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6789
25
              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6790
25
  } else
6791
60
    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6792
85
6793
85
  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6794
85
  // add in a bias on big endian.
6795
85
  if (
Op.getValueType() == MVT::i32 && 85
!i32Stack42
) {
6796
17
    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6797
17
                        DAG.getConstant(4, dl, FIPtr.getValueType()));
6798
17
    MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 
00
:
417
);
6799
17
  }
6800
85
6801
85
  RLI.Chain = Chain;
6802
85
  RLI.Ptr = FIPtr;
6803
85
  RLI.MPI = MPI;
6804
85
}
6805
6806
/// \brief Custom lowers floating point to integer conversions to use
6807
/// the direct move instructions available in ISA 2.07 to avoid the
6808
/// need for load/store combinations.
6809
SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6810
                                                    SelectionDAG &DAG,
6811
535
                                                    const SDLoc &dl) const {
6812
535
  assert(Op.getOperand(0).getValueType().isFloatingPoint());
6813
535
  SDValue Src = Op.getOperand(0);
6814
535
6815
535
  if (Src.getValueType() == MVT::f32)
6816
260
    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6817
535
6818
535
  SDValue Tmp;
6819
535
  switch (Op.getSimpleValueType().SimpleTy) {
6820
0
  
default: 0
llvm_unreachable0
("Unhandled FP_TO_INT type in custom expander!");
6821
345
  case MVT::i32:
6822
345
    Tmp = DAG.getNode(
6823
345
        Op.getOpcode() == ISD::FP_TO_SINT
6824
181
            ? PPCISD::FCTIWZ
6825
164
            : 
(Subtarget.hasFPCVT() ? 164
PPCISD::FCTIWUZ164
:
PPCISD::FCTIDZ0
),
6826
345
        dl, MVT::f64, Src);
6827
345
    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6828
345
    break;
6829
190
  case MVT::i64:
6830
190
    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6831
190
           "i64 FP_TO_UINT is supported only with FPCVT");
6832
94
    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6833
96
                                                        PPCISD::FCTIDUZ,
6834
190
                      dl, MVT::f64, Src);
6835
190
    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6836
190
    break;
6837
535
  }
6838
535
  return Tmp;
6839
535
}
6840
6841
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6842
616
                                          const SDLoc &dl) const {
6843
616
  if (
Subtarget.hasDirectMove() && 616
Subtarget.isPPC64()535
)
6844
535
    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6845
81
6846
81
  ReuseLoadInfo RLI;
6847
81
  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6848
81
6849
81
  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6850
81
                     RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6851
81
}
6852
6853
// We're trying to insert a regular store, S, and then a load, L. If the
6854
// incoming value, O, is a load, we might just be able to have our load use the
6855
// address used by O. However, we don't know if anything else will store to
6856
// that address before we can load from it. To prevent this situation, we need
6857
// to insert our load, L, into the chain as a peer of O. To do this, we give L
6858
// the same chain operand as O, we create a token factor from the chain results
6859
// of O and L, and we replace all uses of O's chain result with that token
6860
// factor (see spliceIntoChain below for this last part).
6861
bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6862
                                            ReuseLoadInfo &RLI,
6863
                                            SelectionDAG &DAG,
6864
166
                                            ISD::LoadExtType ET) const {
6865
166
  SDLoc dl(Op);
6866
166
  if (ET == ISD::NON_EXTLOAD &&
6867
94
      (Op.getOpcode() == ISD::FP_TO_UINT ||
6868
94
       Op.getOpcode() == ISD::FP_TO_SINT) &&
6869
4
      isOperationLegalOrCustom(Op.getOpcode(),
6870
166
                               Op.getOperand(0).getValueType())) {
6871
4
6872
4
    LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6873
4
    return true;
6874
4
  }
6875
162
6876
162
  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6877
162
  if (
!LD || 162
LD->getExtensionType() != ET23
||
LD->isVolatile()20
||
6878
20
      LD->isNonTemporal())
6879
142
    return false;
6880
20
  
if (20
LD->getMemoryVT() != MemVT20
)
6881
0
    return false;
6882
20
6883
20
  RLI.Ptr = LD->getBasePtr();
6884
20
  if (
LD->isIndexed() && 20
!LD->getOffset().isUndef()0
) {
6885
0
    assert(LD->getAddressingMode() == ISD::PRE_INC &&
6886
0
           "Non-pre-inc AM on PPC?");
6887
0
    RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6888
0
                          LD->getOffset());
6889
0
  }
6890
20
6891
20
  RLI.Chain = LD->getChain();
6892
20
  RLI.MPI = LD->getPointerInfo();
6893
20
  RLI.IsDereferenceable = LD->isDereferenceable();
6894
20
  RLI.IsInvariant = LD->isInvariant();
6895
20
  RLI.Alignment = LD->getAlignment();
6896
20
  RLI.AAInfo = LD->getAAInfo();
6897
20
  RLI.Ranges = LD->getRanges();
6898
20
6899
20
  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 
20
:
120
);
6900
166
  return true;
6901
166
}
6902
6903
// Given the head of the old chain, ResChain, insert a token factor containing
6904
// it and NewResChain, and make users of ResChain now be users of that token
6905
// factor.
6906
// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
6907
void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6908
                                        SDValue NewResChain,
6909
24
                                        SelectionDAG &DAG) const {
6910
24
  if (!ResChain)
6911
4
    return;
6912
20
6913
20
  SDLoc dl(NewResChain);
6914
20
6915
20
  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6916
20
                           NewResChain, DAG.getUNDEF(MVT::Other));
6917
20
  assert(TF.getNode() != NewResChain.getNode() &&
6918
20
         "A new TF really is required here");
6919
20
6920
20
  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6921
20
  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6922
20
}
6923
6924
/// \brief Analyze profitability of direct move
6925
/// prefer float load to int load plus direct move
6926
/// when there is no integer use of int load
6927
104
bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
6928
104
  SDNode *Origin = Op.getOperand(0).getNode();
6929
104
  if (Origin->getOpcode() != ISD::LOAD)
6930
97
    return true;
6931
7
6932
7
  // If there is no LXSIBZX/LXSIHZX, like Power8,
6933
7
  // prefer direct move if the memory size is 1 or 2 bytes.
6934
7
  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
6935
7
  if (
!Subtarget.hasP9Vector() && 7
MMO->getSize() <= 27
)
6936
2
    return true;
6937
5
6938
5
  for (SDNode::use_iterator UI = Origin->use_begin(),
6939
5
                            UE = Origin->use_end();
6940
11
       
UI != UE11
;
++UI6
) {
6941
7
6942
7
    // Only look at the users of the loaded value.
6943
7
    if (UI.getUse().get().getResNo() != 0)
6944
2
      continue;
6945
5
6946
5
    
if (5
UI->getOpcode() != ISD::SINT_TO_FP &&
6947
1
        UI->getOpcode() != ISD::UINT_TO_FP)
6948
1
      return true;
6949
7
  }
6950
5
6951
4
  return false;
6952
104
}
6953
6954
/// \brief Custom lowers integer to floating point conversions to use
6955
/// the direct move instructions available in ISA 2.07 to avoid the
6956
/// need for load/store combinations.
6957
SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6958
                                                    SelectionDAG &DAG,
6959
100
                                                    const SDLoc &dl) const {
6960
100
  assert((Op.getValueType() == MVT::f32 ||
6961
100
          Op.getValueType() == MVT::f64) &&
6962
100
         "Invalid floating point type as target of conversion");
6963
100
  assert(Subtarget.hasFPCVT() &&
6964
100
         "Int to FP conversions with direct moves require FPCVT");
6965
100
  SDValue FP;
6966
100
  SDValue Src = Op.getOperand(0);
6967
100
  bool SinglePrec = Op.getValueType() == MVT::f32;
6968
100
  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6969
100
  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6970
49
  unsigned ConvOp = Signed ? 
(SinglePrec ? 49
PPCISD::FCFIDS25
:
PPCISD::FCFID24
) :
6971
51
                             
(SinglePrec ? 51
PPCISD::FCFIDUS24
:
PPCISD::FCFIDU27
);
6972
100
6973
100
  if (
WordInt100
) {
6974
71
    FP = DAG.getNode(Signed ? 
PPCISD::MTVSRA35
:
PPCISD::MTVSRZ36
,
6975
71
                     dl, MVT::f64, Src);
6976
71
    FP = DAG.getNode(ConvOp, dl, SinglePrec ? 
MVT::f3233
:
MVT::f6438
, FP);
6977
71
  }
6978
29
  else {
6979
29
    FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6980
29
    FP = DAG.getNode(ConvOp, dl, SinglePrec ? 
MVT::f3216
:
MVT::f6413
, FP);
6981
29
  }
6982
100
6983
100
  return FP;
6984
100
}
6985
6986
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6987
206
                                          SelectionDAG &DAG) const {
6988
206
  SDLoc dl(Op);
6989
206
6990
206
  if (
Subtarget.hasQPX() && 206
Op.getOperand(0).getValueType() == MVT::v4i11
) {
6991
0
    if (
Op.getValueType() != MVT::v4f32 && 0
Op.getValueType() != MVT::v4f640
)
6992
0
      return SDValue();
6993
0
6994
0
    SDValue Value = Op.getOperand(0);
6995
0
    // The values are now known to be -1 (false) or 1 (true). To convert this
6996
0
    // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6997
0
    // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6998
0
    Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6999
0
7000
0
    SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7001
0
7002
0
    Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7003
0
7004
0
    if (Op.getValueType() != MVT::v4f64)
7005
0
      Value = DAG.getNode(ISD::FP_ROUND, dl,
7006
0
                          Op.getValueType(), Value,
7007
0
                          DAG.getIntPtrConstant(1, dl));
7008
0
    return Value;
7009
0
  }
7010
206
7011
206
  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7012
206
  
if (206
Op.getValueType() != MVT::f32 && 206
Op.getValueType() != MVT::f64102
)
7013
0
    return SDValue();
7014
206
7015
206
  
if (206
Op.getOperand(0).getValueType() == MVT::i1206
)
7016
1
    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7017
1
                       DAG.getConstantFP(1.0, dl, Op.getValueType()),
7018
1
                       DAG.getConstantFP(0.0, dl, Op.getValueType()));
7019
205
7020
205
  // If we have direct moves, we can do all the conversion, skip the store/load
7021
205
  // however, without FPCVT we can't do most conversions.
7022
205
  
if (205
Subtarget.hasDirectMove() && 205
directMoveIsProfitable(Op)104
&&
7023
205
      
Subtarget.isPPC64()100
&&
Subtarget.hasFPCVT()100
)
7024
100
    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7025
105
7026
205
  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7027
105
         "UINT_TO_FP is supported only with FPCVT");
7028
105
7029
105
  // If we have FCFIDS, then use it when converting to single-precision.
7030
105
  // Otherwise, convert to double-precision and then round.
7031
82
  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7032
38
                       ? 
(Op.getOpcode() == ISD::UINT_TO_FP ? 38
PPCISD::FCFIDUS17
7033
38
                                                            : PPCISD::FCFIDS)
7034
67
                       : 
(Op.getOpcode() == ISD::UINT_TO_FP ? 67
PPCISD::FCFIDU9
7035
67
                                                            : PPCISD::FCFID);
7036
82
  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7037
38
                  ? MVT::f32
7038
67
                  : MVT::f64;
7039
105
7040
105
  if (
Op.getOperand(0).getValueType() == MVT::i64105
) {
7041
46
    SDValue SINT = Op.getOperand(0);
7042
46
    // When converting to single-precision, we actually need to convert
7043
46
    // to double-precision first and then round to single-precision.
7044
46
    // To avoid double-rounding effects during that operation, we have
7045
46
    // to prepare the input operand.  Bits that might be truncated when
7046
46
    // converting to double-precision are replaced by a bit that won't
7047
46
    // be lost at this stage, but is below the single-precision rounding
7048
46
    // position.
7049
46
    //
7050
46
    // However, if -enable-unsafe-fp-math is in effect, accept double
7051
46
    // rounding to avoid the extra overhead.
7052
46
    if (Op.getValueType() == MVT::f32 &&
7053
30
        !Subtarget.hasFPCVT() &&
7054
46
        
!DAG.getTarget().Options.UnsafeFPMath8
) {
7055
7
7056
7
      // Twiddle input to make sure the low 11 bits are zero.  (If this
7057
7
      // is the case, we are guaranteed the value will fit into the 53 bit
7058
7
      // mantissa of an IEEE double-precision value without rounding.)
7059
7
      // If any of those low 11 bits were not zero originally, make sure
7060
7
      // bit 12 (value 2048) is set instead, so that the final rounding
7061
7
      // to single-precision gets the correct result.
7062
7
      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7063
7
                                  SINT, DAG.getConstant(2047, dl, MVT::i64));
7064
7
      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7065
7
                          Round, DAG.getConstant(2047, dl, MVT::i64));
7066
7
      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7067
7
      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7068
7
                          Round, DAG.getConstant(-2048, dl, MVT::i64));
7069
7
7070
7
      // However, we cannot use that value unconditionally: if the magnitude
7071
7
      // of the input value is small, the bit-twiddling we did above might
7072
7
      // end up visibly changing the output.  Fortunately, in that case, we
7073
7
      // don't need to twiddle bits since the original input will convert
7074
7
      // exactly to double-precision floating-point already.  Therefore,
7075
7
      // construct a conditional to use the original value if the top 11
7076
7
      // bits are all sign-bit copies, and use the rounded value computed
7077
7
      // above otherwise.
7078
7
      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7079
7
                                 SINT, DAG.getConstant(53, dl, MVT::i32));
7080
7
      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7081
7
                         Cond, DAG.getConstant(1, dl, MVT::i64));
7082
7
      Cond = DAG.getSetCC(dl, MVT::i32,
7083
7
                          Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7084
7
7085
7
      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7086
7
    }
7087
46
7088
46
    ReuseLoadInfo RLI;
7089
46
    SDValue Bits;
7090
46
7091
46
    MachineFunction &MF = DAG.getMachineFunction();
7092
46
    if (
canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)46
) {
7093
1
      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7094
1
                         RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7095
1
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7096
46
    } else 
if (45
Subtarget.hasLFIWAX() &&
7097
45
               
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)38
) {
7098
1
      MachineMemOperand *MMO =
7099
1
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7100
1
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7101
1
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7102
1
      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7103
1
                                     DAG.getVTList(MVT::f64, MVT::Other),
7104
1
                                     Ops, MVT::i32, MMO);
7105
1
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7106
45
    } else 
if (44
Subtarget.hasFPCVT() &&
7107
44
               
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)34
) {
7108
1
      MachineMemOperand *MMO =
7109
1
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7110
1
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7111
1
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7112
1
      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7113
1
                                     DAG.getVTList(MVT::f64, MVT::Other),
7114
1
                                     Ops, MVT::i32, MMO);
7115
1
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7116
44
    } else 
if (43
((Subtarget.hasLFIWAX() &&
7117
36
                 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7118
42
                (Subtarget.hasFPCVT() &&
7119
42
                 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7120
43
               
SINT.getOperand(0).getValueType() == MVT::i322
) {
7121
2
      MachineFrameInfo &MFI = MF.getFrameInfo();
7122
2
      EVT PtrVT = getPointerTy(DAG.getDataLayout());
7123
2
7124
2
      int FrameIdx = MFI.CreateStackObject(4, 4, false);
7125
2
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7126
2
7127
2
      SDValue Store =
7128
2
          DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7129
2
                       MachinePointerInfo::getFixedStack(
7130
2
                           DAG.getMachineFunction(), FrameIdx));
7131
2
7132
2
      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7133
2
             "Expected an i32 store");
7134
2
7135
2
      RLI.Ptr = FIdx;
7136
2
      RLI.Chain = Store;
7137
2
      RLI.MPI =
7138
2
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7139
2
      RLI.Alignment = 4;
7140
2
7141
2
      MachineMemOperand *MMO =
7142
2
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7143
2
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7144
2
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7145
2
      Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7146
2
                                     
PPCISD::LFIWZX1
:
PPCISD::LFIWAX1
,
7147
2
                                     dl, DAG.getVTList(MVT::f64, MVT::Other),
7148
2
                                     Ops, MVT::i32, MMO);
7149
2
    } else
7150
41
      Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7151
46
7152
46
    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7153
46
7154
46
    if (
Op.getValueType() == MVT::f32 && 46
!Subtarget.hasFPCVT()30
)
7155
8
      FP = DAG.getNode(ISD::FP_ROUND, dl,
7156
8
                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7157
46
    return FP;
7158
46
  }
7159
59
7160
105
  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7161
59
         "Unhandled INT_TO_FP type in custom expander!");
7162
59
  // Since we only generate this in 64-bit mode, we can take advantage of
7163
59
  // 64-bit registers.  In particular, sign extend the input value into the
7164
59
  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7165
59
  // then lfd it and fcfid it.
7166
59
  MachineFunction &MF = DAG.getMachineFunction();
7167
59
  MachineFrameInfo &MFI = MF.getFrameInfo();
7168
59
  EVT PtrVT = getPointerTy(MF.getDataLayout());
7169
59
7170
59
  SDValue Ld;
7171
59
  if (
Subtarget.hasLFIWAX() || 59
Subtarget.hasFPCVT()11
) {
7172
48
    ReuseLoadInfo RLI;
7173
48
    bool ReusingLoad;
7174
48
    if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7175
48
                                            DAG))) {
7176
27
      int FrameIdx = MFI.CreateStackObject(4, 4, false);
7177
27
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7178
27
7179
27
      SDValue Store =
7180
27
          DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7181
27
                       MachinePointerInfo::getFixedStack(
7182
27
                           DAG.getMachineFunction(), FrameIdx));
7183
27
7184
27
      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7185
27
             "Expected an i32 store");
7186
27
7187
27
      RLI.Ptr = FIdx;
7188
27
      RLI.Chain = Store;
7189
27
      RLI.MPI =
7190
27
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7191
27
      RLI.Alignment = 4;
7192
27
    }
7193
48
7194
48
    MachineMemOperand *MMO =
7195
48
      MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7196
48
                              RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7197
48
    SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7198
48
    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7199
48
                                   
PPCISD::LFIWZX12
:
PPCISD::LFIWAX36
,
7200
48
                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
7201
48
                                 Ops, MVT::i32, MMO);
7202
48
    if (ReusingLoad)
7203
21
      spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7204
59
  } else {
7205
11
    assert(Subtarget.isPPC64() &&
7206
11
           "i32->FP without LFIWAX supported only on PPC64");
7207
11
7208
11
    int FrameIdx = MFI.CreateStackObject(8, 8, false);
7209
11
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7210
11
7211
11
    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7212
11
                                Op.getOperand(0));
7213
11
7214
11
    // STD the extended value into the stack slot.
7215
11
    SDValue Store = DAG.getStore(
7216
11
        DAG.getEntryNode(), dl, Ext64, FIdx,
7217
11
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7218
11
7219
11
    // Load the value as a double.
7220
11
    Ld = DAG.getLoad(
7221
11
        MVT::f64, dl, Store, FIdx,
7222
11
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7223
11
  }
7224
59
7225
59
  // FCFID it and return it.
7226
59
  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7227
59
  if (
Op.getValueType() == MVT::f32 && 59
!Subtarget.hasFPCVT()25
)
7228
9
    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7229
9
                     DAG.getIntPtrConstant(0, dl));
7230
206
  return FP;
7231
206
}
7232
7233
SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7234
1
                                            SelectionDAG &DAG) const {
7235
1
  SDLoc dl(Op);
7236
1
  /*
7237
1
   The rounding mode is in bits 30:31 of FPSR, and has the following
7238
1
   settings:
7239
1
     00 Round to nearest
7240
1
     01 Round to 0
7241
1
     10 Round to +inf
7242
1
     11 Round to -inf
7243
1
7244
1
  FLT_ROUNDS, on the other hand, expects the following:
7245
1
    -1 Undefined
7246
1
     0 Round to 0
7247
1
     1 Round to nearest
7248
1
     2 Round to +inf
7249
1
     3 Round to -inf
7250
1
7251
1
  To perform the conversion, we do:
7252
1
    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7253
1
  */
7254
1
7255
1
  MachineFunction &MF = DAG.getMachineFunction();
7256
1
  EVT VT = Op.getValueType();
7257
1
  EVT PtrVT = getPointerTy(MF.getDataLayout());
7258
1
7259
1
  // Save FP Control Word to register
7260
1
  EVT NodeTys[] = {
7261
1
    MVT::f64,    // return register
7262
1
    MVT::Glue    // unused in this context
7263
1
  };
7264
1
  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7265
1
7266
1
  // Save FP register to stack slot
7267
1
  int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7268
1
  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7269
1
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7270
1
                               MachinePointerInfo());
7271
1
7272
1
  // Load FP Control Word from low 32 bits of stack slot.
7273
1
  SDValue Four = DAG.getConstant(4, dl, PtrVT);
7274
1
  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7275
1
  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7276
1
7277
1
  // Transform as necessary
7278
1
  SDValue CWD1 =
7279
1
    DAG.getNode(ISD::AND, dl, MVT::i32,
7280
1
                CWD, DAG.getConstant(3, dl, MVT::i32));
7281
1
  SDValue CWD2 =
7282
1
    DAG.getNode(ISD::SRL, dl, MVT::i32,
7283
1
                DAG.getNode(ISD::AND, dl, MVT::i32,
7284
1
                            DAG.getNode(ISD::XOR, dl, MVT::i32,
7285
1
                                        CWD, DAG.getConstant(3, dl, MVT::i32)),
7286
1
                            DAG.getConstant(3, dl, MVT::i32)),
7287
1
                DAG.getConstant(1, dl, MVT::i32));
7288
1
7289
1
  SDValue RetVal =
7290
1
    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7291
1
7292
1
  return DAG.getNode((VT.getSizeInBits() < 16 ?
7293
1
                      
ISD::TRUNCATE0
:
ISD::ZERO_EXTEND1
), dl, VT, RetVal);
7294
1
}
7295
7296
10
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7297
10
  EVT VT = Op.getValueType();
7298
10
  unsigned BitWidth = VT.getSizeInBits();
7299
10
  SDLoc dl(Op);
7300
10
  assert(Op.getNumOperands() == 3 &&
7301
10
         VT == Op.getOperand(1).getValueType() &&
7302
10
         "Unexpected SHL!");
7303
10
7304
10
  // Expand into a bunch of logical ops.  Note that these ops
7305
10
  // depend on the PPC behavior for oversized shift amounts.
7306
10
  SDValue Lo = Op.getOperand(0);
7307
10
  SDValue Hi = Op.getOperand(1);
7308
10
  SDValue Amt = Op.getOperand(2);
7309
10
  EVT AmtVT = Amt.getValueType();
7310
10
7311
10
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7312
10
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7313
10
  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7314
10
  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7315
10
  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7316
10
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7317
10
                             DAG.getConstant(-BitWidth, dl, AmtVT));
7318
10
  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7319
10
  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7320
10
  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7321
10
  SDValue OutOps[] = { OutLo, OutHi };
7322
10
  return DAG.getMergeValues(OutOps, dl);
7323
10
}
7324
7325
10
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7326
10
  EVT VT = Op.getValueType();
7327
10
  SDLoc dl(Op);
7328
10
  unsigned BitWidth = VT.getSizeInBits();
7329
10
  assert(Op.getNumOperands() == 3 &&
7330
10
         VT == Op.getOperand(1).getValueType() &&
7331
10
         "Unexpected SRL!");
7332
10
7333
10
  // Expand into a bunch of logical ops.  Note that these ops
7334
10
  // depend on the PPC behavior for oversized shift amounts.
7335
10
  SDValue Lo = Op.getOperand(0);
7336
10
  SDValue Hi = Op.getOperand(1);
7337
10
  SDValue Amt = Op.getOperand(2);
7338
10
  EVT AmtVT = Amt.getValueType();
7339
10
7340
10
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7341
10
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7342
10
  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7343
10
  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7344
10
  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7345
10
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7346
10
                             DAG.getConstant(-BitWidth, dl, AmtVT));
7347
10
  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7348
10
  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7349
10
  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7350
10
  SDValue OutOps[] = { OutLo, OutHi };
7351
10
  return DAG.getMergeValues(OutOps, dl);
7352
10
}
7353
7354
6
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7355
6
  SDLoc dl(Op);
7356
6
  EVT VT = Op.getValueType();
7357
6
  unsigned BitWidth = VT.getSizeInBits();
7358
6
  assert(Op.getNumOperands() == 3 &&
7359
6
         VT == Op.getOperand(1).getValueType() &&
7360
6
         "Unexpected SRA!");
7361
6
7362
6
  // Expand into a bunch of logical ops, followed by a select_cc.
7363
6
  SDValue Lo = Op.getOperand(0);
7364
6
  SDValue Hi = Op.getOperand(1);
7365
6
  SDValue Amt = Op.getOperand(2);
7366
6
  EVT AmtVT = Amt.getValueType();
7367
6
7368
6
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7369
6
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7370
6
  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7371
6
  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7372
6
  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7373
6
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7374
6
                             DAG.getConstant(-BitWidth, dl, AmtVT));
7375
6
  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7376
6
  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7377
6
  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7378
6
                                  Tmp4, Tmp6, ISD::SETLE);
7379
6
  SDValue OutOps[] = { OutLo, OutHi };
7380
6
  return DAG.getMergeValues(OutOps, dl);
7381
6
}
7382
7383
//===----------------------------------------------------------------------===//
7384
// Vector related lowering.
7385
//
7386
7387
/// BuildSplatI - Build a canonical splati of Val with an element size of
7388
/// SplatSize.  Cast the result to VT.
7389
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7390
439
                           SelectionDAG &DAG, const SDLoc &dl) {
7391
439
  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7392
439
7393
439
  static const MVT VTys[] = { // canonical VT to use for each size.
7394
439
    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7395
439
  };
7396
439
7397
439
  EVT ReqVT = VT != MVT::Other ? 
VT411
:
VTys[SplatSize-1]28
;
7398
439
7399
439
  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7400
439
  if (Val == -1)
7401
246
    SplatSize = 1;
7402
439
7403
439
  EVT CanonicalVT = VTys[SplatSize-1];
7404
439
7405
439
  // Build a canonical splat for this value.
7406
439
  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7407
439
}
7408
7409
/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7410
/// specified intrinsic ID.
7411
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7412
108
                                const SDLoc &dl, EVT DestVT = MVT::Other) {
7413
108
  if (
DestVT == MVT::Other108
)
DestVT = Op.getValueType()0
;
7414
108
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7415
108
                     DAG.getConstant(IID, dl, MVT::i32), Op);
7416
108
}
7417
7418
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7419
/// specified intrinsic ID.
7420
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7421
                                SelectionDAG &DAG, const SDLoc &dl,
7422
61
                                EVT DestVT = MVT::Other) {
7423
61
  if (
DestVT == MVT::Other61
)
DestVT = LHS.getValueType()44
;
7424
61
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7425
61
                     DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7426
61
}
7427
7428
/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7429
/// specified intrinsic ID.
7430
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7431
                                SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7432
120
                                EVT DestVT = MVT::Other) {
7433
120
  if (
DestVT == MVT::Other120
)
DestVT = Op0.getValueType()113
;
7434
120
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7435
120
                     DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7436
120
}
7437
7438
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7439
/// amount.  The result has the specified value type.
7440
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7441
46
                           SelectionDAG &DAG, const SDLoc &dl) {
7442
46
  // Force LHS/RHS to be the right type.
7443
46
  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7444
46
  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7445
46
7446
46
  int Ops[16];
7447
782
  for (unsigned i = 0; 
i != 16782
;
++i736
)
7448
736
    Ops[i] = i + Amt;
7449
46
  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7450
46
  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7451
46
}
7452
7453
/// Do we have an efficient pattern in a .td file for this node?
7454
///
7455
/// \param V - pointer to the BuildVectorSDNode being matched
7456
/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7457
///
7458
/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7459
/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7460
/// the opposite is true (expansion is beneficial) are:
7461
/// - The node builds a vector out of integers that are not 32 or 64-bits
7462
/// - The node builds a vector out of constants
7463
/// - The node is a "load-and-splat"
7464
/// In all other cases, we will choose to keep the BUILD_VECTOR.
7465
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
7466
                                            bool HasDirectMove,
7467
1.30k
                                            bool HasP8Vector) {
7468
1.30k
  EVT VecVT = V->getValueType(0);
7469
1.30k
  bool RightType = VecVT == MVT::v2f64 ||
7470
1.15k
    
(HasP8Vector && 1.15k
VecVT == MVT::v4f321.11k
) ||
7471
1.02k
    
(HasDirectMove && 1.02k
(VecVT == MVT::v2i64 || 979
VecVT == MVT::v4i32486
));
7472
1.30k
  if (!RightType)
7473
143
    return false;
7474
1.15k
7475
1.15k
  bool IsSplat = true;
7476
1.15k
  bool IsLoad = false;
7477
1.15k
  SDValue Op0 = V->getOperand(0);
7478
1.15k
7479
1.15k
  // This function is called in a block that confirms the node is not a constant
7480
1.15k
  // splat. So a constant BUILD_VECTOR here means the vector is built out of
7481
1.15k
  // different constants.
7482
1.15k
  if (V->isConstant())
7483
170
    return false;
7484
3.87k
  
for (int i = 0, e = V->getNumOperands(); 987
i < e3.87k
;
++i2.88k
) {
7485
2.90k
    if (V->getOperand(i).isUndef())
7486
13
      return false;
7487
2.88k
    // We want to expand nodes that represent load-and-splat even if the
7488
2.88k
    // loaded value is a floating point truncation or conversion to int.
7489
2.88k
    
if (2.88k
V->getOperand(i).getOpcode() == ISD::LOAD ||
7490
2.18k
        (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7491
2.18k
         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7492
1.98k
        (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7493
1.98k
         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7494
1.78k
        (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7495
314
         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7496
1.30k
      IsLoad = true;
7497
2.88k
    // If the operands are different or the input is not a load and has more
7498
2.88k
    // uses than just this BV node, then it isn't a splat.
7499
2.88k
    if (V->getOperand(i) != Op0 ||
7500
1.49k
        
(!IsLoad && 1.49k
!V->isOnlyUserOf(V->getOperand(i).getNode())850
))
7501
1.40k
      IsSplat = false;
7502
2.90k
  }
7503
974
  
return !(IsSplat && 974
IsLoad254
);
7504
1.30k
}
7505
7506
// If this is a case we can't handle, return null and let the default
7507
// expansion code take care of it.  If we CAN select this case, and if it
7508
// selects to a single instruction, return Op.  Otherwise, if we can codegen
7509
// this case more efficiently than a constant pool load, lower it to the
7510
// sequence of ops that should be used.
7511
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7512
2.19k
                                             SelectionDAG &DAG) const {
7513
2.19k
  SDLoc dl(Op);
7514
2.19k
  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7515
2.19k
  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7516
2.19k
7517
2.19k
  if (
Subtarget.hasQPX() && 2.19k
Op.getValueType() == MVT::v4i170
) {
7518
9
    // We first build an i32 vector, load it into a QPX register,
7519
9
    // then convert it to a floating-point vector and compare it
7520
9
    // to a zero vector to get the boolean result.
7521
9
    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7522
9
    int FrameIdx = MFI.CreateStackObject(16, 16, false);
7523
9
    MachinePointerInfo PtrInfo =
7524
9
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7525
9
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
7526
9
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7527
9
7528
9
    assert(BVN->getNumOperands() == 4 &&
7529
9
      "BUILD_VECTOR for v4i1 does not have 4 operands");
7530
9
7531
9
    bool IsConst = true;
7532
17
    for (unsigned i = 0; 
i < 417
;
++i8
) {
7533
15
      if (
BVN->getOperand(i).isUndef()15
)
continue2
;
7534
13
      
if (13
!isa<ConstantSDNode>(BVN->getOperand(i))13
) {
7535
7
        IsConst = false;
7536
7
        break;
7537
7
      }
7538
15
    }
7539
9
7540
9
    if (
IsConst9
) {
7541
2
      Constant *One =
7542
2
        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7543
2
      Constant *NegOne =
7544
2
        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7545
2
7546
2
      Constant *CV[4];
7547
10
      for (unsigned i = 0; 
i < 410
;
++i8
) {
7548
8
        if (BVN->getOperand(i).isUndef())
7549
2
          CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7550
6
        else 
if (6
isNullConstant(BVN->getOperand(i))6
)
7551
2
          CV[i] = NegOne;
7552
6
        else
7553
4
          CV[i] = One;
7554
8
      }
7555
2
7556
2
      Constant *CP = ConstantVector::get(CV);
7557
2
      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7558
2
                                          16 /* alignment */);
7559
2
7560
2
      SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7561
2
      SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7562
2
      return DAG.getMemIntrinsicNode(
7563
2
          PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7564
2
          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7565
2
    }
7566
7
7567
7
    SmallVector<SDValue, 4> Stores;
7568
35
    for (unsigned i = 0; 
i < 435
;
++i28
) {
7569
28
      if (
BVN->getOperand(i).isUndef()28
)
continue2
;
7570
26
7571
26
      unsigned Offset = 4*i;
7572
26
      SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7573
26
      Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7574
26
7575
26
      unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7576
26
      if (
StoreSize > 426
) {
7577
0
        Stores.push_back(
7578
0
            DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7579
0
                              PtrInfo.getWithOffset(Offset), MVT::i32));
7580
26
      } else {
7581
26
        SDValue StoreValue = BVN->getOperand(i);
7582
26
        if (StoreSize < 4)
7583
18
          StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7584
26
7585
26
        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7586
26
                                      PtrInfo.getWithOffset(Offset)));
7587
26
      }
7588
28
    }
7589
7
7590
7
    SDValue StoreChain;
7591
7
    if (!Stores.empty())
7592
7
      StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7593
7
    else
7594
0
      StoreChain = DAG.getEntryNode();
7595
9
7596
9
    // Now load from v4i32 into the QPX register; this will extend it to
7597
9
    // v4i64 but not yet convert it to a floating point. Nevertheless, this
7598
9
    // is typed as v4f64 because the QPX register integer states are not
7599
9
    // explicitly represented.
7600
9
7601
9
    SDValue Ops[] = {StoreChain,
7602
9
                     DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7603
9
                     FIdx};
7604
9
    SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7605
9
7606
9
    SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7607
9
      dl, VTs, Ops, MVT::v4i32, PtrInfo);
7608
9
    LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7609
9
      DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7610
9
      LoadedVect);
7611
9
7612
9
    SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7613
9
7614
9
    return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7615
9
  }
7616
2.18k
7617
2.18k
  // All other QPX vectors are handled by generic code.
7618
2.18k
  
if (2.18k
Subtarget.hasQPX()2.18k
)
7619
61
    return SDValue();
7620
2.12k
7621
2.12k
  // Check if this is a splat of a constant value.
7622
2.12k
  APInt APSplatBits, APSplatUndef;
7623
2.12k
  unsigned SplatBitSize;
7624
2.12k
  bool HasAnyUndefs;
7625
2.12k
  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7626
2.12k
                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7627
2.12k
      
SplatBitSize > 321.06k
) {
7628
1.33k
    // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7629
1.33k
    // lowered to VSX instructions under certain conditions.
7630
1.33k
    // Without VSX, there is no pattern more efficient than expanding the node.
7631
1.33k
    if (Subtarget.hasVSX() &&
7632
1.30k
        haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
7633
1.30k
                                        Subtarget.hasP8Vector()))
7634
874
      return Op;
7635
465
    return SDValue();
7636
465
  }
7637
787
7638
787
  unsigned SplatBits = APSplatBits.getZExtValue();
7639
787
  unsigned SplatUndef = APSplatUndef.getZExtValue();
7640
787
  unsigned SplatSize = SplatBitSize / 8;
7641
787
7642
787
  // First, handle single instruction cases.
7643
787
7644
787
  // All zeros?
7645
787
  if (
SplatBits == 0787
) {
7646
257
    // Canonicalize all zero vectors to be v4i32.
7647
257
    if (
Op.getValueType() != MVT::v4i32 || 257
HasAnyUndefs208
) {
7648
49
      SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7649
49
      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7650
49
    }
7651
257
    return Op;
7652
257
  }
7653
530
7654
530
  // We have XXSPLTIB for constant splats one byte wide
7655
530
  
if (530
Subtarget.hasP9Vector() && 530
SplatSize == 1118
) {
7656
68
    // This is a splat of 1-byte elements with some elements potentially undef.
7657
68
    // Rather than trying to match undef in the SDAG patterns, ensure that all
7658
68
    // elements are the same constant.
7659
68
    if (
HasAnyUndefs || 68
ISD::isBuildVectorAllOnes(BVN)68
) {
7660
30
      SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
7661
30
                                                       dl, MVT::i32));
7662
30
      SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
7663
30
      if (Op.getValueType() != MVT::v16i8)
7664
8
        return DAG.getBitcast(Op.getValueType(), NewBV);
7665
22
      return NewBV;
7666
22
    }
7667
38
7668
38
    // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
7669
38
    // detect that constant splats like v8i16: 0xABAB are really just splats
7670
38
    // of a 1-byte constant. In this case, we need to convert the node to a
7671
38
    // splat of v16i8 and a bitcast.
7672
38
    
if (38
Op.getValueType() != MVT::v16i838
)
7673
6
      return DAG.getBitcast(Op.getValueType(),
7674
6
                            DAG.getConstant(SplatBits, dl, MVT::v16i8));
7675
32
7676
32
    return Op;
7677
32
  }
7678
462
7679
462
  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7680
462
  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7681
462
                    (32-SplatBitSize));
7682
462
  if (
SextVal >= -16 && 462
SextVal <= 15437
)
7683
386
    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7684
76
7685
76
  // Two instruction sequences.
7686
76
7687
76
  // If this value is in the range [-32,30] and is even, use:
7688
76
  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7689
76
  // If this value is in the range [17,31] and is odd, use:
7690
76
  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7691
76
  // If this value is in the range [-31,-17] and is odd, use:
7692
76
  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7693
76
  // Note the last two are three-instruction sequences.
7694
76
  
if (76
SextVal >= -32 && 76
SextVal <= 3161
) {
7695
33
    // To avoid having these optimizations undone by constant folding,
7696
33
    // we convert to a pseudo that will be expanded later into one of
7697
33
    // the above forms.
7698
33
    SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7699
4
    EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7700
29
              
(SplatSize == 2 ? 29
MVT::v8i1611
:
MVT::v4i3218
));
7701
33
    SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7702
33
    SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7703
33
    if (VT == Op.getValueType())
7704
28
      return RetVal;
7705
33
    else
7706
5
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7707
43
  }
7708
43
7709
43
  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7710
43
  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7711
43
  // for fneg/fabs.
7712
43
  
if (43
SplatSize == 4 && 43
SplatBits == (0x7FFFFFFF&~SplatUndef)38
) {
7713
2
    // Make -1 and vspltisw -1:
7714
2
    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7715
2
7716
2
    // Make the VSLW intrinsic, computing 0x8000_0000.
7717
2
    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7718
2
                                   OnesV, DAG, dl);
7719
2
7720
2
    // xor by OnesV to invert it.
7721
2
    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7722
2
    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7723
2
  }
7724
41
7725
41
  // Check to see if this is a wide variety of vsplti*, binop self cases.
7726
41
  static const signed char SplatCsts[] = {
7727
41
    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7728
41
    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7729
41
  };
7730
41
7731
950
  for (unsigned idx = 0; 
idx < array_lengthof(SplatCsts)950
;
++idx909
) {
7732
941
    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7733
941
    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7734
941
    int i = SplatCsts[idx];
7735
941
7736
941
    // Figure out what shift amount will be used by altivec if shifted by i in
7737
941
    // this splat size.
7738
941
    unsigned TypeShiftAmt = i & (SplatBitSize-1);
7739
941
7740
941
    // vsplti + shl self.
7741
941
    if (
SextVal == (int)((unsigned)i << TypeShiftAmt)941
) {
7742
8
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7743
8
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7744
8
        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7745
8
        Intrinsic::ppc_altivec_vslw
7746
8
      };
7747
8
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7748
8
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7749
8
    }
7750
933
7751
933
    // vsplti + srl self.
7752
933
    
if (933
SextVal == (int)((unsigned)i >> TypeShiftAmt)933
) {
7753
17
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7754
17
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7755
17
        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7756
17
        Intrinsic::ppc_altivec_vsrw
7757
17
      };
7758
17
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7759
17
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7760
17
    }
7761
916
7762
916
    // vsplti + sra self.
7763
916
    
if (916
SextVal == (int)((unsigned)i >> TypeShiftAmt)916
) {
7764
0
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7765
0
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7766
0
        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7767
0
        Intrinsic::ppc_altivec_vsraw
7768
0
      };
7769
0
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7770
0
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7771
0
    }
7772
916
7773
916
    // vsplti + rol self.
7774
916
    
if (916
SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7775
916
                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7776
3
      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7777
3
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7778
3
        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7779
3
        Intrinsic::ppc_altivec_vrlw
7780
3
      };
7781
3
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7782
3
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7783
3
    }
7784
913
7785
913
    // t = vsplti c, result = vsldoi t, t, 1
7786
913
    
if (913
SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 913
0xFF458
:
0455
))) {
7787
4
      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7788
4
      unsigned Amt = Subtarget.isLittleEndian() ? 
152
:
12
;
7789
4
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7790
4
    }
7791
909
    // t = vsplti c, result = vsldoi t, t, 2
7792
909
    
if (909
SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 909
0xFFFF456
:
0453
))) {
7793
0
      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7794
0
      unsigned Amt = Subtarget.isLittleEndian() ? 
140
:
20
;
7795
0
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7796
0
    }
7797
909
    // t = vsplti c, result = vsldoi t, t, 3
7798
909
    
if (909
SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 909
0xFFFFFF456
:
0453
))) {
7799
0
      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7800
0
      unsigned Amt = Subtarget.isLittleEndian() ? 
130
:
30
;
7801
0
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7802
0
    }
7803
941
  }
7804
41
7805
9
  return SDValue();
7806
2.19k
}
7807
7808
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7809
/// the specified operations to build the shuffle.
7810
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7811
                                      SDValue RHS, SelectionDAG &DAG,
7812
160
                                      const SDLoc &dl) {
7813
160
  unsigned OpNum = (PFEntry >> 26) & 0x0F;
7814
160
  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7815
160
  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7816
160
7817
160
  enum {
7818
160
    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7819
160
    OP_VMRGHW,
7820
160
    OP_VMRGLW,
7821
160
    OP_VSPLTISW0,
7822
160
    OP_VSPLTISW1,
7823
160
    OP_VSPLTISW2,
7824
160
    OP_VSPLTISW3,
7825
160
    OP_VSLDOI4,
7826
160
    OP_VSLDOI8,
7827
160
    OP_VSLDOI12
7828
160
  };
7829
160
7830
160
  if (
OpNum == OP_COPY160
) {
7831
91
    if (
LHSID == (1*9+2)*9+391
)
return LHS61
;
7832
0
    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7833
30
    return RHS;
7834
30
  }
7835
69
7836
69
  SDValue OpLHS, OpRHS;
7837
69
  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7838
69
  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7839
69
7840
69
  int ShufIdxs[16];
7841
69
  switch (OpNum) {
7842
0
  
default: 0
llvm_unreachable0
("Unknown i32 permute!");
7843
11
  case OP_VMRGHW:
7844
11
    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7845
11
    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7846
11
    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7847
11
    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7848
11
    break;
7849
12
  case OP_VMRGLW:
7850
12
    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7851
12
    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7852
12
    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7853
12
    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7854
12
    break;
7855
1
  case OP_VSPLTISW0:
7856
17
    for (unsigned i = 0; 
i != 1617
;
++i16
)
7857
16
      ShufIdxs[i] = (i&3)+0;
7858
1
    break;
7859
0
  case OP_VSPLTISW1:
7860
0
    for (unsigned i = 0; 
i != 160
;
++i0
)
7861
0
      ShufIdxs[i] = (i&3)+4;
7862
0
    break;
7863
3
  case OP_VSPLTISW2:
7864
51
    for (unsigned i = 0; 
i != 1651
;
++i48
)
7865
48
      ShufIdxs[i] = (i&3)+8;
7866
3
    break;
7867
0
  case OP_VSPLTISW3:
7868
0
    for (unsigned i = 0; 
i != 160
;
++i0
)
7869
0
      ShufIdxs[i] = (i&3)+12;
7870
0
    break;
7871
14
  case OP_VSLDOI4:
7872
14
    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7873
14
  case OP_VSLDOI8:
7874
14
    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7875
14
  case OP_VSLDOI12:
7876
14
    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7877
27
  }
7878
27
  EVT VT = OpLHS.getValueType();
7879
27
  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7880
27
  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7881
27
  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7882
27
  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7883
27
}
7884
7885
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7886
/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7887
/// return the code it can be lowered into.  Worst case, it can always be
7888
/// lowered into a vperm.
7889
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7890
1.08k
                                               SelectionDAG &DAG) const {
7891
1.08k
  SDLoc dl(Op);
7892
1.08k
  SDValue V1 = Op.getOperand(0);
7893
1.08k
  SDValue V2 = Op.getOperand(1);
7894
1.08k
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7895
1.08k
  EVT VT = Op.getValueType();
7896
1.08k
  bool isLittleEndian = Subtarget.isLittleEndian();
7897
1.08k
7898
1.08k
  unsigned ShiftElts, InsertAtByte;
7899
1.08k
  bool Swap = false;
7900
1.08k
  if (Subtarget.hasP9Vector() &&
7901
439
      PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7902
1.08k
                           isLittleEndian)) {
7903
134
    if (Swap)
7904
70
      std::swap(V1, V2);
7905
134
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7906
134
    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7907
134
    if (
ShiftElts134
) {
7908
96
      SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7909
96
                                DAG.getConstant(ShiftElts, dl, MVT::i32));
7910
96
      SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
7911
96
                                DAG.getConstant(InsertAtByte, dl, MVT::i32));
7912
96
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7913
96
    }
7914
38
    SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
7915
38
                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
7916
38
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7917
38
  }
7918
952
7919
952
7920
952
  
if (952
Subtarget.hasVSX() &&
7921
952
      
PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)633
) {
7922
60
    if (Swap)
7923
14
      std::swap(V1, V2);
7924
60
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7925
60
    SDValue Conv2 =
7926
60
        DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? 
V126
:
V234
);
7927
60
7928
60
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
7929
60
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
7930
60
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
7931
60
  }
7932
892
7933
892
  
if (892
Subtarget.hasVSX() &&
7934
892
    
PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)573
) {
7935
13
    if (Swap)
7936
7
      std::swap(V1, V2);
7937
13
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7938
13
    SDValue Conv2 =
7939
13
        DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? 
V10
:
V213
);
7940
13
7941
13
    SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
7942
13
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
7943
13
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
7944
13
  }
7945
879
7946
879
  
if (879
Subtarget.hasP9Vector()879
) {
7947
294
     if (
PPC::isXXBRHShuffleMask(SVOp)294
) {
7948
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
7949
2
      SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
7950
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
7951
292
    } else 
if (292
PPC::isXXBRWShuffleMask(SVOp)292
) {
7952
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7953
2
      SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
7954
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
7955
290
    } else 
if (290
PPC::isXXBRDShuffleMask(SVOp)290
) {
7956
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7957
2
      SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
7958
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
7959
288
    } else 
if (288
PPC::isXXBRQShuffleMask(SVOp)288
) {
7960
2
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
7961
2
      SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
7962
2
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
7963
2
    }
7964
871
  }
7965
871
7966
871
  
if (871
Subtarget.hasVSX()871
) {
7967
552
    if (
V2.isUndef() && 552
PPC::isSplatShuffleMask(SVOp, 4)261
) {
7968
87
      int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7969
87
7970
87
      // If the source for the shuffle is a scalar_to_vector that came from a
7971
87
      // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7972
87
      if (Subtarget.hasP9Vector() &&
7973
40
          
((isLittleEndian && 40
SplatIdx == 319
) ||
7974
87
           
(!isLittleEndian && 21
SplatIdx == 021
))) {
7975
38
        SDValue Src = V1.getOperand(0);
7976
38
        if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7977
38
            Src.getOperand(0).getOpcode() == ISD::LOAD &&
7978
26
            Src.getOperand(0).hasOneUse())
7979
24
          return V1;
7980
63
      }
7981
63
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7982
63
      SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7983
63
                                  DAG.getConstant(SplatIdx, dl, MVT::i32));
7984
63
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7985
63
    }
7986
465
7987
465
    // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7988
465
    
if (465
V2.isUndef() && 465
PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8174
) {
7989
0
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7990
0
      SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7991
0
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7992
0
    }
7993
784
  }
7994
784
7995
784
  
if (784
Subtarget.hasQPX()784
) {
7996
71
    if (VT.getVectorNumElements() != 4)
7997
0
      return SDValue();
7998
71
7999
71
    
if (71
V2.isUndef()71
)
V2 = V17
;
8000
71
8001
71
    int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8002
71
    if (
AlignIdx != -171
) {
8003
0
      return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8004
0
                         DAG.getConstant(AlignIdx, dl, MVT::i32));
8005
71
    } else 
if (71
SVOp->isSplat()71
) {
8006
7
      int SplatIdx = SVOp->getSplatIndex();
8007
7
      if (
SplatIdx >= 47
) {
8008
0
        std::swap(V1, V2);
8009
0
        SplatIdx -= 4;
8010
0
      }
8011
71
8012
71
      return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8013
71
                         DAG.getConstant(SplatIdx, dl, MVT::i32));
8014
71
    }
8015
64
8016
64
    // Lower this into a qvgpci/qvfperm pair.
8017
64
8018
64
    // Compute the qvgpci literal
8019
64
    unsigned idx = 0;
8020
320
    for (unsigned i = 0; 
i < 4320
;
++i256
) {
8021
256
      int m = SVOp->getMaskElt(i);
8022
256
      unsigned mm = m >= 0 ? 
(unsigned) m192
:
i64
;
8023
256
      idx |= mm << (3-i)*3;
8024
256
    }
8025
71
8026
71
    SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8027
71
                             DAG.getConstant(idx, dl, MVT::i32));
8028
71
    return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8029
71
  }
8030
713
8031
713
  // Cases that are handled by instructions that take permute immediates
8032
713
  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8033
713
  // selected by the instruction selector.
8034
713
  
if (713
V2.isUndef()713
) {
8035
285
    if (PPC::isSplatShuffleMask(SVOp, 1) ||
8036
245
        PPC::isSplatShuffleMask(SVOp, 2) ||
8037
217
        PPC::isSplatShuffleMask(SVOp, 4) ||
8038
199
        PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8039
189
        PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8040
183
        PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8041
140
        PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8042
134
        PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8043
106
        PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8044
96
        PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8045
90
        PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8046
62
        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8047
52
        
(Subtarget.hasP8Altivec() && 52
(
8048
49
         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8049
45
         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8050
285
         
PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)39
))) {
8051
247
      return Op;
8052
247
    }
8053
466
  }
8054
466
8055
466
  // Altivec has a variety of "shuffle immediates" that take two vector inputs
8056
466
  // and produce a fixed permutation.  If any of these match, do not lower to
8057
466
  // VPERM.
8058
466
  
unsigned int ShuffleKind = isLittleEndian ? 466
2163
:
0303
;
8059
466
  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8060
464
      PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8061
462
      PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8062
428
      PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8063
390
      PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8064
326
      PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8065
261
      PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8066
239
      PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8067
175
      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8068
74
      
(Subtarget.hasP8Altivec() && 74
(
8069
51
       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8070
47
       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8071
51
       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8072
404
    return Op;
8073
62
8074
62
  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8075
62
  // perfect shuffle table to emit an optimal matching sequence.
8076
62
  ArrayRef<int> PermMask = SVOp->getMask();
8077
62
8078
62
  unsigned PFIndexes[4];
8079
62
  bool isFourElementShuffle = true;
8080
279
  for (unsigned i = 0; 
i != 4 && 279
isFourElementShuffle228
;
++i217
) { // Element number
8081
217
    unsigned EltNo = 8;   // Start out undef.
8082
1.05k
    for (unsigned j = 0; 
j != 41.05k
;
++j836
) { // Intra-element byte.
8083
848
      if (PermMask[i*4+j] < 0)
8084
54
        continue;   // Undef, ignore it.
8085
794
8086
794
      unsigned ByteSource = PermMask[i*4+j];
8087
794
      if (
(ByteSource & 3) != j794
) {
8088
12
        isFourElementShuffle = false;
8089
12
        break;
8090
12
      }
8091
782
8092
782
      
if (782
EltNo == 8782
) {
8093
198
        EltNo = ByteSource/4;
8094
782
      } else 
if (584
EltNo != ByteSource/4584
) {
8095
0
        isFourElementShuffle = false;
8096
0
        break;
8097
0
      }
8098
848
    }
8099
217
    PFIndexes[i] = EltNo;
8100
217
  }
8101
62
8102
62
  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8103
62
  // perfect shuffle vector to determine if it is cost effective to do this as
8104
62
  // discrete instructions, or whether we should use a vperm.
8105
62
  // For now, we skip this for little endian until such time as we have a
8106
62
  // little-endian perfect shuffle table.
8107
62
  if (
isFourElementShuffle && 62
!isLittleEndian50
) {
8108
34
    // Compute the index in the perfect shuffle table.
8109
34
    unsigned PFTableIndex =
8110
34
      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8111
34
8112
34
    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8113
34
    unsigned Cost  = (PFEntry >> 30);
8114
34
8115
34
    // Determining when to avoid vperm is tricky.  Many things affect the cost
8116
34
    // of vperm, particularly how many times the perm mask needs to be computed.
8117
34
    // For example, if the perm mask can be hoisted out of a loop or is already
8118
34
    // used (perhaps because there are multiple permutes with the same shuffle
8119
34
    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8120
34
    // the loop requires an extra register.
8121
34
    //
8122
34
    // As a compromise, we only emit discrete instructions if the shuffle can be
8123
34
    // generated in 3 or fewer operations.  When we have loop information
8124
34
    // available, if this block is within a loop, we should avoid using vperm
8125
34
    // for 3-operation perms and use a constant pool load instead.
8126
34
    if (Cost < 3)
8127
22
      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8128
40
  }
8129
40
8130
40
  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8131
40
  // vector that will get spilled to the constant pool.
8132
40
  
if (40
V2.isUndef()40
)
V2 = V132
;
8133
40
8134
40
  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8135
40
  // that it is in input element units, not in bytes.  Convert now.
8136
40
8137
40
  // For little endian, the order of the input vectors is reversed, and
8138
40
  // the permutation mask is complemented with respect to 31.  This is
8139
40
  // necessary to produce proper semantics with the big-endian-biased vperm
8140
40
  // instruction.
8141
40
  EVT EltVT = V1.getValueType().getVectorElementType();
8142
40
  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8143
40
8144
40
  SmallVector<SDValue, 16> ResultMask;
8145
680
  for (unsigned i = 0, e = VT.getVectorNumElements(); 
i != e680
;
++i640
) {
8146
640
    unsigned SrcElt = PermMask[i] < 0 ? 
046
:
PermMask[i]594
;
8147
640
8148
1.28k
    for (unsigned j = 0; 
j != BytesPerElement1.28k
;
++j640
)
8149
640
      
if (640
isLittleEndian640
)
8150
304
        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8151
304
                                             dl, MVT::i32));
8152
640
      else
8153
336
        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8154
336
                                             MVT::i32));
8155
640
  }
8156
40
8157
40
  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8158
40
  if (isLittleEndian)
8159
19
    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8160
19
                       V2, V1, VPermMask);
8161
40
  else
8162
21
    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8163
21
                       V1, V2, VPermMask);
8164
0
}
8165
8166
/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8167
/// vector comparison.  If it is, return true and fill in Opc/isDot with
8168
/// information about the intrinsic.
8169
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
8170
1.35k
                                 bool &isDot, const PPCSubtarget &Subtarget) {
8171
1.35k
  unsigned IntrinsicID =
8172
1.35k
      cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
8173
1.35k
  CompareOpc = -1;
8174
1.35k
  isDot = false;
8175
1.35k
  switch (IntrinsicID) {
8176
1.33k
  default:
8177
1.33k
    return false;
8178
1.35k
  // Comparison predicates.
8179
1
  case Intrinsic::ppc_altivec_vcmpbfp_p:
8180
1
    CompareOpc = 966;
8181
1
    isDot = true;
8182
1
    break;
8183
1
  case Intrinsic::ppc_altivec_vcmpeqfp_p:
8184
1
    CompareOpc = 198;
8185
1
    isDot = true;
8186
1
    break;
8187
0
  case Intrinsic::ppc_altivec_vcmpequb_p:
8188
0
    CompareOpc = 6;
8189
0
    isDot = true;
8190
0
    break;
8191
2
  case Intrinsic::ppc_altivec_vcmpequh_p:
8192
2
    CompareOpc = 70;
8193
2
    isDot = true;
8194
2
    break;
8195
1
  case Intrinsic::ppc_altivec_vcmpequw_p:
8196
1
    CompareOpc = 134;
8197
1
    isDot = true;
8198
1
    break;
8199
2
  case Intrinsic::ppc_altivec_vcmpequd_p:
8200
2
    if (
Subtarget.hasP8Altivec()2
) {
8201
2
      CompareOpc = 199;
8202
2
      isDot = true;
8203
2
    } else
8204
0
      return false;
8205
2
    break;
8206
0
  case Intrinsic::ppc_altivec_vcmpneb_p:
8207
0
  case Intrinsic::ppc_altivec_vcmpneh_p:
8208
0
  case Intrinsic::ppc_altivec_vcmpnew_p:
8209
0
  case Intrinsic::ppc_altivec_vcmpnezb_p:
8210
0
  case Intrinsic::ppc_altivec_vcmpnezh_p:
8211
0
  case Intrinsic::ppc_altivec_vcmpnezw_p:
8212
0
    if (
Subtarget.hasP9Altivec()0
) {
8213
0
      switch (IntrinsicID) {
8214
0
      default:
8215
0
        llvm_unreachable("Unknown comparison intrinsic.");
8216
0
      case Intrinsic::ppc_altivec_vcmpneb_p:
8217
0
        CompareOpc = 7;
8218
0
        break;
8219
0
      case Intrinsic::ppc_altivec_vcmpneh_p:
8220
0
        CompareOpc = 71;
8221
0
        break;
8222
0
      case Intrinsic::ppc_altivec_vcmpnew_p:
8223
0
        CompareOpc = 135;
8224
0
        break;
8225
0
      case Intrinsic::ppc_altivec_vcmpnezb_p:
8226
0
        CompareOpc = 263;
8227
0
        break;
8228
0
      case Intrinsic::ppc_altivec_vcmpnezh_p:
8229
0
        CompareOpc = 327;
8230
0
        break;
8231
0
      case Intrinsic::ppc_altivec_vcmpnezw_p:
8232
0
        CompareOpc = 391;
8233
0
        break;
8234
0
      }
8235
0
      isDot = true;
8236
0
    } else
8237
0
      return false;
8238
0
    break;
8239
0
  case Intrinsic::ppc_altivec_vcmpgefp_p:
8240
0
    CompareOpc = 454;
8241
0
    isDot = true;
8242
0
    break;
8243
0
  case Intrinsic::ppc_altivec_vcmpgtfp_p:
8244
0
    CompareOpc = 710;
8245
0
    isDot = true;
8246
0
    break;
8247
0
  case Intrinsic::ppc_altivec_vcmpgtsb_p:
8248
0
    CompareOpc = 774;
8249
0
    isDot = true;
8250
0
    break;
8251
0
  case Intrinsic::ppc_altivec_vcmpgtsh_p:
8252
0
    CompareOpc = 838;
8253
0
    isDot = true;
8254
0
    break;
8255
0
  case Intrinsic::ppc_altivec_vcmpgtsw_p:
8256
0
    CompareOpc = 902;
8257
0
    isDot = true;
8258
0
    break;
8259
2
  case Intrinsic::ppc_altivec_vcmpgtsd_p:
8260
2
    if (
Subtarget.hasP8Altivec()2
) {
8261
2
      CompareOpc = 967;
8262
2
      isDot = true;
8263
2
    } else
8264
0
      return false;
8265
2
    break;
8266
0
  case Intrinsic::ppc_altivec_vcmpgtub_p:
8267
0
    CompareOpc = 518;
8268
0
    isDot = true;
8269
0
    break;
8270
0
  case Intrinsic::ppc_altivec_vcmpgtuh_p:
8271
0
    CompareOpc = 582;
8272
0
    isDot = true;
8273
0
    break;
8274
0
  case Intrinsic::ppc_altivec_vcmpgtuw_p:
8275
0
    CompareOpc = 646;
8276
0
    isDot = true;
8277
0
    break;
8278
2
  case Intrinsic::ppc_altivec_vcmpgtud_p:
8279
2
    if (
Subtarget.hasP8Altivec()2
) {
8280
2
      CompareOpc = 711;
8281
2
      isDot = true;
8282
2
    } else
8283
0
      return false;
8284
2
    break;
8285
2
8286
2
  // VSX predicate comparisons use the same infrastructure
8287
0
  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8288
0
  case Intrinsic::ppc_vsx_xvcmpgedp_p:
8289
0
  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8290
0
  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8291
0
  case Intrinsic::ppc_vsx_xvcmpgesp_p:
8292
0
  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8293
0
    if (
Subtarget.hasVSX()0
) {
8294
0
      switch (IntrinsicID) {
8295
0
      case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8296
0
        CompareOpc = 99;
8297
0
        break;
8298
0
      case Intrinsic::ppc_vsx_xvcmpgedp_p:
8299
0
        CompareOpc = 115;
8300
0
        break;
8301
0
      case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8302
0
        CompareOpc = 107;
8303
0
        break;
8304
0
      case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8305
0
        CompareOpc = 67;
8306
0
        break;
8307
0
      case Intrinsic::ppc_vsx_xvcmpgesp_p:
8308
0
        CompareOpc = 83;
8309
0
        break;
8310
0
      case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8311
0
        CompareOpc = 75;
8312
0
        break;
8313
0
      }
8314
0
      isDot = true;
8315
0
    } else
8316
0
      return false;
8317
0
    break;
8318
0
8319
0
  // Normal Comparisons.
8320
1
  case Intrinsic::ppc_altivec_vcmpbfp:
8321
1
    CompareOpc = 966;
8322
1
    break;
8323
0
  case Intrinsic::ppc_altivec_vcmpeqfp:
8324
0
    CompareOpc = 198;
8325
0
    break;
8326
0
  case Intrinsic::ppc_altivec_vcmpequb:
8327
0
    CompareOpc = 6;
8328
0
    break;
8329
0
  case Intrinsic::ppc_altivec_vcmpequh:
8330
0
    CompareOpc = 70;
8331
0
    break;
8332
0
  case Intrinsic::ppc_altivec_vcmpequw:
8333
0
    CompareOpc = 134;
8334
0
    break;
8335
2
  case Intrinsic::ppc_altivec_vcmpequd:
8336
2
    if (Subtarget.hasP8Altivec())
8337
2
      CompareOpc = 199;
8338
2
    else
8339
0
      return false;
8340
2
    break;
8341
6
  case Intrinsic::ppc_altivec_vcmpneb:
8342
6
  case Intrinsic::ppc_altivec_vcmpneh:
8343
6
  case Intrinsic::ppc_altivec_vcmpnew:
8344
6
  case Intrinsic::ppc_altivec_vcmpnezb:
8345
6
  case Intrinsic::ppc_altivec_vcmpnezh:
8346
6
  case Intrinsic::ppc_altivec_vcmpnezw:
8347
6
    if (Subtarget.hasP9Altivec())
8348
6
      switch (IntrinsicID) {
8349
0
      default:
8350
0
        llvm_unreachable("Unknown comparison intrinsic.");
8351
1
      case Intrinsic::ppc_altivec_vcmpneb:
8352
1
        CompareOpc = 7;
8353
1
        break;
8354
1
      case Intrinsic::ppc_altivec_vcmpneh:
8355
1
        CompareOpc = 71;
8356
1
        break;
8357
1
      case Intrinsic::ppc_altivec_vcmpnew:
8358
1
        CompareOpc = 135;
8359
1
        break;
8360
1
      case Intrinsic::ppc_altivec_vcmpnezb:
8361
1
        CompareOpc = 263;
8362
1
        break;
8363
1
      case Intrinsic::ppc_altivec_vcmpnezh:
8364
1
        CompareOpc = 327;
8365
1
        break;
8366
1
      case Intrinsic::ppc_altivec_vcmpnezw:
8367
1
        CompareOpc = 391;
8368
1
        break;
8369
6
      }
8370
6
    else
8371
0
      return false;
8372
6
    break;
8373
0
  case Intrinsic::ppc_altivec_vcmpgefp:
8374
0
    CompareOpc = 454;
8375
0
    break;
8376
0
  case Intrinsic::ppc_altivec_vcmpgtfp:
8377
0
    CompareOpc = 710;
8378
0
    break;
8379
0
  case Intrinsic::ppc_altivec_vcmpgtsb:
8380
0
    CompareOpc = 774;
8381
0
    break;
8382
0
  case Intrinsic::ppc_altivec_vcmpgtsh:
8383
0
    CompareOpc = 838;
8384
0
    break;
8385
0
  case Intrinsic::ppc_altivec_vcmpgtsw:
8386
0
    CompareOpc = 902;
8387
0
    break;
8388
2
  case Intrinsic::ppc_altivec_vcmpgtsd:
8389
2
    if (Subtarget.hasP8Altivec())
8390
2
      CompareOpc = 967;
8391
2
    else
8392
0
      return false;
8393
2
    break;
8394
0
  case Intrinsic::ppc_altivec_vcmpgtub:
8395
0
    CompareOpc = 518;
8396
0
    break;
8397
0
  case Intrinsic::ppc_altivec_vcmpgtuh:
8398
0
    CompareOpc = 582;
8399
0
    break;
8400
0
  case Intrinsic::ppc_altivec_vcmpgtuw:
8401
0
    CompareOpc = 646;
8402
0
    break;
8403
2
  case Intrinsic::ppc_altivec_vcmpgtud:
8404
2
    if (Subtarget.hasP8Altivec())
8405
2
      CompareOpc = 711;
8406
2
    else
8407
0
      return false;
8408
2
    break;
8409
24
  }
8410
24
  return true;
8411
24
}
8412
8413
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
8414
/// lower, do it, otherwise return null.
8415
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8416
1.37k
                                                   SelectionDAG &DAG) const {
8417
1.37k
  unsigned IntrinsicID =
8418
1.37k
    cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8419
1.37k
8420
1.37k
  SDLoc dl(Op);
8421
1.37k
8422
1.37k
  if (
IntrinsicID == Intrinsic::thread_pointer1.37k
) {
8423
3
    // Reads the thread pointer register, used for __builtin_thread_pointer.
8424
3
    if (Subtarget.isPPC64())
8425
2
      return DAG.getRegister(PPC::X13, MVT::i64);
8426
1
    return DAG.getRegister(PPC::R2, MVT::i32);
8427
1
  }
8428
1.36k
8429
1.36k
  // We are looking for absolute values here.
8430
1.36k
  // The idea is to try to fit one of two patterns:
8431
1.36k
  //  max (a, (0-a))  OR  max ((0-a), a)
8432
1.36k
  
if (1.36k
Subtarget.hasP9Vector() &&
8433
160
      (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
8434
154
       IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
8435
1.36k
       
IntrinsicID == Intrinsic::ppc_altivec_vmaxsb150
)) {
8436
14
    SDValue V1 = Op.getOperand(1);
8437
14
    SDValue V2 = Op.getOperand(2);
8438
14
    if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
8439
14
        (V1.getSimpleValueType() == MVT::v4i32 ||
8440
8
         V1.getSimpleValueType() == MVT::v8i16 ||
8441
14
         
V1.getSimpleValueType() == MVT::v16i84
)) {
8442
14
      if ( V1.getOpcode() == ISD::SUB &&
8443
8
           ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
8444
14
           
V1.getOperand(1) == V22
) {
8445
2
        // Generate the abs instruction with the operands
8446
2
        return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
8447
2
      }
8448
12
8449
12
      
if ( 12
V2.getOpcode() == ISD::SUB &&
8450
12
           ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
8451
12
           
V2.getOperand(1) == V112
) {
8452
12
        // Generate the abs instruction with the operands
8453
12
        return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
8454
12
      }
8455
1.35k
    }
8456
14
  }
8457
1.35k
8458
1.35k
  // If this is a lowered altivec predicate compare, CompareOpc is set to the
8459
1.35k
  // opcode number of the comparison.
8460
1.35k
  int CompareOpc;
8461
1.35k
  bool isDot;
8462
1.35k
  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
8463
1.33k
    return SDValue();    // Don't custom lower most intrinsics.
8464
20
8465
20
  // If this is a non-dot comparison, make the VCMP node and we are done.
8466
20
  
if (20
!isDot20
) {
8467
13
    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
8468
13
                              Op.getOperand(1), Op.getOperand(2),
8469
13
                              DAG.getConstant(CompareOpc, dl, MVT::i32));
8470
13
    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
8471
13
  }
8472
7
8473
7
  // Create the PPCISD altivec 'dot' comparison node.
8474
7
  SDValue Ops[] = {
8475
7
    Op.getOperand(2),  // LHS
8476
7
    Op.getOperand(3),  // RHS
8477
7
    DAG.getConstant(CompareOpc, dl, MVT::i32)
8478
7
  };
8479
7
  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
8480
7
  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8481
7
8482
7
  // Now that we have the comparison, emit a copy from the CR to a GPR.
8483
7
  // This is flagged to the above dot comparison.
8484
7
  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
8485
7
                                DAG.getRegister(PPC::CR6, MVT::i32),
8486
7
                                CompNode.getValue(1));
8487
7
8488
7
  // Unpack the result based on how the target uses it.
8489
7
  unsigned BitNo;   // Bit # of CR6.
8490
7
  bool InvertBit;   // Invert result?
8491
7
  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
8492
0
  default:  // Can't happen, don't crash on invalid number though.
8493
0
  case 0:   // Return the value of the EQ bit of CR6.
8494
0
    BitNo = 0; InvertBit = false;
8495
0
    break;
8496
1
  case 1:   // Return the inverted value of the EQ bit of CR6.
8497
1
    BitNo = 0; InvertBit = true;
8498
1
    break;
8499
6
  case 2:   // Return the value of the LT bit of CR6.
8500
6
    BitNo = 2; InvertBit = false;
8501
6
    break;
8502
0
  case 3:   // Return the inverted value of the LT bit of CR6.
8503
0
    BitNo = 2; InvertBit = true;
8504
0
    break;
8505
7
  }
8506
7
8507
7
  // Shift the bit into the low position.
8508
7
  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
8509
7
                      DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
8510
7
  // Isolate the bit.
8511
7
  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
8512
7
                      DAG.getConstant(1, dl, MVT::i32));
8513
7
8514
7
  // If we are supposed to, toggle the bit.
8515
7
  if (InvertBit)
8516
1
    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
8517
1
                        DAG.getConstant(1, dl, MVT::i32));
8518
1.37k
  return Flags;
8519
1.37k
}
8520
8521
SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8522
1.76k
                                               SelectionDAG &DAG) const {
8523
1.76k
  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
8524
1.76k
  // the beginning of the argument list.
8525
1.76k
  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 
00
:
11.76k
;
8526
1.76k
  SDLoc DL(Op);
8527
1.76k
  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
8528
23
  case Intrinsic::ppc_cfence: {
8529
23
    assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
8530
23
    assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
8531
23
    return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
8532
23
                                      DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
8533
23
                                                  Op.getOperand(ArgStart + 1)),
8534
23
                                      Op.getOperand(0)),
8535
23
                   0);
8536
1.76k
  }
8537
1.73k
  default:
8538
1.73k
    break;
8539
1.73k
  }
8540
1.73k
  return SDValue();
8541
1.73k
}
8542
8543
40
SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
8544
40
  // Check for a DIV with the same operands as this REM.
8545
52
  for (auto UI : Op.getOperand(1)->uses()) {
8546
52
    if (
(Op.getOpcode() == ISD::SREM && 52
UI->getOpcode() == ISD::SDIV34
) ||
8547
46
        
(Op.getOpcode() == ISD::UREM && 46
UI->getOpcode() == ISD::UDIV18
))
8548
8
      
if (8
UI->getOperand(0) == Op.getOperand(0) &&
8549
4
          UI->getOperand(1) == Op.getOperand(1))
8550
4
        return SDValue();
8551
36
  }
8552
36
  return Op;
8553
36
}
8554
8555
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
8556
0
                                                  SelectionDAG &DAG) const {
8557
0
  SDLoc dl(Op);
8558
0
  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
8559
0
  // instructions), but for smaller types, we need to first extend up to v2i32
8560
0
  // before doing going farther.
8561
0
  if (
Op.getValueType() == MVT::v2i640
) {
8562
0
    EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
8563
0
    if (
ExtVT != MVT::v2i320
) {
8564
0
      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
8565
0
      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
8566
0
                       DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
8567
0
                                        ExtVT.getVectorElementType(), 4)));
8568
0
      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
8569
0
      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
8570
0
                       DAG.getValueType(MVT::v2i32));
8571
0
    }
8572
0
8573
0
    return Op;
8574
0
  }
8575
0
8576
0
  return SDValue();
8577
0
}
8578
8579
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
8580
39
                                                 SelectionDAG &DAG) const {
8581
39
  SDLoc dl(Op);
8582
39
  // Create a stack slot that is 16-byte aligned.
8583
39
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8584
39
  int FrameIdx = MFI.CreateStackObject(16, 16, false);
8585
39
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8586
39
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8587
39
8588
39
  // Store the input value into Value#0 of the stack slot.
8589
39
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8590
39
                               MachinePointerInfo());
8591
39
  // Load it out.
8592
39
  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
8593
39
}
8594
8595
SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8596
36
                                                  SelectionDAG &DAG) const {
8597
36
  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
8598
36
         "Should only be called for ISD::INSERT_VECTOR_ELT");
8599
36
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
8600
36
  // We have legal lowering for constant indices but not for variable ones.
8601
36
  if (C)
8602
32
    return Op;
8603
4
  return SDValue();
8604
4
}
8605
8606
SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
8607
8
                                                   SelectionDAG &DAG) const {
8608
8
  SDLoc dl(Op);
8609
8
  SDNode *N = Op.getNode();
8610
8
8611
8
  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
8612
8
         "Unknown extract_vector_elt type");
8613
8
8614
8
  SDValue Value = N->getOperand(0);
8615
8
8616
8
  // The first part of this is like the store lowering except that we don't
8617
8
  // need to track the chain.
8618
8
8619
8
  // The values are now known to be -1 (false) or 1 (true). To convert this
8620
8
  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8621
8
  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8622
8
  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8623
8
8624
8
  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8625
8
  // understand how to form the extending load.
8626
8
  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8627
8
8628
8
  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8629
8
8630
8
  // Now convert to an integer and store.
8631
8
  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8632
8
    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8633
8
    Value);
8634
8
8635
8
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8636
8
  int FrameIdx = MFI.CreateStackObject(16, 16, false);
8637
8
  MachinePointerInfo PtrInfo =
8638
8
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8639
8
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8640
8
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8641
8
8642
8
  SDValue StoreChain = DAG.getEntryNode();
8643
8
  SDValue Ops[] = {StoreChain,
8644
8
                   DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8645
8
                   Value, FIdx};
8646
8
  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8647
8
8648
8
  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8649
8
    dl, VTs, Ops, MVT::v4i32, PtrInfo);
8650
8
8651
8
  // Extract the value requested.
8652
8
  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8653
8
  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8654
8
  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8655
8
8656
8
  SDValue IntVal =
8657
8
      DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8658
8
8659
8
  if (!Subtarget.useCRBits())
8660
0
    return IntVal;
8661
8
8662
8
  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8663
8
}
8664
8665
/// Lowering for QPX v4i1 loads
8666
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8667
89
                                           SelectionDAG &DAG) const {
8668
89
  SDLoc dl(Op);
8669
89
  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8670
89
  SDValue LoadChain = LN->getChain();
8671
89
  SDValue BasePtr = LN->getBasePtr();
8672
89
8673
89
  if (Op.getValueType() == MVT::v4f64 ||
8674
89
      
Op.getValueType() == MVT::v4f3222
) {
8675
87
    EVT MemVT = LN->getMemoryVT();
8676
87
    unsigned Alignment = LN->getAlignment();
8677
87
8678
87
    // If this load is properly aligned, then it is legal.
8679
87
    if (Alignment >= MemVT.getStoreSize())
8680
87
      return Op;
8681
0
8682
0
    EVT ScalarVT = Op.getValueType().getScalarType(),
8683
0
        ScalarMemVT = MemVT.getScalarType();
8684
0
    unsigned Stride = ScalarMemVT.getStoreSize();
8685
0
8686
0
    SDValue Vals[4], LoadChains[4];
8687
0
    for (unsigned Idx = 0; 
Idx < 40
;
++Idx0
) {
8688
0
      SDValue Load;
8689
0
      if (ScalarVT != ScalarMemVT)
8690
0
        Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8691
0
                              BasePtr,
8692
0
                              LN->getPointerInfo().getWithOffset(Idx * Stride),
8693
0
                              ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8694
0
                              LN->getMemOperand()->getFlags(), LN->getAAInfo());
8695
0
      else
8696
0
        Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8697
0
                           LN->getPointerInfo().getWithOffset(Idx * Stride),
8698
0
                           MinAlign(Alignment, Idx * Stride),
8699
0
                           LN->getMemOperand()->getFlags(), LN->getAAInfo());
8700
0
8701
0
      if (
Idx == 0 && 0
LN->isIndexed()0
) {
8702
0
        assert(LN->getAddressingMode() == ISD::PRE_INC &&
8703
0
               "Unknown addressing mode on vector load");
8704
0
        Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8705
0
                                  LN->getAddressingMode());
8706
0
      }
8707
0
8708
0
      Vals[Idx] = Load;
8709
0
      LoadChains[Idx] = Load.getValue(1);
8710
0
8711
0
      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8712
0
                            DAG.getConstant(Stride, dl,
8713
0
                                            BasePtr.getValueType()));
8714
0
    }
8715
0
8716
0
    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8717
0
    SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8718
0
8719
0
    if (
LN->isIndexed()0
) {
8720
0
      SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8721
0
      return DAG.getMergeValues(RetOps, dl);
8722
0
    }
8723
0
8724
0
    SDValue RetOps[] = { Value, TF };
8725
0
    return DAG.getMergeValues(RetOps, dl);
8726
0
  }
8727
2
8728
89
  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8729
2
  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8730
2
8731
2
  // To lower v4i1 from a byte array, we load the byte elements of the
8732
2
  // vector and then reuse the BUILD_VECTOR logic.
8733
2
8734
2
  SDValue VectElmts[4], VectElmtChains[4];
8735
10
  for (unsigned i = 0; 
i < 410
;
++i8
) {
8736
8
    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8737
8
    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8738
8
8739
8
    VectElmts[i] = DAG.getExtLoad(
8740
8
        ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8741
8
        LN->getPointerInfo().getWithOffset(i), MVT::i8,
8742
8
        /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8743
8
    VectElmtChains[i] = VectElmts[i].getValue(1);
8744
8
  }
8745
89
8746
89
  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8747
89
  SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8748
89
8749
89
  SDValue RVals[] = { Value, LoadChain };
8750
89
  return DAG.getMergeValues(RVals, dl);
8751
89
}
8752
8753
/// Lowering for QPX v4i1 stores
8754
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8755
47
                                            SelectionDAG &DAG) const {
8756
47
  SDLoc dl(Op);
8757
47
  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8758
47
  SDValue StoreChain = SN->getChain();
8759
47
  SDValue BasePtr = SN->getBasePtr();
8760
47
  SDValue Value = SN->getValue();
8761
47
8762
47
  if (Value.getValueType() == MVT::v4f64 ||
8763
47
      
Value.getValueType() == MVT::v4f3210
) {
8764
45
    EVT MemVT = SN->getMemoryVT();
8765
45
    unsigned Alignment = SN->getAlignment();
8766
45
8767
45
    // If this store is properly aligned, then it is legal.
8768
45
    if (Alignment >= MemVT.getStoreSize())
8769
18
      return Op;
8770
27
8771
27
    EVT ScalarVT = Value.getValueType().getScalarType(),
8772
27
        ScalarMemVT = MemVT.getScalarType();
8773
27
    unsigned Stride = ScalarMemVT.getStoreSize();
8774
27
8775
27
    SDValue Stores[4];
8776
135
    for (unsigned Idx = 0; 
Idx < 4135
;
++Idx108
) {
8777
108
      SDValue Ex = DAG.getNode(
8778
108
          ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8779
108
          DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8780
108
      SDValue Store;
8781
108
      if (ScalarVT != ScalarMemVT)
8782
0
        Store =
8783
0
            DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8784
0
                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8785
0
                              ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8786
0
                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8787
108
      else
8788
108
        Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8789
108
                             SN->getPointerInfo().getWithOffset(Idx * Stride),
8790
108
                             MinAlign(Alignment, Idx * Stride),
8791
108
                             SN->getMemOperand()->getFlags(), SN->getAAInfo());
8792
108
8793
108
      if (
Idx == 0 && 108
SN->isIndexed()27
) {
8794
0
        assert(SN->getAddressingMode() == ISD::PRE_INC &&
8795
0
               "Unknown addressing mode on vector store");
8796
0
        Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8797
0
                                    SN->getAddressingMode());
8798
0
      }
8799
108
8800
108
      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8801
108
                            DAG.getConstant(Stride, dl,
8802
108
                                            BasePtr.getValueType()));
8803
108
      Stores[Idx] = Store;
8804
108
    }
8805
27
8806
27
    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8807
27
8808
27
    if (
SN->isIndexed()27
) {
8809
0
      SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8810
0
      return DAG.getMergeValues(RetOps, dl);
8811
0
    }
8812
27
8813
27
    return TF;
8814
27
  }
8815
2
8816
47
  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8817
2
  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8818
2
8819
2
  // The values are now known to be -1 (false) or 1 (true). To convert this
8820
2
  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8821
2
  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8822
2
  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8823
2
8824
2
  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8825
2
  // understand how to form the extending load.
8826
2
  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8827
2
8828
2
  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8829
2
8830
2
  // Now convert to an integer and store.
8831
2
  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8832
2
    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8833
2
    Value);
8834
2
8835
2
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8836
2
  int FrameIdx = MFI.CreateStackObject(16, 16, false);
8837
2
  MachinePointerInfo PtrInfo =
8838
2
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8839
2
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8840
2
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8841
2
8842
2
  SDValue Ops[] = {StoreChain,
8843
2
                   DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8844
2
                   Value, FIdx};
8845
2
  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8846
2
8847
2
  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8848
2
    dl, VTs, Ops, MVT::v4i32, PtrInfo);
8849
2
8850
2
  // Move data into the byte array.
8851
2
  SDValue Loads[4], LoadChains[4];
8852
10
  for (unsigned i = 0; 
i < 410
;
++i8
) {
8853
8
    unsigned Offset = 4*i;
8854
8
    SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8855
8
    Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8856
8
8857
8
    Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8858
8
                           PtrInfo.getWithOffset(Offset));
8859
8
    LoadChains[i] = Loads[i].getValue(1);
8860
8
  }
8861
2
8862
2
  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8863
2
8864
2
  SDValue Stores[4];
8865
10
  for (unsigned i = 0; 
i < 410
;
++i8
) {
8866
8
    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8867
8
    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8868
8
8869
8
    Stores[i] = DAG.getTruncStore(
8870
8
        StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8871
8
        MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8872
8
        SN->getAAInfo());
8873
8
  }
8874
47
8875
47
  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8876
47
8877
47
  return StoreChain;
8878
47
}
8879
8880
17
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8881
17
  SDLoc dl(Op);
8882
17
  if (
Op.getValueType() == MVT::v4i3217
) {
8883
7
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8884
7
8885
7
    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8886
7
    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8887
7
8888
7
    SDValue RHSSwap =   // = vrlw RHS, 16
8889
7
      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8890
7
8891
7
    // Shrinkify inputs to v8i16.
8892
7
    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8893
7
    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8894
7
    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8895
7
8896
7
    // Low parts multiplied together, generating 32-bit results (we ignore the
8897
7
    // top parts).
8898
7
    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8899
7
                                        LHS, RHS, DAG, dl, MVT::v4i32);
8900
7
8901
7
    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8902
7
                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8903
7
    // Shift the high parts up 16 bits.
8904
7
    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8905
7
                              Neg16, DAG, dl);
8906
7
    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8907
10
  } else 
if (10
Op.getValueType() == MVT::v8i1610
) {
8908
5
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8909
5
8910
5
    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8911
5
8912
5
    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8913
5
                            LHS, RHS, Zero, DAG, dl);
8914
5
  } else 
if (5
Op.getValueType() == MVT::v16i85
) {
8915
5
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8916
5
    bool isLittleEndian = Subtarget.isLittleEndian();
8917
5
8918
5
    // Multiply the even 8-bit parts, producing 16-bit sums.
8919
5
    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8920
5
                                           LHS, RHS, DAG, dl, MVT::v8i16);
8921
5
    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8922
5
8923
5
    // Multiply the odd 8-bit parts, producing 16-bit sums.
8924
5
    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8925
5
                                          LHS, RHS, DAG, dl, MVT::v8i16);
8926
5
    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8927
5
8928
5
    // Merge the results together.  Because vmuleub and vmuloub are
8929
5
    // instructions with a big-endian bias, we must reverse the
8930
5
    // element numbering and reverse the meaning of "odd" and "even"
8931
5
    // when generating little endian code.
8932
5
    int Ops[16];
8933
45
    for (unsigned i = 0; 
i != 845
;
++i40
) {
8934
40
      if (
isLittleEndian40
) {
8935
16
        Ops[i*2  ] = 2*i;
8936
16
        Ops[i*2+1] = 2*i+16;
8937
40
      } else {
8938
24
        Ops[i*2  ] = 2*i+1;
8939
24
        Ops[i*2+1] = 2*i+1+16;
8940
24
      }
8941
40
    }
8942
5
    if (isLittleEndian)
8943
2
      return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8944
5
    else
8945
3
      return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8946
0
  } else {
8947
0
    llvm_unreachable("Unknown mul to lower!");
8948
10
  }
8949
0
}
8950
8951
/// LowerOperation - Provide custom lowering hooks for some operations.
8952
///
8953
10.1k
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8954
10.1k
  switch (Op.getOpcode()) {
8955
0
  
default: 0
llvm_unreachable0
("Wasn't expecting to be able to lower this!");
8956
652
  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8957
9
  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8958
1.49k
  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8959
39
  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8960
11
  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8961
38
  case ISD::SETCC:              return LowerSETCC(Op, DAG);
8962
1
  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8963
1
  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8964
6
  case ISD::VASTART:
8965
6
    return LowerVASTART(Op, DAG);
8966
10.1k
8967
1
  case ISD::VAARG:
8968
1
    return LowerVAARG(Op, DAG);
8969
10.1k
8970
1
  case ISD::VACOPY:
8971
1
    return LowerVACOPY(Op, DAG);
8972
10.1k
8973
1
  case ISD::STACKRESTORE:
8974
1
    return LowerSTACKRESTORE(Op, DAG);
8975
10.1k
8976
22
  case ISD::DYNAMIC_STACKALLOC:
8977
22
    return LowerDYNAMIC_STACKALLOC(Op, DAG);
8978
10.1k
8979
1
  case ISD::GET_DYNAMIC_AREA_OFFSET:
8980
1
    return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8981
10.1k
8982
1
  case ISD::EH_DWARF_CFA:
8983
1
    return LowerEH_DWARF_CFA(Op, DAG);
8984
10.1k
8985
6
  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8986
5
  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8987
10.1k
8988
102
  case ISD::LOAD:               return LowerLOAD(Op, DAG);
8989
48
  case ISD::STORE:              return LowerSTORE(Op, DAG);
8990
0
  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8991
257
  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8992
615
  case ISD::FP_TO_UINT:
8993
615
  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8994
615
                                                      SDLoc(Op));
8995
206
  case ISD::UINT_TO_FP:
8996
206
  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8997
1
  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8998
206
8999
206
  // Lower 64-bit shifts.
9000
10
  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
9001
10
  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
9002
6
  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
9003
206
9004
206
  // Vector-related lowering.
9005
2.19k
  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
9006
1.08k
  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
9007
1.37k
  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
9008
39
  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
9009
0
  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
9010
8
  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
9011
36
  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
9012
17
  case ISD::MUL:                return LowerMUL(Op, DAG);
9013
206
9014
206
  // For counter-based loop handling.
9015
0
  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
9016
206
9017
206
  // Frame & Return address.
9018
6
  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
9019
10
  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
9020
206
9021
1.76k
  case ISD::INTRINSIC_VOID:
9022
1.76k
    return LowerINTRINSIC_VOID(Op, DAG);
9023
40
  case ISD::SREM:
9024
40
  case ISD::UREM:
9025
40
    return LowerREM(Op, DAG);
9026
0
  }
9027
0
}
9028
9029
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
9030
                                           SmallVectorImpl<SDValue>&Results,
9031
12
                                           SelectionDAG &DAG) const {
9032
12
  SDLoc dl(N);
9033
12
  switch (N->getOpcode()) {
9034
0
  default:
9035
0
    llvm_unreachable("Do not know how to custom type legalize this operation!");
9036
2
  case ISD::READCYCLECOUNTER: {
9037
2
    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
9038
2
    SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
9039
2
9040
2
    Results.push_back(RTB);
9041
2
    Results.push_back(RTB.getValue(1));
9042
2
    Results.push_back(RTB.getValue(2));
9043
2
    break;
9044
12
  }
9045
1
  case ISD::INTRINSIC_W_CHAIN: {
9046
1
    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
9047
1
        Intrinsic::ppc_is_decremented_ctr_nonzero)
9048
0
      break;
9049
1
9050
1
    assert(N->getValueType(0) == MVT::i1 &&
9051
1
           "Unexpected result type for CTR decrement intrinsic");
9052
1
    EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
9053
1
                                 N->getValueType(0));
9054
1
    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
9055
1
    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
9056
1
                                 N->getOperand(1));
9057
1
9058
1
    Results.push_back(NewInt);
9059
1
    Results.push_back(NewInt.getValue(1));
9060
1
    break;
9061
1
  }
9062
0
  case ISD::VAARG: {
9063
0
    if (
!Subtarget.isSVR4ABI() || 0
Subtarget.isPPC64()0
)
9064
0
      return;
9065
0
9066
0
    EVT VT = N->getValueType(0);
9067
0
9068
0
    if (
VT == MVT::i640
) {
9069
0
      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
9070
0
9071
0
      Results.push_back(NewNode);
9072
0
      Results.push_back(NewNode.getValue(1));
9073
0
    }
9074
0
    return;
9075
0
  }
9076
8
  case ISD::FP_ROUND_INREG: {
9077
8
    assert(N->getValueType(0) == MVT::ppcf128);
9078
8
    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
9079
8
    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9080
8
                             MVT::f64, N->getOperand(0),
9081
8
                             DAG.getIntPtrConstant(0, dl));
9082
8
    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9083
8
                             MVT::f64, N->getOperand(0),
9084
8
                             DAG.getIntPtrConstant(1, dl));
9085
8
9086
8
    // Add the two halves of the long double in round-to-zero mode.
9087
8
    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
9088
8
9089
8
    // We know the low half is about to be thrown away, so just use something
9090
8
    // convenient.
9091
8
    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
9092
8
                                FPreg, FPreg));
9093
8
    return;
9094
0
  }
9095
1
  case ISD::FP_TO_SINT:
9096
1
  case ISD::FP_TO_UINT:
9097
1
    // LowerFP_TO_INT() can only handle f32 and f64.
9098
1
    if (N->getOperand(0).getValueType() == MVT::ppcf128)
9099
0
      return;
9100
1
    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
9101
1
    return;
9102
12
  }
9103
12
}
9104
9105
//===----------------------------------------------------------------------===//
9106
//  Other Lowering Code
9107
//===----------------------------------------------------------------------===//
9108
9109
707
static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
9110
707
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9111
707
  Function *Func = Intrinsic::getDeclaration(M, Id);
9112
707
  return Builder.CreateCall(Func, {});
9113
707
}
9114
9115
// The mappings for emitLeading/TrailingFence is taken from
9116
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9117
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
9118
                                                 Instruction *Inst,
9119
492
                                                 AtomicOrdering Ord) const {
9120
492
  if (Ord == AtomicOrdering::SequentiallyConsistent)
9121
133
    return callIntrinsic(Builder, Intrinsic::ppc_sync);
9122
359
  
if (359
isReleaseOrStronger(Ord)359
)
9123
234
    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9124
125
  return nullptr;
9125
125
}
9126
9127
Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
9128
                                                  Instruction *Inst,
9129
492
                                                  AtomicOrdering Ord) const {
9130
492
  if (
Inst->hasAtomicLoad() && 492
isAcquireOrStronger(Ord)471
) {
9131
363
    // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9132
363
    // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9133
363
    // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9134
363
    if (
isa<LoadInst>(Inst) && 363
Subtarget.isPPC64()27
)
9135
23
      return Builder.CreateCall(
9136
23
          Intrinsic::getDeclaration(
9137
23
              Builder.GetInsertBlock()->getParent()->getParent(),
9138
23
              Intrinsic::ppc_cfence, {Inst->getType()}),
9139
23
          {Inst});
9140
340
    // FIXME: Can use isync for rmw operation.
9141
340
    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9142
340
  }
9143
129
  return nullptr;
9144
129
}
9145
9146
MachineBasicBlock *
9147
PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
9148
                                    unsigned AtomicSize,
9149
                                    unsigned BinOpcode,
9150
                                    unsigned CmpOpcode,
9151
483
                                    unsigned CmpPred) const {
9152
483
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9153
483
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9154
483
9155
483
  auto LoadMnemonic = PPC::LDARX;
9156
483
  auto StoreMnemonic = PPC::STDCX;
9157
483
  switch (AtomicSize) {
9158
0
  default:
9159
0
    llvm_unreachable("Unexpected size of atomic entity");
9160
120
  case 1:
9161
120
    LoadMnemonic = PPC::LBARX;
9162
120
    StoreMnemonic = PPC::STBCX;
9163
120
    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9164
120
    break;
9165
120
  case 2:
9166
120
    LoadMnemonic = PPC::LHARX;
9167
120
    StoreMnemonic = PPC::STHCX;
9168
120
    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9169
120
    break;
9170
120
  case 4:
9171
120
    LoadMnemonic = PPC::LWARX;
9172
120
    StoreMnemonic = PPC::STWCX;
9173
120
    break;
9174
123
  case 8:
9175
123
    LoadMnemonic = PPC::LDARX;
9176
123
    StoreMnemonic = PPC::STDCX;
9177
123
    break;
9178
483
  }
9179
483
9180
483
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9181
483
  MachineFunction *F = BB->getParent();
9182
483
  MachineFunction::iterator It = ++BB->getIterator();
9183
483
9184
483
  unsigned dest = MI.getOperand(0).getReg();
9185
483
  unsigned ptrA = MI.getOperand(1).getReg();
9186
483
  unsigned ptrB = MI.getOperand(2).getReg();
9187
483
  unsigned incr = MI.getOperand(3).getReg();
9188
483
  DebugLoc dl = MI.getDebugLoc();
9189
483
9190
483
  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9191
483
  MachineBasicBlock *loop2MBB =
9192
483
    CmpOpcode ? 
F->CreateMachineBasicBlock(LLVM_BB)180
:
nullptr303
;
9193
483
  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9194
483
  F->insert(It, loopMBB);
9195
483
  if (CmpOpcode)
9196
180
    F->insert(It, loop2MBB);
9197
483
  F->insert(It, exitMBB);
9198
483
  exitMBB->splice(exitMBB->begin(), BB,
9199
483
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9200
483
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9201
483
9202
483
  MachineRegisterInfo &RegInfo = F->getRegInfo();
9203
231
  unsigned TmpReg = (!BinOpcode) ? incr :
9204
252
    
RegInfo.createVirtualRegister( AtomicSize == 8 ? 252
&PPC::G8RCRegClass65
9205
252
                                           : &PPC::GPRCRegClass);
9206
483
9207
483
  //  thisMBB:
9208
483
  //   ...
9209
483
  //   fallthrough --> loopMBB
9210
483
  BB->addSuccessor(loopMBB);
9211
483
9212
483
  //  loopMBB:
9213
483
  //   l[wd]arx dest, ptr
9214
483
  //   add r0, dest, incr
9215
483
  //   st[wd]cx. r0, ptr
9216
483
  //   bne- loopMBB
9217
483
  //   fallthrough --> exitMBB
9218
483
9219
483
  // For max/min...
9220
483
  //  loopMBB:
9221
483
  //   l[wd]arx dest, ptr
9222
483
  //   cmpl?[wd] incr, dest
9223
483
  //   bgt exitMBB
9224
483
  //  loop2MBB:
9225
483
  //   st[wd]cx. dest, ptr
9226
483
  //   bne- loopMBB
9227
483
  //   fallthrough --> exitMBB
9228
483
9229
483
  BB = loopMBB;
9230
483
  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9231
483
    .addReg(ptrA).addReg(ptrB);
9232
483
  if (BinOpcode)
9233
252
    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
9234
483
  if (
CmpOpcode483
) {
9235
180
    // Signed comparisons of byte or halfword values must be sign-extended.
9236
180
    if (
CmpOpcode == PPC::CMPW && 180
AtomicSize < 470
) {
9237
48
      unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9238
48
      BuildMI(BB, dl, TII->get(AtomicSize == 1 ? 
PPC::EXTSB24
:
PPC::EXTSH24
),
9239
48
              ExtReg).addReg(dest);
9240
48
      BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9241
48
        .addReg(incr).addReg(ExtReg);
9242
48
    } else
9243
132
      BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9244
132
        .addReg(incr).addReg(dest);
9245
180
9246
180
    BuildMI(BB, dl, TII->get(PPC::BCC))
9247
180
      .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9248
180
    BB->addSuccessor(loop2MBB);
9249
180
    BB->addSuccessor(exitMBB);
9250
180
    BB = loop2MBB;
9251
180
  }
9252
483
  BuildMI(BB, dl, TII->get(StoreMnemonic))
9253
483
    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
9254
483
  BuildMI(BB, dl, TII->get(PPC::BCC))
9255
483
    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9256
483
  BB->addSuccessor(loopMBB);
9257
483
  BB->addSuccessor(exitMBB);
9258
483
9259
483
  //  exitMBB:
9260
483
  //   ...
9261
483
  BB = exitMBB;
9262
483
  return BB;
9263
483
}
9264
9265
MachineBasicBlock *
9266
PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
9267
                                            MachineBasicBlock *BB,
9268
                                            bool is8bit, // operation
9269
                                            unsigned BinOpcode,
9270
                                            unsigned CmpOpcode,
9271
260
                                            unsigned CmpPred) const {
9272
260
  // If we support part-word atomic mnemonics, just use them
9273
260
  if (Subtarget.hasPartwordAtomics())
9274
240
    
return EmitAtomicBinary(MI, BB, is8bit ? 240
1120
:
2120
, BinOpcode,
9275
240
                            CmpOpcode, CmpPred);
9276
20
9277
20
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9278
20
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9279
20
  // In 64 bit mode we have to use 64 bits for addresses, even though the
9280
20
  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
9281
20
  // registers without caring whether they're 32 or 64, but here we're
9282
20
  // doing actual arithmetic on the addresses.
9283
20
  bool is64bit = Subtarget.isPPC64();
9284
20
  bool isLittleEndian = Subtarget.isLittleEndian();
9285
20
  unsigned ZeroReg = is64bit ? 
PPC::ZERO818
:
PPC::ZERO2
;
9286
20
9287
20
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9288
20
  MachineFunction *F = BB->getParent();
9289
20
  MachineFunction::iterator It = ++BB->getIterator();
9290
20
9291
20
  unsigned dest = MI.getOperand(0).getReg();
9292
20
  unsigned ptrA = MI.getOperand(1).getReg();
9293
20
  unsigned ptrB = MI.getOperand(2).getReg();
9294
20
  unsigned incr = MI.getOperand(3).getReg();
9295
20
  DebugLoc dl = MI.getDebugLoc();
9296
20
9297
20
  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9298
20
  MachineBasicBlock *loop2MBB =
9299
20
    CmpOpcode ? 
F->CreateMachineBasicBlock(LLVM_BB)8
:
nullptr12
;
9300
20
  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9301
20
  F->insert(It, loopMBB);
9302
20
  if (CmpOpcode)
9303
8
    F->insert(It, loop2MBB);
9304
20
  F->insert(It, exitMBB);
9305
20
  exitMBB->splice(exitMBB->begin(), BB,
9306
20
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9307
20
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9308
20
9309
20
  MachineRegisterInfo &RegInfo = F->getRegInfo();
9310
18
  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9311
2
                                          : &PPC::GPRCRegClass;
9312
20
  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9313
20
  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9314
20
  unsigned ShiftReg =
9315
20
    isLittleEndian ? 
Shift1Reg0
:
RegInfo.createVirtualRegister(RC)20
;
9316
20
  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
9317
20
  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9318
20
  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9319
20
  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9320
20
  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9321
20
  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
9322
20
  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9323
20
  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9324
20
  unsigned Ptr1Reg;
9325
20
  unsigned TmpReg = (!BinOpcode) ? 
Incr2Reg12
:
RegInfo.createVirtualRegister(RC)8
;
9326
20
9327
20
  //  thisMBB:
9328
20
  //   ...
9329
20
  //   fallthrough --> loopMBB
9330
20
  BB->addSuccessor(loopMBB);
9331
20
9332
20
  // The 4-byte load must be aligned, while a char or short may be
9333
20
  // anywhere in the word.  Hence all this nasty bookkeeping code.
9334
20
  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9335
20
  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9336
20
  //   xori shift, shift1, 24 [16]
9337
20
  //   rlwinm ptr, ptr1, 0, 0, 29
9338
20
  //   slw incr2, incr, shift
9339
20
  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9340
20
  //   slw mask, mask2, shift
9341
20
  //  loopMBB:
9342
20
  //   lwarx tmpDest, ptr
9343
20
  //   add tmp, tmpDest, incr2
9344
20
  //   andc tmp2, tmpDest, mask
9345
20
  //   and tmp3, tmp, mask
9346
20
  //   or tmp4, tmp3, tmp2
9347
20
  //   stwcx. tmp4, ptr
9348
20
  //   bne- loopMBB
9349
20
  //   fallthrough --> exitMBB
9350
20
  //   srw dest, tmpDest, shift
9351
20
  if (
ptrA != ZeroReg20
) {
9352
0
    Ptr1Reg = RegInfo.createVirtualRegister(RC);
9353
0
    BuildMI(BB, dl, TII->get(is64bit ? 
PPC::ADD80
:
PPC::ADD40
), Ptr1Reg)
9354
0
      .addReg(ptrA).addReg(ptrB);
9355
20
  } else {
9356
20
    Ptr1Reg = ptrB;
9357
20
  }
9358
20
  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9359
20
      .addImm(3).addImm(27).addImm(is8bit ? 
2810
:
2710
);
9360
20
  if (!isLittleEndian)
9361
20
    
BuildMI(BB, dl, TII->get(is64bit ? 20
PPC::XORI818
:
PPC::XORI2
), ShiftReg)
9362
20
        .addReg(Shift1Reg).addImm(is8bit ? 
2410
:
1610
);
9363
20
  if (is64bit)
9364
18
    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9365
18
      .addReg(Ptr1Reg).addImm(0).addImm(61);
9366
20
  else
9367
2
    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9368
2
      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9369
20
  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
9370
20
      .addReg(incr).addReg(ShiftReg);
9371
20
  if (is8bit)
9372
10
    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9373
10
  else {
9374
10
    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9375
10
    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
9376
10
  }
9377
20
  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9378
20
      .addReg(Mask2Reg).addReg(ShiftReg);
9379
20
9380
20
  BB = loopMBB;
9381
20
  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9382
20
    .addReg(ZeroReg).addReg(PtrReg);
9383
20
  if (BinOpcode)
9384
8
    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
9385
8
      .addReg(Incr2Reg).addReg(TmpDestReg);
9386
20
  BuildMI(BB, dl, TII->get(is64bit ? 
PPC::ANDC818
:
PPC::ANDC2
), Tmp2Reg)
9387
20
    .addReg(TmpDestReg).addReg(MaskReg);
9388
20
  BuildMI(BB, dl, TII->get(is64bit ? 
PPC::AND818
:
PPC::AND2
), Tmp3Reg)
9389
20
    .addReg(TmpReg).addReg(MaskReg);
9390
20
  if (
CmpOpcode20
) {
9391
8
    // For unsigned comparisons, we can directly compare the shifted values.
9392
8
    // For signed comparisons we shift and sign extend.
9393
8
    unsigned SReg = RegInfo.createVirtualRegister(RC);
9394
8
    BuildMI(BB, dl, TII->get(is64bit ? 
PPC::AND88
:
PPC::AND0
), SReg)
9395
8
      .addReg(TmpDestReg).addReg(MaskReg);
9396
8
    unsigned ValueReg = SReg;
9397
8
    unsigned CmpReg = Incr2Reg;
9398
8
    if (
CmpOpcode == PPC::CMPW8
) {
9399
4
      ValueReg = RegInfo.createVirtualRegister(RC);
9400
4
      BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
9401
4
        .addReg(SReg).addReg(ShiftReg);
9402
4
      unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
9403
4
      BuildMI(BB, dl, TII->get(is8bit ? 
PPC::EXTSB2
:
PPC::EXTSH2
), ValueSReg)
9404
4
        .addReg(ValueReg);
9405
4
      ValueReg = ValueSReg;
9406
4
      CmpReg = incr;
9407
4
    }
9408
8
    BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9409
8
      .addReg(CmpReg).addReg(ValueReg);
9410
8
    BuildMI(BB, dl, TII->get(PPC::BCC))
9411
8
      .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9412
8
    BB->addSuccessor(loop2MBB);
9413
8
    BB->addSuccessor(exitMBB);
9414
8
    BB = loop2MBB;
9415
8
  }
9416
20
  BuildMI(BB, dl, TII->get(is64bit ? 
PPC::OR818
:
PPC::OR2
), Tmp4Reg)
9417
260
    .addReg(Tmp3Reg).addReg(Tmp2Reg);
9418
260
  BuildMI(BB, dl, TII->get(PPC::STWCX))
9419
260
    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
9420
260
  BuildMI(BB, dl, TII->get(PPC::BCC))
9421
260
    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9422
260
  BB->addSuccessor(loopMBB);
9423
260
  BB->addSuccessor(exitMBB);
9424
260
9425
260
  //  exitMBB:
9426
260
  //   ...
9427
260
  BB = exitMBB;
9428
260
  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
9429
260
    .addReg(ShiftReg);
9430
260
  return BB;
9431
260
}
9432
9433
llvm::MachineBasicBlock *
9434
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
9435
6
                                    MachineBasicBlock *MBB) const {
9436
6
  DebugLoc DL = MI.getDebugLoc();
9437
6
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9438
6
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
9439
6
9440
6
  MachineFunction *MF = MBB->getParent();
9441
6
  MachineRegisterInfo &MRI = MF->getRegInfo();
9442
6
9443
6
  const BasicBlock *BB = MBB->getBasicBlock();
9444
6
  MachineFunction::iterator I = ++MBB->getIterator();
9445
6
9446
6
  // Memory Reference
9447
6
  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9448
6
  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9449
6
9450
6
  unsigned DstReg = MI.getOperand(0).getReg();
9451
6
  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
9452
6
  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
9453
6
  unsigned mainDstReg = MRI.createVirtualRegister(RC);
9454
6
  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
9455
6
9456
6
  MVT PVT = getPointerTy(MF->getDataLayout());
9457
6
  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9458
6
         "Invalid Pointer Size!");
9459
6
  // For v = setjmp(buf), we generate
9460
6
  //
9461
6
  // thisMBB:
9462
6
  //  SjLjSetup mainMBB
9463
6
  //  bl mainMBB
9464
6
  //  v_restore = 1
9465
6
  //  b sinkMBB
9466
6
  //
9467
6
  // mainMBB:
9468
6
  //  buf[LabelOffset] = LR
9469
6
  //  v_main = 0
9470
6
  //
9471
6
  // sinkMBB:
9472
6
  //  v = phi(main, restore)
9473
6
  //
9474
6
9475
6
  MachineBasicBlock *thisMBB = MBB;
9476
6
  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
9477
6
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
9478
6
  MF->insert(I, mainMBB);
9479
6
  MF->insert(I, sinkMBB);
9480
6
9481
6
  MachineInstrBuilder MIB;
9482
6
9483
6
  // Transfer the remainder of BB and its successor edges to sinkMBB.
9484
6
  sinkMBB->splice(sinkMBB->begin(), MBB,
9485
6
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9486
6
  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
9487
6
9488
6
  // Note that the structure of the jmp_buf used here is not compatible
9489
6
  // with that used by libc, and is not designed to be. Specifically, it
9490
6
  // stores only those 'reserved' registers that LLVM does not otherwise
9491
6
  // understand how to spill. Also, by convention, by the time this
9492
6
  // intrinsic is called, Clang has already stored the frame address in the
9493
6
  // first slot of the buffer and stack address in the third. Following the
9494
6
  // X86 target code, we'll store the jump address in the second slot. We also
9495
6
  // need to save the TOC pointer (R2) to handle jumps between shared
9496
6
  // libraries, and that will be stored in the fourth slot. The thread
9497
6
  // identifier (R13) is not affected.
9498
6
9499
6
  // thisMBB:
9500
6
  const int64_t LabelOffset = 1 * PVT.getStoreSize();
9501
6
  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9502
6
  const int64_t BPOffset    = 4 * PVT.getStoreSize();
9503
6
9504
6
  // Prepare IP either in reg.
9505
6
  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
9506
6
  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
9507
6
  unsigned BufReg = MI.getOperand(1).getReg();
9508
6
9509
6
  if (
Subtarget.isPPC64() && 6
Subtarget.isSVR4ABI()6
) {
9510
6
    setUsesTOCBasePtr(*MBB->getParent());
9511
6
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
9512
6
            .addReg(PPC::X2)
9513
6
            .addImm(TOCOffset)
9514
6
            .addReg(BufReg);
9515
6
    MIB.setMemRefs(MMOBegin, MMOEnd);
9516
6
  }
9517
6
9518
6
  // Naked functions never have a base pointer, and so we use r1. For all
9519
6
  // other functions, this decision must be delayed until during PEI.
9520
6
  unsigned BaseReg;
9521
6
  if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
9522
0
    
BaseReg = Subtarget.isPPC64() ? 0
PPC::X10
:
PPC::R10
;
9523
6
  else
9524
6
    
BaseReg = Subtarget.isPPC64() ? 6
PPC::BP86
:
PPC::BP0
;
9525
6
9526
6
  MIB = BuildMI(*thisMBB, MI, DL,
9527
6
                TII->get(Subtarget.isPPC64() ? 
PPC::STD6
:
PPC::STW0
))
9528
6
            .addReg(BaseReg)
9529
6
            .addImm(BPOffset)
9530
6
            .addReg(BufReg);
9531
6
  MIB.setMemRefs(MMOBegin, MMOEnd);
9532
6
9533
6
  // Setup
9534
6
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
9535
6
  MIB.addRegMask(TRI->getNoPreservedMask());
9536
6
9537
6
  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
9538
6
9539
6
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
9540
6
          .addMBB(mainMBB);
9541
6
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
9542
6
9543
6
  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
9544
6
  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
9545
6
9546
6
  // mainMBB:
9547
6
  //  mainDstReg = 0
9548
6
  MIB =
9549
6
      BuildMI(mainMBB, DL,
9550
6
              TII->get(Subtarget.isPPC64() ? 
PPC::MFLR86
:
PPC::MFLR0
), LabelReg);
9551
6
9552
6
  // Store IP
9553
6
  if (
Subtarget.isPPC64()6
) {
9554
6
    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
9555
6
            .addReg(LabelReg)
9556
6
            .addImm(LabelOffset)
9557
6
            .addReg(BufReg);
9558
6
  } else {
9559
0
    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
9560
0
            .addReg(LabelReg)
9561
0
            .addImm(LabelOffset)
9562
0
            .addReg(BufReg);
9563
0
  }
9564
6
9565
6
  MIB.setMemRefs(MMOBegin, MMOEnd);
9566
6
9567
6
  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
9568
6
  mainMBB->addSuccessor(sinkMBB);
9569
6
9570
6
  // sinkMBB:
9571
6
  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
9572
6
          TII->get(PPC::PHI), DstReg)
9573
6
    .addReg(mainDstReg).addMBB(mainMBB)
9574
6
    .addReg(restoreDstReg).addMBB(thisMBB);
9575
6
9576
6
  MI.eraseFromParent();
9577
6
  return sinkMBB;
9578
6
}
9579
9580
MachineBasicBlock *
9581
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
9582
5
                                     MachineBasicBlock *MBB) const {
9583
5
  DebugLoc DL = MI.getDebugLoc();
9584
5
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9585
5
9586
5
  MachineFunction *MF = MBB->getParent();
9587
5
  MachineRegisterInfo &MRI = MF->getRegInfo();
9588
5
9589
5
  // Memory Reference
9590
5
  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9591
5
  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9592
5
9593
5
  MVT PVT = getPointerTy(MF->getDataLayout());
9594
5
  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9595
5
         "Invalid Pointer Size!");
9596
5
9597
5
  const TargetRegisterClass *RC =
9598
5
    (PVT == MVT::i64) ? 
&PPC::G8RCRegClass5
:
&PPC::GPRCRegClass0
;
9599
5
  unsigned Tmp = MRI.createVirtualRegister(RC);
9600
5
  // Since FP is only updated here but NOT referenced, it's treated as GPR.
9601
5
  unsigned FP  = (PVT == MVT::i64) ? 
PPC::X315
:
PPC::R310
;
9602
5
  unsigned SP  = (PVT == MVT::i64) ? 
PPC::X15
:
PPC::R10
;
9603
5
  unsigned BP =
9604
5
      (PVT == MVT::i64)
9605
5
          ? PPC::X30
9606
0
          : 
(Subtarget.isSVR4ABI() && 0
isPositionIndependent()0
?
PPC::R290
9607
0
                                                              : PPC::R30);
9608
5
9609
5
  MachineInstrBuilder MIB;
9610
5
9611
5
  const int64_t LabelOffset = 1 * PVT.getStoreSize();
9612
5
  const int64_t SPOffset    = 2 * PVT.getStoreSize();
9613
5
  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9614
5
  const int64_t BPOffset    = 4 * PVT.getStoreSize();
9615
5
9616
5
  unsigned BufReg = MI.getOperand(0).getReg();
9617
5
9618
5
  // Reload FP (the jumped-to function may not have had a
9619
5
  // frame pointer, and if so, then its r31 will be restored
9620
5
  // as necessary).
9621
5
  if (
PVT == MVT::i645
) {
9622
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
9623
5
            .addImm(0)
9624
5
            .addReg(BufReg);
9625
5
  } else {
9626
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
9627
0
            .addImm(0)
9628
0
            .addReg(BufReg);
9629
0
  }
9630
5
  MIB.setMemRefs(MMOBegin, MMOEnd);
9631
5
9632
5
  // Reload IP
9633
5
  if (
PVT == MVT::i645
) {
9634
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
9635
5
            .addImm(LabelOffset)
9636
5
            .addReg(BufReg);
9637
5
  } else {
9638
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
9639
0
            .addImm(LabelOffset)
9640
0
            .addReg(BufReg);
9641
0
  }
9642
5
  MIB.setMemRefs(MMOBegin, MMOEnd);
9643
5
9644
5
  // Reload SP
9645
5
  if (
PVT == MVT::i645
) {
9646
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
9647
5
            .addImm(SPOffset)
9648
5
            .addReg(BufReg);
9649
5
  } else {
9650
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9651
0
            .addImm(SPOffset)
9652
0
            .addReg(BufReg);
9653
0
  }
9654
5
  MIB.setMemRefs(MMOBegin, MMOEnd);
9655
5
9656
5
  // Reload BP
9657
5
  if (
PVT == MVT::i645
) {
9658
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9659
5
            .addImm(BPOffset)
9660
5
            .addReg(BufReg);
9661
5
  } else {
9662
0
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9663
0
            .addImm(BPOffset)
9664
0
            .addReg(BufReg);
9665
0
  }
9666
5
  MIB.setMemRefs(MMOBegin, MMOEnd);
9667
5
9668
5
  // Reload TOC
9669
5
  if (
PVT == MVT::i64 && 5
Subtarget.isSVR4ABI()5
) {
9670
5
    setUsesTOCBasePtr(*MBB->getParent());
9671
5
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9672
5
            .addImm(TOCOffset)
9673
5
            .addReg(BufReg);
9674
5
9675
5
    MIB.setMemRefs(MMOBegin, MMOEnd);
9676
5
  }
9677
5
9678
5
  // Jump
9679
5
  BuildMI(*MBB, MI, DL,
9680
5
          TII->get(PVT == MVT::i64 ? 
PPC::MTCTR85
:
PPC::MTCTR0
)).addReg(Tmp);
9681
5
  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? 
PPC::BCTR85
:
PPC::BCTR0
));
9682
5
9683
5
  MI.eraseFromParent();
9684
5
  return MBB;
9685
5
}
9686
9687
MachineBasicBlock *
9688
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9689
1.74k
                                               MachineBasicBlock *BB) const {
9690
1.74k
  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9691
1.74k
      
MI.getOpcode() == TargetOpcode::PATCHPOINT1.72k
) {
9692
59
    if (
Subtarget.isPPC64() && 59
Subtarget.isSVR4ABI()59
&&
9693
59
        
MI.getOpcode() == TargetOpcode::PATCHPOINT59
) {
9694
40
      // Call lowering should have added an r2 operand to indicate a dependence
9695
40
      // on the TOC base pointer value. It can't however, because there is no
9696
40
      // way to mark the dependence as implicit there, and so the stackmap code
9697
40
      // will confuse it with a regular operand. Instead, add the dependence
9698
40
      // here.
9699
40
      setUsesTOCBasePtr(*BB->getParent());
9700
40
      MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9701
40
    }
9702
59
9703
59
    return emitPatchPoint(MI, BB);
9704
59
  }
9705
1.68k
9706
1.68k
  
if (1.68k
MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9707
1.68k
      
MI.getOpcode() == PPC::EH_SjLj_SetJmp641.68k
) {
9708
6
    return emitEHSjLjSetJmp(MI, BB);
9709
1.68k
  } else 
if (1.68k
MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9710
1.68k
             
MI.getOpcode() == PPC::EH_SjLj_LongJmp641.68k
) {
9711
5
    return emitEHSjLjLongJmp(MI, BB);
9712
5
  }
9713
1.67k
9714
1.67k
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9715
1.67k
9716
1.67k
  // To "insert" these instructions we actually have to insert their
9717
1.67k
  // control-flow patterns.
9718
1.67k
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9719
1.67k
  MachineFunction::iterator It = ++BB->getIterator();
9720
1.67k
9721
1.67k
  MachineFunction *F = BB->getParent();
9722
1.67k
9723
1.67k
  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9724
1.56k
       MI.getOpcode() == PPC::SELECT_CC_I8 ||
9725
1.67k
       
MI.getOpcode() == PPC::SELECT_I41.51k
||
MI.getOpcode() == PPC::SELECT_I81.34k
) {
9726
703
    SmallVector<MachineOperand, 2> Cond;
9727
703
    if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9728
595
        MI.getOpcode() == PPC::SELECT_CC_I8)
9729
163
      Cond.push_back(MI.getOperand(4));
9730
703
    else
9731
540
      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9732
703
    Cond.push_back(MI.getOperand(1));
9733
703
9734
703
    DebugLoc dl = MI.getDebugLoc();
9735
703
    TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9736
703
                      MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9737
1.67k
  } else 
if (972
MI.getOpcode() == PPC::SELECT_CC_I4 ||
9738
972
             MI.getOpcode() == PPC::SELECT_CC_I8 ||
9739
972
             MI.getOpcode() == PPC::SELECT_CC_F4 ||
9740
937
             MI.getOpcode() == PPC::SELECT_CC_F8 ||
9741
923
             MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9742
923
             MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9743
923
             MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9744
923
             MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9745
923
             MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9746
905
             MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9747
905
             MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9748
905
             MI.getOpcode() == PPC::SELECT_I4 ||
9749
905
             MI.getOpcode() == PPC::SELECT_I8 ||
9750
905
             MI.getOpcode() == PPC::SELECT_F4 ||
9751
884
             MI.getOpcode() == PPC::SELECT_F8 ||
9752
870
             MI.getOpcode() == PPC::SELECT_QFRC ||
9753
850
             MI.getOpcode() == PPC::SELECT_QSRC ||
9754
830
             MI.getOpcode() == PPC::SELECT_QBRC ||
9755
810
             MI.getOpcode() == PPC::SELECT_VRRC ||
9756
770
             MI.getOpcode() == PPC::SELECT_VSFRC ||
9757
744
             MI.getOpcode() == PPC::SELECT_VSSRC ||
9758
972
             
MI.getOpcode() == PPC::SELECT_VSRC743
) {
9759
229
    // The incoming instruction knows the destination vreg to set, the
9760
229
    // condition code register to branch on, the true/false values to
9761
229
    // select between, and a branch opcode to use.
9762
229
9763
229
    //  thisMBB:
9764
229
    //  ...
9765
229
    //   TrueVal = ...
9766
229
    //   cmpTY ccX, r1, r2
9767
229
    //   bCC copy1MBB
9768
229
    //   fallthrough --> copy0MBB
9769
229
    MachineBasicBlock *thisMBB = BB;
9770
229
    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9771
229
    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9772
229
    DebugLoc dl = MI.getDebugLoc();
9773
229
    F->insert(It, copy0MBB);
9774
229
    F->insert(It, sinkMBB);
9775
229
9776
229
    // Transfer the remainder of BB and its successor edges to sinkMBB.
9777
229
    sinkMBB->splice(sinkMBB->begin(), BB,
9778
229
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
9779
229
    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9780
229
9781
229
    // Next, add the true and fallthrough blocks as its successors.
9782
229
    BB->addSuccessor(copy0MBB);
9783
229
    BB->addSuccessor(sinkMBB);
9784
229
9785
229
    if (
MI.getOpcode() == PPC::SELECT_I4 || 229
MI.getOpcode() == PPC::SELECT_I8229
||
9786
229
        
MI.getOpcode() == PPC::SELECT_F4229
||
MI.getOpcode() == PPC::SELECT_F8208
||
9787
194
        MI.getOpcode() == PPC::SELECT_QFRC ||
9788
174
        MI.getOpcode() == PPC::SELECT_QSRC ||
9789
154
        MI.getOpcode() == PPC::SELECT_QBRC ||
9790
134
        MI.getOpcode() == PPC::SELECT_VRRC ||
9791
94
        MI.getOpcode() == PPC::SELECT_VSFRC ||
9792
68
        MI.getOpcode() == PPC::SELECT_VSSRC ||
9793
229
        
MI.getOpcode() == PPC::SELECT_VSRC67
) {
9794
162
      BuildMI(BB, dl, TII->get(PPC::BC))
9795
162
          .addReg(MI.getOperand(1).getReg())
9796
162
          .addMBB(sinkMBB);
9797
229
    } else {
9798
67
      unsigned SelectPred = MI.getOperand(4).getImm();
9799
67
      BuildMI(BB, dl, TII->get(PPC::BCC))
9800
67
          .addImm(SelectPred)
9801
67
          .addReg(MI.getOperand(1).getReg())
9802
67
          .addMBB(sinkMBB);
9803
67
    }
9804
229
9805
229
    //  copy0MBB:
9806
229
    //   %FalseValue = ...
9807
229
    //   # fallthrough to sinkMBB
9808
229
    BB = copy0MBB;
9809
229
9810
229
    // Update machine-CFG edges
9811
229
    BB->addSuccessor(sinkMBB);
9812
229
9813
229
    //  sinkMBB:
9814
229
    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9815
229
    //  ...
9816
229
    BB = sinkMBB;
9817
229
    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9818
229
        .addReg(MI.getOperand(3).getReg())
9819
229
        .addMBB(copy0MBB)
9820
229
        .addReg(MI.getOperand(2).getReg())
9821
229
        .addMBB(thisMBB);
9822
972
  } else 
if (743
MI.getOpcode() == PPC::ReadTB743
) {
9823
2
    // To read the 64-bit time-base register on a 32-bit target, we read the
9824
2
    // two halves. Should the counter have wrapped while it was being read, we
9825
2
    // need to try again.
9826
2
    // ...
9827
2
    // readLoop:
9828
2
    // mfspr Rx,TBU # load from TBU
9829
2
    // mfspr Ry,TB  # load from TB
9830
2
    // mfspr Rz,TBU # load from TBU
9831
2
    // cmpw crX,Rx,Rz # check if 'old'='new'
9832
2
    // bne readLoop   # branch if they're not equal
9833
2
    // ...
9834
2
9835
2
    MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9836
2
    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9837
2
    DebugLoc dl = MI.getDebugLoc();
9838
2
    F->insert(It, readMBB);
9839
2
    F->insert(It, sinkMBB);
9840
2
9841
2
    // Transfer the remainder of BB and its successor edges to sinkMBB.
9842
2
    sinkMBB->splice(sinkMBB->begin(), BB,
9843
2
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
9844
2
    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9845
2
9846
2
    BB->addSuccessor(readMBB);
9847
2
    BB = readMBB;
9848
2
9849
2
    MachineRegisterInfo &RegInfo = F->getRegInfo();
9850
2
    unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9851
2
    unsigned LoReg = MI.getOperand(0).getReg();
9852
2
    unsigned HiReg = MI.getOperand(1).getReg();
9853
2
9854
2
    BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9855
2
    BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9856
2
    BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9857
2
9858
2
    unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9859
2
9860
2
    BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9861
2
      .addReg(HiReg).addReg(ReadAgainReg);
9862
2
    BuildMI(BB, dl, TII->get(PPC::BCC))
9863
2
      .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9864
2
9865
2
    BB->addSuccessor(readMBB);
9866
2
    BB->addSuccessor(sinkMBB);
9867
743
  } else 
if (741
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8741
)
9868
16
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9869
725
  else 
if (725
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16725
)
9870
14
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9871
711
  else 
if (711
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32711
)
9872
13
    BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9873
698
  else 
if (698
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64698
)
9874
14
    BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9875
698
9876
684
  else 
if (684
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8684
)
9877
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9878
674
  else 
if (674
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16674
)
9879
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9880
664
  else 
if (664
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32664
)
9881
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9882
654
  else 
if (654
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64654
)
9883
11
    BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9884
654
9885
643
  else 
if (643
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8643
)
9886
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9887
633
  else 
if (633
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16633
)
9888
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9889
623
  else 
if (623
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32623
)
9890
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9891
613
  else 
if (613
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64613
)
9892
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9893
613
9894
603
  else 
if (603
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8603
)
9895
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9896
593
  else 
if (593
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16593
)
9897
12
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9898
581
  else 
if (581
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32581
)
9899
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9900
571
  else 
if (571
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64571
)
9901
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9902
571
9903
561
  else 
if (561
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8561
)
9904
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9905
551
  else 
if (551
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16551
)
9906
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9907
541
  else 
if (541
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32541
)
9908
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9909
531
  else 
if (531
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64531
)
9910
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9911
531
9912
521
  else 
if (521
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8521
)
9913
10
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9914
511
  else 
if (511
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16511
)
9915
10
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9916
501
  else 
if (501
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32501
)
9917
10
    BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9918
491
  else 
if (491
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64491
)
9919
10
    BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9920
491
9921
481
  else 
if (481
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8481
)
9922
13
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9923
468
  else 
if (468
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16468
)
9924
13
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9925
455
  else 
if (455
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32455
)
9926
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9927
444
  else 
if (444
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64444
)
9928
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9929
444
9930
433
  else 
if (433
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8433
)
9931
13
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9932
420
  else 
if (420
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16420
)
9933
13
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9934
407
  else 
if (407
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32407
)
9935
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9936
396
  else 
if (396
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64396
)
9937
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9938
396
9939
385
  else 
if (385
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8385
)
9940
12
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9941
373
  else 
if (373
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16373
)
9942
12
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9943
361
  else 
if (361
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32361
)
9944
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9945
350
  else 
if (350
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64350
)
9946
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9947
350
9948
339
  else 
if (339
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8339
)
9949
12
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9950
327
  else 
if (327
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16327
)
9951
12
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9952
315
  else 
if (315
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32315
)
9953
11
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9954
304
  else 
if (304
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64304
)
9955
11
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9956
304
9957
293
  else 
if (293
MI.getOpcode() == PPC::ATOMIC_SWAP_I8293
)
9958
14
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9959
279
  else 
if (279
MI.getOpcode() == PPC::ATOMIC_SWAP_I16279
)
9960
14
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9961
265
  else 
if (265
MI.getOpcode() == PPC::ATOMIC_SWAP_I32265
)
9962
13
    BB = EmitAtomicBinary(MI, BB, 4, 0);
9963
252
  else 
if (252
MI.getOpcode() == PPC::ATOMIC_SWAP_I64252
)
9964
14
    BB = EmitAtomicBinary(MI, BB, 8, 0);
9965
238
  else 
if (238
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9966
215
           MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9967
190
           (Subtarget.hasPartwordAtomics() &&
9968
190
            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9969
168
           (Subtarget.hasPartwordAtomics() &&
9970
238
            
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I1696
)) {
9971
92
    bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9972
92
9973
92
    auto LoadMnemonic = PPC::LDARX;
9974
92
    auto StoreMnemonic = PPC::STDCX;
9975
92
    switch (MI.getOpcode()) {
9976
0
    default:
9977
0
      llvm_unreachable("Compare and swap of unknown size");
9978
22
    case PPC::ATOMIC_CMP_SWAP_I8:
9979
22
      LoadMnemonic = PPC::LBARX;
9980
22
      StoreMnemonic = PPC::STBCX;
9981
22
      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9982
22
      break;
9983
22
    case PPC::ATOMIC_CMP_SWAP_I16:
9984
22
      LoadMnemonic = PPC::LHARX;
9985
22
      StoreMnemonic = PPC::STHCX;
9986
22
      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9987
22
      break;
9988
23
    case PPC::ATOMIC_CMP_SWAP_I32:
9989
23
      LoadMnemonic = PPC::LWARX;
9990
23
      StoreMnemonic = PPC::STWCX;
9991
23
      break;
9992
25
    case PPC::ATOMIC_CMP_SWAP_I64:
9993
25
      LoadMnemonic = PPC::LDARX;
9994
25
      StoreMnemonic = PPC::STDCX;
9995
25
      break;
9996
92
    }
9997
92
    unsigned dest = MI.getOperand(0).getReg();
9998
92
    unsigned ptrA = MI.getOperand(1).getReg();
9999
92
    unsigned ptrB = MI.getOperand(2).getReg();
10000
92
    unsigned oldval = MI.getOperand(3).getReg();
10001
92
    unsigned newval = MI.getOperand(4).getReg();
10002
92
    DebugLoc dl = MI.getDebugLoc();
10003
92
10004
92
    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10005
92
    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10006
92
    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10007
92
    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10008
92
    F->insert(It, loop1MBB);
10009
92
    F->insert(It, loop2MBB);
10010
92
    F->insert(It, midMBB);
10011
92
    F->insert(It, exitMBB);
10012
92
    exitMBB->splice(exitMBB->begin(), BB,
10013
92
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
10014
92
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10015
92
10016
92
    //  thisMBB:
10017
92
    //   ...
10018
92
    //   fallthrough --> loopMBB
10019
92
    BB->addSuccessor(loop1MBB);
10020
92
10021
92
    // loop1MBB:
10022
92
    //   l[bhwd]arx dest, ptr
10023
92
    //   cmp[wd] dest, oldval
10024
92
    //   bne- midMBB
10025
92
    // loop2MBB:
10026
92
    //   st[bhwd]cx. newval, ptr
10027
92
    //   bne- loopMBB
10028
92
    //   b exitBB
10029
92
    // midMBB:
10030
92
    //   st[bhwd]cx. dest, ptr
10031
92
    // exitBB:
10032
92
    BB = loop1MBB;
10033
92
    BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10034
92
      .addReg(ptrA).addReg(ptrB);
10035
92
    BuildMI(BB, dl, TII->get(is64bit ? 
PPC::CMPD25
:
PPC::CMPW67
), PPC::CR0)
10036
92
      .addReg(oldval).addReg(dest);
10037
92
    BuildMI(BB, dl, TII->get(PPC::BCC))
10038
92
      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
10039
92
    BB->addSuccessor(loop2MBB);
10040
92
    BB->addSuccessor(midMBB);
10041
92
10042
92
    BB = loop2MBB;
10043
92
    BuildMI(BB, dl, TII->get(StoreMnemonic))
10044
92
      .addReg(newval).addReg(ptrA).addReg(ptrB);
10045
92
    BuildMI(BB, dl, TII->get(PPC::BCC))
10046
92
      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
10047
92
    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10048
92
    BB->addSuccessor(loop1MBB);
10049
92
    BB->addSuccessor(exitMBB);
10050
92
10051
92
    BB = midMBB;
10052
92
    BuildMI(BB, dl, TII->get(StoreMnemonic))
10053
92
      .addReg(dest).addReg(ptrA).addReg(ptrB);
10054
92
    BB->addSuccessor(exitMBB);
10055
92
10056
92
    //  exitMBB:
10057
92
    //   ...
10058
92
    BB = exitMBB;
10059
238
  } else 
if (146
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
10060
146
             
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16142
) {
10061
8
    // We must use 64-bit registers for addresses when targeting 64-bit,
10062
8
    // since we're actually doing arithmetic on them.  Other registers
10063
8
    // can be 32-bit.
10064
8
    bool is64bit = Subtarget.isPPC64();
10065
8
    bool isLittleEndian = Subtarget.isLittleEndian();
10066
8
    bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
10067
8
10068
8
    unsigned dest = MI.getOperand(0).getReg();
10069
8
    unsigned ptrA = MI.getOperand(1).getReg();
10070
8
    unsigned ptrB = MI.getOperand(2).getReg();
10071
8
    unsigned oldval = MI.getOperand(3).getReg();
10072
8
    unsigned newval = MI.getOperand(4).getReg();
10073
8
    DebugLoc dl = MI.getDebugLoc();
10074
8
10075
8
    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10076
8
    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10077
8
    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10078
8
    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10079
8
    F->insert(It, loop1MBB);
10080
8
    F->insert(It, loop2MBB);
10081
8
    F->insert(It, midMBB);
10082
8
    F->insert(It, exitMBB);
10083
8
    exitMBB->splice(exitMBB->begin(), BB,
10084
8
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
10085
8
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10086
8
10087
8
    MachineRegisterInfo &RegInfo = F->getRegInfo();
10088
6
    const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
10089
2
                                            : &PPC::GPRCRegClass;
10090
8
    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
10091
8
    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
10092
8
    unsigned ShiftReg =
10093
8
      isLittleEndian ? 
Shift1Reg0
:
RegInfo.createVirtualRegister(RC)8
;
10094
8
    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
10095
8
    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
10096
8
    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
10097
8
    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
10098
8
    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
10099
8
    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
10100
8
    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
10101
8
    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
10102
8
    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
10103
8
    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
10104
8
    unsigned Ptr1Reg;
10105
8
    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
10106
8
    unsigned ZeroReg = is64bit ? 
PPC::ZERO86
:
PPC::ZERO2
;
10107
8
    //  thisMBB:
10108
8
    //   ...
10109
8
    //   fallthrough --> loopMBB
10110
8
    BB->addSuccessor(loop1MBB);
10111
8
10112
8
    // The 4-byte load must be aligned, while a char or short may be
10113
8
    // anywhere in the word.  Hence all this nasty bookkeeping code.
10114
8
    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10115
8
    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10116
8
    //   xori shift, shift1, 24 [16]
10117
8
    //   rlwinm ptr, ptr1, 0, 0, 29
10118
8
    //   slw newval2, newval, shift
10119
8
    //   slw oldval2, oldval,shift
10120
8
    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10121
8
    //   slw mask, mask2, shift
10122
8
    //   and newval3, newval2, mask
10123
8
    //   and oldval3, oldval2, mask
10124
8
    // loop1MBB:
10125
8
    //   lwarx tmpDest, ptr
10126
8
    //   and tmp, tmpDest, mask
10127
8
    //   cmpw tmp, oldval3
10128
8
    //   bne- midMBB
10129
8
    // loop2MBB:
10130
8
    //   andc tmp2, tmpDest, mask
10131
8
    //   or tmp4, tmp2, newval3
10132
8
    //   stwcx. tmp4, ptr
10133
8
    //   bne- loop1MBB
10134
8
    //   b exitBB
10135
8
    // midMBB:
10136
8
    //   stwcx. tmpDest, ptr
10137
8
    // exitBB:
10138
8
    //   srw dest, tmpDest, shift
10139
8
    if (
ptrA != ZeroReg8
) {
10140
0
      Ptr1Reg = RegInfo.createVirtualRegister(RC);
10141
0
      BuildMI(BB, dl, TII->get(is64bit ? 
PPC::ADD80
:
PPC::ADD40
), Ptr1Reg)
10142
0
        .addReg(ptrA).addReg(ptrB);
10143
8
    } else {
10144
8
      Ptr1Reg = ptrB;
10145
8
    }
10146
8
    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
10147
8
        .addImm(3).addImm(27).addImm(is8bit ? 
284
:
274
);
10148
8
    if (!isLittleEndian)
10149
8
      
BuildMI(BB, dl, TII->get(is64bit ? 8
PPC::XORI86
:
PPC::XORI2
), ShiftReg)
10150
8
          .addReg(Shift1Reg).addImm(is8bit ? 
244
:
164
);
10151
8
    if (is64bit)
10152
6
      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10153
6
        .addReg(Ptr1Reg).addImm(0).addImm(61);
10154
8
    else
10155
2
      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10156
2
        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
10157
8
    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
10158
8
        .addReg(newval).addReg(ShiftReg);
10159
8
    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
10160
8
        .addReg(oldval).addReg(ShiftReg);
10161
8
    if (is8bit)
10162
4
      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10163
4
    else {
10164
4
      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10165
4
      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10166
4
        .addReg(Mask3Reg).addImm(65535);
10167
4
    }
10168
8
    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10169
8
        .addReg(Mask2Reg).addReg(ShiftReg);
10170
8
    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
10171
8
        .addReg(NewVal2Reg).addReg(MaskReg);
10172
8
    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
10173
8
        .addReg(OldVal2Reg).addReg(MaskReg);
10174
8
10175
8
    BB = loop1MBB;
10176
8
    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10177
8
        .addReg(ZeroReg).addReg(PtrReg);
10178
8
    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
10179
8
        .addReg(TmpDestReg).addReg(MaskReg);
10180
8
    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
10181
8
        .addReg(TmpReg).addReg(OldVal3Reg);
10182
8
    BuildMI(BB, dl, TII->get(PPC::BCC))
10183
8
        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
10184
8
    BB->addSuccessor(loop2MBB);
10185
8
    BB->addSuccessor(midMBB);
10186
8
10187
8
    BB = loop2MBB;
10188
8
    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
10189
8
        .addReg(TmpDestReg).addReg(MaskReg);
10190
8
    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
10191
8
        .addReg(Tmp2Reg).addReg(NewVal3Reg);
10192
8
    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
10193
8
        .addReg(ZeroReg).addReg(PtrReg);
10194
8
    BuildMI(BB, dl, TII->get(PPC::BCC))
10195
8
      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
10196
8
    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10197
8
    BB->addSuccessor(loop1MBB);
10198
8
    BB->addSuccessor(exitMBB);
10199
8
10200
8
    BB = midMBB;
10201
8
    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
10202
8
      .addReg(ZeroReg).addReg(PtrReg);
10203
8
    BB->addSuccessor(exitMBB);
10204
8
10205
8
    //  exitMBB:
10206
8
    //   ...
10207
8
    BB = exitMBB;
10208
8
    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
10209
8
      .addReg(ShiftReg);
10210
146
  } else 
if (138
MI.getOpcode() == PPC::FADDrtz138
) {
10211
8
    // This pseudo performs an FADD with rounding mode temporarily forced
10212
8
    // to round-to-zero.  We emit this via custom inserter since the FPSCR
10213
8
    // is not modeled at the SelectionDAG level.
10214
8
    unsigned Dest = MI.getOperand(0).getReg();
10215
8
    unsigned Src1 = MI.getOperand(1).getReg();
10216
8
    unsigned Src2 = MI.getOperand(2).getReg();
10217
8
    DebugLoc dl = MI.getDebugLoc();
10218
8
10219
8
    MachineRegisterInfo &RegInfo = F->getRegInfo();
10220
8
    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
10221
8
10222
8
    // Save FPSCR value.
10223
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
10224
8
10225
8
    // Set rounding mode to round-to-zero.
10226
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
10227
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
10228
8
10229
8
    // Perform addition.
10230
8
    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
10231
8
10232
8
    // Restore FPSCR value.
10233
8
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
10234
138
  } else 
if (130
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10235
129
             MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
10236
113
             MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10237
130
             
MI.getOpcode() == PPC::ANDIo_1_GT_BIT8112
) {
10238
130
    unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10239
129
                       MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
10240
113
                          ? PPC::ANDIo8
10241
17
                          : PPC::ANDIo;
10242
130
    bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10243
129
                 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
10244
130
10245
130
    MachineRegisterInfo &RegInfo = F->getRegInfo();
10246
130
    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
10247
17
                                                  &PPC::GPRCRegClass :
10248
113
                                                  &PPC::G8RCRegClass);
10249
130
10250
130
    DebugLoc dl = MI.getDebugLoc();
10251
130
    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
10252
130
        .addReg(MI.getOperand(1).getReg())
10253
130
        .addImm(1);
10254
130
    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
10255
130
            MI.getOperand(0).getReg())
10256
130
        .addReg(isEQ ? 
PPC::CR0EQ2
:
PPC::CR0GT128
);
10257
0
  } else 
if (0
MI.getOpcode() == PPC::TCHECK_RET0
) {
10258
0
    DebugLoc Dl = MI.getDebugLoc();
10259
0
    MachineRegisterInfo &RegInfo = F->getRegInfo();
10260
0
    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10261
0
    BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
10262
0
    return BB;
10263
0
  } else {
10264
0
    llvm_unreachable("Unexpected instr type to insert");
10265
972
  }
10266
1.67k
10267
1.67k
  MI.eraseFromParent(); // The pseudo instruction is gone now.
10268
1.67k
  return BB;
10269
1.67k
}
10270
10271
//===----------------------------------------------------------------------===//
10272
// Target Optimization Hooks
10273
//===----------------------------------------------------------------------===//
10274
10275
39
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
10276
39
  // For the estimates, convergence is quadratic, so we essentially double the
10277
39
  // number of digits correct after every iteration. For both FRE and FRSQRTE,
10278
39
  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10279
39
  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10280
39
  int RefinementSteps = Subtarget.hasRecipPrec() ? 
139
:
30
;
10281
39
  if (VT.getScalarType() == MVT::f64)
10282
18
    RefinementSteps++;
10283
39
  return RefinementSteps;
10284
39
}
10285
10286
SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
10287
                                           int Enabled, int &RefinementSteps,
10288
                                           bool &UseOneConstNR,
10289
23
                                           bool Reciprocal) const {
10290
23
  EVT VT = Operand.getValueType();
10291
23
  if (
(VT == MVT::f32 && 23
Subtarget.hasFRSQRTES()7
) ||
10292
16
      
(VT == MVT::f64 && 16
Subtarget.hasFRSQRTE()8
) ||
10293
8
      
(VT == MVT::v4f32 && 8
Subtarget.hasAltivec()5
) ||
10294
6
      
(VT == MVT::v2f64 && 6
Subtarget.hasVSX()0
) ||
10295
6
      
(VT == MVT::v4f32 && 6
Subtarget.hasQPX()3
) ||
10296
23
      
(VT == MVT::v4f64 && 3
Subtarget.hasQPX()3
)) {
10297
23
    if (RefinementSteps == ReciprocalEstimate::Unspecified)
10298
21
      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10299
23
10300
23
    UseOneConstNR = true;
10301
23
    return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
10302
23
  }
10303
0
  return SDValue();
10304
0
}
10305
10306
SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
10307
                                            int Enabled,
10308
22
                                            int &RefinementSteps) const {
10309
22
  EVT VT = Operand.getValueType();
10310
22
  if (
(VT == MVT::f32 && 22
Subtarget.hasFRES()8
) ||
10311
14
      
(VT == MVT::f64 && 14
Subtarget.hasFRE()11
) ||
10312
7
      
(VT == MVT::v4f32 && 7
Subtarget.hasAltivec()2
) ||
10313
6
      
(VT == MVT::v2f64 && 6
Subtarget.hasVSX()0
) ||
10314
6
      
(VT == MVT::v4f32 && 6
Subtarget.hasQPX()1
) ||
10315
22
      
(VT == MVT::v4f64 && 5
Subtarget.hasQPX()1
)) {
10316
18
    if (RefinementSteps == ReciprocalEstimate::Unspecified)
10317
18
      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10318
18
    return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
10319
18
  }
10320
4
  return SDValue();
10321
4
}
10322
10323
6
unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
10324
6
  // Note: This functionality is used only when unsafe-fp-math is enabled, and
10325
6
  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
10326
6
  // enabled for division), this functionality is redundant with the default
10327
6
  // combiner logic (once the division -> reciprocal/multiply transformation
10328
6
  // has taken place). As a result, this matters more for older cores than for
10329
6
  // newer ones.
10330
6
10331
6
  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10332
6
  // reciprocal if there are two or more FDIVs (for embedded cores with only
10333
6
  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
10334
6
  switch (Subtarget.getDarwinDirective()) {
10335
6
  default:
10336
6
    return 3;
10337
0
  case PPC::DIR_440:
10338
0
  case PPC::DIR_A2:
10339
0
  case PPC::DIR_E500mc:
10340
0
  case PPC::DIR_E5500:
10341
0
    return 2;
10342
0
  }
10343
0
}
10344
10345
// isConsecutiveLSLoc needs to work even if all adds have not yet been
10346
// collapsed, and so we need to look through chains of them.
10347
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
10348
12.1k
                                     int64_t& Offset, SelectionDAG &DAG) {
10349
12.1k
  if (
DAG.isBaseWithConstantOffset(Loc)12.1k
) {
10350
6.27k
    Base = Loc.getOperand(0);
10351
6.27k
    Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
10352
6.27k
10353
6.27k
    // The base might itself be a base plus an offset, and if so, accumulate
10354
6.27k
    // that as well.
10355
6.27k
    getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
10356
6.27k
  }
10357
12.1k
}
10358
10359
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
10360
                            unsigned Bytes, int Dist,
10361
3.08k
                            SelectionDAG &DAG) {
10362
3.08k
  if (VT.getSizeInBits() / 8 != Bytes)
10363
162
    return false;
10364
2.92k
10365
2.92k
  SDValue BaseLoc = Base->getBasePtr();
10366
2.92k
  if (
Loc.getOpcode() == ISD::FrameIndex2.92k
) {
10367
0
    if (BaseLoc.getOpcode() != ISD::FrameIndex)
10368
0
      return false;
10369
0
    const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10370
0
    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
10371
0
    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
10372
0
    int FS  = MFI.getObjectSize(FI);
10373
0
    int BFS = MFI.getObjectSize(BFI);
10374
0
    if (
FS != BFS || 0
FS != (int)Bytes0
)
return false0
;
10375
0
    return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
10376
0
  }
10377
2.92k
10378
2.92k
  SDValue Base1 = Loc, Base2 = BaseLoc;
10379
2.92k
  int64_t Offset1 = 0, Offset2 = 0;
10380
2.92k
  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
10381
2.92k
  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
10382
2.92k
  if (
Base1 == Base2 && 2.92k
Offset1 == (Offset2 + Dist * Bytes)2.16k
)
10383
226
    return true;
10384
2.69k
10385
2.69k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10386
2.69k
  const GlobalValue *GV1 = nullptr;
10387
2.69k
  const GlobalValue *GV2 = nullptr;
10388
2.69k
  Offset1 = 0;
10389
2.69k
  Offset2 = 0;
10390
2.69k
  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
10391
2.69k
  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
10392
2.69k
  if (
isGA1 && 2.69k
isGA215
&&
GV1 == GV212
)
10393
12
    return Offset1 == (Offset2 + Dist*Bytes);
10394
2.68k
  return false;
10395
2.68k
}
10396
10397
// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
10398
// not enforce equality of the chain operands.
10399
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
10400
                            unsigned Bytes, int Dist,
10401
3.08k
                            SelectionDAG &DAG) {
10402
3.08k
  if (LSBaseSDNode *
LS3.08k
= dyn_cast<LSBaseSDNode>(N)) {
10403
2.13k
    EVT VT = LS->getMemoryVT();
10404
2.13k
    SDValue Loc = LS->getBasePtr();
10405
2.13k
    return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
10406
2.13k
  }
10407
955
10408
955
  
if (955
N->getOpcode() == ISD::INTRINSIC_W_CHAIN955
) {
10409
952
    EVT VT;
10410
952
    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10411
0
    default: return false;
10412
754
    case Intrinsic::ppc_qpx_qvlfd:
10413
754
    case Intrinsic::ppc_qpx_qvlfda:
10414
754
      VT = MVT::v4f64;
10415
754
      break;
10416
4
    case Intrinsic::ppc_qpx_qvlfs:
10417
4
    case Intrinsic::ppc_qpx_qvlfsa:
10418
4
      VT = MVT::v4f32;
10419
4
      break;
10420
0
    case Intrinsic::ppc_qpx_qvlfcd:
10421
0
    case Intrinsic::ppc_qpx_qvlfcda:
10422
0
      VT = MVT::v2f64;
10423
0
      break;
10424
0
    case Intrinsic::ppc_qpx_qvlfcs:
10425
0
    case Intrinsic::ppc_qpx_qvlfcsa:
10426
0
      VT = MVT::v2f32;
10427
0
      break;
10428
194
    case Intrinsic::ppc_qpx_qvlfiwa:
10429
194
    case Intrinsic::ppc_qpx_qvlfiwz:
10430
194
    case Intrinsic::ppc_altivec_lvx:
10431
194
    case Intrinsic::ppc_altivec_lvxl:
10432
194
    case Intrinsic::ppc_vsx_lxvw4x:
10433
194
    case Intrinsic::ppc_vsx_lxvw4x_be:
10434
194
      VT = MVT::v4i32;
10435
194
      break;
10436
0
    case Intrinsic::ppc_vsx_lxvd2x:
10437
0
    case Intrinsic::ppc_vsx_lxvd2x_be:
10438
0
      VT = MVT::v2f64;
10439
0
      break;
10440
0
    case Intrinsic::ppc_altivec_lvebx:
10441
0
      VT = MVT::i8;
10442
0
      break;
10443
0
    case Intrinsic::ppc_altivec_lvehx:
10444
0
      VT = MVT::i16;
10445
0
      break;
10446
0
    case Intrinsic::ppc_altivec_lvewx:
10447
0
      VT = MVT::i32;
10448
0
      break;
10449
952
    }
10450
952
10451
952
    return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
10452
952
  }
10453
3
10454
3
  
if (3
N->getOpcode() == ISD::INTRINSIC_VOID3
) {
10455
1
    EVT VT;
10456
1
    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10457
0
    default: return false;
10458
0
    case Intrinsic::ppc_qpx_qvstfd:
10459
0
    case Intrinsic::ppc_qpx_qvstfda:
10460
0
      VT = MVT::v4f64;
10461
0
      break;
10462
0
    case Intrinsic::ppc_qpx_qvstfs:
10463
0
    case Intrinsic::ppc_qpx_qvstfsa:
10464
0
      VT = MVT::v4f32;
10465
0
      break;
10466
0
    case Intrinsic::ppc_qpx_qvstfcd:
10467
0
    case Intrinsic::ppc_qpx_qvstfcda:
10468
0
      VT = MVT::v2f64;
10469
0
      break;
10470
0
    case Intrinsic::ppc_qpx_qvstfcs:
10471
0
    case Intrinsic::ppc_qpx_qvstfcsa:
10472
0
      VT = MVT::v2f32;
10473
0
      break;
10474
1
    case Intrinsic::ppc_qpx_qvstfiw:
10475
1
    case Intrinsic::ppc_qpx_qvstfiwa:
10476
1
    case Intrinsic::ppc_altivec_stvx:
10477
1
    case Intrinsic::ppc_altivec_stvxl:
10478
1
    case Intrinsic::ppc_vsx_stxvw4x:
10479
1
      VT = MVT::v4i32;
10480
1
      break;
10481
0
    case Intrinsic::ppc_vsx_stxvd2x:
10482
0
      VT = MVT::v2f64;
10483
0
      break;
10484
0
    case Intrinsic::ppc_vsx_stxvw4x_be:
10485
0
      VT = MVT::v4i32;
10486
0
      break;
10487
0
    case Intrinsic::ppc_vsx_stxvd2x_be:
10488
0
      VT = MVT::v2f64;
10489
0
      break;
10490
0
    case Intrinsic::ppc_altivec_stvebx:
10491
0
      VT = MVT::i8;
10492
0
      break;
10493
0
    case Intrinsic::ppc_altivec_stvehx:
10494
0
      VT = MVT::i16;
10495
0
      break;
10496
0
    case Intrinsic::ppc_altivec_stvewx:
10497
0
      VT = MVT::i32;
10498
0
      break;
10499
1
    }
10500
1
10501
1
    return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
10502
1
  }
10503
2
10504
2
  return false;
10505
2
}
10506
10507
// Return true is there is a nearyby consecutive load to the one provided
10508
// (regardless of alignment). We search up and down the chain, looking though
10509
// token factors and other loads (but nothing else). As a result, a true result
10510
// indicates that it is safe to create a new consecutive load adjacent to the
10511
// load provided.
10512
108
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
10513
108
  SDValue Chain = LD->getChain();
10514
108
  EVT VT = LD->getMemoryVT();
10515
108
10516
108
  SmallSet<SDNode *, 16> LoadRoots;
10517
108
  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
10518
108
  SmallSet<SDNode *, 16> Visited;
10519
108
10520
108
  // First, search up the chain, branching to follow all token-factor operands.
10521
108
  // If we find a consecutive load, then we're done, otherwise, record all
10522
108
  // nodes just above the top-level loads and token factors.
10523
740
  while (
!Queue.empty()740
) {
10524
633
    SDNode *ChainNext = Queue.pop_back_val();
10525
633
    if (!Visited.insert(ChainNext).second)
10526
0
      continue;
10527
633
10528
633
    
if (MemSDNode *633
ChainLD633
= dyn_cast<MemSDNode>(ChainNext)) {
10529
386
      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10530
1
        return true;
10531
385
10532
385
      
if (385
!Visited.count(ChainLD->getChain().getNode())385
)
10533
21
        Queue.push_back(ChainLD->getChain().getNode());
10534
633
    } else 
if (247
ChainNext->getOpcode() == ISD::TokenFactor247
) {
10535
140
      for (const SDUse &O : ChainNext->ops())
10536
504
        
if (504
!Visited.count(O.getNode())504
)
10537
504
          Queue.push_back(O.getNode());
10538
140
    } else
10539
107
      LoadRoots.insert(ChainNext);
10540
633
  }
10541
108
10542
108
  // Second, search down the chain, starting from the top-level nodes recorded
10543
108
  // in the first phase. These top-level nodes are the nodes just above all
10544
108
  // loads and token factors. Starting with their uses, recursively look though
10545
108
  // all loads (just the chain uses) and token factors to find a consecutive
10546
108
  // load.
10547
107
  Visited.clear();
10548
107
  Queue.clear();
10549
107
10550
107
  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
10551
165
       IE = LoadRoots.end(); 
I != IE165
;
++I58
) {
10552
107
    Queue.push_back(*I);
10553
107
10554
2.47k
    while (
!Queue.empty()2.47k
) {
10555
2.41k
      SDNode *LoadRoot = Queue.pop_back_val();
10556
2.41k
      if (!Visited.insert(LoadRoot).second)
10557
8
        continue;
10558
2.40k
10559
2.40k
      
if (MemSDNode *2.40k
ChainLD2.40k
= dyn_cast<MemSDNode>(LoadRoot))
10560
1.82k
        
if (1.82k
isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)1.82k
)
10561
49
          return true;
10562
2.35k
10563
2.35k
      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
10564
8.95k
           UE = LoadRoot->use_end(); 
UI != UE8.95k
;
++UI6.59k
)
10565
6.59k
        
if (6.59k
((isa<MemSDNode>(*UI) &&
10566
2.28k
            cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
10567
6.59k
            
UI->getOpcode() == ISD::TokenFactor4.32k
) &&
!Visited.count(*UI)4.07k
)
10568
2.74k
          Queue.push_back(*UI);
10569
2.41k
    }
10570
107
  }
10571
107
10572
58
  return false;
10573
108
}
10574
10575
/// This function is called when we have proved that a SETCC node can be replaced
10576
/// by subtraction (and other supporting instructions) so that the result of
10577
/// comparison is kept in a GPR instead of CR. This function is purely for
10578
/// codegen purposes and has some flags to guide the codegen process.
10579
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
10580
54
                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
10581
54
  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10582
54
10583
54
  // Zero extend the operands to the largest legal integer. Originally, they
10584
54
  // must be of a strictly smaller size.
10585
54
  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
10586
54
                         DAG.getConstant(Size, DL, MVT::i32));
10587
54
  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
10588
54
                         DAG.getConstant(Size, DL, MVT::i32));
10589
54
10590
54
  // Swap if needed. Depends on the condition code.
10591
54
  if (Swap)
10592
27
    std::swap(Op0, Op1);
10593
54
10594
54
  // Subtract extended integers.
10595
54
  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
10596
54
10597
54
  // Move the sign bit to the least significant position and zero out the rest.
10598
54
  // Now the least significant bit carries the result of original comparison.
10599
54
  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
10600
54
                             DAG.getConstant(Size - 1, DL, MVT::i32));
10601
54
  auto Final = Shifted;
10602
54
10603
54
  // Complement the result if needed. Based on the condition code.
10604
54
  if (Complement)
10605
26
    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
10606
26
                        DAG.getConstant(1, DL, MVT::i64));
10607
54
10608
54
  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
10609
54
}
10610
10611
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
10612
238
                                                  DAGCombinerInfo &DCI) const {
10613
238
  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10614
238
10615
238
  SelectionDAG &DAG = DCI.DAG;
10616
238
  SDLoc DL(N);
10617
238
10618
238
  // Size of integers being compared has a critical role in the following
10619
238
  // analysis, so we prefer to do this when all types are legal.
10620
238
  if (!DCI.isAfterLegalizeVectorOps())
10621
125
    return SDValue();
10622
113
10623
113
  // If all users of SETCC extend its value to a legal integer type
10624
113
  // then we replace SETCC with a subtraction
10625
113
  for (SDNode::use_iterator UI = N->use_begin(),
10626
170
       UE = N->use_end(); 
UI != UE170
;
++UI57
) {
10627
113
    if (UI->getOpcode() != ISD::ZERO_EXTEND)
10628
56
      return SDValue();
10629
113
  }
10630
113
10631
57
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
10632
57
  auto OpSize = N->getOperand(0).getValueSizeInBits();
10633
57
10634
57
  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
10635
57
10636
57
  if (
OpSize < Size57
) {
10637
54
    switch (CC) {
10638
0
    default: break;
10639
14
    case ISD::SETULT:
10640
14
      return generateEquivalentSub(N, Size, false, false, DL, DAG);
10641
13
    case ISD::SETULE:
10642
13
      return generateEquivalentSub(N, Size, true, true, DL, DAG);
10643
14
    case ISD::SETUGT:
10644
14
      return generateEquivalentSub(N, Size, false, true, DL, DAG);
10645
13
    case ISD::SETUGE:
10646
13
      return generateEquivalentSub(N, Size, true, false, DL, DAG);
10647
3
    }
10648
3
  }
10649
3
10650
3
  return SDValue();
10651
3
}
10652
10653
SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
10654
10.5k
                                                  DAGCombinerInfo &DCI) const {
10655
10.5k
  SelectionDAG &DAG = DCI.DAG;
10656
10.5k
  SDLoc dl(N);
10657
10.5k
10658
10.5k
  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
10659
10.5k
  // If we're tracking CR bits, we need to be careful that we don't have:
10660
10.5k
  //   trunc(binary-ops(zext(x), zext(y)))
10661
10.5k
  // or
10662
10.5k
  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
10663
10.5k
  // such that we're unnecessarily moving things into GPRs when it would be
10664
10.5k
  // better to keep them in CR bits.
10665
10.5k
10666
10.5k
  // Note that trunc here can be an actual i1 trunc, or can be the effective
10667
10.5k
  // truncation that comes from a setcc or select_cc.
10668
10.5k
  if (N->getOpcode() == ISD::TRUNCATE &&
10669
5.97k
      N->getValueType(0) != MVT::i1)
10670
5.63k
    return SDValue();
10671
4.90k
10672
4.90k
  
if (4.90k
N->getOperand(0).getValueType() != MVT::i32 &&
10673
3.54k
      N->getOperand(0).getValueType() != MVT::i64)
10674
2.81k
    return SDValue();
10675
2.09k
10676
2.09k
  
if (2.09k
N->getOpcode() == ISD::SETCC ||
10677
2.09k
      
N->getOpcode() == ISD::SELECT_CC655
) {
10678
1.76k
    // If we're looking at a comparison, then we need to make sure that the
10679
1.76k
    // high bits (all except for the first) don't matter the result.
10680
1.76k
    ISD::CondCode CC =
10681
1.76k
      cast<CondCodeSDNode>(N->getOperand(
10682
1.76k
        N->getOpcode() == ISD::SETCC ? 
21.44k
:
4322
))->get();
10683
1.76k
    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
10684
1.76k
10685
1.76k
    if (
ISD::isSignedIntSetCC(CC)1.76k
) {
10686
398
      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
10687
0
          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
10688
398
        return SDValue();
10689
1.36k
    } else 
if (1.36k
ISD::isUnsignedIntSetCC(CC)1.36k
) {
10690
343
      if (!DAG.MaskedValueIsZero(N->getOperand(0),
10691
343
                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
10692
10
          !DAG.MaskedValueIsZero(N->getOperand(1),
10693
10
                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
10694
333
        
return (N->getOpcode() == ISD::SETCC ? 333
ConvertSETCCToSubtract(N, DCI)238
10695
333
                                             : SDValue());
10696
1.02k
    } else {
10697
1.02k
      // This is neither a signed nor an unsigned comparison, just make sure
10698
1.02k
      // that the high bits are equal.
10699
1.02k
      KnownBits Op1Known, Op2Known;
10700
1.02k
      DAG.computeKnownBits(N->getOperand(0), Op1Known);
10701
1.02k
      DAG.computeKnownBits(N->getOperand(1), Op2Known);
10702
1.02k
10703
1.02k
      // We don't really care about what is known about the first bit (if
10704
1.02k
      // anything), so clear it in all masks prior to comparing them.
10705
1.02k
      Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
10706
1.02k
      Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
10707
1.02k
10708
1.02k
      if (
Op1Known.Zero != Op2Known.Zero || 1.02k
Op1Known.One != Op2Known.One423
)
10709
614
        return SDValue();
10710
750
    }
10711
1.76k
  }
10712
750
10713
750
  // We now know that the higher-order bits are irrelevant, we just need to
10714
750
  // make sure that all of the intermediate operations are bit operations, and
10715
750
  // all inputs are extensions.
10716
750
  
if (750
N->getOperand(0).getOpcode() != ISD::AND &&
10717
738
      N->getOperand(0).getOpcode() != ISD::OR  &&
10718
738
      N->getOperand(0).getOpcode() != ISD::XOR &&
10719
738
      N->getOperand(0).getOpcode() != ISD::SELECT &&
10720
738
      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
10721
738
      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
10722
560
      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
10723
559
      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
10724
559
      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
10725
559
    return SDValue();
10726
191
10727
191
  
if (191
(N->getOpcode() == ISD::SETCC || 191
N->getOpcode() == ISD::SELECT_CC4
) &&
10728
189
      N->getOperand(1).getOpcode() != ISD::AND &&
10729
179
      N->getOperand(1).getOpcode() != ISD::OR  &&
10730
179
      N->getOperand(1).getOpcode() != ISD::XOR &&
10731
179
      N->getOperand(1).getOpcode() != ISD::SELECT &&
10732
179
      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10733
179
      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10734
1
      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10735
1
      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10736
1
      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10737
1
    return SDValue();
10738
190
10739
190
  SmallVector<SDValue, 4> Inputs;
10740
190
  SmallVector<SDValue, 8> BinOps, PromOps;
10741
190
  SmallPtrSet<SDNode *, 16> Visited;
10742
190
10743
566
  for (unsigned i = 0; 
i < 2566
;
++i376
) {
10744
378
    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10745
378
          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10746
378
          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10747
0
          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10748
378
        isa<ConstantSDNode>(N->getOperand(i)))
10749
0
      Inputs.push_back(N->getOperand(i));
10750
378
    else
10751
378
      BinOps.push_back(N->getOperand(i));
10752
378
10753
378
    if (N->getOpcode() == ISD::TRUNCATE)
10754
2
      break;
10755
378
  }
10756
190
10757
190
  // Visit all inputs, collect all binary operations (and, or, xor and
10758
190
  // select) that are all fed by extensions.
10759
196
  while (
!BinOps.empty()196
) {
10760
196
    SDValue BinOp = BinOps.back();
10761
196
    BinOps.pop_back();
10762
196
10763
196
    if (!Visited.insert(BinOp.getNode()).second)
10764
0
      continue;
10765
196
10766
196
    PromOps.push_back(BinOp);
10767
196
10768
208
    for (unsigned i = 0, ie = BinOp.getNumOperands(); 
i != ie208
;
++i12
) {
10769
202
      // The condition of the select is not promoted.
10770
202
      if (
BinOp.getOpcode() == ISD::SELECT && 202
i == 00
)
10771
0
        continue;
10772
202
      
if (202
BinOp.getOpcode() == ISD::SELECT_CC && 202
i != 20
&&
i != 30
)
10773
0
        continue;
10774
202
10775
202
      
if (202
((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10776
202
            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10777
202
            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10778
0
           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10779
202
          
isa<ConstantSDNode>(BinOp.getOperand(i))202
) {
10780
6
        Inputs.push_back(BinOp.getOperand(i));
10781
202
      } else 
if (196
BinOp.getOperand(i).getOpcode() == ISD::AND ||
10782
196
                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10783
196
                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10784
196
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10785
196
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10786
196
                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10787
190
                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10788
190
                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10789
196
                 
BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND190
) {
10790
6
        BinOps.push_back(BinOp.getOperand(i));
10791
196
      } else {
10792
190
        // We have an input that is not an extension or another binary
10793
190
        // operation; we'll abort this transformation.
10794
190
        return SDValue();
10795
190
      }
10796
202
    }
10797
196
  }
10798
190
10799
190
  // Make sure that this is a self-contained cluster of operations (which
10800
190
  // is not quite the same thing as saying that everything has only one
10801
190
  // use).
10802
0
  
for (unsigned i = 0, ie = Inputs.size(); 0
i != ie0
;
++i0
) {
10803
0
    if (isa<ConstantSDNode>(Inputs[i]))
10804
0
      continue;
10805
0
10806
0
    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10807
0
                              UE = Inputs[i].getNode()->use_end();
10808
0
         
UI != UE0
;
++UI0
) {
10809
0
      SDNode *User = *UI;
10810
0
      if (
User != N && 0
!Visited.count(User)0
)
10811
0
        return SDValue();
10812
0
10813
0
      // Make sure that we're not going to promote the non-output-value
10814
0
      // operand(s) or SELECT or SELECT_CC.
10815
0
      // FIXME: Although we could sometimes handle this, and it does occur in
10816
0
      // practice that one of the condition inputs to the select is also one of
10817
0
      // the outputs, we currently can't deal with this.
10818
0
      
if (0
User->getOpcode() == ISD::SELECT0
) {
10819
0
        if (User->getOperand(0) == Inputs[i])
10820
0
          return SDValue();
10821
0
      } else 
if (0
User->getOpcode() == ISD::SELECT_CC0
) {
10822
0
        if (User->getOperand(0) == Inputs[i] ||
10823
0
            User->getOperand(1) == Inputs[i])
10824
0
          return SDValue();
10825
0
      }
10826
0
    }
10827
0
  }
10828
0
10829
0
  
for (unsigned i = 0, ie = PromOps.size(); 0
i != ie0
;
++i0
) {
10830
0
    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10831
0
                              UE = PromOps[i].getNode()->use_end();
10832
0
         
UI != UE0
;
++UI0
) {
10833
0
      SDNode *User = *UI;
10834
0
      if (
User != N && 0
!Visited.count(User)0
)
10835
0
        return SDValue();
10836
0
10837
0
      // Make sure that we're not going to promote the non-output-value
10838
0
      // operand(s) or SELECT or SELECT_CC.
10839
0
      // FIXME: Although we could sometimes handle this, and it does occur in
10840
0
      // practice that one of the condition inputs to the select is also one of
10841
0
      // the outputs, we currently can't deal with this.
10842
0
      
if (0
User->getOpcode() == ISD::SELECT0
) {
10843
0
        if (User->getOperand(0) == PromOps[i])
10844
0
          return SDValue();
10845
0
      } else 
if (0
User->getOpcode() == ISD::SELECT_CC0
) {
10846
0
        if (User->getOperand(0) == PromOps[i] ||
10847
0
            User->getOperand(1) == PromOps[i])
10848
0
          return SDValue();
10849
0
      }
10850
0
    }
10851
0
  }
10852
0
10853
0
  // Replace all inputs with the extension operand.
10854
0
  
for (unsigned i = 0, ie = Inputs.size(); 0
i != ie0
;
++i0
) {
10855
0
    // Constants may have users outside the cluster of to-be-promoted nodes,
10856
0
    // and so we need to replace those as we do the promotions.
10857
0
    if (isa<ConstantSDNode>(Inputs[i]))
10858
0
      continue;
10859
0
    else
10860
0
      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10861
0
  }
10862
0
10863
0
  std::list<HandleSDNode> PromOpHandles;
10864
0
  for (auto &PromOp : PromOps)
10865
0
    PromOpHandles.emplace_back(PromOp);
10866
0
10867
0
  // Replace all operations (these are all the same, but have a different
10868
0
  // (i1) return type). DAG.getNode will validate that the types of
10869
0
  // a binary operator match, so go through the list in reverse so that
10870
0
  // we've likely promoted both operands first. Any intermediate truncations or
10871
0
  // extensions disappear.
10872
0
  while (
!PromOpHandles.empty()0
) {
10873
0
    SDValue PromOp = PromOpHandles.back().getValue();
10874
0
    PromOpHandles.pop_back();
10875
0
10876
0
    if (PromOp.getOpcode() == ISD::TRUNCATE ||
10877
0
        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10878
0
        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10879
0
        
PromOp.getOpcode() == ISD::ANY_EXTEND0
) {
10880
0
      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10881
0
          
PromOp.getOperand(0).getValueType() != MVT::i10
) {
10882
0
        // The operand is not yet ready (see comment below).
10883
0
        PromOpHandles.emplace_front(PromOp);
10884
0
        continue;
10885
0
      }
10886
0
10887
0
      SDValue RepValue = PromOp.getOperand(0);
10888
0
      if (isa<ConstantSDNode>(RepValue))
10889
0
        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10890
0
10891
0
      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10892
0
      continue;
10893
0
    }
10894
0
10895
0
    unsigned C;
10896
0
    switch (PromOp.getOpcode()) {
10897
0
    default:             C = 0; break;
10898
0
    case ISD::SELECT:    C = 1; break;
10899
0
    case ISD::SELECT_CC: C = 2; break;
10900
0
    }
10901
0
10902
0
    
if (0
(!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10903
0
         PromOp.getOperand(C).getValueType() != MVT::i1) ||
10904
0
        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10905
0
         
PromOp.getOperand(C+1).getValueType() != MVT::i10
)) {
10906
0
      // The to-be-promoted operands of this node have not yet been
10907
0
      // promoted (this should be rare because we're going through the
10908
0
      // list backward, but if one of the operands has several users in
10909
0
      // this cluster of to-be-promoted nodes, it is possible).
10910
0
      PromOpHandles.emplace_front(PromOp);
10911
0
      continue;
10912
0
    }
10913
0
10914
0
    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10915
0
                                PromOp.getNode()->op_end());
10916
0
10917
0
    // If there are any constant inputs, make sure they're replaced now.
10918
0
    for (unsigned i = 0; 
i < 20
;
++i0
)
10919
0
      
if (0
isa<ConstantSDNode>(Ops[C+i])0
)
10920
0
        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10921
0
10922
0
    DAG.ReplaceAllUsesOfValueWith(PromOp,
10923
0
      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10924
0
  }
10925
0
10926
0
  // Now we're left with the initial truncation itself.
10927
0
  
if (0
N->getOpcode() == ISD::TRUNCATE0
)
10928
0
    return N->getOperand(0);
10929
0
10930
0
  // Otherwise, this is a comparison. The operands to be compared have just
10931
0
  // changed type (to i1), but everything else is the same.
10932
0
  return SDValue(N, 0);
10933
0
}
10934
10935
SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10936
4.43k
                                                  DAGCombinerInfo &DCI) const {
10937
4.43k
  SelectionDAG &DAG = DCI.DAG;
10938
4.43k
  SDLoc dl(N);
10939
4.43k
10940
4.43k
  // If we're tracking CR bits, we need to be careful that we don't have:
10941
4.43k
  //   zext(binary-ops(trunc(x), trunc(y)))
10942
4.43k
  // or
10943
4.43k
  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10944
4.43k
  // such that we're unnecessarily moving things into CR bits that can more
10945
4.43k
  // efficiently stay in GPRs. Note that if we're not certain that the high
10946
4.43k
  // bits are set as required by the final extension, we still may need to do
10947
4.43k
  // some masking to get the proper behavior.
10948
4.43k
10949
4.43k
  // This same functionality is important on PPC64 when dealing with
10950
4.43k
  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10951
4.43k
  // the return values of functions. Because it is so similar, it is handled
10952
4.43k
  // here as well.
10953
4.43k
10954
4.43k
  if (N->getValueType(0) != MVT::i32 &&
10955
3.76k
      N->getValueType(0) != MVT::i64)
10956
246
    return SDValue();
10957
4.19k
10958
4.19k
  
if (4.19k
!((N->getOperand(0).getValueType() == MVT::i1 && 4.19k
Subtarget.useCRBits()1.40k
) ||
10959
2.80k
        
(N->getOperand(0).getValueType() == MVT::i32 && 2.80k
Subtarget.isPPC64()2.06k
)))
10960
774
    return SDValue();
10961
3.41k
10962
3.41k
  
if (3.41k
N->getOperand(0).getOpcode() != ISD::AND &&
10963
3.35k
      N->getOperand(0).getOpcode() != ISD::OR  &&
10964
3.31k
      N->getOperand(0).getOpcode() != ISD::XOR &&
10965
3.29k
      N->getOperand(0).getOpcode() != ISD::SELECT &&
10966
3.17k
      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10967
3.13k
    return SDValue();
10968
286
10969
286
  SmallVector<SDValue, 4> Inputs;
10970
286
  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10971
286
  SmallPtrSet<SDNode *, 16> Visited;
10972
286
10973
286
  // Visit all inputs, collect all binary operations (and, or, xor and
10974
286
  // select) that are all fed by truncations.
10975
472
  while (
!BinOps.empty()472
) {
10976
330
    SDValue BinOp = BinOps.back();
10977
330
    BinOps.pop_back();
10978
330
10979
330
    if (!Visited.insert(BinOp.getNode()).second)
10980
0
      continue;
10981
330
10982
330
    PromOps.push_back(BinOp);
10983
330
10984
958
    for (unsigned i = 0, ie = BinOp.getNumOperands(); 
i != ie958
;
++i628
) {
10985
772
      // The condition of the select is not promoted.
10986
772
      if (
BinOp.getOpcode() == ISD::SELECT && 772
i == 0401
)
10987
139
        continue;
10988
633
      
if (633
BinOp.getOpcode() == ISD::SELECT_CC && 633
i != 2174
&&
i != 3126
)
10989
111
        continue;
10990
522
10991
522
      
if (522
BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10992
522
          
isa<ConstantSDNode>(BinOp.getOperand(i))419
) {
10993
324
        Inputs.push_back(BinOp.getOperand(i));
10994
522
      } else 
if (198
BinOp.getOperand(i).getOpcode() == ISD::AND ||
10995
184
                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10996
172
                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10997
172
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10998
198
                 
BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC153
) {
10999
54
        BinOps.push_back(BinOp.getOperand(i));
11000
198
      } else {
11001
144
        // We have an input that is not a truncation or another binary
11002
144
        // operation; we'll abort this transformation.
11003
144
        return SDValue();
11004
144
      }
11005
772
    }
11006
330
  }
11007
286
11008
286
  // The operands of a select that must be truncated when the select is
11009
286
  // promoted because the operand is actually part of the to-be-promoted set.
11010
142
  DenseMap<SDNode *, EVT> SelectTruncOp[2];
11011
142
11012
142
  // Make sure that this is a self-contained cluster of operations (which
11013
142
  // is not quite the same thing as saying that everything has only one
11014
142
  // use).
11015
435
  for (unsigned i = 0, ie = Inputs.size(); 
i != ie435
;
++i293
) {
11016
306
    if (isa<ConstantSDNode>(Inputs[i]))
11017
212
      continue;
11018
94
11019
94
    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
11020
94
                              UE = Inputs[i].getNode()->use_end();
11021
195
         
UI != UE195
;
++UI101
) {
11022
114
      SDNode *User = *UI;
11023
114
      if (
User != N && 114
!Visited.count(User)114
)
11024
13
        return SDValue();
11025
101
11026
101
      // If we're going to promote the non-output-value operand(s) or SELECT or
11027
101
      // SELECT_CC, record them for truncation.
11028
101
      
if (101
User->getOpcode() == ISD::SELECT101
) {
11029
60
        if (User->getOperand(0) == Inputs[i])
11030
0
          SelectTruncOp[0].insert(std::make_pair(User,
11031
0
                                    User->getOperand(0).getValueType()));
11032
101
      } else 
if (41
User->getOpcode() == ISD::SELECT_CC41
) {
11033
16
        if (User->getOperand(0) == Inputs[i])
11034
12
          SelectTruncOp[0].insert(std::make_pair(User,
11035
12
                                    User->getOperand(0).getValueType()));
11036
16
        if (User->getOperand(1) == Inputs[i])
11037
2
          SelectTruncOp[1].insert(std::make_pair(User,
11038
2
                                    User->getOperand(1).getValueType()));
11039
41
      }
11040
114
    }
11041
306
  }
11042
142
11043
277
  
for (unsigned i = 0, ie = PromOps.size(); 129
i != ie277
;
++i148
) {
11044
153
    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
11045
153
                              UE = PromOps[i].getNode()->use_end();
11046
306
         
UI != UE306
;
++UI153
) {
11047
158
      SDNode *User = *UI;
11048
158
      if (
User != N && 158
!Visited.count(User)29
)
11049
5
        return SDValue();
11050
153
11051
153
      // If we're going to promote the non-output-value operand(s) or SELECT or
11052
153
      // SELECT_CC, record them for truncation.
11053
153
      
if (153
User->getOpcode() == ISD::SELECT153
) {
11054
0
        if (User->getOperand(0) == PromOps[i])
11055
0
          SelectTruncOp[0].insert(std::make_pair(User,
11056
0
                                    User->getOperand(0).getValueType()));
11057
153
      } else 
if (153
User->getOpcode() == ISD::SELECT_CC153
) {
11058
0
        if (User->getOperand(0) == PromOps[i])
11059
0
          SelectTruncOp[0].insert(std::make_pair(User,
11060
0
                                    User->getOperand(0).getValueType()));
11061
0
        if (User->getOperand(1) == PromOps[i])
11062
0
          SelectTruncOp[1].insert(std::make_pair(User,
11063
0
                                    User->getOperand(1).getValueType()));
11064
153
      }
11065
158
    }
11066
153
  }
11067
129
11068
124
  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
11069
124
  bool ReallyNeedsExt = false;
11070
124
  if (
N->getOpcode() != ISD::ANY_EXTEND124
) {
11071
51
    // If all of the inputs are not already sign/zero extended, then
11072
51
    // we'll still need to do that at the end.
11073
166
    for (unsigned i = 0, ie = Inputs.size(); 
i != ie166
;
++i115
) {
11074
121
      if (isa<ConstantSDNode>(Inputs[i]))
11075
61
        continue;
11076
60
11077
60
      unsigned OpBits =
11078
60
        Inputs[i].getOperand(0).getValueSizeInBits();
11079
60
      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
11080
60
11081
60
      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
11082
14
           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
11083
14
                                  APInt::getHighBitsSet(OpBits,
11084
14
                                                        OpBits-PromBits))) ||
11085
54
          (N->getOpcode() == ISD::SIGN_EXTEND &&
11086
46
           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
11087
60
             (OpBits-(PromBits-1)))) {
11088
6
        ReallyNeedsExt = true;
11089
6
        break;
11090
6
      }
11091
121
    }
11092
51
  }
11093
124
11094
124
  // Replace all inputs, either with the truncation operand, or a
11095
124
  // truncation or extension to the final output type.
11096
396
  for (unsigned i = 0, ie = Inputs.size(); 
i != ie396
;
++i272
) {
11097
272
    // Constant inputs need to be replaced with the to-be-promoted nodes that
11098
272
    // use them because they might have users outside of the cluster of
11099
272
    // promoted nodes.
11100
272
    if (isa<ConstantSDNode>(Inputs[i]))
11101
195
      continue;
11102
77
11103
77
    SDValue InSrc = Inputs[i].getOperand(0);
11104
77
    if (Inputs[i].getValueType() == N->getValueType(0))
11105
0
      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
11106
77
    else 
if (77
N->getOpcode() == ISD::SIGN_EXTEND77
)
11107
46
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11108
46
        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
11109
31
    else 
if (31
N->getOpcode() == ISD::ZERO_EXTEND31
)
11110
15
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11111
15
        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
11112
31
    else
11113
16
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11114
16
        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
11115
272
  }
11116
124
11117
124
  std::list<HandleSDNode> PromOpHandles;
11118
124
  for (auto &PromOp : PromOps)
11119
148
    PromOpHandles.emplace_back(PromOp);
11120
124
11121
124
  // Replace all operations (these are all the same, but have a different
11122
124
  // (promoted) return type). DAG.getNode will validate that the types of
11123
124
  // a binary operator match, so go through the list in reverse so that
11124
124
  // we've likely promoted both operands first.
11125
272
  while (
!PromOpHandles.empty()272
) {
11126
148
    SDValue PromOp = PromOpHandles.back().getValue();
11127
148
    PromOpHandles.pop_back();
11128
148
11129
148
    unsigned C;
11130
148
    switch (PromOp.getOpcode()) {
11131
25
    default:             C = 0; break;
11132
110
    case ISD::SELECT:    C = 1; break;
11133
13
    case ISD::SELECT_CC: C = 2; break;
11134
148
    }
11135
148
11136
148
    
if (148
(!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11137
55
         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
11138
148
        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11139
148
         
PromOp.getOperand(C+1).getValueType() != N->getValueType(0)46
)) {
11140
0
      // The to-be-promoted operands of this node have not yet been
11141
0
      // promoted (this should be rare because we're going through the
11142
0
      // list backward, but if one of the operands has several users in
11143
0
      // this cluster of to-be-promoted nodes, it is possible).
11144
0
      PromOpHandles.emplace_front(PromOp);
11145
0
      continue;
11146
0
    }
11147
148
11148
148
    // For SELECT and SELECT_CC nodes, we do a similar check for any
11149
148
    // to-be-promoted comparison inputs.
11150
148
    
if (148
PromOp.getOpcode() == ISD::SELECT ||
11151
148
        
PromOp.getOpcode() == ISD::SELECT_CC38
) {
11152
123
      if ((SelectTruncOp[0].count(PromOp.getNode()) &&
11153
4
           PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
11154
123
          (SelectTruncOp[1].count(PromOp.getNode()) &&
11155
123
           
PromOp.getOperand(1).getValueType() != N->getValueType(0)1
)) {
11156
0
        PromOpHandles.emplace_front(PromOp);
11157
0
        continue;
11158
0
      }
11159
148
    }
11160
148
11161
148
    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11162
148
                                PromOp.getNode()->op_end());
11163
148
11164
148
    // If this node has constant inputs, then they'll need to be promoted here.
11165
444
    for (unsigned i = 0; 
i < 2444
;
++i296
) {
11166
296
      if (!isa<ConstantSDNode>(Ops[C+i]))
11167
101
        continue;
11168
195
      
if (195
Ops[C+i].getValueType() == N->getValueType(0)195
)
11169
0
        continue;
11170
195
11171
195
      
if (195
N->getOpcode() == ISD::SIGN_EXTEND195
)
11172
0
        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11173
195
      else 
if (195
N->getOpcode() == ISD::ZERO_EXTEND195
)
11174
65
        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11175
195
      else
11176
130
        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11177
296
    }
11178
148
11179
148
    // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11180
148
    // truncate them again to the original value type.
11181
148
    if (PromOp.getOpcode() == ISD::SELECT ||
11182
148
        
PromOp.getOpcode() == ISD::SELECT_CC38
) {
11183
123
      auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
11184
123
      if (SI0 != SelectTruncOp[0].end())
11185
4
        Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
11186
123
      auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
11187
123
      if (SI1 != SelectTruncOp[1].end())
11188
1
        Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
11189
123
    }
11190
148
11191
148
    DAG.ReplaceAllUsesOfValueWith(PromOp,
11192
148
      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
11193
148
  }
11194
124
11195
124
  // Now we're left with the initial extension itself.
11196
124
  
if (124
!ReallyNeedsExt124
)
11197
118
    return N->getOperand(0);
11198
6
11199
6
  // To zero extend, just mask off everything except for the first bit (in the
11200
6
  // i1 case).
11201
6
  
if (6
N->getOpcode() == ISD::ZERO_EXTEND6
)
11202
6
    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
11203
6
                       DAG.getConstant(APInt::getLowBitsSet(
11204
6
                                         N->getValueSizeInBits(0), PromBits),
11205
6
                                       dl, N->getValueType(0)));
11206
0
11207
6
  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
11208
0
         "Invalid extension type");
11209
0
  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
11210
0
  SDValue ShiftCst =
11211
0
      DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
11212
0
  return DAG.getNode(
11213
0
      ISD::SRA, dl, N->getValueType(0),
11214
0
      DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
11215
0
      ShiftCst);
11216
0
}
11217
11218
/// \brief Reduces the number of fp-to-int conversion when building a vector.
11219
///
11220
/// If this vector is built out of floating to integer conversions,
11221
/// transform it to a vector built out of floating point values followed by a
11222
/// single floating to integer conversion of the vector.
11223
/// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
11224
/// becomes (fptosi (build_vector ($A, $B, ...)))
11225
SDValue PPCTargetLowering::
11226
combineElementTruncationToVectorTruncation(SDNode *N,
11227
178
                                           DAGCombinerInfo &DCI) const {
11228
178
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11229
178
         "Should be called with a BUILD_VECTOR node");
11230
178
11231
178
  SelectionDAG &DAG = DCI.DAG;
11232
178
  SDLoc dl(N);
11233
178
11234
178
  SDValue FirstInput = N->getOperand(0);
11235
178
  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
11236
178
         "The input operand must be an fp-to-int conversion.");
11237
178
11238
178
  // This combine happens after legalization so the fp_to_[su]i nodes are
11239
178
  // already converted to PPCSISD nodes.
11240
178
  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
11241
178
  if (FirstConversion == PPCISD::FCTIDZ ||
11242
133
      FirstConversion == PPCISD::FCTIDUZ ||
11243
88
      FirstConversion == PPCISD::FCTIWZ ||
11244
178
      
FirstConversion == PPCISD::FCTIWUZ44
) {
11245
178
    bool IsSplat = true;
11246
178
    bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
11247
134
      FirstConversion == PPCISD::FCTIWUZ;
11248
178
    EVT SrcVT = FirstInput.getOperand(0).getValueType();
11249
178
    SmallVector<SDValue, 4> Ops;
11250
178
    EVT TargetVT = N->getValueType(0);
11251
710
    for (int i = 0, e = N->getNumOperands(); 
i < e710
;
++i532
) {
11252
532
      if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
11253
0
        return SDValue();
11254
532
      unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
11255
532
      if (NextConversion != FirstConversion)
11256
0
        return SDValue();
11257
532
      
if (532
N->getOperand(i) != FirstInput532
)
11258
290
        IsSplat = false;
11259
532
    }
11260
178
11261
178
    // If this is a splat, we leave it as-is since there will be only a single
11262
178
    // fp-to-int conversion followed by a splat of the integer. This is better
11263
178
    // for 32-bit and smaller ints and neutral for 64-bit ints.
11264
178
    
if (178
IsSplat178
)
11265
32
      return SDValue();
11266
146
11267
146
    // Now that we know we have the right type of node, get its operands
11268
582
    
for (int i = 0, e = N->getNumOperands(); 146
i < e582
;
++i436
) {
11269
436
      SDValue In = N->getOperand(i).getOperand(0);
11270
436
      // For 32-bit values, we need to add an FP_ROUND node.
11271
436
      if (
Is32Bit436
) {
11272
288
        if (In.isUndef())
11273
0
          Ops.push_back(DAG.getUNDEF(SrcVT));
11274
288
        else {
11275
288
          SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
11276
288
                                      MVT::f32, In.getOperand(0),
11277
288
                                      DAG.getIntPtrConstant(1, dl));
11278
288
          Ops.push_back(Trunc);
11279
288
        }
11280
288
      } else
11281
148
        
Ops.push_back(In.isUndef() ? 148
DAG.getUNDEF(SrcVT)0
:
In.getOperand(0)148
);
11282
436
    }
11283
146
11284
146
    unsigned Opcode;
11285
146
    if (FirstConversion == PPCISD::FCTIDZ ||
11286
109
        FirstConversion == PPCISD::FCTIWZ)
11287
73
      Opcode = ISD::FP_TO_SINT;
11288
146
    else
11289
73
      Opcode = ISD::FP_TO_UINT;
11290
146
11291
146
    EVT NewVT = TargetVT == MVT::v2i64 ? 
MVT::v2f6474
:
MVT::v4f3272
;
11292
178
    SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
11293
178
    return DAG.getNode(Opcode, dl, TargetVT, BV);
11294
178
  }
11295
0
  return SDValue();
11296
0
}
11297
11298
/// \brief Reduce the number of loads when building a vector.
11299
///
11300
/// Building a vector out of multiple loads can be converted to a load
11301
/// of the vector type if the loads are consecutive. If the loads are
11302
/// consecutive but in descending order, a shuffle is added at the end
11303
/// to reorder the vector.
11304
2.04k
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
11305
2.04k
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11306
2.04k
         "Should be called with a BUILD_VECTOR node");
11307
2.04k
11308
2.04k
  SDLoc dl(N);
11309
2.04k
  bool InputsAreConsecutiveLoads = true;
11310
2.04k
  bool InputsAreReverseConsecutive = true;
11311
2.04k
  unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
11312
2.04k
  SDValue FirstInput = N->getOperand(0);
11313
2.04k
  bool IsRoundOfExtLoad = false;
11314
2.04k
11315
2.04k
  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
11316
2.04k
      
FirstInput.getOperand(0).getOpcode() == ISD::LOAD98
) {
11317
48
    LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
11318
48
    IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
11319
48
  }
11320
2.04k
  // Not a build vector of (possibly fp_rounded) loads.
11321
2.04k
  if (
!IsRoundOfExtLoad && 2.04k
FirstInput.getOpcode() != ISD::LOAD2.02k
)
11322
1.68k
    return SDValue();
11323
360
11324
536
  
for (int i = 1, e = N->getNumOperands(); 360
i < e536
;
++i176
) {
11325
440
    // If any inputs are fp_round(extload), they all must be.
11326
440
    if (
IsRoundOfExtLoad && 440
N->getOperand(i).getOpcode() != ISD::FP_ROUND40
)
11327
0
      return SDValue();
11328
440
11329
440
    
SDValue NextInput = IsRoundOfExtLoad ? 440
N->getOperand(i).getOperand(0)40
:
11330
400
      N->getOperand(i);
11331
440
    if (NextInput.getOpcode() != ISD::LOAD)
11332
0
      return SDValue();
11333
440
11334
440
    SDValue PreviousInput =
11335
440
      IsRoundOfExtLoad ? 
N->getOperand(i-1).getOperand(0)40
:
N->getOperand(i-1)400
;
11336
440
    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
11337
440
    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
11338
440
11339
440
    // If any inputs are fp_round(extload), they all must be.
11340
440
    if (
IsRoundOfExtLoad && 440
LD2->getExtensionType() != ISD::EXTLOAD40
)
11341
0
      return SDValue();
11342
440
11343
440
    
if (440
!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)440
)
11344
368
      InputsAreConsecutiveLoads = false;
11345
440
    if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
11346
336
      InputsAreReverseConsecutive = false;
11347
440
11348
440
    // Exit early if the loads are neither consecutive nor reverse consecutive.
11349
440
    if (
!InputsAreConsecutiveLoads && 440
!InputsAreReverseConsecutive368
)
11350
264
      return SDValue();
11351
440
  }
11352
360
11353
96
  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
11354
96
         "The loads cannot be both consecutive and reverse consecutive.");
11355
96
11356
96
  SDValue FirstLoadOp =
11357
96
    IsRoundOfExtLoad ? 
FirstInput.getOperand(0)8
:
FirstInput88
;
11358
96
  SDValue LastLoadOp =
11359
8
    IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
11360
88
                       N->getOperand(N->getNumOperands()-1);
11361
96
11362
96
  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
11363
96
  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
11364
96
  if (
InputsAreConsecutiveLoads96
) {
11365
40
    assert(LD1 && "Input needs to be a LoadSDNode.");
11366
40
    return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
11367
40
                       LD1->getBasePtr(), LD1->getPointerInfo(),
11368
40
                       LD1->getAlignment());
11369
40
  }
11370
56
  
if (56
InputsAreReverseConsecutive56
) {
11371
56
    assert(LDL && "Input needs to be a LoadSDNode.");
11372
56
    SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
11373
56
                               LDL->getBasePtr(), LDL->getPointerInfo(),
11374
56
                               LDL->getAlignment());
11375
56
    SmallVector<int, 16> Ops;
11376
216
    for (int i = N->getNumOperands() - 1; 
i >= 0216
;
i--160
)
11377
160
      Ops.push_back(i);
11378
56
11379
56
    return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
11380
56
                                DAG.getUNDEF(N->getValueType(0)), Ops);
11381
56
  }
11382
0
  return SDValue();
11383
0
}
11384
11385
// This function adds the required vector_shuffle needed to get
11386
// the elements of the vector extract in the correct position
11387
// as specified by the CorrectElems encoding.
11388
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
11389
                                      SDValue Input, uint64_t Elems,
11390
10
                                      uint64_t CorrectElems) {
11391
10
  SDLoc dl(N);
11392
10
11393
10
  unsigned NumElems = Input.getValueType().getVectorNumElements();
11394
10
  SmallVector<int, 16> ShuffleMask(NumElems, -1);
11395
10
11396
10
  // Knowing the element indices being extracted from the original
11397
10
  // vector and the order in which they're being inserted, just put
11398
10
  // them at element indices required for the instruction.
11399
38
  for (unsigned i = 0; 
i < N->getNumOperands()38
;
i++28
) {
11400
28
    if (DAG.getDataLayout().isLittleEndian())
11401
14
      ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
11402
28
    else
11403
14
      ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
11404
28
    CorrectElems = CorrectElems >> 8;
11405
28
    Elems = Elems >> 8;
11406
28
  }
11407
10
11408
10
  SDValue Shuffle =
11409
10
      DAG.getVectorShuffle(Input.getValueType(), dl, Input,
11410
10
                           DAG.getUNDEF(Input.getValueType()), ShuffleMask);
11411
10
11412
10
  EVT Ty = N->getValueType(0);
11413
10
  SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
11414
10
  return BV;
11415
10
}
11416
11417
// Look for build vector patterns where input operands come from sign
11418
// extended vector_extract elements of specific indices. If the correct indices
11419
// aren't used, add a vector shuffle to fix up the indices and create a new
11420
// PPCISD:SExtVElems node which selects the vector sign extend instructions
11421
// during instruction selection.
11422
806
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
11423
806
  // This array encodes the indices that the vector sign extend instructions
11424
806
  // extract from when extending from one type to another for both BE and LE.
11425
806
  // The right nibble of each byte corresponds to the LE incides.
11426
806
  // and the left nibble of each byte corresponds to the BE incides.
11427
806
  // For example: 0x3074B8FC  byte->word
11428
806
  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
11429
806
  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
11430
806
  // For example: 0x000070F8  byte->double word
11431
806
  // For LE: the allowed indices are: 0x0,0x8
11432
806
  // For BE: the allowed indices are: 0x7,0xF
11433
806
  uint64_t TargetElems[] = {
11434
806
      0x3074B8FC, // b->w
11435
806
      0x000070F8, // b->d
11436
806
      0x10325476, // h->w
11437
806
      0x00003074, // h->d
11438
806
      0x00001032, // w->d
11439
806
  };
11440
806
11441
806
  uint64_t Elems = 0;
11442
806
  int Index;
11443
806
  SDValue Input;
11444
806
11445
862
  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
11446
862
    if (!Op)
11447
0
      return false;
11448
862
    
if (862
Op.getOpcode() != ISD::SIGN_EXTEND862
)
11449
762
      return false;
11450
100
11451
100
    SDValue Extract = Op.getOperand(0);
11452
100
    if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11453
16
      return false;
11454
84
11455
84
    ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
11456
84
    if (!ExtOp)
11457
0
      return false;
11458
84
11459
84
    Index = ExtOp->getZExtValue();
11460
84
    if (
Input && 84
Input != Extract.getOperand(0)56
)
11461
4
      return false;
11462
80
11463
80
    
if (80
!Input80
)
11464
28
      Input = Extract.getOperand(0);
11465
80
11466
80
    Elems = Elems << 8;
11467
80
    Index = DAG.getDataLayout().isLittleEndian() ? 
Index40
:
Index << 440
;
11468
862
    Elems |= Index;
11469
862
11470
862
    return true;
11471
862
  };
11472
806
11473
806
  // If the build vector operands aren't sign extended vector extracts,
11474
806
  // of the same input vector, then return.
11475
886
  for (unsigned i = 0; 
i < N->getNumOperands()886
;
i++80
) {
11476
862
    if (
!isSExtOfVecExtract(N->getOperand(i))862
) {
11477
782
      return SDValue();
11478
782
    }
11479
862
  }
11480
806
11481
806
  // If the vector extract indicies are not correct, add the appropriate
11482
806
  // vector_shuffle.
11483
24
  int TgtElemArrayIdx;
11484
24
  int InputSize = Input.getValueType().getScalarSizeInBits();
11485
24
  int OutputSize = N->getValueType(0).getScalarSizeInBits();
11486
24
  if (InputSize + OutputSize == 40)
11487
4
    TgtElemArrayIdx = 0;
11488
20
  else 
if (20
InputSize + OutputSize == 7220
)
11489
4
    TgtElemArrayIdx = 1;
11490
16
  else 
if (16
InputSize + OutputSize == 4816
)
11491
4
    TgtElemArrayIdx = 2;
11492
12
  else 
if (12
InputSize + OutputSize == 8012
)
11493
4
    TgtElemArrayIdx = 3;
11494
8
  else 
if (8
InputSize + OutputSize == 968
)
11495
6
    TgtElemArrayIdx = 4;
11496
8
  else
11497
2
    return SDValue();
11498
22
11499
22
  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
11500
22
  CorrectElems = DAG.getDataLayout().isLittleEndian()
11501
11
                     ? CorrectElems & 0x0F0F0F0F0F0F0F0F
11502
11
                     : CorrectElems & 0xF0F0F0F0F0F0F0F0;
11503
22
  if (
Elems != CorrectElems22
) {
11504
10
    return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
11505
10
  }
11506
12
11507
12
  // Regular lowering will catch cases where a shuffle is not needed.
11508
12
  return SDValue();
11509
12
}
11510
11511
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
11512
2.59k
                                                 DAGCombinerInfo &DCI) const {
11513
2.59k
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11514
2.59k
         "Should be called with a BUILD_VECTOR node");
11515
2.59k
11516
2.59k
  SelectionDAG &DAG = DCI.DAG;
11517
2.59k
  SDLoc dl(N);
11518
2.59k
11519
2.59k
  if (!Subtarget.hasVSX())
11520
400
    return SDValue();
11521
2.19k
11522
2.19k
  // The target independent DAG combiner will leave a build_vector of
11523
2.19k
  // float-to-int conversions intact. We can generate MUCH better code for
11524
2.19k
  // a float-to-int conversion of a vector of floats.
11525
2.19k
  SDValue FirstInput = N->getOperand(0);
11526
2.19k
  if (
FirstInput.getOpcode() == PPCISD::MFVSR2.19k
) {
11527
178
    SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
11528
178
    if (Reduced)
11529
146
      return Reduced;
11530
2.04k
  }
11531
2.04k
11532
2.04k
  // If we're building a vector out of consecutive loads, just load that
11533
2.04k
  // vector type.
11534
2.04k
  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
11535
2.04k
  if (Reduced)
11536
96
    return Reduced;
11537
1.95k
11538
1.95k
  // If we're building a vector out of extended elements from another vector
11539
1.95k
  // we have P9 vector integer extend instructions.
11540
1.95k
  
if (1.95k
Subtarget.hasP9Altivec()1.95k
) {
11541
806
    Reduced = combineBVOfVecSExt(N, DAG);
11542
806
    if (Reduced)
11543
10
      return Reduced;
11544
1.94k
  }
11545
1.94k
11546
1.94k
11547
1.94k
  
if (1.94k
N->getValueType(0) != MVT::v2f641.94k
)
11548
1.78k
    return SDValue();
11549
153
11550
153
  // Looking for:
11551
153
  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
11552
153
  
if (153
FirstInput.getOpcode() != ISD::SINT_TO_FP &&
11553
134
      FirstInput.getOpcode() != ISD::UINT_TO_FP)
11554
130
    return SDValue();
11555
23
  
if (23
N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
11556
4
      N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
11557
0
    return SDValue();
11558
23
  
if (23
FirstInput.getOpcode() != N->getOperand(1).getOpcode()23
)
11559
0
    return SDValue();
11560
23
11561
23
  SDValue Ext1 = FirstInput.getOperand(0);
11562
23
  SDValue Ext2 = N->getOperand(1).getOperand(0);
11563
23
  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11564
5
     Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11565
18
    return SDValue();
11566
5
11567
5
  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
11568
5
  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
11569
5
  if (
!Ext1Op || 5
!Ext2Op5
)
11570
0
    return SDValue();
11571
5
  
if (5
Ext1.getValueType() != MVT::i32 ||
11572
5
      Ext2.getValueType() != MVT::i32)
11573
0
  
if (0
Ext1.getOperand(0) != Ext2.getOperand(0)0
)
11574
0
    return SDValue();
11575
5
11576
5
  int FirstElem = Ext1Op->getZExtValue();
11577
5
  int SecondElem = Ext2Op->getZExtValue();
11578
5
  int SubvecIdx;
11579
5
  if (
FirstElem == 0 && 5
SecondElem == 15
)
11580
5
    
SubvecIdx = Subtarget.isLittleEndian() ? 5
11
:
04
;
11581
0
  else 
if (0
FirstElem == 2 && 0
SecondElem == 30
)
11582
0
    
SubvecIdx = Subtarget.isLittleEndian() ? 0
00
:
10
;
11583
0
  else
11584
0
    return SDValue();
11585
5
11586
5
  SDValue SrcVec = Ext1.getOperand(0);
11587
5
  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
11588
5
    
PPCISD::SINT_VEC_TO_FP5
:
PPCISD::UINT_VEC_TO_FP0
;
11589
2.59k
  return DAG.getNode(NodeType, dl, MVT::v2f64,
11590
2.59k
                     SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
11591
2.59k
}
11592
11593
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
11594
353
                                              DAGCombinerInfo &DCI) const {
11595
353
  assert((N->getOpcode() == ISD::SINT_TO_FP ||
11596
353
          N->getOpcode() == ISD::UINT_TO_FP) &&
11597
353
         "Need an int -> FP conversion node here");
11598
353
11599
353
  if (
useSoftFloat() || 353
!Subtarget.has64BitSupport()352
)
11600
17
    return SDValue();
11601
336
11602
336
  SelectionDAG &DAG = DCI.DAG;
11603
336
  SDLoc dl(N);
11604
336
  SDValue Op(N, 0);
11605
336
11606
336
  SDValue FirstOperand(Op.getOperand(0));
11607
336
  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
11608
59
    (FirstOperand.getValueType() == MVT::i8 ||
11609
59
     FirstOperand.getValueType() == MVT::i16);
11610
336
  if (
Subtarget.hasP9Vector() && 336
Subtarget.hasP9Altivec()58
&&
SubWordLoad58
) {
11611
32
    bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
11612
32
    bool DstDouble = Op.getValueType() == MVT::f64;
11613
32
    unsigned ConvOp = Signed ?
11614
16
      
(DstDouble ? 16
PPCISD::FCFID8
:
PPCISD::FCFIDS8
) :
11615
16
      
(DstDouble ? 16
PPCISD::FCFIDU8
:
PPCISD::FCFIDUS8
);
11616
32
    SDValue WidthConst =
11617
32
      DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 
116
:
216
,
11618
32
                            dl, false);
11619
32
    LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
11620
32
    SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
11621
32
    SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
11622
32
                                         DAG.getVTList(MVT::f64, MVT::Other),
11623
32
                                         Ops, MVT::i8, LDN->getMemOperand());
11624
32
11625
32
    // For signed conversion, we need to sign-extend the value in the VSR
11626
32
    if (
Signed32
) {
11627
16
      SDValue ExtOps[] = { Ld, WidthConst };
11628
16
      SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
11629
16
      return DAG.getNode(ConvOp, dl, DstDouble ? 
MVT::f648
:
MVT::f328
, Ext);
11630
16
    } else
11631
16
      
return DAG.getNode(ConvOp, dl, DstDouble ? 16
MVT::f648
:
MVT::f328
, Ld);
11632
304
  }
11633
304
11634
304
  // Don't handle ppc_fp128 here or i1 conversions.
11635
304
  
if (304
Op.getValueType() != MVT::f32 && 304
Op.getValueType() != MVT::f64183
)
11636
52
    return SDValue();
11637
252
  
if (252
Op.getOperand(0).getValueType() == MVT::i1252
)
11638
0
    return SDValue();
11639
252
11640
252
  // For i32 intermediate values, unfortunately, the conversion functions
11641
252
  // leave the upper 32 bits of the value are undefined. Within the set of
11642
252
  // scalar instructions, we have no method for zero- or sign-extending the
11643
252
  // value. Thus, we cannot handle i32 intermediate values here.
11644
252
  
if (252
Op.getOperand(0).getValueType() == MVT::i32252
)
11645
131
    return SDValue();
11646
121
11647
252
  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
11648
121
         "UINT_TO_FP is supported only with FPCVT");
11649
121
11650
121
  // If we have FCFIDS, then use it when converting to single-precision.
11651
121
  // Otherwise, convert to double-precision and then round.
11652
103
  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11653
54
                       ? 
(Op.getOpcode() == ISD::UINT_TO_FP ? 54
PPCISD::FCFIDUS32
11654
54
                                                            : PPCISD::FCFIDS)
11655
67
                       : 
(Op.getOpcode() == ISD::UINT_TO_FP ? 67
PPCISD::FCFIDU26
11656
67
                                                            : PPCISD::FCFID);
11657
103
  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11658
54
                  ? MVT::f32
11659
67
                  : MVT::f64;
11660
121
11661
121
  // If we're converting from a float, to an int, and back to a float again,
11662
121
  // then we don't need the store/load pair at all.
11663
121
  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
11664
3
       Subtarget.hasFPCVT()) ||
11665
121
      
(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)118
) {
11666
16
    SDValue Src = Op.getOperand(0).getOperand(0);
11667
16
    if (
Src.getValueType() == MVT::f3216
) {
11668
5
      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
11669
5
      DCI.AddToWorklist(Src.getNode());
11670
16
    } else 
if (11
Src.getValueType() != MVT::f6411
) {
11671
1
      // Make sure that we don't pick up a ppc_fp128 source value.
11672
1
      return SDValue();
11673
1
    }
11674
15
11675
15
    unsigned FCTOp =
11676
12
      Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
11677
3
                                                        PPCISD::FCTIDUZ;
11678
15
11679
15
    SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
11680
15
    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
11681
15
11682
15
    if (
Op.getValueType() == MVT::f32 && 15
!Subtarget.hasFPCVT()5
) {
11683
3
      FP = DAG.getNode(ISD::FP_ROUND, dl,
11684
3
                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
11685
3
      DCI.AddToWorklist(FP.getNode());
11686
3
    }
11687
16
11688
16
    return FP;
11689
16
  }
11690
105
11691
105
  return SDValue();
11692
105
}
11693
11694
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
11695
// builtins) into loads with swaps.
11696
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
11697
402
                                              DAGCombinerInfo &DCI) const {
11698
402
  SelectionDAG &DAG = DCI.DAG;
11699
402
  SDLoc dl(N);
11700
402
  SDValue Chain;
11701
402
  SDValue Base;
11702
402
  MachineMemOperand *MMO;
11703
402
11704
402
  switch (N->getOpcode()) {
11705
0
  default:
11706
0
    llvm_unreachable("Unexpected opcode for little endian VSX load");
11707
388
  case ISD::LOAD: {
11708
388
    LoadSDNode *LD = cast<LoadSDNode>(N);
11709
388
    Chain = LD->getChain();
11710
388
    Base = LD->getBasePtr();
11711
388
    MMO = LD->getMemOperand();
11712
388
    // If the MMO suggests this isn't a load of a full vector, leave
11713
388
    // things alone.  For a built-in, we have to make the change for
11714
388
    // correctness, so if there is a size problem that will be a bug.
11715
388
    if (MMO->getSize() < 16)
11716
0
      return SDValue();
11717
388
    break;
11718
388
  }
11719
14
  case ISD::INTRINSIC_W_CHAIN: {
11720
14
    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11721
14
    Chain = Intrin->getChain();
11722
14
    // Similarly to the store case below, Intrin->getBasePtr() doesn't get
11723
14
    // us what we want. Get operand 2 instead.
11724
14
    Base = Intrin->getOperand(2);
11725
14
    MMO = Intrin->getMemOperand();
11726
14
    break;
11727
402
  }
11728
402
  }
11729
402
11730
402
  MVT VecTy = N->getValueType(0).getSimpleVT();
11731
402
11732
402
  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
11733
402
  // aligned and the type is a vector with elements up to 4 bytes
11734
402
  if (
Subtarget.needsSwapsForVSXMemOps() && 402
!(MMO->getAlignment()%16)402
11735
402
      && 
VecTy.getScalarSizeInBits() <= 32299
) {
11736
151
    return SDValue();
11737
151
  }
11738
251
11739
251
  SDValue LoadOps[] = { Chain, Base };
11740
251
  SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
11741
251
                                         DAG.getVTList(MVT::v2f64, MVT::Other),
11742
251
                                         LoadOps, MVT::v2f64, MMO);
11743
251
11744
251
  DCI.AddToWorklist(Load.getNode());
11745
251
  Chain = Load.getValue(1);
11746
251
  SDValue Swap = DAG.getNode(
11747
251
      PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
11748
251
  DCI.AddToWorklist(Swap.getNode());
11749
251
11750
251
  // Add a bitcast if the resulting load type doesn't match v2f64.
11751
251
  if (
VecTy != MVT::v2f64251
) {
11752
83
    SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
11753
83
    DCI.AddToWorklist(N.getNode());
11754
83
    // Package {bitcast value, swap's chain} to match Load's shape.
11755
83
    return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
11756
83
                       N, Swap.getValue(1));
11757
83
  }
11758
168
11759
168
  return Swap;
11760
168
}
11761
11762
// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
11763
// builtins) into stores with swaps.
11764
SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
11765
264
                                               DAGCombinerInfo &DCI) const {
11766
264
  SelectionDAG &DAG = DCI.DAG;
11767
264
  SDLoc dl(N);
11768
264
  SDValue Chain;
11769
264
  SDValue Base;
11770
264
  unsigned SrcOpnd;
11771
264
  MachineMemOperand *MMO;
11772
264
11773
264
  switch (N->getOpcode()) {
11774
0
  default:
11775
0
    llvm_unreachable("Unexpected opcode for little endian VSX store");
11776
252
  case ISD::STORE: {
11777
252
    StoreSDNode *ST = cast<StoreSDNode>(N);
11778
252
    Chain = ST->getChain();
11779
252
    Base = ST->getBasePtr();
11780
252
    MMO = ST->getMemOperand();
11781
252
    SrcOpnd = 1;
11782
252
    // If the MMO suggests this isn't a store of a full vector, leave
11783
252
    // things alone.  For a built-in, we have to make the change for
11784
252
    // correctness, so if there is a size problem that will be a bug.
11785
252
    if (MMO->getSize() < 16)
11786
0
      return SDValue();
11787
252
    break;
11788
252
  }
11789
12
  case ISD::INTRINSIC_VOID: {
11790
12
    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11791
12
    Chain = Intrin->getChain();
11792
12
    // Intrin->getBasePtr() oddly does not get what we want.
11793
12
    Base = Intrin->getOperand(3);
11794
12
    MMO = Intrin->getMemOperand();
11795
12
    SrcOpnd = 2;
11796
12
    break;
11797
264
  }
11798
264
  }
11799
264
11800
264
  SDValue Src = N->getOperand(SrcOpnd);
11801
264
  MVT VecTy = Src.getValueType().getSimpleVT();
11802
264
11803
264
  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
11804
264
  // aligned and the type is a vector with elements up to 4 bytes
11805
264
  if (
Subtarget.needsSwapsForVSXMemOps() && 264
!(MMO->getAlignment()%16)264
11806
264
      && 
VecTy.getScalarSizeInBits() <= 32214
) {
11807
145
    return SDValue();
11808
145
  }
11809
119
11810
119
  // All stores are done as v2f64 and possible bit cast.
11811
119
  
if (119
VecTy != MVT::v2f64119
) {
11812
71
    Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
11813
71
    DCI.AddToWorklist(Src.getNode());
11814
71
  }
11815
264
11816
264
  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
11817
264
                             DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
11818
264
  DCI.AddToWorklist(Swap.getNode());
11819
264
  Chain = Swap.getValue(1);
11820
264
  SDValue StoreOps[] = { Chain, Swap, Base };
11821
264
  SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
11822
264
                                          DAG.getVTList(MVT::Other),
11823
264
                                          StoreOps, VecTy, MMO);
11824
264
  DCI.AddToWorklist(Store.getNode());
11825
264
  return Store;
11826
264
}
11827
11828
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
11829
83.6k
                                             DAGCombinerInfo &DCI) const {
11830
83.6k
  SelectionDAG &DAG = DCI.DAG;
11831
83.6k
  SDLoc dl(N);
11832
83.6k
  switch (N->getOpcode()) {
11833
27.3k
  default: break;
11834
1.40k
  case ISD::SHL:
11835
1.40k
    return combineSHL(N, DCI);
11836
400
  case ISD::SRA:
11837
400
    return combineSRA(N, DCI);
11838
569
  case ISD::SRL:
11839
569
    return combineSRL(N, DCI);
11840
60
  case PPCISD::SHL:
11841
60
    if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
11842
2
        return N->getOperand(0);
11843
58
    break;
11844
57
  case PPCISD::SRL:
11845
57
    if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
11846
1
        return N->getOperand(0);
11847
56
    break;
11848
21
  case PPCISD::SRA:
11849
21
    if (ConstantSDNode *
C21
= dyn_cast<ConstantSDNode>(N->getOperand(0))) {
11850
0
      if (C->isNullValue() ||   //  0 >>s V -> 0.
11851
0
          C->isAllOnesValue())    // -1 >>s V -> -1.
11852
0
        return N->getOperand(0);
11853
21
    }
11854
21
    break;
11855
4.43k
  case ISD::SIGN_EXTEND:
11856
4.43k
  case ISD::ZERO_EXTEND:
11857
4.43k
  case ISD::ANY_EXTEND:
11858
4.43k
    return DAGCombineExtBoolTrunc(N, DCI);
11859
10.5k
  case ISD::TRUNCATE:
11860
10.5k
  case ISD::SETCC:
11861
10.5k
  case ISD::SELECT_CC:
11862
10.5k
    return DAGCombineTruncBoolExt(N, DCI);
11863
353
  case ISD::SINT_TO_FP:
11864
353
  case ISD::UINT_TO_FP:
11865
353
    return combineFPToIntToFP(N, DCI);
11866
14.6k
  case ISD::STORE: {
11867
14.6k
    EVT Op1VT = N->getOperand(1).getValueType();
11868
14.6k
    bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
11869
8.79k
      
(Subtarget.hasP9Vector() && 8.79k
(Op1VT == MVT::i8 || 455
Op1VT == MVT::i16419
));
11870
14.6k
11871
14.6k
    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
11872
14.6k
    if (
Subtarget.hasSTFIWX() && 14.6k
!cast<StoreSDNode>(N)->isTruncatingStore()9.65k
&&
11873
8.44k
        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
11874
19
        ValidTypeForStoreFltAsInt &&
11875
14.6k
        
N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf12818
) {
11876
18
      SDValue Val = N->getOperand(1).getOperand(0);
11877
18
      if (
Val.getValueType() == MVT::f3218
) {
11878
11
        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
11879
11
        DCI.AddToWorklist(Val.getNode());
11880
11
      }
11881
18
      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
11882
18
      DCI.AddToWorklist(Val.getNode());
11883
18
11884
18
      if (
Op1VT == MVT::i3218
) {
11885
10
        SDValue Ops[] = {
11886
10
          N->getOperand(0), Val, N->getOperand(2),
11887
10
          DAG.getValueType(N->getOperand(1).getValueType())
11888
10
        };
11889
10
11890
10
        Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
11891
10
                DAG.getVTList(MVT::Other), Ops,
11892
10
                cast<StoreSDNode>(N)->getMemoryVT(),
11893
10
                cast<StoreSDNode>(N)->getMemOperand());
11894
18
      } else {
11895
8
        unsigned WidthInBytes =
11896
8
          N->getOperand(1).getValueType() == MVT::i8 ? 
14
:
24
;
11897
8
        SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
11898
8
11899
8
        SDValue Ops[] = {
11900
8
          N->getOperand(0), Val, N->getOperand(2), WidthConst,
11901
8
          DAG.getValueType(N->getOperand(1).getValueType())
11902
8
        };
11903
8
        Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
11904
8
                                      DAG.getVTList(MVT::Other), Ops,
11905
8
                                      cast<StoreSDNode>(N)->getMemoryVT(),
11906
8
                                      cast<StoreSDNode>(N)->getMemOperand());
11907
8
      }
11908
18
11909
18
      DCI.AddToWorklist(Val.getNode());
11910
18
      return Val;
11911
18
    }
11912
14.6k
11913
14.6k
    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
11914
14.6k
    
if (14.6k
cast<StoreSDNode>(N)->isUnindexed() &&
11915
14.6k
        N->getOperand(1).getOpcode() == ISD::BSWAP &&
11916
26
        N->getOperand(1).getNode()->hasOneUse() &&
11917
26
        (N->getOperand(1).getValueType() == MVT::i32 ||
11918
15
         N->getOperand(1).getValueType() == MVT::i16 ||
11919
10
         
(Subtarget.hasLDBRX() && 10
Subtarget.isPPC64()7
&&
11920
14.6k
          
N->getOperand(1).getValueType() == MVT::i645
))) {
11921
21
      SDValue BSwapOp = N->getOperand(1).getOperand(0);
11922
21
      // Do an any-extend to 32-bits if this is a half-word input.
11923
21
      if (BSwapOp.getValueType() == MVT::i16)
11924
5
        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
11925
21
11926
21
      // If the type of BSWAP operand is wider than stored memory width
11927
21
      // it need to be shifted to the right side before STBRX.
11928
21
      EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
11929
21
      if (
Op1VT.bitsGT(mVT)21
) {
11930
7
        int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
11931
7
        BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
11932
7
                              DAG.getConstant(Shift, dl, MVT::i32));
11933
7
        // Need to truncate if this is a bswap of i64 stored as i32/i16.
11934
7
        if (Op1VT == MVT::i64)
11935
4
          BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
11936
7
      }
11937
21
11938
21
      SDValue Ops[] = {
11939
21
        N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
11940
21
      };
11941
21
      return
11942
21
        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
11943
21
                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
11944
21
                                cast<StoreSDNode>(N)->getMemOperand());
11945
21
    }
11946
14.6k
11947
14.6k
    // For little endian, VSX stores require generating xxswapd/lxvd2x.
11948
14.6k
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11949
14.6k
    EVT VT = N->getOperand(1).getValueType();
11950
14.6k
    if (
VT.isSimple()14.6k
) {
11951
14.5k
      MVT StoreVT = VT.getSimpleVT();
11952
14.5k
      if (Subtarget.needsSwapsForVSXMemOps() &&
11953
944
          
(StoreVT == MVT::v2f64 || 944
StoreVT == MVT::v2i64902
||
11954
944
           
StoreVT == MVT::v4f32866
||
StoreVT == MVT::v4i32845
))
11955
252
        return expandVSXStoreForLE(N, DCI);
11956
14.3k
    }
11957
14.3k
    break;
11958
14.3k
  }
11959
13.7k
  case ISD::LOAD: {
11960
13.7k
    LoadSDNode *LD = cast<LoadSDNode>(N);
11961
13.7k
    EVT VT = LD->getValueType(0);
11962
13.7k
11963
13.7k
    // For little endian, VSX loads require generating lxvd2x/xxswapd.
11964
13.7k
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11965
13.7k
    if (
VT.isSimple()13.7k
) {
11966
13.7k
      MVT LoadVT = VT.getSimpleVT();
11967
13.7k
      if (Subtarget.needsSwapsForVSXMemOps() &&
11968
1.59k
          
(LoadVT == MVT::v2f64 || 1.59k
LoadVT == MVT::v2i641.43k
||
11969
1.59k
           
LoadVT == MVT::v4f321.41k
||
LoadVT == MVT::v4i321.38k
))
11970
388
        return expandVSXLoadForLE(N, DCI);
11971
13.3k
    }
11972
13.3k
11973
13.3k
    // We sometimes end up with a 64-bit integer load, from which we extract
11974
13.3k
    // two single-precision floating-point numbers. This happens with
11975
13.3k
    // std::complex<float>, and other similar structures, because of the way we
11976
13.3k
    // canonicalize structure copies. However, if we lack direct moves,
11977
13.3k
    // then the final bitcasts from the extracted integer values to the
11978
13.3k
    // floating-point numbers turn into store/load pairs. Even with direct moves,
11979
13.3k
    // just loading the two floating-point numbers is likely better.
11980
13.3k
    
auto ReplaceTwoFloatLoad = [&]() 13.3k
{
11981
13.3k
      if (VT != MVT::i64)
11982
10.8k
        return false;
11983
2.47k
11984
2.47k
      
if (2.47k
LD->getExtensionType() != ISD::NON_EXTLOAD ||
11985
1.96k
          LD->isVolatile())
11986
522
        return false;
11987
1.95k
11988
1.95k
      //  We're looking for a sequence like this:
11989
1.95k
      //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
11990
1.95k
      //      t16: i64 = srl t13, Constant:i32<32>
11991
1.95k
      //    t17: i32 = truncate t16
11992
1.95k
      //  t18: f32 = bitcast t17
11993
1.95k
      //    t19: i32 = truncate t13
11994
1.95k
      //  t20: f32 = bitcast t19
11995
1.95k
11996
1.95k
      
if (1.95k
!LD->hasNUsesOfValue(2, 0)1.95k
)
11997
1.79k
        return false;
11998
155
11999
155
      auto UI = LD->use_begin();
12000
190
      while (
UI.getUse().getResNo() != 0190
)
++UI35
;
12001
155
      SDNode *Trunc = *UI++;
12002
161
      while (
UI.getUse().getResNo() != 0161
)
++UI6
;
12003
155
      SDNode *RightShift = *UI;
12004
155
      if (Trunc->getOpcode() != ISD::TRUNCATE)
12005
153
        std::swap(Trunc, RightShift);
12006
155
12007
155
      if (Trunc->getOpcode() != ISD::TRUNCATE ||
12008
2
          Trunc->getValueType(0) != MVT::i32 ||
12009
2
          !Trunc->hasOneUse())
12010
153
        return false;
12011
2
      
if (2
RightShift->getOpcode() != ISD::SRL ||
12012
2
          !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
12013
2
          RightShift->getConstantOperandVal(1) != 32 ||
12014
2
          !RightShift->hasOneUse())
12015
0
        return false;
12016
2
12017
2
      SDNode *Trunc2 = *RightShift->use_begin();
12018
2
      if (Trunc2->getOpcode() != ISD::TRUNCATE ||
12019
2
          Trunc2->getValueType(0) != MVT::i32 ||
12020
2
          !Trunc2->hasOneUse())
12021
0
        return false;
12022
2
12023
2
      SDNode *Bitcast = *Trunc->use_begin();
12024
2
      SDNode *Bitcast2 = *Trunc2->use_begin();
12025
2
12026
2
      if (Bitcast->getOpcode() != ISD::BITCAST ||
12027
2
          Bitcast->getValueType(0) != MVT::f32)
12028
0
        return false;
12029
2
      
if (2
Bitcast2->getOpcode() != ISD::BITCAST ||
12030
2
          Bitcast2->getValueType(0) != MVT::f32)
12031
0
        return false;
12032
2
12033
2
      
if (2
Subtarget.isLittleEndian()2
)
12034
0
        std::swap(Bitcast, Bitcast2);
12035
2
12036
2
      // Bitcast has the second float (in memory-layout order) and Bitcast2
12037
2
      // has the first one.
12038
2
12039
2
      SDValue BasePtr = LD->getBasePtr();
12040
2
      if (
LD->isIndexed()2
) {
12041
0
        assert(LD->getAddressingMode() == ISD::PRE_INC &&
12042
0
               "Non-pre-inc AM on PPC?");
12043
0
        BasePtr =
12044
0
          DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12045
0
                      LD->getOffset());
12046
0
      }
12047
2
12048
2
      auto MMOFlags =
12049
2
          LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
12050
2
      SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
12051
2
                                      LD->getPointerInfo(), LD->getAlignment(),
12052
2
                                      MMOFlags, LD->getAAInfo());
12053
2
      SDValue AddPtr =
12054
2
        DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
12055
2
                    BasePtr, DAG.getIntPtrConstant(4, dl));
12056
2
      SDValue FloatLoad2 = DAG.getLoad(
12057
2
          MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
12058
2
          LD->getPointerInfo().getWithOffset(4),
12059
2
          MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
12060
2
12061
2
      if (
LD->isIndexed()2
) {
12062
0
        // Note that DAGCombine should re-form any pre-increment load(s) from
12063
0
        // what is produced here if that makes sense.
12064
0
        DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
12065
0
      }
12066
2
12067
2
      DCI.CombineTo(Bitcast2, FloatLoad);
12068
2
      DCI.CombineTo(Bitcast, FloatLoad2);
12069
2
12070
2
      DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 
20
:
12
),
12071
13.3k
                                    SDValue(FloatLoad2.getNode(), 1));
12072
13.3k
      return true;
12073
13.3k
    };
12074
13.3k
12075
13.3k
    if (ReplaceTwoFloatLoad())
12076
2
      return SDValue(N, 0);
12077
13.3k
12078
13.3k
    EVT MemVT = LD->getMemoryVT();
12079
13.3k
    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
12080
13.3k
    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
12081
13.3k
    Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
12082
13.3k
    unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
12083
13.3k
    if (
LD->isUnindexed() && 13.3k
VT.isVector()13.2k
&&
12084
2.74k
        
((Subtarget.hasAltivec() && 2.74k
ISD::isNON_EXTLoad(N)2.51k
&&
12085
2.74k
          // P8 and later hardware should just use LOAD.
12086
2.74k
          
!Subtarget.hasP8Vector()2.51k
&&
(VT == MVT::v16i8 || 1.35k
VT == MVT::v8i161.24k
||
12087
1.35k
                                       
VT == MVT::v4i321.19k
||
VT == MVT::v4f32488
)) ||
12088
1.69k
         
(Subtarget.hasQPX() && 1.69k
(VT == MVT::v4f64 || 178
VT == MVT::v4f3296
) &&
12089
2.74k
          LD->getAlignment() >= ScalarABIAlignment)) &&
12090
13.3k
        
LD->getAlignment() < ABIAlignment1.14k
) {
12091
108
      // This is a type-legal unaligned Altivec or QPX load.
12092
108
      SDValue Chain = LD->getChain();
12093
108
      SDValue Ptr = LD->getBasePtr();
12094
108
      bool isLittleEndian = Subtarget.isLittleEndian();
12095
108
12096
108
      // This implements the loading of unaligned vectors as described in
12097
108
      // the venerable Apple Velocity Engine overview. Specifically:
12098
108
      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
12099
108
      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
12100
108
      //
12101
108
      // The general idea is to expand a sequence of one or more unaligned
12102
108
      // loads into an alignment-based permutation-control instruction (lvsl
12103
108
      // or lvsr), a series of regular vector loads (which always truncate
12104
108
      // their input address to an aligned address), and a series of
12105
108
      // permutations.  The results of these permutations are the requested
12106
108
      // loaded values.  The trick is that the last "extra" load is not taken
12107
108
      // from the address you might suspect (sizeof(vector) bytes after the
12108
108
      // last requested load), but rather sizeof(vector) - 1 bytes after the
12109
108
      // last requested vector. The point of this is to avoid a page fault if
12110
108
      // the base address happened to be aligned. This works because if the
12111
108
      // base address is aligned, then adding less than a full vector length
12112
108
      // will cause the last vector in the sequence to be (re)loaded.
12113
108
      // Otherwise, the next vector will be fetched as you might suspect was
12114
108
      // necessary.
12115
108
12116
108
      // We might be able to reuse the permutation generation from
12117
108
      // a different base address offset from this one by an aligned amount.
12118
108
      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
12119
108
      // optimization later.
12120
108
      Intrinsic::ID Intr, IntrLD, IntrPerm;
12121
108
      MVT PermCntlTy, PermTy, LDTy;
12122
108
      if (
Subtarget.hasAltivec()108
) {
12123
1
        Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
12124
57
                                 Intrinsic::ppc_altivec_lvsl;
12125
58
        IntrLD = Intrinsic::ppc_altivec_lvx;
12126
58
        IntrPerm = Intrinsic::ppc_altivec_vperm;
12127
58
        PermCntlTy = MVT::v16i8;
12128
58
        PermTy = MVT::v4i32;
12129
58
        LDTy = MVT::v4i32;
12130
108
      } else {
12131
46
        Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
12132
4
                                       Intrinsic::ppc_qpx_qvlpcls;
12133
46
        IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
12134
4
                                       Intrinsic::ppc_qpx_qvlfs;
12135
50
        IntrPerm = Intrinsic::ppc_qpx_qvfperm;
12136
50
        PermCntlTy = MVT::v4f64;
12137
50
        PermTy = MVT::v4f64;
12138
50
        LDTy = MemVT.getSimpleVT();
12139
50
      }
12140
108
12141
108
      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
12142
108
12143
108
      // Create the new MMO for the new base load. It is like the original MMO,
12144
108
      // but represents an area in memory almost twice the vector size centered
12145
108
      // on the original address. If the address is unaligned, we might start
12146
108
      // reading up to (sizeof(vector)-1) bytes below the address of the
12147
108
      // original unaligned load.
12148
108
      MachineFunction &MF = DAG.getMachineFunction();
12149
108
      MachineMemOperand *BaseMMO =
12150
108
        MF.getMachineMemOperand(LD->getMemOperand(),
12151
108
                                -(long)MemVT.getStoreSize()+1,
12152
108
                                2*MemVT.getStoreSize()-1);
12153
108
12154
108
      // Create the new base load.
12155
108
      SDValue LDXIntID =
12156
108
          DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
12157
108
      SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
12158
108
      SDValue BaseLoad =
12159
108
        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
12160
108
                                DAG.getVTList(PermTy, MVT::Other),
12161
108
                                BaseLoadOps, LDTy, BaseMMO);
12162
108
12163
108
      // Note that the value of IncOffset (which is provided to the next
12164
108
      // load's pointer info offset value, and thus used to calculate the
12165
108
      // alignment), and the value of IncValue (which is actually used to
12166
108
      // increment the pointer value) are different! This is because we
12167
108
      // require the next load to appear to be aligned, even though it
12168
108
      // is actually offset from the base pointer by a lesser amount.
12169
108
      int IncOffset = VT.getSizeInBits() / 8;
12170
108
      int IncValue = IncOffset;
12171
108
12172
108
      // Walk (both up and down) the chain looking for another load at the real
12173
108
      // (aligned) offset (the alignment of the other load does not matter in
12174
108
      // this case). If found, then do not use the offset reduction trick, as
12175
108
      // that will prevent the loads from being later combined (as they would
12176
108
      // otherwise be duplicates).
12177
108
      if (!findConsecutiveLoad(LD, DAG))
12178
58
        --IncValue;
12179
108
12180
108
      SDValue Increment =
12181
108
          DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
12182
108
      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
12183
108
12184
108
      MachineMemOperand *ExtraMMO =
12185
108
        MF.getMachineMemOperand(LD->getMemOperand(),
12186
108
                                1, 2*MemVT.getStoreSize()-1);
12187
108
      SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
12188
108
      SDValue ExtraLoad =
12189
108
        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
12190
108
                                DAG.getVTList(PermTy, MVT::Other),
12191
108
                                ExtraLoadOps, LDTy, ExtraMMO);
12192
108
12193
108
      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
12194
108
        BaseLoad.getValue(1), ExtraLoad.getValue(1));
12195
108
12196
108
      // Because vperm has a big-endian bias, we must reverse the order
12197
108
      // of the input vectors and complement the permute control vector
12198
108
      // when generating little endian code.  We have already handled the
12199
108
      // latter by using lvsr instead of lvsl, so just reverse BaseLoad
12200
108
      // and ExtraLoad here.
12201
108
      SDValue Perm;
12202
108
      if (isLittleEndian)
12203
1
        Perm = BuildIntrinsicOp(IntrPerm,
12204
1
                                ExtraLoad, BaseLoad, PermCntl, DAG, dl);
12205
108
      else
12206
107
        Perm = BuildIntrinsicOp(IntrPerm,
12207
107
                                BaseLoad, ExtraLoad, PermCntl, DAG, dl);
12208
108
12209
108
      if (VT != PermTy)
12210
40
        Perm = Subtarget.hasAltivec() ?
12211
36
                 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
12212
4
                 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
12213
4
                               DAG.getTargetConstant(1, dl, MVT::i64));
12214
108
                               // second argument is 1 because this rounding
12215
108
                               // is always exact.
12216
108
12217
108
      // The output of the permutation is our loaded result, the TokenFactor is
12218
108
      // our new chain.
12219
108
      DCI.CombineTo(N, Perm, TF);
12220
108
      return SDValue(N, 0);
12221
108
    }
12222
13.2k
    }
12223
13.2k
    break;
12224
1.88k
    case ISD::INTRINSIC_WO_CHAIN: {
12225
1.88k
      bool isLittleEndian = Subtarget.isLittleEndian();
12226
1.88k
      unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12227
368
      Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
12228
1.51k
                                           : Intrinsic::ppc_altivec_lvsl);
12229
1.88k
      if ((IID == Intr ||
12230
1.75k
           IID == Intrinsic::ppc_qpx_qvlpcld  ||
12231
1.62k
           IID == Intrinsic::ppc_qpx_qvlpcls) &&
12232
1.88k
        
N->getOperand(1)->getOpcode() == ISD::ADD260
) {
12233
187
        SDValue Add = N->getOperand(1);
12234
187
12235
187
        int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
12236
187
                   
5110
/* 32 byte alignment */ :
477
/* 16 byte alignment */;
12237
187
12238
187
        if (DAG.MaskedValueIsZero(Add->getOperand(1),
12239
187
                                  APInt::getAllOnesValue(Bits /* alignment */)
12240
187
                                      .zext(Add.getScalarValueSizeInBits()))) {
12241
177
          SDNode *BasePtr = Add->getOperand(0).getNode();
12242
177
          for (SDNode::use_iterator UI = BasePtr->use_begin(),
12243
177
                                    UE = BasePtr->use_end();
12244
3.38k
               
UI != UE3.38k
;
++UI3.20k
) {
12245
3.21k
            if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12246
3.21k
                
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID10
) {
12247
10
              // We've found another LVSL/LVSR, and this address is an aligned
12248
10
              // multiple of that one. The results will be the same, so use the
12249
10
              // one we've just found instead.
12250
10
12251
10
              return SDValue(*UI, 0);
12252
10
            }
12253
3.21k
          }
12254
177
        }
12255
187
12256
177
        
if (177
isa<ConstantSDNode>(Add->getOperand(1))177
) {
12257
167
          SDNode *BasePtr = Add->getOperand(0).getNode();
12258
167
          for (SDNode::use_iterator UI = BasePtr->use_begin(),
12259
468
               UE = BasePtr->use_end(); 
UI != UE468
;
++UI301
) {
12260
468
            if (UI->getOpcode() == ISD::ADD &&
12261
468
                isa<ConstantSDNode>(UI->getOperand(1)) &&
12262
468
                (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
12263
468
                 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
12264
468
                (1ULL << Bits) == 0) {
12265
168
              SDNode *OtherAdd = *UI;
12266
168
              for (SDNode::use_iterator VI = OtherAdd->use_begin(),
12267
252
                   VE = OtherAdd->use_end(); 
VI != VE252
;
++VI84
) {
12268
251
                if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12269
251
                    
cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID167
) {
12270
167
                  return SDValue(*VI, 0);
12271
167
                }
12272
251
              }
12273
168
            }
12274
468
          }
12275
167
        }
12276
187
      }
12277
1.88k
    }
12278
1.88k
12279
1.70k
    break;
12280
790
  case ISD::INTRINSIC_W_CHAIN:
12281
790
    // For little endian, VSX loads require generating lxvd2x/xxswapd.
12282
790
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12283
790
    if (
Subtarget.needsSwapsForVSXMemOps()790
) {
12284
19
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12285
5
      default:
12286
5
        break;
12287
14
      case Intrinsic::ppc_vsx_lxvw4x:
12288
14
      case Intrinsic::ppc_vsx_lxvd2x:
12289
14
        return expandVSXLoadForLE(N, DCI);
12290
776
      }
12291
776
    }
12292
776
    break;
12293
2.12k
  case ISD::INTRINSIC_VOID:
12294
2.12k
    // For little endian, VSX stores require generating xxswapd/stxvd2x.
12295
2.12k
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12296
2.12k
    if (
Subtarget.needsSwapsForVSXMemOps()2.12k
) {
12297
1.71k
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12298
1.70k
      default:
12299
1.70k
        break;
12300
12
      case Intrinsic::ppc_vsx_stxvw4x:
12301
12
      case Intrinsic::ppc_vsx_stxvd2x:
12302
12
        return expandVSXStoreForLE(N, DCI);
12303
2.11k
      }
12304
2.11k
    }
12305
2.11k
    break;
12306
37
  case ISD::BSWAP:
12307
37
    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
12308
37
    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
12309
32
        N->getOperand(0).hasOneUse() &&
12310
32
        
(N->getValueType(0) == MVT::i32 || 32
N->getValueType(0) == MVT::i1621
||
12311
14
         
(Subtarget.hasLDBRX() && 14
Subtarget.isPPC64()12
&&
12312
37
          
N->getValueType(0) == MVT::i6411
))) {
12313
29
      SDValue Load = N->getOperand(0);
12314
29
      LoadSDNode *LD = cast<LoadSDNode>(Load);
12315
29
      // Create the byte-swapping load.
12316
29
      SDValue Ops[] = {
12317
29
        LD->getChain(),    // Chain
12318
29
        LD->getBasePtr(),  // Ptr
12319
29
        DAG.getValueType(N->getValueType(0)) // VT
12320
29
      };
12321
29
      SDValue BSLoad =
12322
29
        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
12323
29
                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
12324
29
                                              
MVT::i6411
:
MVT::i3218
, MVT::Other),
12325
29
                                Ops, LD->getMemoryVT(), LD->getMemOperand());
12326
29
12327
29
      // If this is an i16 load, insert the truncate.
12328
29
      SDValue ResVal = BSLoad;
12329
29
      if (N->getValueType(0) == MVT::i16)
12330
7
        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
12331
29
12332
29
      // First, combine the bswap away.  This makes the value produced by the
12333
29
      // load dead.
12334
29
      DCI.CombineTo(N, ResVal);
12335
29
12336
29
      // Next, combine the load away, we give it a bogus result value but a real
12337
29
      // chain result.  The result value is dead because the bswap is dead.
12338
29
      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
12339
29
12340
29
      // Return N so it doesn't get rechecked!
12341
29
      return SDValue(N, 0);
12342
29
    }
12343
8
    break;
12344
13
  case PPCISD::VCMP:
12345
13
    // If a VCMPo node already exists with exactly the same operands as this
12346
13
    // node, use its result instead of this node (VCMPo computes both a CR6 and
12347
13
    // a normal output).
12348
13
    //
12349
13
    if (!N->getOperand(0).hasOneUse() &&
12350
1
        !N->getOperand(1).hasOneUse() &&
12351
13
        
!N->getOperand(2).hasOneUse()1
) {
12352
1
12353
1
      // Scan all of the users of the LHS, looking for VCMPo's that match.
12354
1
      SDNode *VCMPoNode = nullptr;
12355
1
12356
1
      SDNode *LHSN = N->getOperand(0).getNode();
12357
1
      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
12358
1
           
UI != E1
;
++UI0
)
12359
1
        
if (1
UI->getOpcode() == PPCISD::VCMPo &&
12360
1
            UI->getOperand(1) == N->getOperand(1) &&
12361
1
            UI->getOperand(2) == N->getOperand(2) &&
12362
1
            
UI->getOperand(0) == N->getOperand(0)1
) {
12363
1
          VCMPoNode = *UI;
12364
1
          break;
12365
1
        }
12366
1
12367
1
      // If there is no VCMPo node, or if the flag value has a single use, don't
12368
1
      // transform this.
12369
1
      if (
!VCMPoNode || 1
VCMPoNode->hasNUsesOfValue(0, 1)1
)
12370
0
        break;
12371
1
12372
1
      // Look at the (necessarily single) use of the flag value.  If it has a
12373
1
      // chain, this transformation is more complex.  Note that multiple things
12374
1
      // could use the value result, which we should ignore.
12375
1
      SDNode *FlagUser = nullptr;
12376
1
      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
12377
2
           
FlagUser == nullptr2
;
++UI1
) {
12378
1
        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
12379
1
        SDNode *User = *UI;
12380
2
        for (unsigned i = 0, e = User->getNumOperands(); 
i != e2
;
++i1
) {
12381
2
          if (
User->getOperand(i) == SDValue(VCMPoNode, 1)2
) {
12382
1
            FlagUser = User;
12383
1
            break;
12384
1
          }
12385
2
        }
12386
1
      }
12387
1
12388
1
      // If the user is a MFOCRF instruction, we know this is safe.
12389
1
      // Otherwise we give up for right now.
12390
1
      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
12391
1
        return SDValue(VCMPoNode, 0);
12392
12
    }
12393
12
    break;
12394
973
  case ISD::BRCOND: {
12395
973
    SDValue Cond = N->getOperand(1);
12396
973
    SDValue Target = N->getOperand(2);
12397
973
12398
973
    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12399
112
        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
12400
973
          Intrinsic::ppc_is_decremented_ctr_nonzero) {
12401
112
12402
112
      // We now need to make the intrinsic dead (it cannot be instruction
12403
112
      // selected).
12404
112
      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
12405
112
      assert(Cond.getNode()->hasOneUse() &&
12406
112
             "Counter decrement has more than one use");
12407
112
12408
112
      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
12409
112
                         N->getOperand(0), Target);
12410
112
    }
12411
861
  }
12412
861
  break;
12413
1.63k
  case ISD::BR_CC: {
12414
1.63k
    // If this is a branch on an altivec predicate comparison, lower this so
12415
1.63k
    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
12416
1.63k
    // lowering is done pre-legalize, because the legalizer lowers the predicate
12417
1.63k
    // compare down to code that is difficult to reassemble.
12418
1.63k
    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
12419
1.63k
    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
12420
1.63k
12421
1.63k
    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
12422
1.63k
    // value. If so, pass-through the AND to get to the intrinsic.
12423
1.63k
    if (LHS.getOpcode() == ISD::AND &&
12424
187
        LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12425
0
        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
12426
0
          Intrinsic::ppc_is_decremented_ctr_nonzero &&
12427
0
        isa<ConstantSDNode>(LHS.getOperand(1)) &&
12428
0
        !isNullConstant(LHS.getOperand(1)))
12429
0
      LHS = LHS.getOperand(0);
12430
1.63k
12431
1.63k
    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12432
7
        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
12433
7
          Intrinsic::ppc_is_decremented_ctr_nonzero &&
12434
1.63k
        
isa<ConstantSDNode>(RHS)7
) {
12435
7
      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
12436
7
             "Counter decrement comparison is not EQ or NE");
12437
7
12438
7
      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12439
0
      bool isBDNZ = (CC == ISD::SETEQ && Val) ||
12440
7
                    
(CC == ISD::SETNE && 7
!Val7
);
12441
7
12442
7
      // We now need to make the intrinsic dead (it cannot be instruction
12443
7
      // selected).
12444
7
      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
12445
7
      assert(LHS.getNode()->hasOneUse() &&
12446
7
             "Counter decrement has more than one use");
12447
7
12448
7
      return DAG.getNode(isBDNZ ? 
PPCISD::BDNZ0
:
PPCISD::BDZ7
, dl, MVT::Other,
12449
7
                         N->getOperand(0), N->getOperand(4));
12450
7
    }
12451
1.62k
12452
1.62k
    int CompareOpc;
12453
1.62k
    bool isDot;
12454
1.62k
12455
1.62k
    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12456
1.62k
        
isa<ConstantSDNode>(RHS)4
&&
(CC == ISD::SETEQ || 4
CC == ISD::SETNE3
) &&
12457
1.62k
        
getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)4
) {
12458
4
      assert(isDot && "Can't compare against a vector result!");
12459
4
12460
4
      // If this is a comparison against something other than 0/1, then we know
12461
4
      // that the condition is never/always true.
12462
4
      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12463
4
      if (
Val != 0 && 4
Val != 10
) {
12464
0
        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
12465
0
          return N->getOperand(0);
12466
0
        // Always !=, turn it into an unconditional branch.
12467
0
        return DAG.getNode(ISD::BR, dl, MVT::Other,
12468
0
                           N->getOperand(0), N->getOperand(4));
12469
0
      }
12470
4
12471
4
      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
12472
4
12473
4
      // Create the PPCISD altivec 'dot' comparison node.
12474
4
      SDValue Ops[] = {
12475
4
        LHS.getOperand(2),  // LHS of compare
12476
4
        LHS.getOperand(3),  // RHS of compare
12477
4
        DAG.getConstant(CompareOpc, dl, MVT::i32)
12478
4
      };
12479
4
      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
12480
4
      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
12481
4
12482
4
      // Unpack the result based on how the target uses it.
12483
4
      PPC::Predicate CompOpc;
12484
4
      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
12485
0
      default:  // Can't happen, don't crash on invalid number though.
12486
1
      case 0:   // Branch on the value of the EQ bit of CR6.
12487
1
        CompOpc = BranchOnWhenPredTrue ? 
PPC::PRED_EQ0
:
PPC::PRED_NE1
;
12488
1
        break;
12489
1
      case 1:   // Branch on the inverted value of the EQ bit of CR6.
12490
1
        CompOpc = BranchOnWhenPredTrue ? 
PPC::PRED_NE1
:
PPC::PRED_EQ0
;
12491
1
        break;
12492
2
      case 2:   // Branch on the value of the LT bit of CR6.
12493
2
        CompOpc = BranchOnWhenPredTrue ? 
PPC::PRED_LT2
:
PPC::PRED_GE0
;
12494
2
        break;
12495
0
      case 3:   // Branch on the inverted value of the LT bit of CR6.
12496
0
        CompOpc = BranchOnWhenPredTrue ? 
PPC::PRED_GE0
:
PPC::PRED_LT0
;
12497
0
        break;
12498
4
      }
12499
4
12500
4
      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
12501
4
                         DAG.getConstant(CompOpc, dl, MVT::i32),
12502
4
                         DAG.getRegister(PPC::CR6, MVT::i32),
12503
4
                         N->getOperand(4), CompNode.getValue(1));
12504
4
    }
12505
1.62k
    break;
12506
1.62k
  }
12507
2.59k
  case ISD::BUILD_VECTOR:
12508
2.59k
    return DAGCombineBuildVector(N, DCI);
12509
62.2k
  }
12510
62.2k
12511
62.2k
  return SDValue();
12512
62.2k
}
12513
12514
SDValue
12515
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12516
                                  SelectionDAG &DAG,
12517
11
                                  std::vector<SDNode *> *Created) const {
12518
11
  // fold (sdiv X, pow2)
12519
11
  EVT VT = N->getValueType(0);
12520
11
  if (
VT == MVT::i64 && 11
!Subtarget.isPPC64()4
)
12521
2
    return SDValue();
12522
9
  
if (9
(VT != MVT::i32 && 9
VT != MVT::i642
) ||
12523
9
      
!(Divisor.isPowerOf2() || 9
(-Divisor).isPowerOf2()3
))
12524
0
    return SDValue();
12525
9
12526
9
  SDLoc DL(N);
12527
9
  SDValue N0 = N->getOperand(0);
12528
9
12529
9
  bool IsNegPow2 = (-Divisor).isPowerOf2();
12530
9
  unsigned Lg2 = (IsNegPow2 ? 
-Divisor3
:
Divisor6
).countTrailingZeros();
12531
9
  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
12532
9
12533
9
  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
12534
9
  if (Created)
12535
9
    Created->push_back(Op.getNode());
12536
9
12537
9
  if (
IsNegPow29
) {
12538
3
    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
12539
3
    if (Created)
12540
3
      Created->push_back(Op.getNode());
12541
3
  }
12542
11
12543
11
  return Op;
12544
11
}
12545
12546
//===----------------------------------------------------------------------===//
12547
// Inline Assembly Support
12548
//===----------------------------------------------------------------------===//
12549
12550
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
12551
                                                      KnownBits &Known,
12552
                                                      const APInt &DemandedElts,
12553
                                                      const SelectionDAG &DAG,
12554
5.25k
                                                      unsigned Depth) const {
12555
5.25k
  Known.resetAll();
12556
5.25k
  switch (Op.getOpcode()) {
12557
5.06k
  default: break;
12558
44
  case PPCISD::LBRX: {
12559
44
    // lhbrx is known to have the top bits cleared out.
12560
44
    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
12561
4
      Known.Zero = 0xFFFF0000;
12562
44
    break;
12563
5.25k
  }
12564
143
  case ISD::INTRINSIC_WO_CHAIN: {
12565
143
    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
12566
143
    default: break;
12567
0
    case Intrinsic::ppc_altivec_vcmpbfp_p:
12568
0
    case Intrinsic::ppc_altivec_vcmpeqfp_p:
12569
0
    case Intrinsic::ppc_altivec_vcmpequb_p:
12570
0
    case Intrinsic::ppc_altivec_vcmpequh_p:
12571
0
    case Intrinsic::ppc_altivec_vcmpequw_p:
12572
0
    case Intrinsic::ppc_altivec_vcmpequd_p:
12573
0
    case Intrinsic::ppc_altivec_vcmpgefp_p:
12574
0
    case Intrinsic::ppc_altivec_vcmpgtfp_p:
12575
0
    case Intrinsic::ppc_altivec_vcmpgtsb_p:
12576
0
    case Intrinsic::ppc_altivec_vcmpgtsh_p:
12577
0
    case Intrinsic::ppc_altivec_vcmpgtsw_p:
12578
0
    case Intrinsic::ppc_altivec_vcmpgtsd_p:
12579
0
    case Intrinsic::ppc_altivec_vcmpgtub_p:
12580
0
    case Intrinsic::ppc_altivec_vcmpgtuh_p:
12581
0
    case Intrinsic::ppc_altivec_vcmpgtuw_p:
12582
0
    case Intrinsic::ppc_altivec_vcmpgtud_p:
12583
0
      Known.Zero = ~1U;  // All bits but the low one are known to be zero.
12584
0
      break;
12585
5.25k
    }
12586
5.25k
  }
12587
5.25k
  }
12588
5.25k
}
12589
12590
1.39k
unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
12591
1.39k
  switch (Subtarget.getDarwinDirective()) {
12592
364
  default: break;
12593
1.03k
  case PPC::DIR_970:
12594
1.03k
  case PPC::DIR_PWR4:
12595
1.03k
  case PPC::DIR_PWR5:
12596
1.03k
  case PPC::DIR_PWR5X:
12597
1.03k
  case PPC::DIR_PWR6:
12598
1.03k
  case PPC::DIR_PWR6X:
12599
1.03k
  case PPC::DIR_PWR7:
12600
1.03k
  case PPC::DIR_PWR8:
12601
1.03k
  case PPC::DIR_PWR9: {
12602
1.03k
    if (!ML)
12603
0
      break;
12604
1.03k
12605
1.03k
    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12606
1.03k
12607
1.03k
    // For small loops (between 5 and 8 instructions), align to a 32-byte
12608
1.03k
    // boundary so that the entire loop fits in one instruction-cache line.
12609
1.03k
    uint64_t LoopSize = 0;
12610
3.21k
    for (auto I = ML->block_begin(), IE = ML->block_end(); 
I != IE3.21k
;
++I2.18k
)
12611
7.29k
      
for (auto J = (*I)->begin(), JE = (*I)->end(); 2.18k
J != JE7.29k
;
++J5.10k
) {
12612
5.63k
        LoopSize += TII->getInstSizeInBytes(*J);
12613
5.63k
        if (LoopSize > 32)
12614
523
          break;
12615
2.18k
      }
12616
1.03k
12617
1.03k
    if (
LoopSize > 16 && 1.03k
LoopSize <= 32681
)
12618
568
      return 5;
12619
466
12620
466
    break;
12621
466
  }
12622
830
  }
12623
830
12624
830
  return TargetLowering::getPrefLoopAlignment(ML);
12625
830
}
12626
12627
/// getConstraintType - Given a constraint, return the type of
12628
/// constraint it is for this target.
12629
PPCTargetLowering::ConstraintType
12630
8.08k
PPCTargetLowering::getConstraintType(StringRef Constraint) const {
12631
8.08k
  if (
Constraint.size() == 18.08k
) {
12632
1.82k
    switch (Constraint[0]) {
12633
602
    default: break;
12634
1.21k
    case 'b':
12635
1.21k
    case 'r':
12636
1.21k
    case 'f':
12637
1.21k
    case 'd':
12638
1.21k
    case 'v':
12639
1.21k
    case 'y':
12640
1.21k
      return C_RegisterClass;
12641
4
    case 'Z':
12642
4
      // FIXME: While Z does indicate a memory constraint, it specifically
12643
4
      // indicates an r+r address (used in conjunction with the 'y' modifier
12644
4
      // in the replacement string). Currently, we're forcing the base
12645
4
      // register to be r0 in the asm printer (which is interpreted as zero)
12646
4
      // and forming the complete address in the second register. This is
12647
4
      // suboptimal.
12648
4
      return C_Memory;
12649
8.08k
    }
12650
6.26k
  } else 
if (6.26k
Constraint == "wc"6.26k
) { // individual CR bits.
12651
156
    return C_RegisterClass;
12652
6.10k
  } else 
if (6.10k
Constraint == "wa" || 6.10k
Constraint == "wd"6.10k
||
12653
6.10k
             
Constraint == "wf"6.09k
||
Constraint == "ws"6.09k
) {
12654
12
    return C_RegisterClass; // VSX registers.
12655
12
  }
12656
6.69k
  return TargetLowering::getConstraintType(Constraint);
12657
6.69k
}
12658
12659
/// Examine constraint type and operand type and determine a weight value.
12660
/// This object must already have been set up with the operand type
12661
/// and the current alternative constraint selected.
12662
TargetLowering::ConstraintWeight
12663
PPCTargetLowering::getSingleConstraintMatchWeight(
12664
486
    AsmOperandInfo &info, const char *constraint) const {
12665
486
  ConstraintWeight weight = CW_Invalid;
12666
486
  Value *CallOperandVal = info.CallOperandVal;
12667
486
    // If we don't have a value, we can't do a match,
12668
486
    // but allow it at the lowest weight.
12669
486
  if (!CallOperandVal)
12670
204
    return CW_Default;
12671
282
  Type *type = CallOperandVal->getType();
12672
282
12673
282
  // Look at the constraint type.
12674
282
  if (
StringRef(constraint) == "wc" && 282
type->isIntegerTy(1)0
)
12675
0
    return CW_Register; // an individual CR bit.
12676
282
  else 
if (282
(StringRef(constraint) == "wa" ||
12677
282
            StringRef(constraint) == "wd" ||
12678
282
            StringRef(constraint) == "wf") &&
12679
0
           type->isVectorTy())
12680
0
    return CW_Register;
12681
282
  else 
if (282
StringRef(constraint) == "ws" && 282
type->isDoubleTy()0
)
12682
0
    return CW_Register;
12683
282
12684
282
  switch (*constraint) {
12685
282
  default:
12686
282
    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
12687
282
    break;
12688
0
  case 'b':
12689
0
    if (type->isIntegerTy())
12690
0
      weight = CW_Register;
12691
0
    break;
12692
0
  case 'f':
12693
0
    if (type->isFloatTy())
12694
0
      weight = CW_Register;
12695
0
    break;
12696
0
  case 'd':
12697
0
    if (type->isDoubleTy())
12698
0
      weight = CW_Register;
12699
0
    break;
12700
0
  case 'v':
12701
0
    if (type->isVectorTy())
12702
0
      weight = CW_Register;
12703
0
    break;
12704
0
  case 'y':
12705
0
    weight = CW_Register;
12706
0
    break;
12707
0
  case 'Z':
12708
0
    weight = CW_Memory;
12709
0
    break;
12710
282
  }
12711
282
  return weight;
12712
282
}
12713
12714
std::pair<unsigned, const TargetRegisterClass *>
12715
PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
12716
                                                StringRef Constraint,
12717
2.77k
                                                MVT VT) const {
12718
2.77k
  if (
Constraint.size() == 12.77k
) {
12719
290
    // GCC RS6000 Constraint Letters
12720
290
    switch (Constraint[0]) {
12721
7
    case 'b':   // R1-R31
12722
7
      if (
VT == MVT::i64 && 7
Subtarget.isPPC64()5
)
12723
5
        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
12724
2
      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
12725
270
    case 'r':   // R0-R31
12726
270
      if (
VT == MVT::i64 && 270
Subtarget.isPPC64()27
)
12727
27
        return std::make_pair(0U, &PPC::G8RCRegClass);
12728
243
      return std::make_pair(0U, &PPC::GPRCRegClass);
12729
243
    // 'd' and 'f' constraints are both defined to be "the floating point
12730
243
    // registers", where one is for 32-bit and the other for 64-bit. We don't
12731
243
    // really care overly much here so just give them all the same reg classes.
12732
13
    case 'd':
12733
13
    case 'f':
12734
13
      if (
VT == MVT::f32 || 13
VT == MVT::i3213
)
12735
0
        return std::make_pair(0U, &PPC::F4RCRegClass);
12736
13
      
if (13
VT == MVT::f64 || 13
VT == MVT::i642
)
12737
13
        return std::make_pair(0U, &PPC::F8RCRegClass);
12738
0
      
if (0
VT == MVT::v4f64 && 0
Subtarget.hasQPX()0
)
12739
0
        return std::make_pair(0U, &PPC::QFRCRegClass);
12740
0
      
if (0
VT == MVT::v4f32 && 0
Subtarget.hasQPX()0
)
12741
0
        return std::make_pair(0U, &PPC::QSRCRegClass);
12742
0
      break;
12743
0
    case 'v':
12744
0
      if (
VT == MVT::v4f64 && 0
Subtarget.hasQPX()0
)
12745
0
        return std::make_pair(0U, &PPC::QFRCRegClass);
12746
0
      
if (0
VT == MVT::v4f32 && 0
Subtarget.hasQPX()0
)
12747
0
        return std::make_pair(0U, &PPC::QSRCRegClass);
12748
0
      
if (0
Subtarget.hasAltivec()0
)
12749
0
        return std::make_pair(0U, &PPC::VRRCRegClass);
12750
0
    case 'y':   // crrc
12751
0
      return std::make_pair(0U, &PPC::CRRCRegClass);
12752
2.77k
    }
12753
2.48k
  } else 
if (2.48k
Constraint == "wc" && 2.48k
Subtarget.useCRBits()39
) {
12754
36
    // An individual CR bit.
12755
36
    return std::make_pair(0U, &PPC::CRBITRCRegClass);
12756
2.45k
  } else 
if (2.45k
(Constraint == "wa" || 2.45k
Constraint == "wd"2.45k
||
12757
2.45k
             
Constraint == "wf"2.44k
) &&
Subtarget.hasVSX()3
) {
12758
0
    return std::make_pair(0U, &PPC::VSRCRegClass);
12759
2.45k
  } else 
if (2.45k
Constraint == "ws" && 2.45k
Subtarget.hasVSX()0
) {
12760
0
    if (
VT == MVT::f32 && 0
Subtarget.hasP8Vector()0
)
12761
0
      return std::make_pair(0U, &PPC::VSSRCRegClass);
12762
0
    else
12763
0
      return std::make_pair(0U, &PPC::VSFRCRegClass);
12764
2.45k
  }
12765
2.45k
12766
2.45k
  std::pair<unsigned, const TargetRegisterClass *> R =
12767
2.45k
      TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12768
2.45k
12769
2.45k
  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
12770
2.45k
  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
12771
2.45k
  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
12772
2.45k
  // register.
12773
2.45k
  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
12774
2.45k
  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
12775
2.45k
  if (
R.first && 2.45k
VT == MVT::i642.35k
&&
Subtarget.isPPC64()76
&&
12776
76
      PPC::GPRCRegClass.contains(R.first))
12777
76
    return std::make_pair(TRI->getMatchingSuperReg(R.first,
12778
76
                            PPC::sub_32, &PPC::G8RCRegClass),
12779
76
                          &PPC::G8RCRegClass);
12780
2.37k
12781
2.37k
  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
12782
2.37k
  
if (2.37k
!R.second && 2.37k
StringRef("{cc}").equals_lower(Constraint)99
) {
12783
16
    R.first = PPC::CR0;
12784
16
    R.second = &PPC::CRRCRegClass;
12785
16
  }
12786
2.77k
12787
2.77k
  return R;
12788
2.77k
}
12789
12790
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12791
/// vector.  If it is invalid, don't add anything to Ops.
12792
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
12793
                                                     std::string &Constraint,
12794
                                                     std::vector<SDValue>&Ops,
12795
78
                                                     SelectionDAG &DAG) const {
12796
78
  SDValue Result;
12797
78
12798
78
  // Only support length 1 constraints.
12799
78
  if (
Constraint.length() > 178
)
return0
;
12800
78
12801
78
  char Letter = Constraint[0];
12802
78
  switch (Letter) {
12803
54
  default: break;
12804
24
  case 'I':
12805
24
  case 'J':
12806
24
  case 'K':
12807
24
  case 'L':
12808
24
  case 'M':
12809
24
  case 'N':
12810
24
  case 'O':
12811
24
  case 'P': {
12812
24
    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
12813
24
    if (
!CST24
)
return4
; // Must be an immediate to match.
12814
20
    SDLoc dl(Op);
12815
20
    int64_t Value = CST->getSExtValue();
12816
20
    EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
12817
20
                         // numbers are printed as such.
12818
20
    switch (Letter) {
12819
0
    
default: 0
llvm_unreachable0
("Unknown constraint letter!");
12820
6
    case 'I':  // "I" is a signed 16-bit constant.
12821
6
      if (isInt<16>(Value))
12822
6
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12823
6
      break;
12824
0
    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
12825
0
      if (isShiftedUInt<16, 16>(Value))
12826
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12827
0
      break;
12828
0
    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
12829
0
      if (isShiftedInt<16, 16>(Value))
12830
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12831
0
      break;
12832
0
    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
12833
0
      if (isUInt<16>(Value))
12834
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12835
0
      break;
12836
0
    case 'M':  // "M" is a constant that is greater than 31.
12837
0
      if (Value > 31)
12838
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12839
0
      break;
12840
0
    case 'N':  // "N" is a positive constant that is an exact power of two.
12841
0
      if (
Value > 0 && 0
isPowerOf2_64(Value)0
)
12842
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12843
0
      break;
12844
14
    case 'O':  // "O" is the constant zero.
12845
14
      if (Value == 0)
12846
14
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12847
14
      break;
12848
0
    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
12849
0
      if (isInt<16>(-Value))
12850
0
        Result = DAG.getTargetConstant(Value, dl, TCVT);
12851
0
      break;
12852
20
    }
12853
20
    break;
12854
20
  }
12855
74
  }
12856
74
12857
74
  
if (74
Result.getNode()74
) {
12858
20
    Ops.push_back(Result);
12859
20
    return;
12860
20
  }
12861
54
12862
54
  // Handle standard constraint letters.
12863
54
  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12864
54
}
12865
12866
// isLegalAddressingMode - Return true if the addressing mode represented
12867
// by AM is legal for this target, for a load/store of the specified type.
12868
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
12869
                                              const AddrMode &AM, Type *Ty,
12870
44.7k
                                              unsigned AS, Instruction *I) const {
12871
44.7k
  // PPC does not allow r+i addressing modes for vectors!
12872
44.7k
  if (
Ty->isVectorTy() && 44.7k
AM.BaseOffs != 020.6k
)
12873
2.89k
    return false;
12874
41.8k
12875
41.8k
  // PPC allows a sign-extended 16-bit immediate field.
12876
41.8k
  
if (41.8k
AM.BaseOffs <= -(1LL << 16) || 41.8k
AM.BaseOffs >= (1LL << 16)-141.8k
)
12877
28
    return false;
12878
41.7k
12879
41.7k
  // No global is ever allowed as a base.
12880
41.7k
  
if (41.7k
AM.BaseGV41.7k
)
12881
2.32k
    return false;
12882
39.4k
12883
39.4k
  // PPC only support r+r,
12884
39.4k
  switch (AM.Scale) {
12885
11.3k
  case 0:  // "r+i" or just "i", depending on HasBaseReg.
12886
11.3k
    break;
12887
23.3k
  case 1:
12888
23.3k
    if (
AM.HasBaseReg && 23.3k
AM.BaseOffs23.0k
) // "r+r+i" is not allowed.
12889
2.71k
      return false;
12890
20.6k
    // Otherwise we have r+r or r+i.
12891
20.6k
    break;
12892
226
  case 2:
12893
226
    if (
AM.HasBaseReg || 226
AM.BaseOffs124
) // 2*r+r or 2*r+i is not allowed.
12894
102
      return false;
12895
124
    // Allow 2*r as r+r.
12896
124
    break;
12897
4.53k
  default:
12898
4.53k
    // No other scales are supported.
12899
4.53k
    return false;
12900
32.1k
  }
12901
32.1k
12902
32.1k
  return true;
12903
32.1k
}
12904
12905
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
12906
6
                                           SelectionDAG &DAG) const {
12907
6
  MachineFunction &MF = DAG.getMachineFunction();
12908
6
  MachineFrameInfo &MFI = MF.getFrameInfo();
12909
6
  MFI.setReturnAddressIsTaken(true);
12910
6
12911
6
  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
12912
0
    return SDValue();
12913
6
12914
6
  SDLoc dl(Op);
12915
6
  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12916
6
12917
6
  // Make sure the function does not optimize away the store of the RA to
12918
6
  // the stack.
12919
6
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
12920
6
  FuncInfo->setLRStoreRequired();
12921
6
  bool isPPC64 = Subtarget.isPPC64();
12922
6
  auto PtrVT = getPointerTy(MF.getDataLayout());
12923
6
12924
6
  if (
Depth > 06
) {
12925
2
    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
12926
2
    SDValue Offset =
12927
2
        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
12928
2
                        isPPC64 ? 
MVT::i640
:
MVT::i322
);
12929
2
    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
12930
2
                       DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
12931
2
                       MachinePointerInfo());
12932
2
  }
12933
4
12934
4
  // Just load the return address off the stack.
12935
4
  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
12936
4
  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
12937
4
                     MachinePointerInfo());
12938
4
}
12939
12940
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
12941
12
                                          SelectionDAG &DAG) const {
12942
12
  SDLoc dl(Op);
12943
12
  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12944
12
12945
12
  MachineFunction &MF = DAG.getMachineFunction();
12946
12
  MachineFrameInfo &MFI = MF.getFrameInfo();
12947
12
  MFI.setFrameAddressIsTaken(true);
12948
12
12949
12
  EVT PtrVT = getPointerTy(MF.getDataLayout());
12950
12
  bool isPPC64 = PtrVT == MVT::i64;
12951
12
12952
12
  // Naked functions never have a frame pointer, and so we use r1. For all
12953
12
  // other functions, this decision must be delayed until during PEI.
12954
12
  unsigned FrameReg;
12955
12
  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
12956
1
    
FrameReg = isPPC64 ? 1
PPC::X11
:
PPC::R10
;
12957
12
  else
12958
11
    
FrameReg = isPPC64 ? 11
PPC::FP87
:
PPC::FP4
;
12959
12
12960
12
  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
12961
12
                                         PtrVT);
12962
16
  while (Depth--)
12963
4
    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
12964
4
                            FrameAddr, MachinePointerInfo());
12965
12
  return FrameAddr;
12966
12
}
12967
12968
// FIXME? Maybe this could be a TableGen attribute on some registers and
12969
// this table could be generated automatically from RegInfo.
12970
unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
12971
19
                                              SelectionDAG &DAG) const {
12972
19
  bool isPPC64 = Subtarget.isPPC64();
12973
19
  bool isDarwinABI = Subtarget.isDarwinABI();
12974
19
12975
19
  if (
(isPPC64 && 19
VT != MVT::i6411
&&
VT != MVT::i325
) ||
12976
19
      
(!isPPC64 && 19
VT != MVT::i328
))
12977
0
    report_fatal_error("Invalid register global variable type");
12978
19
12979
19
  
bool is64Bit = isPPC64 && 19
VT == MVT::i6411
;
12980
19
  unsigned Reg = StringSwitch<unsigned>(RegName)
12981
19
                   .Case("r1", is64Bit ? 
PPC::X16
:
PPC::R113
)
12982
19
                   .Case("r2", (isDarwinABI || 
isPPC6411
) ?
015
:
PPC::R24
)
12983
19
                   .Case("r13", (!isPPC64 && 
isDarwinABI8
) ?
04
:
12984
15
                                  
(is64Bit ? 15
PPC::X136
:
PPC::R139
))
12985
19
                   .Default(0);
12986
19
12987
19
  if (Reg)
12988
11
    return Reg;
12989
8
  report_fatal_error("Invalid register name global variable");
12990
8
}
12991
12992
bool
12993
1.33k
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
12994
1.33k
  // The PowerPC target isn't yet aware of offsets.
12995
1.33k
  return false;
12996
1.33k
}
12997
12998
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12999
                                           const CallInst &I,
13000
1.35k
                                           unsigned Intrinsic) const {
13001
1.35k
  switch (Intrinsic) {
13002
22
  case Intrinsic::ppc_qpx_qvlfd:
13003
22
  case Intrinsic::ppc_qpx_qvlfs:
13004
22
  case Intrinsic::ppc_qpx_qvlfcd:
13005
22
  case Intrinsic::ppc_qpx_qvlfcs:
13006
22
  case Intrinsic::ppc_qpx_qvlfiwa:
13007
22
  case Intrinsic::ppc_qpx_qvlfiwz:
13008
22
  case Intrinsic::ppc_altivec_lvx:
13009
22
  case Intrinsic::ppc_altivec_lvxl:
13010
22
  case Intrinsic::ppc_altivec_lvebx:
13011
22
  case Intrinsic::ppc_altivec_lvehx:
13012
22
  case Intrinsic::ppc_altivec_lvewx:
13013
22
  case Intrinsic::ppc_vsx_lxvd2x:
13014
22
  case Intrinsic::ppc_vsx_lxvw4x: {
13015
22
    EVT VT;
13016
22
    switch (Intrinsic) {
13017
0
    case Intrinsic::ppc_altivec_lvebx:
13018
0
      VT = MVT::i8;
13019
0
      break;
13020
0
    case Intrinsic::ppc_altivec_lvehx:
13021
0
      VT = MVT::i16;
13022
0
      break;
13023
0
    case Intrinsic::ppc_altivec_lvewx:
13024
0
      VT = MVT::i32;
13025
0
      break;
13026
9
    case Intrinsic::ppc_vsx_lxvd2x:
13027
9
      VT = MVT::v2f64;
13028
9
      break;
13029
0
    case Intrinsic::ppc_qpx_qvlfd:
13030
0
      VT = MVT::v4f64;
13031
0
      break;
13032
0
    case Intrinsic::ppc_qpx_qvlfs:
13033
0
      VT = MVT::v4f32;
13034
0
      break;
13035
0
    case Intrinsic::ppc_qpx_qvlfcd:
13036
0
      VT = MVT::v2f64;
13037
0
      break;
13038
0
    case Intrinsic::ppc_qpx_qvlfcs:
13039
0
      VT = MVT::v2f32;
13040
0
      break;
13041
13
    default:
13042
13
      VT = MVT::v4i32;
13043
13
      break;
13044
22
    }
13045
22
13046
22
    Info.opc = ISD::INTRINSIC_W_CHAIN;
13047
22
    Info.memVT = VT;
13048
22
    Info.ptrVal = I.getArgOperand(0);
13049
22
    Info.offset = -VT.getStoreSize()+1;
13050
22
    Info.size = 2*VT.getStoreSize()-1;
13051
22
    Info.align = 1;
13052
22
    Info.vol = false;
13053
22
    Info.readMem = true;
13054
22
    Info.writeMem = false;
13055
22
    return true;
13056
22
  }
13057
0
  case Intrinsic::ppc_qpx_qvlfda:
13058
0
  case Intrinsic::ppc_qpx_qvlfsa:
13059
0
  case Intrinsic::ppc_qpx_qvlfcda:
13060
0
  case Intrinsic::ppc_qpx_qvlfcsa:
13061
0
  case Intrinsic::ppc_qpx_qvlfiwaa:
13062
0
  case Intrinsic::ppc_qpx_qvlfiwza: {
13063
0
    EVT VT;
13064
0
    switch (Intrinsic) {
13065
0
    case Intrinsic::ppc_qpx_qvlfda:
13066
0
      VT = MVT::v4f64;
13067
0
      break;
13068
0
    case Intrinsic::ppc_qpx_qvlfsa:
13069
0
      VT = MVT::v4f32;
13070
0
      break;
13071
0
    case Intrinsic::ppc_qpx_qvlfcda:
13072
0
      VT = MVT::v2f64;
13073
0
      break;
13074
0
    case Intrinsic::ppc_qpx_qvlfcsa:
13075
0
      VT = MVT::v2f32;
13076
0
      break;
13077
0
    default:
13078
0
      VT = MVT::v4i32;
13079
0
      break;
13080
0
    }
13081
0
13082
0
    Info.opc = ISD::INTRINSIC_W_CHAIN;
13083
0
    Info.memVT = VT;
13084
0
    Info.ptrVal = I.getArgOperand(0);
13085
0
    Info.offset = 0;
13086
0
    Info.size = VT.getStoreSize();
13087
0
    Info.align = 1;
13088
0
    Info.vol = false;
13089
0
    Info.readMem = true;
13090
0
    Info.writeMem = false;
13091
0
    return true;
13092
0
  }
13093
20
  case Intrinsic::ppc_qpx_qvstfd:
13094
20
  case Intrinsic::ppc_qpx_qvstfs:
13095
20
  case Intrinsic::ppc_qpx_qvstfcd:
13096
20
  case Intrinsic::ppc_qpx_qvstfcs:
13097
20
  case Intrinsic::ppc_qpx_qvstfiw:
13098
20
  case Intrinsic::ppc_altivec_stvx:
13099
20
  case Intrinsic::ppc_altivec_stvxl:
13100
20
  case Intrinsic::ppc_altivec_stvebx:
13101
20
  case Intrinsic::ppc_altivec_stvehx:
13102
20
  case Intrinsic::ppc_altivec_stvewx:
13103
20
  case Intrinsic::ppc_vsx_stxvd2x:
13104
20
  case Intrinsic::ppc_vsx_stxvw4x: {
13105
20
    EVT VT;
13106
20
    switch (Intrinsic) {
13107
0
    case Intrinsic::ppc_altivec_stvebx:
13108
0
      VT = MVT::i8;
13109
0
      break;
13110
0
    case Intrinsic::ppc_altivec_stvehx:
13111
0
      VT = MVT::i16;
13112
0
      break;
13113
0
    case Intrinsic::ppc_altivec_stvewx:
13114
0
      VT = MVT::i32;
13115
0
      break;
13116
9
    case Intrinsic::ppc_vsx_stxvd2x:
13117
9
      VT = MVT::v2f64;
13118
9
      break;
13119
0
    case Intrinsic::ppc_qpx_qvstfd:
13120
0
      VT = MVT::v4f64;
13121
0
      break;
13122
0
    case Intrinsic::ppc_qpx_qvstfs:
13123
0
      VT = MVT::v4f32;
13124
0
      break;
13125
0
    case Intrinsic::ppc_qpx_qvstfcd:
13126
0
      VT = MVT::v2f64;
13127
0
      break;
13128
0
    case Intrinsic::ppc_qpx_qvstfcs:
13129
0
      VT = MVT::v2f32;
13130
0
      break;
13131
11
    default:
13132
11
      VT = MVT::v4i32;
13133
11
      break;
13134
20
    }
13135
20
13136
20
    Info.opc = ISD::INTRINSIC_VOID;
13137
20
    Info.memVT = VT;
13138
20
    Info.ptrVal = I.getArgOperand(1);
13139
20
    Info.offset = -VT.getStoreSize()+1;
13140
20
    Info.size = 2*VT.getStoreSize()-1;
13141
20
    Info.align = 1;
13142
20
    Info.vol = false;
13143
20
    Info.readMem = false;
13144
20
    Info.writeMem = true;
13145
20
    return true;
13146
20
  }
13147
0
  case Intrinsic::ppc_qpx_qvstfda:
13148
0
  case Intrinsic::ppc_qpx_qvstfsa:
13149
0
  case Intrinsic::ppc_qpx_qvstfcda:
13150
0
  case Intrinsic::ppc_qpx_qvstfcsa:
13151
0
  case Intrinsic::ppc_qpx_qvstfiwa: {
13152
0
    EVT VT;
13153
0
    switch (Intrinsic) {
13154
0
    case Intrinsic::ppc_qpx_qvstfda:
13155
0
      VT = MVT::v4f64;
13156
0
      break;
13157
0
    case Intrinsic::ppc_qpx_qvstfsa:
13158
0
      VT = MVT::v4f32;
13159
0
      break;
13160
0
    case Intrinsic::ppc_qpx_qvstfcda:
13161
0
      VT = MVT::v2f64;
13162
0
      break;
13163
0
    case Intrinsic::ppc_qpx_qvstfcsa:
13164
0
      VT = MVT::v2f32;
13165
0
      break;
13166
0
    default:
13167
0
      VT = MVT::v4i32;
13168
0
      break;
13169
0
    }
13170
0
13171
0
    Info.opc = ISD::INTRINSIC_VOID;
13172
0
    Info.memVT = VT;
13173
0
    Info.ptrVal = I.getArgOperand(1);
13174
0
    Info.offset = 0;
13175
0
    Info.size = VT.getStoreSize();
13176
0
    Info.align = 1;
13177
0
    Info.vol = false;
13178
0
    Info.readMem = false;
13179
0
    Info.writeMem = true;
13180
0
    return true;
13181
0
  }
13182
1.30k
  default:
13183
1.30k
    break;
13184
1.30k
  }
13185
1.30k
13186
1.30k
  return false;
13187
1.30k
}
13188
13189
/// getOptimalMemOpType - Returns the target specific optimal type for load
13190
/// and store operations as a result of memset, memcpy, and memmove
13191
/// lowering. If DstAlign is zero that means it's safe to destination
13192
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
13193
/// means there isn't a need to check it against alignment requirement,
13194
/// probably because the source does not need to be loaded. If 'IsMemset' is
13195
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
13196
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
13197
/// source is constant so it does not need to be loaded.
13198
/// It returns EVT::Other if the type should be determined using generic
13199
/// target-independent logic.
13200
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
13201
                                           unsigned DstAlign, unsigned SrcAlign,
13202
                                           bool IsMemset, bool ZeroMemset,
13203
                                           bool MemcpyStrSrc,
13204
131
                                           MachineFunction &MF) const {
13205
131
  if (
getTargetMachine().getOptLevel() != CodeGenOpt::None131
) {
13206
54
    const Function *F = MF.getFunction();
13207
54
    // When expanding a memset, require at least two QPX instructions to cover
13208
54
    // the cost of loading the value to be stored from the constant pool.
13209
54
    if (
Subtarget.hasQPX() && 54
Size >= 325
&&
(!IsMemset || 5
Size >= 643
) &&
13210
54
       
(!SrcAlign || 5
SrcAlign >= 322
) &&
(!DstAlign || 3
DstAlign >= 323
) &&
13211
54
        
!F->hasFnAttribute(Attribute::NoImplicitFloat)2
) {
13212
2
      return MVT::v4f64;
13213
2
    }
13214
52
13215
52
    // We should use Altivec/VSX loads and stores when available. For unaligned
13216
52
    // addresses, unaligned VSX loads are only fast starting with the P8.
13217
52
    
if (52
Subtarget.hasAltivec() && 52
Size >= 1640
&&
13218
33
        
(((!SrcAlign || 33
SrcAlign >= 1618
) &&
(!DstAlign || 22
DstAlign >= 1616
)) ||
13219
22
         
((IsMemset && 22
Subtarget.hasVSX()9
) ||
Subtarget.hasP8Vector()13
)))
13220
30
      return MVT::v4i32;
13221
99
  }
13222
99
13223
99
  
if (99
Subtarget.isPPC64()99
) {
13224
97
    return MVT::i64;
13225
97
  }
13226
2
13227
2
  return MVT::i32;
13228
2
}
13229
13230
/// \brief Returns true if it is beneficial to convert a load of a constant
13231
/// to just the constant itself.
13232
bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
13233
0
                                                          Type *Ty) const {
13234
0
  assert(Ty->isIntegerTy());
13235
0
13236
0
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
13237
0
  return !(BitSize == 0 || BitSize > 64);
13238
0
}
13239
13240
1.63k
bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
13241
1.63k
  if (
!Ty1->isIntegerTy() || 1.63k
!Ty2->isIntegerTy()1.63k
)
13242
1
    return false;
13243
1.63k
  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
13244
1.63k
  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
13245
729
  return NumBits1 == 64 && NumBits2 == 32;
13246
1.63k
}
13247
13248
2.99k
bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
13249
2.99k
  if (
!VT1.isInteger() || 2.99k
!VT2.isInteger()2.99k
)
13250
0
    return false;
13251
2.99k
  unsigned NumBits1 = VT1.getSizeInBits();
13252
2.99k
  unsigned NumBits2 = VT2.getSizeInBits();
13253
440
  return NumBits1 == 64 && NumBits2 == 32;
13254
2.99k
}
13255
13256
2.57k
bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
13257
2.57k
  // Generally speaking, zexts are not free, but they are free when they can be
13258
2.57k
  // folded with other operations.
13259
2.57k
  if (LoadSDNode *
LD2.57k
= dyn_cast<LoadSDNode>(Val)) {
13260
171
    EVT MemVT = LD->getMemoryVT();
13261
171
    if (
(MemVT == MVT::i1 || 171
MemVT == MVT::i8169
||
MemVT == MVT::i16166
||
13262
156
         
(Subtarget.isPPC64() && 156
MemVT == MVT::i32135
)) &&
13263
48
        (LD->getExtensionType() == ISD::NON_EXTLOAD ||
13264
0
         LD->getExtensionType() == ISD::ZEXTLOAD))
13265
48
      return true;
13266
2.52k
  }
13267
2.52k
13268
2.52k
  // FIXME: Add other cases...
13269
2.52k
  //  - 32-bit shifts with a zext to i64
13270
2.52k
  //  - zext after ctlz, bswap, etc.
13271
2.52k
  //  - zext after and by a constant mask
13272
2.52k
13273
2.52k
  return TargetLowering::isZExtFree(Val, VT2);
13274
2.52k
}
13275
13276
540
bool PPCTargetLowering::isFPExtFree(EVT VT) const {
13277
540
  assert(VT.isFloatingPoint());
13278
540
  return true;
13279
540
}
13280
13281
6.05k
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
13282
148
  return isInt<16>(Imm) || isUInt<16>(Imm);
13283
6.05k
}
13284
13285
3.04k
bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
13286
159
  return isInt<16>(Imm) || isUInt<16>(Imm);
13287
3.04k
}
13288
13289
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
13290
                                                       unsigned,
13291
                                                       unsigned,
13292
972
                                                       bool *Fast) const {
13293
972
  if (DisablePPCUnaligned)
13294
12
    return false;
13295
960
13296
960
  // PowerPC supports unaligned memory access for simple non-vector types.
13297
960
  // Although accessing unaligned addresses is not as efficient as accessing
13298
960
  // aligned addresses, it is generally more efficient than manual expansion,
13299
960
  // and generally only traps for software emulation when crossing page
13300
960
  // boundaries.
13301
960
13302
960
  
if (960
!VT.isSimple()960
)
13303
0
    return false;
13304
960
13305
960
  
if (960
VT.getSimpleVT().isVector()960
) {
13306
601
    if (
Subtarget.hasVSX()601
) {
13307
581
      if (
VT != MVT::v2f64 && 581
VT != MVT::v2i64340
&&
13308
581
          
VT != MVT::v4f32323
&&
VT != MVT::v4i32323
)
13309
0
        return false;
13310
20
    } else {
13311
20
      return false;
13312
20
    }
13313
940
  }
13314
940
13315
940
  
if (940
VT == MVT::ppcf128940
)
13316
0
    return false;
13317
940
13318
940
  
if (940
Fast940
)
13319
32
    *Fast = true;
13320
972
13321
972
  return true;
13322
972
}
13323
13324
1.30k
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
13325
1.30k
  VT = VT.getScalarType();
13326
1.30k
13327
1.30k
  if (!VT.isSimple())
13328
0
    return false;
13329
1.30k
13330
1.30k
  switch (VT.getSimpleVT().SimpleTy) {
13331
1.28k
  case MVT::f32:
13332
1.28k
  case MVT::f64:
13333
1.28k
    return true;
13334
15
  default:
13335
15
    break;
13336
15
  }
13337
15
13338
15
  return false;
13339
15
}
13340
13341
const MCPhysReg *
13342
59
PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
13343
59
  // LR is a callee-save register, but we must treat it as clobbered by any call
13344
59
  // site. Hence we include LR in the scratch registers, which are in turn added
13345
59
  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
13346
59
  // to CTR, which is used by any indirect call.
13347
59
  static const MCPhysReg ScratchRegs[] = {
13348
59
    PPC::X12, PPC::LR8, PPC::CTR8, 0
13349
59
  };
13350
59
13351
59
  return ScratchRegs;
13352
59
}
13353
13354
unsigned PPCTargetLowering::getExceptionPointerRegister(
13355
66
    const Constant *PersonalityFn) const {
13356
66
  return Subtarget.isPPC64() ? 
PPC::X366
:
PPC::R30
;
13357
66
}
13358
13359
unsigned PPCTargetLowering::getExceptionSelectorRegister(
13360
33
    const Constant *PersonalityFn) const {
13361
33
  return Subtarget.isPPC64() ? 
PPC::X433
:
PPC::R40
;
13362
33
}
13363
13364
bool
13365
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
13366
238
                     EVT VT , unsigned DefinedValues) const {
13367
238
  if (VT == MVT::v2i64)
13368
73
    return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
13369
165
13370
165
  
if (165
Subtarget.hasVSX() || 165
Subtarget.hasQPX()45
)
13371
143
    return true;
13372
22
13373
22
  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
13374
22
}
13375
13376
74.6k
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
13377
74.6k
  if (
DisableILPPref || 74.6k
Subtarget.enableMachineScheduler()73.8k
)
13378
56.5k
    return TargetLowering::getSchedulingPreference(N);
13379
18.0k
13380
18.0k
  return Sched::ILP;
13381
18.0k
}
13382
13383
// Create a fast isel object.
13384
FastISel *
13385
PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
13386
596
                                  const TargetLibraryInfo *LibInfo) const {
13387
596
  return PPC::createFastISel(FuncInfo, LibInfo);
13388
596
}
13389
13390
3
void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
13391
3
  if (
Subtarget.isDarwinABI()3
)
return0
;
13392
3
  
if (3
!Subtarget.isPPC64()3
)
return0
;
13393
3
13394
3
  // Update IsSplitCSR in PPCFunctionInfo
13395
3
  PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
13396
3
  PFI->setIsSplitCSR(true);
13397
3
}
13398
13399
void PPCTargetLowering::insertCopiesSplitCSR(
13400
  MachineBasicBlock *Entry,
13401
3
  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
13402
3
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13403
3
  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
13404
3
  if (!IStart)
13405
0
    return;
13406
3
13407
3
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13408
3
  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
13409
3
  MachineBasicBlock::iterator MBBI = Entry->begin();
13410
158
  for (const MCPhysReg *I = IStart; 
*I158
;
++I155
) {
13411
155
    const TargetRegisterClass *RC = nullptr;
13412
155
    if (PPC::G8RCRegClass.contains(*I))
13413
56
      RC = &PPC::G8RCRegClass;
13414
99
    else 
if (99
PPC::F8RCRegClass.contains(*I)99
)
13415
54
      RC = &PPC::F8RCRegClass;
13416
45
    else 
if (45
PPC::CRRCRegClass.contains(*I)45
)
13417
9
      RC = &PPC::CRRCRegClass;
13418
36
    else 
if (36
PPC::VRRCRegClass.contains(*I)36
)
13419
36
      RC = &PPC::VRRCRegClass;
13420
36
    else
13421
0
      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
13422
155
13423
155
    unsigned NewVR = MRI->createVirtualRegister(RC);
13424
155
    // Create copy from CSR to a virtual register.
13425
155
    // FIXME: this currently does not emit CFI pseudo-instructions, it works
13426
155
    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
13427
155
    // nounwind. If we want to generalize this later, we may need to emit
13428
155
    // CFI pseudo-instructions.
13429
155
    assert(Entry->getParent()->getFunction()->hasFnAttribute(
13430
155
             Attribute::NoUnwind) &&
13431
155
           "Function should be nounwind in insertCopiesSplitCSR!");
13432
155
    Entry->addLiveIn(*I);
13433
155
    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
13434
155
      .addReg(*I);
13435
155
13436
155
    // Insert the copy-back instructions right before the terminator
13437
155
    for (auto *Exit : Exits)
13438
155
      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
13439
155
              TII->get(TargetOpcode::COPY), *I)
13440
155
        .addReg(NewVR);
13441
155
  }
13442
3
}
13443
13444
// Override to enable LOAD_STACK_GUARD lowering on Linux.
13445
12
bool PPCTargetLowering::useLoadStackGuardNode() const {
13446
12
  if (!Subtarget.isTargetLinux())
13447
6
    return TargetLowering::useLoadStackGuardNode();
13448
6
  return true;
13449
6
}
13450
13451
// Override to disable global variable loading on Linux.
13452
4
void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
13453
4
  if (!Subtarget.isTargetLinux())
13454
2
    return TargetLowering::insertSSPDeclarations(M);
13455
2
}
13456
13457
364
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
13458
364
  if (
!VT.isSimple() || 364
!Subtarget.hasVSX()364
)
13459
210
    return false;
13460
154
13461
154
  switch(VT.getSimpleVT().SimpleTy) {
13462
0
  default:
13463
0
    // For FP types that are currently not supported by PPC backend, return
13464
0
    // false. Examples: f16, f80.
13465
0
    return false;
13466
154
  case MVT::f32:
13467
154
  case MVT::f64:
13468
154
  case MVT::ppcf128:
13469
154
    return Imm.isPosZero();
13470
0
  }
13471
0
}
13472
13473
// For vector shift operation op, fold
13474
// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
13475
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
13476
2.37k
                                  SelectionDAG &DAG) {
13477
2.37k
  SDValue N0 = N->getOperand(0);
13478
2.37k
  SDValue N1 = N->getOperand(1);
13479
2.37k
  EVT VT = N0.getValueType();
13480
2.37k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
13481
2.37k
  unsigned Opcode = N->getOpcode();
13482
2.37k
  unsigned TargetOpcode;
13483
2.37k
13484
2.37k
  switch (Opcode) {
13485
0
  default:
13486
0
    llvm_unreachable("Unexpected shift operation");
13487
1.40k
  case ISD::SHL:
13488
1.40k
    TargetOpcode = PPCISD::SHL;
13489
1.40k
    break;
13490
569
  case ISD::SRL:
13491
569
    TargetOpcode = PPCISD::SRL;
13492
569
    break;
13493
400
  case ISD::SRA:
13494
400
    TargetOpcode = PPCISD::SRA;
13495
400
    break;
13496
2.37k
  }
13497
2.37k
13498
2.37k
  
if (2.37k
VT.isVector() && 2.37k
TLI.isOperationLegal(Opcode, VT)175
&&
13499
144
      N1->getOpcode() == ISD::AND)
13500
12
    
if (ConstantSDNode *12
Mask12
= isConstOrConstSplat(N1->getOperand(1)))
13501
12
      
if (12
Mask->getZExtValue() == OpSizeInBits - 112
)
13502
12
        return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
13503
2.36k
13504
2.36k
  return SDValue();
13505
2.36k
}
13506
13507
1.40k
SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
13508
1.40k
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13509
4
    return Value;
13510
1.40k
13511
1.40k
  return SDValue();
13512
1.40k
}
13513
13514
400
SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
13515
400
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13516
4
    return Value;
13517
396
13518
396
  return SDValue();
13519
396
}
13520
13521
569
SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
13522
569
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13523
4
    return Value;
13524
565
13525
565
  return SDValue();
13526
565
}