Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the SystemZTargetLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "SystemZISelLowering.h"
14
#include "SystemZCallingConv.h"
15
#include "SystemZConstantPoolValue.h"
16
#include "SystemZMachineFunctionInfo.h"
17
#include "SystemZTargetMachine.h"
18
#include "llvm/CodeGen/CallingConvLower.h"
19
#include "llvm/CodeGen/MachineInstrBuilder.h"
20
#include "llvm/CodeGen/MachineRegisterInfo.h"
21
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22
#include "llvm/IR/Intrinsics.h"
23
#include "llvm/IR/IntrinsicInst.h"
24
#include "llvm/Support/CommandLine.h"
25
#include "llvm/Support/KnownBits.h"
26
#include <cctype>
27
28
using namespace llvm;
29
30
#define DEBUG_TYPE "systemz-lower"
31
32
namespace {
33
// Represents information about a comparison.
34
struct Comparison {
35
  Comparison(SDValue Op0In, SDValue Op1In)
36
2.12k
    : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
37
38
  // The operands to the comparison.
39
  SDValue Op0, Op1;
40
41
  // The opcode that should be used to compare Op0 and Op1.
42
  unsigned Opcode;
43
44
  // A SystemZICMP value.  Only used for integer comparisons.
45
  unsigned ICmpType;
46
47
  // The mask of CC values that Opcode can produce.
48
  unsigned CCValid;
49
50
  // The mask of CC values for which the original condition is true.
51
  unsigned CCMask;
52
};
53
} // end anonymous namespace
54
55
// Classify VT as either 32 or 64 bit.
56
335
static bool is32Bit(EVT VT) {
57
335
  switch (VT.getSimpleVT().SimpleTy) {
58
335
  case MVT::i32:
59
133
    return true;
60
335
  case MVT::i64:
61
202
    return false;
62
335
  default:
63
0
    llvm_unreachable("Unsupported type");
64
335
  }
65
335
}
66
67
// Return a version of MachineOperand that can be safely used before the
68
// final use.
69
1.08k
static MachineOperand earlyUseOperand(MachineOperand Op) {
70
1.08k
  if (Op.isReg())
71
797
    Op.setIsKill(false);
72
1.08k
  return Op;
73
1.08k
}
74
75
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
76
                                             const SystemZSubtarget &STI)
77
1.09k
    : TargetLowering(TM), Subtarget(STI) {
78
1.09k
  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
79
1.09k
80
1.09k
  // Set up the register classes.
81
1.09k
  if (Subtarget.hasHighWord())
82
494
    addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
83
605
  else
84
605
    addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
85
1.09k
  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
86
1.09k
  if (Subtarget.hasVector()) {
87
415
    addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
88
415
    addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
89
684
  } else {
90
684
    addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
91
684
    addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
92
684
  }
93
1.09k
  if (Subtarget.hasVectorEnhancements1())
94
122
    addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
95
977
  else
96
977
    addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
97
1.09k
98
1.09k
  if (Subtarget.hasVector()) {
99
415
    addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
100
415
    addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
101
415
    addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
102
415
    addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
103
415
    addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
104
415
    addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
105
415
  }
106
1.09k
107
1.09k
  // Compute derived properties from the register classes
108
1.09k
  computeRegisterProperties(Subtarget.getRegisterInfo());
109
1.09k
110
1.09k
  // Set up special registers.
111
1.09k
  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
112
1.09k
113
1.09k
  // TODO: It may be better to default to latency-oriented scheduling, however
114
1.09k
  // LLVM's current latency-oriented scheduler can't handle physreg definitions
115
1.09k
  // such as SystemZ has with CC, so set this to the register-pressure
116
1.09k
  // scheduler, because it can.
117
1.09k
  setSchedulingPreference(Sched::RegPressure);
118
1.09k
119
1.09k
  setBooleanContents(ZeroOrOneBooleanContent);
120
1.09k
  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
121
1.09k
122
1.09k
  // Instructions are strings of 2-byte aligned 2-byte values.
123
1.09k
  setMinFunctionAlignment(2);
124
1.09k
  // For performance reasons we prefer 16-byte alignment.
125
1.09k
  setPrefFunctionAlignment(4);
126
1.09k
127
1.09k
  // Handle operations that are handled in a similar way for all types.
128
1.09k
  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
129
14.2k
       I <= MVT::LAST_FP_VALUETYPE;
130
13.1k
       ++I) {
131
13.1k
    MVT VT = MVT::SimpleValueType(I);
132
13.1k
    if (isTypeLegal(VT)) {
133
5.49k
      // Lower SET_CC into an IPM-based sequence.
134
5.49k
      setOperationAction(ISD::SETCC, VT, Custom);
135
5.49k
136
5.49k
      // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
137
5.49k
      setOperationAction(ISD::SELECT, VT, Expand);
138
5.49k
139
5.49k
      // Lower SELECT_CC and BR_CC into separate comparisons and branches.
140
5.49k
      setOperationAction(ISD::SELECT_CC, VT, Custom);
141
5.49k
      setOperationAction(ISD::BR_CC,     VT, Custom);
142
5.49k
    }
143
13.1k
  }
144
1.09k
145
1.09k
  // Expand jump table branches as address arithmetic followed by an
146
1.09k
  // indirect jump.
147
1.09k
  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
148
1.09k
149
1.09k
  // Expand BRCOND into a BR_CC (see above).
150
1.09k
  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
151
1.09k
152
1.09k
  // Handle integer types.
153
1.09k
  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
154
7.69k
       I <= MVT::LAST_INTEGER_VALUETYPE;
155
6.59k
       ++I) {
156
6.59k
    MVT VT = MVT::SimpleValueType(I);
157
6.59k
    if (isTypeLegal(VT)) {
158
2.19k
      // Expand individual DIV and REMs into DIVREMs.
159
2.19k
      setOperationAction(ISD::SDIV, VT, Expand);
160
2.19k
      setOperationAction(ISD::UDIV, VT, Expand);
161
2.19k
      setOperationAction(ISD::SREM, VT, Expand);
162
2.19k
      setOperationAction(ISD::UREM, VT, Expand);
163
2.19k
      setOperationAction(ISD::SDIVREM, VT, Custom);
164
2.19k
      setOperationAction(ISD::UDIVREM, VT, Custom);
165
2.19k
166
2.19k
      // Support addition/subtraction with overflow.
167
2.19k
      setOperationAction(ISD::SADDO, VT, Custom);
168
2.19k
      setOperationAction(ISD::SSUBO, VT, Custom);
169
2.19k
170
2.19k
      // Support addition/subtraction with carry.
171
2.19k
      setOperationAction(ISD::UADDO, VT, Custom);
172
2.19k
      setOperationAction(ISD::USUBO, VT, Custom);
173
2.19k
174
2.19k
      // Support carry in as value rather than glue.
175
2.19k
      setOperationAction(ISD::ADDCARRY, VT, Custom);
176
2.19k
      setOperationAction(ISD::SUBCARRY, VT, Custom);
177
2.19k
178
2.19k
      // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
179
2.19k
      // stores, putting a serialization instruction after the stores.
180
2.19k
      setOperationAction(ISD::ATOMIC_LOAD,  VT, Custom);
181
2.19k
      setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
182
2.19k
183
2.19k
      // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
184
2.19k
      // available, or if the operand is constant.
185
2.19k
      setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
186
2.19k
187
2.19k
      // Use POPCNT on z196 and above.
188
2.19k
      if (Subtarget.hasPopulationCount())
189
988
        setOperationAction(ISD::CTPOP, VT, Custom);
190
1.21k
      else
191
1.21k
        setOperationAction(ISD::CTPOP, VT, Expand);
192
2.19k
193
2.19k
      // No special instructions for these.
194
2.19k
      setOperationAction(ISD::CTTZ,            VT, Expand);
195
2.19k
      setOperationAction(ISD::ROTR,            VT, Expand);
196
2.19k
197
2.19k
      // Use *MUL_LOHI where possible instead of MULH*.
198
2.19k
      setOperationAction(ISD::MULHS, VT, Expand);
199
2.19k
      setOperationAction(ISD::MULHU, VT, Expand);
200
2.19k
      setOperationAction(ISD::SMUL_LOHI, VT, Custom);
201
2.19k
      setOperationAction(ISD::UMUL_LOHI, VT, Custom);
202
2.19k
203
2.19k
      // Only z196 and above have native support for conversions to unsigned.
204
2.19k
      // On z10, promoting to i64 doesn't generate an inexact condition for
205
2.19k
      // values that are outside the i32 range but in the i64 range, so use
206
2.19k
      // the default expansion.
207
2.19k
      if (!Subtarget.hasFPExtension())
208
1.21k
        setOperationAction(ISD::FP_TO_UINT, VT, Expand);
209
2.19k
    }
210
6.59k
  }
211
1.09k
212
1.09k
  // Type legalization will convert 8- and 16-bit atomic operations into
213
1.09k
  // forms that operate on i32s (but still keeping the original memory VT).
214
1.09k
  // Lower them into full i32 operations.
215
1.09k
  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
216
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
217
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
218
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
219
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
220
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
221
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
222
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
223
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
224
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
225
1.09k
  setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
226
1.09k
227
1.09k
  // Even though i128 is not a legal type, we still need to custom lower
228
1.09k
  // the atomic operations in order to exploit SystemZ instructions.
229
1.09k
  setOperationAction(ISD::ATOMIC_LOAD,     MVT::i128, Custom);
230
1.09k
  setOperationAction(ISD::ATOMIC_STORE,    MVT::i128, Custom);
231
1.09k
232
1.09k
  // We can use the CC result of compare-and-swap to implement
233
1.09k
  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
234
1.09k
  setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
235
1.09k
  setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
236
1.09k
  setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
237
1.09k
238
1.09k
  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
239
1.09k
240
1.09k
  // Traps are legal, as we will convert them to "j .+2".
241
1.09k
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
242
1.09k
243
1.09k
  // z10 has instructions for signed but not unsigned FP conversion.
244
1.09k
  // Handle unsigned 32-bit types as signed 64-bit types.
245
1.09k
  if (!Subtarget.hasFPExtension()) {
246
605
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
247
605
    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
248
605
  }
249
1.09k
250
1.09k
  // We have native support for a 64-bit CTLZ, via FLOGR.
251
1.09k
  setOperationAction(ISD::CTLZ, MVT::i32, Promote);
252
1.09k
  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
253
1.09k
  setOperationAction(ISD::CTLZ, MVT::i64, Legal);
254
1.09k
255
1.09k
  // On arch13 we have native support for a 64-bit CTPOP.
256
1.09k
  if (Subtarget.hasMiscellaneousExtensions3()) {
257
28
    setOperationAction(ISD::CTPOP, MVT::i32, Promote);
258
28
    setOperationAction(ISD::CTPOP, MVT::i64, Legal);
259
28
  }
260
1.09k
261
1.09k
  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
262
1.09k
  setOperationAction(ISD::OR, MVT::i64, Custom);
263
1.09k
264
1.09k
  // FIXME: Can we support these natively?
265
1.09k
  setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
266
1.09k
  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
267
1.09k
  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
268
1.09k
269
1.09k
  // We have native instructions for i8, i16 and i32 extensions, but not i1.
270
1.09k
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
271
6.59k
  for (MVT VT : MVT::integer_valuetypes()) {
272
6.59k
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
273
6.59k
    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
274
6.59k
    setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1, Promote);
275
6.59k
  }
276
1.09k
277
1.09k
  // Handle the various types of symbolic address.
278
1.09k
  setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
279
1.09k
  setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
280
1.09k
  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
281
1.09k
  setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
282
1.09k
  setOperationAction(ISD::JumpTable,        PtrVT, Custom);
283
1.09k
284
1.09k
  // We need to handle dynamic allocations specially because of the
285
1.09k
  // 160-byte area at the bottom of the stack.
286
1.09k
  setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
287
1.09k
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
288
1.09k
289
1.09k
  // Use custom expanders so that we can force the function to use
290
1.09k
  // a frame pointer.
291
1.09k
  setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
292
1.09k
  setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
293
1.09k
294
1.09k
  // Handle prefetches with PFD or PFDRL.
295
1.09k
  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
296
1.09k
297
121k
  for (MVT VT : MVT::vector_valuetypes()) {
298
121k
    // Assume by default that all vector operations need to be expanded.
299
35.0M
    for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; 
++Opcode34.8M
)
300
34.8M
      if (getOperationAction(Opcode, VT) == Legal)
301
24.3M
        setOperationAction(Opcode, VT, Expand);
302
121k
303
121k
    // Likewise all truncating stores and extending loads.
304
13.5M
    for (MVT InnerVT : MVT::vector_valuetypes()) {
305
13.5M
      setTruncStoreAction(VT, InnerVT, Expand);
306
13.5M
      setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
307
13.5M
      setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
308
13.5M
      setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309
13.5M
    }
310
121k
311
121k
    if (isTypeLegal(VT)) {
312
2.49k
      // These operations are legal for anything that can be stored in a
313
2.49k
      // vector register, even if there is no native support for the format
314
2.49k
      // as such.  In particular, we can do these for v4f32 even though there
315
2.49k
      // are no specific instructions for that format.
316
2.49k
      setOperationAction(ISD::LOAD, VT, Legal);
317
2.49k
      setOperationAction(ISD::STORE, VT, Legal);
318
2.49k
      setOperationAction(ISD::VSELECT, VT, Legal);
319
2.49k
      setOperationAction(ISD::BITCAST, VT, Legal);
320
2.49k
      setOperationAction(ISD::UNDEF, VT, Legal);
321
2.49k
322
2.49k
      // Likewise, except that we need to replace the nodes with something
323
2.49k
      // more specific.
324
2.49k
      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
325
2.49k
      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
326
2.49k
    }
327
121k
  }
328
1.09k
329
1.09k
  // Handle integer vector types.
330
85.7k
  for (MVT VT : MVT::integer_vector_valuetypes()) {
331
85.7k
    if (isTypeLegal(VT)) {
332
1.66k
      // These operations have direct equivalents.
333
1.66k
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
334
1.66k
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
335
1.66k
      setOperationAction(ISD::ADD, VT, Legal);
336
1.66k
      setOperationAction(ISD::SUB, VT, Legal);
337
1.66k
      if (VT != MVT::v2i64)
338
1.24k
        setOperationAction(ISD::MUL, VT, Legal);
339
1.66k
      setOperationAction(ISD::AND, VT, Legal);
340
1.66k
      setOperationAction(ISD::OR, VT, Legal);
341
1.66k
      setOperationAction(ISD::XOR, VT, Legal);
342
1.66k
      if (Subtarget.hasVectorEnhancements1())
343
488
        setOperationAction(ISD::CTPOP, VT, Legal);
344
1.17k
      else
345
1.17k
        setOperationAction(ISD::CTPOP, VT, Custom);
346
1.66k
      setOperationAction(ISD::CTTZ, VT, Legal);
347
1.66k
      setOperationAction(ISD::CTLZ, VT, Legal);
348
1.66k
349
1.66k
      // Convert a GPR scalar to a vector by inserting it into element 0.
350
1.66k
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
351
1.66k
352
1.66k
      // Use a series of unpacks for extensions.
353
1.66k
      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
354
1.66k
      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
355
1.66k
356
1.66k
      // Detect shifts by a scalar amount and convert them into
357
1.66k
      // V*_BY_SCALAR.
358
1.66k
      setOperationAction(ISD::SHL, VT, Custom);
359
1.66k
      setOperationAction(ISD::SRA, VT, Custom);
360
1.66k
      setOperationAction(ISD::SRL, VT, Custom);
361
1.66k
362
1.66k
      // At present ROTL isn't matched by DAGCombiner.  ROTR should be
363
1.66k
      // converted into ROTL.
364
1.66k
      setOperationAction(ISD::ROTL, VT, Expand);
365
1.66k
      setOperationAction(ISD::ROTR, VT, Expand);
366
1.66k
367
1.66k
      // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
368
1.66k
      // and inverting the result as necessary.
369
1.66k
      setOperationAction(ISD::SETCC, VT, Custom);
370
1.66k
    }
371
85.7k
  }
372
1.09k
373
1.09k
  if (Subtarget.hasVector()) {
374
415
    // There should be no need to check for float types other than v2f64
375
415
    // since <2 x f32> isn't a legal type.
376
415
    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
377
415
    setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
378
415
    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
379
415
    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
380
415
    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
381
415
    setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
382
415
    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
383
415
    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
384
415
  }
385
1.09k
386
1.09k
  if (Subtarget.hasVectorEnhancements2()) {
387
28
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
388
28
    setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
389
28
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
390
28
    setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
391
28
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
392
28
    setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
393
28
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
394
28
    setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
395
28
  }
396
1.09k
397
1.09k
  // Handle floating-point types.
398
1.09k
  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
399
7.69k
       I <= MVT::LAST_FP_VALUETYPE;
400
6.59k
       ++I) {
401
6.59k
    MVT VT = MVT::SimpleValueType(I);
402
6.59k
    if (isTypeLegal(VT)) {
403
3.29k
      // We can use FI for FRINT.
404
3.29k
      setOperationAction(ISD::FRINT, VT, Legal);
405
3.29k
406
3.29k
      // We can use the extended form of FI for other rounding operations.
407
3.29k
      if (Subtarget.hasFPExtension()) {
408
1.48k
        setOperationAction(ISD::FNEARBYINT, VT, Legal);
409
1.48k
        setOperationAction(ISD::FFLOOR, VT, Legal);
410
1.48k
        setOperationAction(ISD::FCEIL, VT, Legal);
411
1.48k
        setOperationAction(ISD::FTRUNC, VT, Legal);
412
1.48k
        setOperationAction(ISD::FROUND, VT, Legal);
413
1.48k
      }
414
3.29k
415
3.29k
      // No special instructions for these.
416
3.29k
      setOperationAction(ISD::FSIN, VT, Expand);
417
3.29k
      setOperationAction(ISD::FCOS, VT, Expand);
418
3.29k
      setOperationAction(ISD::FSINCOS, VT, Expand);
419
3.29k
      setOperationAction(ISD::FREM, VT, Expand);
420
3.29k
      setOperationAction(ISD::FPOW, VT, Expand);
421
3.29k
422
3.29k
      // Handle constrained floating-point operations.
423
3.29k
      setOperationAction(ISD::STRICT_FADD, VT, Legal);
424
3.29k
      setOperationAction(ISD::STRICT_FSUB, VT, Legal);
425
3.29k
      setOperationAction(ISD::STRICT_FMUL, VT, Legal);
426
3.29k
      setOperationAction(ISD::STRICT_FDIV, VT, Legal);
427
3.29k
      setOperationAction(ISD::STRICT_FMA, VT, Legal);
428
3.29k
      setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
429
3.29k
      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
430
3.29k
      setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
431
3.29k
      setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
432
3.29k
      if (Subtarget.hasFPExtension()) {
433
1.48k
        setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
434
1.48k
        setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
435
1.48k
        setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
436
1.48k
        setOperationAction(ISD::STRICT_FROUND, VT, Legal);
437
1.48k
        setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
438
1.48k
      }
439
3.29k
    }
440
6.59k
  }
441
1.09k
442
1.09k
  // Handle floating-point vector types.
443
1.09k
  if (Subtarget.hasVector()) {
444
415
    // Scalar-to-vector conversion is just a subreg.
445
415
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
446
415
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
447
415
448
415
    // Some insertions and extractions can be done directly but others
449
415
    // need to go via integers.
450
415
    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
451
415
    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
452
415
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
453
415
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
454
415
455
415
    // These operations have direct equivalents.
456
415
    setOperationAction(ISD::FADD, MVT::v2f64, Legal);
457
415
    setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
458
415
    setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
459
415
    setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
460
415
    setOperationAction(ISD::FMA, MVT::v2f64, Legal);
461
415
    setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
462
415
    setOperationAction(ISD::FABS, MVT::v2f64, Legal);
463
415
    setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
464
415
    setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
465
415
    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
466
415
    setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
467
415
    setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
468
415
    setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
469
415
    setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
470
415
471
415
    // Handle constrained floating-point operations.
472
415
    setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
473
415
    setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
474
415
    setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
475
415
    setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
476
415
    setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
477
415
    setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
478
415
    setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
479
415
    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
480
415
    setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
481
415
    setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
482
415
    setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
483
415
    setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
484
415
  }
485
1.09k
486
1.09k
  // The vector enhancements facility 1 has instructions for these.
487
1.09k
  if (Subtarget.hasVectorEnhancements1()) {
488
122
    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
489
122
    setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
490
122
    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
491
122
    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
492
122
    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
493
122
    setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
494
122
    setOperationAction(ISD::FABS, MVT::v4f32, Legal);
495
122
    setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
496
122
    setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
497
122
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
498
122
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
499
122
    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
500
122
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
501
122
    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
502
122
503
122
    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
504
122
    setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
505
122
    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
506
122
    setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
507
122
508
122
    setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
509
122
    setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
510
122
    setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
511
122
    setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
512
122
513
122
    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
514
122
    setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
515
122
    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
516
122
    setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
517
122
518
122
    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
519
122
    setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
520
122
    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
521
122
    setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
522
122
523
122
    setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
524
122
    setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
525
122
    setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
526
122
    setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
527
122
528
122
    // Handle constrained floating-point operations.
529
122
    setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
530
122
    setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
531
122
    setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
532
122
    setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
533
122
    setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
534
122
    setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
535
122
    setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
536
122
    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
537
122
    setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
538
122
    setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
539
122
    setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
540
122
    setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
541
122
    for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
542
610
                     MVT::v4f32, MVT::v2f64 }) {
543
610
      setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
544
610
      setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
545
610
    }
546
122
  }
547
1.09k
548
1.09k
  // We have fused multiply-addition for f32 and f64 but not f128.
549
1.09k
  setOperationAction(ISD::FMA, MVT::f32,  Legal);
550
1.09k
  setOperationAction(ISD::FMA, MVT::f64,  Legal);
551
1.09k
  if (Subtarget.hasVectorEnhancements1())
552
122
    setOperationAction(ISD::FMA, MVT::f128, Legal);
553
977
  else
554
977
    setOperationAction(ISD::FMA, MVT::f128, Expand);
555
1.09k
556
1.09k
  // We don't have a copysign instruction on vector registers.
557
1.09k
  if (Subtarget.hasVectorEnhancements1())
558
122
    setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
559
1.09k
560
1.09k
  // Needed so that we don't try to implement f128 constant loads using
561
1.09k
  // a load-and-extend of a f80 constant (in cases where the constant
562
1.09k
  // would fit in an f80).
563
1.09k
  for (MVT VT : MVT::fp_valuetypes())
564
6.59k
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
565
1.09k
566
1.09k
  // We don't have extending load instruction on vector registers.
567
1.09k
  if (Subtarget.hasVectorEnhancements1()) {
568
122
    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
569
122
    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
570
122
  }
571
1.09k
572
1.09k
  // Floating-point truncation and stores need to be done separately.
573
1.09k
  setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
574
1.09k
  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
575
1.09k
  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
576
1.09k
577
1.09k
  // We have 64-bit FPR<->GPR moves, but need special handling for
578
1.09k
  // 32-bit forms.
579
1.09k
  if (!Subtarget.hasVector()) {
580
684
    setOperationAction(ISD::BITCAST, MVT::i32, Custom);
581
684
    setOperationAction(ISD::BITCAST, MVT::f32, Custom);
582
684
  }
583
1.09k
584
1.09k
  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
585
1.09k
  // structure, but VAEND is a no-op.
586
1.09k
  setOperationAction(ISD::VASTART, MVT::Other, Custom);
587
1.09k
  setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
588
1.09k
  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
589
1.09k
590
1.09k
  // Codes for which we want to perform some z-specific combinations.
591
1.09k
  setTargetDAGCombine(ISD::ZERO_EXTEND);
592
1.09k
  setTargetDAGCombine(ISD::SIGN_EXTEND);
593
1.09k
  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
594
1.09k
  setTargetDAGCombine(ISD::LOAD);
595
1.09k
  setTargetDAGCombine(ISD::STORE);
596
1.09k
  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
597
1.09k
  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
598
1.09k
  setTargetDAGCombine(ISD::FP_ROUND);
599
1.09k
  setTargetDAGCombine(ISD::FP_EXTEND);
600
1.09k
  setTargetDAGCombine(ISD::BSWAP);
601
1.09k
  setTargetDAGCombine(ISD::SDIV);
602
1.09k
  setTargetDAGCombine(ISD::UDIV);
603
1.09k
  setTargetDAGCombine(ISD::SREM);
604
1.09k
  setTargetDAGCombine(ISD::UREM);
605
1.09k
606
1.09k
  // Handle intrinsics.
607
1.09k
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
608
1.09k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
609
1.09k
610
1.09k
  // We want to use MVC in preference to even a single load/store pair.
611
1.09k
  MaxStoresPerMemcpy = 0;
612
1.09k
  MaxStoresPerMemcpyOptSize = 0;
613
1.09k
614
1.09k
  // The main memset sequence is a byte store followed by an MVC.
615
1.09k
  // Two STC or MV..I stores win over that, but the kind of fused stores
616
1.09k
  // generated by target-independent code don't when the byte value is
617
1.09k
  // variable.  E.g.  "STC <reg>;MHI <reg>,257;STH <reg>" is not better
618
1.09k
  // than "STC;MVC".  Handle the choice in target-specific code instead.
619
1.09k
  MaxStoresPerMemset = 0;
620
1.09k
  MaxStoresPerMemsetOptSize = 0;
621
1.09k
}
622
623
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
624
8.00k
                                              LLVMContext &, EVT VT) const {
625
8.00k
  if (!VT.isVector())
626
6.47k
    return MVT::i32;
627
1.52k
  return VT.changeVectorElementTypeToInteger();
628
1.52k
}
629
630
1.45k
bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
631
1.45k
  VT = VT.getScalarType();
632
1.45k
633
1.45k
  if (!VT.isSimple())
634
0
    return false;
635
1.45k
636
1.45k
  switch (VT.getSimpleVT().SimpleTy) {
637
1.45k
  case MVT::f32:
638
1.36k
  case MVT::f64:
639
1.36k
    return true;
640
1.36k
  case MVT::f128:
641
86
    return Subtarget.hasVectorEnhancements1();
642
1.36k
  default:
643
0
    break;
644
0
  }
645
0
646
0
  return false;
647
0
}
648
649
// Return true if the constant can be generated with a vector instruction,
650
// such as VGM, VGMB or VREPI.
651
bool SystemZVectorConstantInfo::isVectorConstantLegal(
652
3.04k
    const SystemZSubtarget &Subtarget) {
653
3.04k
  const SystemZInstrInfo *TII =
654
3.04k
      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
655
3.04k
  if (!Subtarget.hasVector() ||
656
3.04k
      
(2.57k
isFP1282.57k
&&
!Subtarget.hasVectorEnhancements1()20
))
657
470
    return false;
658
2.57k
659
2.57k
  // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
660
2.57k
  // preferred way of creating all-zero and all-one vectors so give it
661
2.57k
  // priority over other methods below.
662
2.57k
  unsigned Mask = 0;
663
2.57k
  unsigned I = 0;
664
21.1k
  for (; I < SystemZ::VectorBytes; 
++I18.5k
) {
665
20.2k
    uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
666
20.2k
    if (Byte == 0xff)
667
6.74k
      Mask |= 1ULL << I;
668
13.4k
    else if (Byte != 0)
669
1.63k
      break;
670
20.2k
  }
671
2.57k
  if (I == SystemZ::VectorBytes) {
672
943
    Opcode = SystemZISD::BYTE_MASK;
673
943
    OpVals.push_back(Mask);
674
943
    VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
675
943
    return true;
676
943
  }
677
1.63k
678
1.63k
  if (SplatBitSize > 64)
679
104
    return false;
680
1.53k
681
1.79k
  
auto tryValue = [&](uint64_t Value) -> bool 1.53k
{
682
1.79k
    // Try VECTOR REPLICATE IMMEDIATE
683
1.79k
    int64_t SignedValue = SignExtend64(Value, SplatBitSize);
684
1.79k
    if (isInt<16>(SignedValue)) {
685
742
      OpVals.push_back(((unsigned) SignedValue));
686
742
      Opcode = SystemZISD::REPLICATE;
687
742
      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
688
742
                               SystemZ::VectorBits / SplatBitSize);
689
742
      return true;
690
742
    }
691
1.05k
    // Try VECTOR GENERATE MASK
692
1.05k
    unsigned Start, End;
693
1.05k
    if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
694
538
      // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
695
538
      // denoting 1 << 63 and 63 denoting 1.  Convert them to bit numbers for
696
538
      // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
697
538
      OpVals.push_back(Start - (64 - SplatBitSize));
698
538
      OpVals.push_back(End - (64 - SplatBitSize));
699
538
      Opcode = SystemZISD::ROTATE_MASK;
700
538
      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
701
538
                               SystemZ::VectorBits / SplatBitSize);
702
538
      return true;
703
538
    }
704
513
    return false;
705
513
  };
706
1.53k
707
1.53k
  // First try assuming that any undefined bits above the highest set bit
708
1.53k
  // and below the lowest set bit are 1s.  This increases the likelihood of
709
1.53k
  // being able to use a sign-extended element value in VECTOR REPLICATE
710
1.53k
  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
711
1.53k
  uint64_t SplatBitsZ = SplatBits.getZExtValue();
712
1.53k
  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
713
1.53k
  uint64_t Lower =
714
1.53k
      (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
715
1.53k
  uint64_t Upper =
716
1.53k
      (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
717
1.53k
  if (tryValue(SplatBitsZ | Upper | Lower))
718
1.27k
    return true;
719
261
720
261
  // Now try assuming that any undefined bits between the first and
721
261
  // last defined set bits are set.  This increases the chances of
722
261
  // using a non-wraparound mask.
723
261
  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
724
261
  return tryValue(SplatBitsZ | Middle);
725
261
}
726
727
1.03k
SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
728
1.03k
  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
729
1.03k
  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
730
1.03k
731
1.03k
  // Find the smallest splat.
732
1.03k
  SplatBits = FPImm.bitcastToAPInt();
733
1.03k
  unsigned Width = SplatBits.getBitWidth();
734
1.19k
  while (Width > 8) {
735
1.15k
    unsigned HalfSize = Width / 2;
736
1.15k
    APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
737
1.15k
    APInt LowValue = SplatBits.trunc(HalfSize);
738
1.15k
739
1.15k
    // If the two halves do not match, stop here.
740
1.15k
    if (HighValue != LowValue || 
8 > HalfSize168
)
741
988
      break;
742
168
743
168
    SplatBits = HighValue;
744
168
    Width = HalfSize;
745
168
  }
746
1.03k
  SplatUndef = 0;
747
1.03k
  SplatBitSize = Width;
748
1.03k
}
749
750
2.01k
SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
751
2.01k
  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
752
2.01k
  bool HasAnyUndefs;
753
2.01k
754
2.01k
  // Get IntBits by finding the 128 bit splat.
755
2.01k
  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
756
2.01k
                       true);
757
2.01k
758
2.01k
  // Get SplatBits by finding the 8 bit or greater splat.
759
2.01k
  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
760
2.01k
                       true);
761
2.01k
}
762
763
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
764
1.40k
                                         bool ForCodeSize) const {
765
1.40k
  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
766
1.40k
  if (Imm.isZero() || 
Imm.isNegZero()971
)
767
434
    return true;
768
971
769
971
  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
770
971
}
771
772
3.01k
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
773
3.01k
  // We can use CGFI or CLGFI.
774
3.01k
  return isInt<32>(Imm) || 
isUInt<32>(Imm)23
;
775
3.01k
}
776
777
22
bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
778
22
  // We can use ALGFI or SLGFI.
779
22
  return isUInt<32>(Imm) || 
isUInt<32>(-Imm)12
;
780
22
}
781
782
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
783
478
    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
784
478
  // Unaligned accesses should never be slower than the expanded version.
785
478
  // We check specifically for aligned accesses in the few cases where
786
478
  // they are required.
787
478
  if (Fast)
788
22
    *Fast = true;
789
478
  return true;
790
478
}
791
792
// Information about the addressing mode for a memory access.
793
struct AddressingMode {
794
  // True if a long displacement is supported.
795
  bool LongDisplacement;
796
797
  // True if use of index register is supported.
798
  bool IndexReg;
799
800
  AddressingMode(bool LongDispl, bool IdxReg) :
801
27.8k
    LongDisplacement(LongDispl), IndexReg(IdxReg) {}
802
};
803
804
// Return the desired addressing mode for a Load which has only one use (in
805
// the same block) which is a Store.
806
static AddressingMode getLoadStoreAddrMode(bool HasVector,
807
841
                                          Type *Ty) {
808
841
  // With vector support a Load->Store combination may be combined to either
809
841
  // an MVC or vector operations and it seems to work best to allow the
810
841
  // vector addressing mode.
811
841
  if (HasVector)
812
841
    return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
813
0
814
0
  // Otherwise only the MVC case is special.
815
0
  bool MVC = Ty->isIntegerTy(8);
816
0
  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
817
0
}
818
819
// Return the addressing mode which seems most desirable given an LLVM
820
// Instruction pointer.
821
static AddressingMode
822
2.12k
supportedAddressingMode(Instruction *I, bool HasVector) {
823
2.12k
  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
824
618
    switch (II->getIntrinsicID()) {
825
618
    
default: break222
;
826
618
    case Intrinsic::memset:
827
396
    case Intrinsic::memmove:
828
396
    case Intrinsic::memcpy:
829
396
      return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
830
1.73k
    }
831
1.73k
  }
832
1.73k
833
1.73k
  if (isa<LoadInst>(I) && 
I->hasOneUse()957
) {
834
937
    auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
835
937
    if (SingleUser->getParent() == I->getParent()) {
836
937
      if (isa<ICmpInst>(SingleUser)) {
837
22
        if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
838
22
          if (C->getBitWidth() <= 64 &&
839
22
              
(12
isInt<16>(C->getSExtValue())12
||
isUInt<16>(C->getZExtValue())0
))
840
12
            // Comparison of memory with 16 bit signed / unsigned immediate
841
12
            return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
842
915
      } else if (isa<StoreInst>(SingleUser))
843
382
        // Load->Store
844
382
        return getLoadStoreAddrMode(HasVector, I->getType());
845
795
    }
846
795
  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
847
553
    if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
848
459
      if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
849
459
        // Load->Store
850
459
        return getLoadStoreAddrMode(HasVector, LoadI->getType());
851
879
  }
852
879
853
879
  if (HasVector && 
(631
isa<LoadInst>(I)631
||
isa<StoreInst>(I)285
)) {
854
409
855
409
    // * Use LDE instead of LE/LEY for z13 to avoid partial register
856
409
    //   dependencies (LDE only supports small offsets).
857
409
    // * Utilize the vector registers to hold floating point
858
409
    //   values (vector load / store instructions only support small
859
409
    //   offsets).
860
409
861
409
    Type *MemAccessTy = (isa<LoadInst>(I) ? 
I->getType()346
:
862
409
                         
I->getOperand(0)->getType()63
);
863
409
    bool IsFPAccess = MemAccessTy->isFloatingPointTy();
864
409
    bool IsVectorAccess = MemAccessTy->isVectorTy();
865
409
866
409
    // A store of an extracted vector element will be combined into a VSTE type
867
409
    // instruction.
868
409
    if (!IsVectorAccess && 
isa<StoreInst>(I)325
) {
869
36
      Value *DataOp = I->getOperand(0);
870
36
      if (isa<ExtractElementInst>(DataOp))
871
0
        IsVectorAccess = true;
872
36
    }
873
409
874
409
    // A load which gets inserted into a vector element will be combined into a
875
409
    // VLE type instruction.
876
409
    if (!IsVectorAccess && 
isa<LoadInst>(I)325
&&
I->hasOneUse()289
) {
877
279
      User *LoadUser = *I->user_begin();
878
279
      if (isa<InsertElementInst>(LoadUser))
879
145
        IsVectorAccess = true;
880
279
    }
881
409
882
409
    if (IsFPAccess || 
IsVectorAccess259
)
883
379
      return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
884
500
  }
885
500
886
500
  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
887
500
}
888
889
bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
890
32.5k
       const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
891
32.5k
  // Punt on globals for now, although they can be used in limited
892
32.5k
  // RELATIVE LONG cases.
893
32.5k
  if (AM.BaseGV)
894
6.37k
    return false;
895
26.1k
896
26.1k
  // Require a 20-bit signed offset.
897
26.1k
  if (!isInt<20>(AM.BaseOffs))
898
416
    return false;
899
25.7k
900
25.7k
  AddressingMode SupportedAM(true, true);
901
25.7k
  if (I != nullptr)
902
2.12k
    SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
903
25.7k
904
25.7k
  if (!SupportedAM.LongDisplacement && 
!isUInt<12>(AM.BaseOffs)1.62k
)
905
700
    return false;
906
25.0k
907
25.0k
  if (!SupportedAM.IndexReg)
908
230
    // No indexing allowed.
909
230
    return AM.Scale == 0;
910
24.7k
  else
911
24.7k
    // Indexing is OK but no scale factor can be applied.
912
24.7k
    return AM.Scale == 0 || 
AM.Scale == 13.89k
;
913
25.0k
}
914
915
369
bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
916
369
  if (!FromType->isIntegerTy() || 
!ToType->isIntegerTy()358
)
917
11
    return false;
918
358
  unsigned FromBits = FromType->getPrimitiveSizeInBits();
919
358
  unsigned ToBits = ToType->getPrimitiveSizeInBits();
920
358
  return FromBits > ToBits;
921
358
}
922
923
1.61k
bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
924
1.61k
  if (!FromVT.isInteger() || !ToVT.isInteger())
925
0
    return false;
926
1.61k
  unsigned FromBits = FromVT.getSizeInBits();
927
1.61k
  unsigned ToBits = ToVT.getSizeInBits();
928
1.61k
  return FromBits > ToBits;
929
1.61k
}
930
931
//===----------------------------------------------------------------------===//
932
// Inline asm support
933
//===----------------------------------------------------------------------===//
934
935
TargetLowering::ConstraintType
936
3.00k
SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
937
3.00k
  if (Constraint.size() == 1) {
938
1.90k
    switch (Constraint[0]) {
939
1.90k
    case 'a': // Address register
940
1.56k
    case 'd': // Data register (equivalent to 'r')
941
1.56k
    case 'f': // Floating-point register
942
1.56k
    case 'h': // High-part register
943
1.56k
    case 'r': // General-purpose register
944
1.56k
    case 'v': // Vector register
945
1.56k
      return C_RegisterClass;
946
1.56k
947
1.56k
    case 'Q': // Memory with base and unsigned 12-bit displacement
948
69
    case 'R': // Likewise, plus an index
949
69
    case 'S': // Memory with base and signed 20-bit displacement
950
69
    case 'T': // Likewise, plus an index
951
69
    case 'm': // Equivalent to 'T'.
952
69
      return C_Memory;
953
69
954
88
    case 'I': // Unsigned 8-bit constant
955
88
    case 'J': // Unsigned 12-bit constant
956
88
    case 'K': // Signed 16-bit constant
957
88
    case 'L': // Signed 20-bit displacement (on all targets we support)
958
88
    case 'M': // 0x7fffffff
959
88
      return C_Other;
960
88
961
183
    default:
962
183
      break;
963
1.28k
    }
964
1.28k
  }
965
1.28k
  return TargetLowering::getConstraintType(Constraint);
966
1.28k
}
967
968
TargetLowering::ConstraintWeight SystemZTargetLowering::
969
getSingleConstraintMatchWeight(AsmOperandInfo &info,
970
0
                               const char *constraint) const {
971
0
  ConstraintWeight weight = CW_Invalid;
972
0
  Value *CallOperandVal = info.CallOperandVal;
973
0
  // If we don't have a value, we can't do a match,
974
0
  // but allow it at the lowest weight.
975
0
  if (!CallOperandVal)
976
0
    return CW_Default;
977
0
  Type *type = CallOperandVal->getType();
978
0
  // Look at the constraint type.
979
0
  switch (*constraint) {
980
0
  default:
981
0
    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
982
0
    break;
983
0
984
0
  case 'a': // Address register
985
0
  case 'd': // Data register (equivalent to 'r')
986
0
  case 'h': // High-part register
987
0
  case 'r': // General-purpose register
988
0
    if (CallOperandVal->getType()->isIntegerTy())
989
0
      weight = CW_Register;
990
0
    break;
991
0
992
0
  case 'f': // Floating-point register
993
0
    if (type->isFloatingPointTy())
994
0
      weight = CW_Register;
995
0
    break;
996
0
997
0
  case 'v': // Vector register
998
0
    if ((type->isVectorTy() || type->isFloatingPointTy()) &&
999
0
        Subtarget.hasVector())
1000
0
      weight = CW_Register;
1001
0
    break;
1002
0
1003
0
  case 'I': // Unsigned 8-bit constant
1004
0
    if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1005
0
      if (isUInt<8>(C->getZExtValue()))
1006
0
        weight = CW_Constant;
1007
0
    break;
1008
0
1009
0
  case 'J': // Unsigned 12-bit constant
1010
0
    if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1011
0
      if (isUInt<12>(C->getZExtValue()))
1012
0
        weight = CW_Constant;
1013
0
    break;
1014
0
1015
0
  case 'K': // Signed 16-bit constant
1016
0
    if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1017
0
      if (isInt<16>(C->getSExtValue()))
1018
0
        weight = CW_Constant;
1019
0
    break;
1020
0
1021
0
  case 'L': // Signed 20-bit displacement (on all targets we support)
1022
0
    if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1023
0
      if (isInt<20>(C->getSExtValue()))
1024
0
        weight = CW_Constant;
1025
0
    break;
1026
0
1027
0
  case 'M': // 0x7fffffff
1028
0
    if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1029
0
      if (C->getZExtValue() == 0x7fffffff)
1030
0
        weight = CW_Constant;
1031
0
    break;
1032
0
  }
1033
0
  return weight;
1034
0
}
1035
1036
// Parse a "{tNNN}" register constraint for which the register type "t"
1037
// has already been verified.  MC is the class associated with "t" and
1038
// Map maps 0-based register numbers to LLVM register numbers.
1039
static std::pair<unsigned, const TargetRegisterClass *>
1040
parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1041
460
                    const unsigned *Map, unsigned Size) {
1042
460
  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1043
460
  if (isdigit(Constraint[2])) {
1044
456
    unsigned Index;
1045
456
    bool Failed =
1046
456
        Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1047
456
    if (!Failed && Index < Size && Map[Index])
1048
456
      return std::make_pair(Map[Index], RC);
1049
4
  }
1050
4
  return std::make_pair(0U, nullptr);
1051
4
}
1052
1053
std::pair<unsigned, const TargetRegisterClass *>
1054
SystemZTargetLowering::getRegForInlineAsmConstraint(
1055
1.02k
    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1056
1.02k
  if (Constraint.size() == 1) {
1057
469
    // GCC Constraint Letters
1058
469
    switch (Constraint[0]) {
1059
469
    
default: break14
;
1060
469
    case 'd': // Data register (equivalent to 'r')
1061
188
    case 'r': // General-purpose register
1062
188
      if (VT == MVT::i64)
1063
22
        return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1064
166
      else if (VT == MVT::i128)
1065
0
        return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1066
166
      return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1067
166
1068
166
    case 'a': // Address register
1069
4
      if (VT == MVT::i64)
1070
1
        return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1071
3
      else if (VT == MVT::i128)
1072
0
        return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1073
3
      return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1074
3
1075
221
    case 'h': // High-part register (an LLVM extension)
1076
221
      return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1077
3
1078
6
    case 'f': // Floating-point register
1079
6
      if (VT == MVT::f64)
1080
3
        return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1081
3
      else if (VT == MVT::f128)
1082
1
        return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1083
2
      return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1084
2
1085
36
    case 'v': // Vector register
1086
36
      if (Subtarget.hasVector()) {
1087
36
        if (VT == MVT::f32)
1088
4
          return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1089
32
        if (VT == MVT::f64)
1090
4
          return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1091
28
        return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1092
28
      }
1093
0
      break;
1094
469
    }
1095
469
  }
1096
568
  if (Constraint.size() > 0 && 
Constraint[0] == '{'488
) {
1097
474
    // We need to override the default register parsing for GPRs and FPRs
1098
474
    // because the interpretation depends on VT.  The internal names of
1099
474
    // the registers are also different from the external names
1100
474
    // (F0D and F0S instead of F0, etc.).
1101
474
    if (Constraint[1] == 'r') {
1102
232
      if (VT == MVT::i32)
1103
14
        return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1104
14
                                   SystemZMC::GR32Regs, 16);
1105
218
      if (VT == MVT::i128)
1106
0
        return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1107
0
                                   SystemZMC::GR128Regs, 16);
1108
218
      return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1109
218
                                 SystemZMC::GR64Regs, 16);
1110
218
    }
1111
242
    if (Constraint[1] == 'f') {
1112
80
      if (VT == MVT::f32)
1113
4
        return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1114
4
                                   SystemZMC::FP32Regs, 16);
1115
76
      if (VT == MVT::f128)
1116
2
        return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1117
2
                                   SystemZMC::FP128Regs, 16);
1118
74
      return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1119
74
                                 SystemZMC::FP64Regs, 16);
1120
74
    }
1121
162
    if (Constraint[1] == 'v') {
1122
148
      if (VT == MVT::f32)
1123
4
        return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1124
4
                                   SystemZMC::VR32Regs, 32);
1125
144
      if (VT == MVT::f64)
1126
4
        return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1127
4
                                   SystemZMC::VR64Regs, 32);
1128
140
      return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1129
140
                                 SystemZMC::VR128Regs, 32);
1130
140
    }
1131
162
  }
1132
108
  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1133
108
}
1134
1135
void SystemZTargetLowering::
1136
LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1137
                             std::vector<SDValue> &Ops,
1138
36
                             SelectionDAG &DAG) const {
1139
36
  // Only support length 1 constraints for now.
1140
36
  if (Constraint.length() == 1) {
1141
36
    switch (Constraint[0]) {
1142
36
    case 'I': // Unsigned 8-bit constant
1143
6
      if (auto *C = dyn_cast<ConstantSDNode>(Op))
1144
6
        if (isUInt<8>(C->getZExtValue()))
1145
4
          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1146
4
                                              Op.getValueType()));
1147
6
      return;
1148
36
1149
36
    case 'J': // Unsigned 12-bit constant
1150
6
      if (auto *C = dyn_cast<ConstantSDNode>(Op))
1151
6
        if (isUInt<12>(C->getZExtValue()))
1152
4
          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1153
4
                                              Op.getValueType()));
1154
6
      return;
1155
36
1156
36
    case 'K': // Signed 16-bit constant
1157
6
      if (auto *C = dyn_cast<ConstantSDNode>(Op))
1158
6
        if (isInt<16>(C->getSExtValue()))
1159
4
          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1160
4
                                              Op.getValueType()));
1161
6
      return;
1162
36
1163
36
    case 'L': // Signed 20-bit displacement (on all targets we support)
1164
6
      if (auto *C = dyn_cast<ConstantSDNode>(Op))
1165
6
        if (isInt<20>(C->getSExtValue()))
1166
4
          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1167
4
                                              Op.getValueType()));
1168
6
      return;
1169
36
1170
36
    case 'M': // 0x7fffffff
1171
8
      if (auto *C = dyn_cast<ConstantSDNode>(Op))
1172
8
        if (C->getZExtValue() == 0x7fffffff)
1173
4
          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1174
4
                                              Op.getValueType()));
1175
8
      return;
1176
4
    }
1177
4
  }
1178
4
  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1179
4
}
1180
1181
//===----------------------------------------------------------------------===//
1182
// Calling conventions
1183
//===----------------------------------------------------------------------===//
1184
1185
#include "SystemZGenCallingConv.inc"
1186
1187
const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1188
56
  CallingConv::ID) const {
1189
56
  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1190
56
                                           SystemZ::R14D, 0 };
1191
56
  return ScratchRegs;
1192
56
}
1193
1194
bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1195
3
                                                     Type *ToType) const {
1196
3
  return isTruncateFree(FromType, ToType);
1197
3
}
1198
1199
105
bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1200
105
  return CI->isTailCall();
1201
105
}
1202
1203
// We do not yet support 128-bit single-element vector types.  If the user
1204
// attempts to use such types as function argument or return type, prefer
1205
// to error out instead of emitting code violating the ABI.
1206
11.8k
static void VerifyVectorType(MVT VT, EVT ArgVT) {
1207
11.8k
  if (ArgVT.isVector() && 
!VT.isVector()7.43k
)
1208
8
    report_fatal_error("Unsupported vector argument or return type");
1209
11.8k
}
1210
1211
3.22k
static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1212
9.21k
  for (unsigned i = 0; i < Ins.size(); 
++i5.99k
)
1213
5.99k
    VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1214
3.22k
}
1215
1216
6.42k
static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1217
12.2k
  for (unsigned i = 0; i < Outs.size(); 
++i5.86k
)
1218
5.86k
    VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1219
6.42k
}
1220
1221
// Value is a value that has been passed to us in the location described by VA
1222
// (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
1223
// any loads onto Chain.
1224
static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1225
                                   CCValAssign &VA, SDValue Chain,
1226
16.1k
                                   SDValue Value) {
1227
16.1k
  // If the argument has been promoted from a smaller type, insert an
1228
16.1k
  // assertion to capture this.
1229
16.1k
  if (VA.getLocInfo() == CCValAssign::SExt)
1230
39
    Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1231
39
                        DAG.getValueType(VA.getValVT()));
1232
16.0k
  else if (VA.getLocInfo() == CCValAssign::ZExt)
1233
17
    Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1234
17
                        DAG.getValueType(VA.getValVT()));
1235
16.1k
1236
16.1k
  if (VA.isExtInLoc())
1237
56
    Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1238
16.0k
  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1239
2
    // If this is a short vector argument loaded from the stack,
1240
2
    // extend from i64 to full vector size and then bitcast.
1241
2
    assert(VA.getLocVT() == MVT::i64);
1242
2
    assert(VA.getValVT().isVector());
1243
2
    Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1244
2
    Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1245
2
  } else
1246
16.0k
    assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1247
16.1k
  return Value;
1248
16.1k
}
1249
1250
// Value is a value of type VA.getValVT() that we need to copy into
1251
// the location described by VA.  Return a copy of Value converted to
1252
// VA.getValVT().  The caller is responsible for handling indirect values.
1253
static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1254
7.77k
                                   CCValAssign &VA, SDValue Value) {
1255
7.77k
  switch (VA.getLocInfo()) {
1256
7.77k
  case CCValAssign::SExt:
1257
55
    return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1258
7.77k
  case CCValAssign::ZExt:
1259
446
    return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1260
7.77k
  case CCValAssign::AExt:
1261
0
    return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1262
7.77k
  case CCValAssign::BCvt:
1263
12
    // If this is a short vector argument to be stored to the stack,
1264
12
    // bitcast to v2i64 and then extract first element.
1265
12
    assert(VA.getLocVT() == MVT::i64);
1266
12
    assert(VA.getValVT().isVector());
1267
12
    Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1268
12
    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1269
12
                       DAG.getConstant(0, DL, MVT::i32));
1270
7.77k
  case CCValAssign::Full:
1271
7.26k
    return Value;
1272
7.77k
  default:
1273
0
    llvm_unreachable("Unhandled getLocInfo()");
1274
7.77k
  }
1275
7.77k
}
1276
1277
SDValue SystemZTargetLowering::LowerFormalArguments(
1278
    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1279
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1280
8.11k
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1281
8.11k
  MachineFunction &MF = DAG.getMachineFunction();
1282
8.11k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1283
8.11k
  MachineRegisterInfo &MRI = MF.getRegInfo();
1284
8.11k
  SystemZMachineFunctionInfo *FuncInfo =
1285
8.11k
      MF.getInfo<SystemZMachineFunctionInfo>();
1286
8.11k
  auto *TFL =
1287
8.11k
      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1288
8.11k
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1289
8.11k
1290
8.11k
  // Detect unsupported vector argument types.
1291
8.11k
  if (Subtarget.hasVector())
1292
3.00k
    VerifyVectorTypes(Ins);
1293
8.11k
1294
8.11k
  // Assign locations to all of the incoming arguments.
1295
8.11k
  SmallVector<CCValAssign, 16> ArgLocs;
1296
8.11k
  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1297
8.11k
  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1298
8.11k
1299
8.11k
  unsigned NumFixedGPRs = 0;
1300
8.11k
  unsigned NumFixedFPRs = 0;
1301
23.6k
  for (unsigned I = 0, E = ArgLocs.size(); I != E; 
++I15.4k
) {
1302
15.4k
    SDValue ArgValue;
1303
15.4k
    CCValAssign &VA = ArgLocs[I];
1304
15.4k
    EVT LocVT = VA.getLocVT();
1305
15.4k
    if (VA.isRegLoc()) {
1306
15.3k
      // Arguments passed in registers
1307
15.3k
      const TargetRegisterClass *RC;
1308
15.3k
      switch (LocVT.getSimpleVT().SimpleTy) {
1309
15.3k
      default:
1310
0
        // Integers smaller than i64 should be promoted to i64.
1311
0
        llvm_unreachable("Unexpected argument type");
1312
15.3k
      case MVT::i32:
1313
2.35k
        NumFixedGPRs += 1;
1314
2.35k
        RC = &SystemZ::GR32BitRegClass;
1315
2.35k
        break;
1316
15.3k
      case MVT::i64:
1317
7.38k
        NumFixedGPRs += 1;
1318
7.38k
        RC = &SystemZ::GR64BitRegClass;
1319
7.38k
        break;
1320
15.3k
      case MVT::f32:
1321
780
        NumFixedFPRs += 1;
1322
780
        RC = &SystemZ::FP32BitRegClass;
1323
780
        break;
1324
15.3k
      case MVT::f64:
1325
1.37k
        NumFixedFPRs += 1;
1326
1.37k
        RC = &SystemZ::FP64BitRegClass;
1327
1.37k
        break;
1328
15.3k
      case MVT::v16i8:
1329
3.44k
      case MVT::v8i16:
1330
3.44k
      case MVT::v4i32:
1331
3.44k
      case MVT::v2i64:
1332
3.44k
      case MVT::v4f32:
1333
3.44k
      case MVT::v2f64:
1334
3.44k
        RC = &SystemZ::VR128BitRegClass;
1335
3.44k
        break;
1336
15.3k
      }
1337
15.3k
1338
15.3k
      unsigned VReg = MRI.createVirtualRegister(RC);
1339
15.3k
      MRI.addLiveIn(VA.getLocReg(), VReg);
1340
15.3k
      ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1341
15.3k
    } else {
1342
155
      assert(VA.isMemLoc() && "Argument not register or memory");
1343
155
1344
155
      // Create the frame index object for this incoming parameter.
1345
155
      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1346
155
                                     VA.getLocMemOffset(), true);
1347
155
1348
155
      // Create the SelectionDAG nodes corresponding to a load
1349
155
      // from this parameter.  Unpromoted ints and floats are
1350
155
      // passed as right-justified 8-byte values.
1351
155
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1352
155
      if (VA.getLocVT() == MVT::i32 || 
VA.getLocVT() == MVT::f32147
)
1353
10
        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1354
10
                          DAG.getIntPtrConstant(4, DL));
1355
155
      ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1356
155
                             MachinePointerInfo::getFixedStack(MF, FI));
1357
155
    }
1358
15.4k
1359
15.4k
    // Convert the value of the argument register into the value that's
1360
15.4k
    // being passed.
1361
15.4k
    if (VA.getLocInfo() == CCValAssign::Indirect) {
1362
63
      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1363
63
                                   MachinePointerInfo()));
1364
63
      // If the original argument was split (e.g. i128), we need
1365
63
      // to load all parts of it here (using the same address).
1366
63
      unsigned ArgIndex = Ins[I].OrigArgIndex;
1367
63
      assert (Ins[I].PartOffset == 0);
1368
92
      while (I + 1 != E && 
Ins[I + 1].OrigArgIndex == ArgIndex65
) {
1369
29
        CCValAssign &PartVA = ArgLocs[I + 1];
1370
29
        unsigned PartOffset = Ins[I + 1].PartOffset;
1371
29
        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1372
29
                                      DAG.getIntPtrConstant(PartOffset, DL));
1373
29
        InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1374
29
                                     MachinePointerInfo()));
1375
29
        ++I;
1376
29
      }
1377
63
    } else
1378
15.4k
      InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1379
15.4k
  }
1380
8.11k
1381
8.11k
  if (IsVarArg) {
1382
0
    // Save the number of non-varargs registers for later use by va_start, etc.
1383
0
    FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1384
0
    FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1385
0
1386
0
    // Likewise the address (in the form of a frame index) of where the
1387
0
    // first stack vararg would be.  The 1-byte size here is arbitrary.
1388
0
    int64_t StackSize = CCInfo.getNextStackOffset();
1389
0
    FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1390
0
1391
0
    // ...and a similar frame index for the caller-allocated save area
1392
0
    // that will be used to store the incoming registers.
1393
0
    int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1394
0
    unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1395
0
    FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1396
0
1397
0
    // Store the FPR varargs in the reserved frame slots.  (We store the
1398
0
    // GPRs as part of the prologue.)
1399
0
    if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1400
0
      SDValue MemOps[SystemZ::NumArgFPRs];
1401
0
      for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1402
0
        unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1403
0
        int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1404
0
        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1405
0
        unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1406
0
                                     &SystemZ::FP64BitRegClass);
1407
0
        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1408
0
        MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1409
0
                                 MachinePointerInfo::getFixedStack(MF, FI));
1410
0
      }
1411
0
      // Join the stores, which are independent of one another.
1412
0
      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1413
0
                          makeArrayRef(&MemOps[NumFixedFPRs],
1414
0
                                       SystemZ::NumArgFPRs-NumFixedFPRs));
1415
0
    }
1416
0
  }
1417
8.11k
1418
8.11k
  return Chain;
1419
8.11k
}
1420
1421
static bool canUseSiblingCall(const CCState &ArgCCInfo,
1422
                              SmallVectorImpl<CCValAssign> &ArgLocs,
1423
173
                              SmallVectorImpl<ISD::OutputArg> &Outs) {
1424
173
  // Punt if there are any indirect or stack arguments, or if the call
1425
173
  // needs the callee-saved argument register R6, or if the call uses
1426
173
  // the callee-saved register arguments SwiftSelf and SwiftError.
1427
243
  for (unsigned I = 0, E = ArgLocs.size(); I != E; 
++I70
) {
1428
75
    CCValAssign &VA = ArgLocs[I];
1429
75
    if (VA.getLocInfo() == CCValAssign::Indirect)
1430
1
      return false;
1431
74
    if (!VA.isRegLoc())
1432
1
      return false;
1433
73
    unsigned Reg = VA.getLocReg();
1434
73
    if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1435
1
      return false;
1436
72
    if (Outs[I].Flags.isSwiftSelf() || 
Outs[I].Flags.isSwiftError()70
)
1437
2
      return false;
1438
72
  }
1439
173
  
return true168
;
1440
173
}
1441
1442
SDValue
1443
SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1444
1.03k
                                 SmallVectorImpl<SDValue> &InVals) const {
1445
1.03k
  SelectionDAG &DAG = CLI.DAG;
1446
1.03k
  SDLoc &DL = CLI.DL;
1447
1.03k
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1448
1.03k
  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1449
1.03k
  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1450
1.03k
  SDValue Chain = CLI.Chain;
1451
1.03k
  SDValue Callee = CLI.Callee;
1452
1.03k
  bool &IsTailCall = CLI.IsTailCall;
1453
1.03k
  CallingConv::ID CallConv = CLI.CallConv;
1454
1.03k
  bool IsVarArg = CLI.IsVarArg;
1455
1.03k
  MachineFunction &MF = DAG.getMachineFunction();
1456
1.03k
  EVT PtrVT = getPointerTy(MF.getDataLayout());
1457
1.03k
1458
1.03k
  // Detect unsupported vector argument and return types.
1459
1.03k
  if (Subtarget.hasVector()) {
1460
218
    VerifyVectorTypes(Outs);
1461
218
    VerifyVectorTypes(Ins);
1462
218
  }
1463
1.03k
1464
1.03k
  // Analyze the operands of the call, assigning locations to each operand.
1465
1.03k
  SmallVector<CCValAssign, 16> ArgLocs;
1466
1.03k
  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1467
1.03k
  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1468
1.03k
1469
1.03k
  // We don't support GuaranteedTailCallOpt, only automatically-detected
1470
1.03k
  // sibling calls.
1471
1.03k
  if (IsTailCall && 
!canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)173
)
1472
5
    IsTailCall = false;
1473
1.03k
1474
1.03k
  // Get a count of how many bytes are to be pushed on the stack.
1475
1.03k
  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1476
1.03k
1477
1.03k
  // Mark the start of the call.
1478
1.03k
  if (!IsTailCall)
1479
868
    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1480
1.03k
1481
1.03k
  // Copy argument values to their designated locations.
1482
1.03k
  SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1483
1.03k
  SmallVector<SDValue, 8> MemOpChains;
1484
1.03k
  SDValue StackPtr;
1485
2.50k
  for (unsigned I = 0, E = ArgLocs.size(); I != E; 
++I1.46k
) {
1486
1.46k
    CCValAssign &VA = ArgLocs[I];
1487
1.46k
    SDValue ArgValue = OutVals[I];
1488
1.46k
1489
1.46k
    if (VA.getLocInfo() == CCValAssign::Indirect) {
1490
80
      // Store the argument in a stack slot and pass its address.
1491
80
      SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1492
80
      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1493
80
      MemOpChains.push_back(
1494
80
          DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1495
80
                       MachinePointerInfo::getFixedStack(MF, FI)));
1496
80
      // If the original argument was split (e.g. i128), we need
1497
80
      // to store all parts of it here (and pass just one address).
1498
80
      unsigned ArgIndex = Outs[I].OrigArgIndex;
1499
80
      assert (Outs[I].PartOffset == 0);
1500
89
      while (I + 1 != E && 
Outs[I + 1].OrigArgIndex == ArgIndex40
) {
1501
9
        SDValue PartValue = OutVals[I + 1];
1502
9
        unsigned PartOffset = Outs[I + 1].PartOffset;
1503
9
        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1504
9
                                      DAG.getIntPtrConstant(PartOffset, DL));
1505
9
        MemOpChains.push_back(
1506
9
            DAG.getStore(Chain, DL, PartValue, Address,
1507
9
                         MachinePointerInfo::getFixedStack(MF, FI)));
1508
9
        ++I;
1509
9
      }
1510
80
      ArgValue = SpillSlot;
1511
80
    } else
1512
1.38k
      ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1513
1.46k
1514
1.46k
    if (VA.isRegLoc())
1515
1.26k
      // Queue up the argument copies and emit them at the end.
1516
1.26k
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1517
200
    else {
1518
200
      assert(VA.isMemLoc() && "Argument not register or memory");
1519
200
1520
200
      // Work out the address of the stack slot.  Unpromoted ints and
1521
200
      // floats are passed as right-justified 8-byte values.
1522
200
      if (!StackPtr.getNode())
1523
38
        StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1524
200
      unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
1525
200
      if (VA.getLocVT() == MVT::i32 || 
VA.getLocVT() == MVT::f32182
)
1526
37
        Offset += 4;
1527
200
      SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1528
200
                                    DAG.getIntPtrConstant(Offset, DL));
1529
200
1530
200
      // Emit the store.
1531
200
      MemOpChains.push_back(
1532
200
          DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1533
200
    }
1534
1.46k
  }
1535
1.03k
1536
1.03k
  // Join the stores, which are independent of one another.
1537
1.03k
  if (!MemOpChains.empty())
1538
69
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1539
1.03k
1540
1.03k
  // Accept direct calls by converting symbolic call addresses to the
1541
1.03k
  // associated Target* opcodes.  Force %r1 to be used for indirect
1542
1.03k
  // tail calls.
1543
1.03k
  SDValue Glue;
1544
1.03k
  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1545
536
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1546
536
    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1547
536
  } else 
if (auto *502
E502
= dyn_cast<ExternalSymbolSDNode>(Callee)) {
1548
442
    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1549
442
    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1550
442
  } else 
if (60
IsTailCall60
) {
1551
28
    Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1552
28
    Glue = Chain.getValue(1);
1553
28
    Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1554
28
  }
1555
1.03k
1556
1.03k
  // Build a sequence of copy-to-reg nodes, chained and glued together.
1557
2.30k
  for (unsigned I = 0, E = RegsToPass.size(); I != E; 
++I1.26k
) {
1558
1.26k
    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1559
1.26k
                             RegsToPass[I].second, Glue);
1560
1.26k
    Glue = Chain.getValue(1);
1561
1.26k
  }
1562
1.03k
1563
1.03k
  // The first call operand is the chain and the second is the target address.
1564
1.03k
  SmallVector<SDValue, 8> Ops;
1565
1.03k
  Ops.push_back(Chain);
1566
1.03k
  Ops.push_back(Callee);
1567
1.03k
1568
1.03k
  // Add argument registers to the end of the list so that they are
1569
1.03k
  // known live into the call.
1570
2.30k
  for (unsigned I = 0, E = RegsToPass.size(); I != E; 
++I1.26k
)
1571
1.26k
    Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1572
1.26k
                                  RegsToPass[I].second.getValueType()));
1573
1.03k
1574
1.03k
  // Add a register mask operand representing the call-preserved registers.
1575
1.03k
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1576
1.03k
  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1577
1.03k
  assert(Mask && "Missing call preserved mask for calling convention");
1578
1.03k
  Ops.push_back(DAG.getRegisterMask(Mask));
1579
1.03k
1580
1.03k
  // Glue the call to the argument copies, if any.
1581
1.03k
  if (Glue.getNode())
1582
703
    Ops.push_back(Glue);
1583
1.03k
1584
1.03k
  // Emit the call.
1585
1.03k
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1586
1.03k
  if (IsTailCall)
1587
168
    return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1588
870
  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1589
870
  Glue = Chain.getValue(1);
1590
870
1591
870
  // Mark the end of the call, which is glued to the call itself.
1592
870
  Chain = DAG.getCALLSEQ_END(Chain,
1593
870
                             DAG.getConstant(NumBytes, DL, PtrVT, true),
1594
870
                             DAG.getConstant(0, DL, PtrVT, true),
1595
870
                             Glue, DL);
1596
870
  Glue = Chain.getValue(1);
1597
870
1598
870
  // Assign locations to each value returned by this call.
1599
870
  SmallVector<CCValAssign, 16> RetLocs;
1600
870
  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1601
870
  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1602
870
1603
870
  // Copy all of the result registers out of their specified physreg.
1604
1.56k
  for (unsigned I = 0, E = RetLocs.size(); I != E; 
++I695
) {
1605
695
    CCValAssign &VA = RetLocs[I];
1606
695
1607
695
    // Copy the value out, gluing the copy to the end of the call sequence.
1608
695
    SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1609
695
                                          VA.getLocVT(), Glue);
1610
695
    Chain = RetValue.getValue(1);
1611
695
    Glue = RetValue.getValue(2);
1612
695
1613
695
    // Convert the value of the return register into the value that's
1614
695
    // being returned.
1615
695
    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1616
695
  }
1617
870
1618
870
  return Chain;
1619
870
}
1620
1621
bool SystemZTargetLowering::
1622
CanLowerReturn(CallingConv::ID CallConv,
1623
               MachineFunction &MF, bool isVarArg,
1624
               const SmallVectorImpl<ISD::OutputArg> &Outs,
1625
9.15k
               LLVMContext &Context) const {
1626
9.15k
  // Detect unsupported vector return types.
1627
9.15k
  if (Subtarget.hasVector())
1628
3.22k
    VerifyVectorTypes(Outs);
1629
9.15k
1630
9.15k
  // Special case that we cannot easily detect in RetCC_SystemZ since
1631
9.15k
  // i128 is not a legal type.
1632
9.15k
  for (auto &Out : Outs)
1633
7.28k
    if (Out.ArgVT == MVT::i128)
1634
13
      return false;
1635
9.15k
1636
9.15k
  SmallVector<CCValAssign, 16> RetLocs;
1637
9.14k
  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1638
9.14k
  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1639
9.15k
}
1640
1641
SDValue
1642
SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1643
                                   bool IsVarArg,
1644
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
1645
                                   const SmallVectorImpl<SDValue> &OutVals,
1646
8.07k
                                   const SDLoc &DL, SelectionDAG &DAG) const {
1647
8.07k
  MachineFunction &MF = DAG.getMachineFunction();
1648
8.07k
1649
8.07k
  // Detect unsupported vector return types.
1650
8.07k
  if (Subtarget.hasVector())
1651
2.97k
    VerifyVectorTypes(Outs);
1652
8.07k
1653
8.07k
  // Assign locations to each returned value.
1654
8.07k
  SmallVector<CCValAssign, 16> RetLocs;
1655
8.07k
  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1656
8.07k
  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1657
8.07k
1658
8.07k
  // Quick exit for void returns
1659
8.07k
  if (RetLocs.empty())
1660
1.90k
    return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1661
6.17k
1662
6.17k
  // Copy the result values into the output registers.
1663
6.17k
  SDValue Glue;
1664
6.17k
  SmallVector<SDValue, 4> RetOps;
1665
6.17k
  RetOps.push_back(Chain);
1666
12.5k
  for (unsigned I = 0, E = RetLocs.size(); I != E; 
++I6.38k
) {
1667
6.38k
    CCValAssign &VA = RetLocs[I];
1668
6.38k
    SDValue RetValue = OutVals[I];
1669
6.38k
1670
6.38k
    // Make the return register live on exit.
1671
6.38k
    assert(VA.isRegLoc() && "Can only return in registers!");
1672
6.38k
1673
6.38k
    // Promote the value as required.
1674
6.38k
    RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1675
6.38k
1676
6.38k
    // Chain and glue the copies together.
1677
6.38k
    unsigned Reg = VA.getLocReg();
1678
6.38k
    Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1679
6.38k
    Glue = Chain.getValue(1);
1680
6.38k
    RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1681
6.38k
  }
1682
6.17k
1683
6.17k
  // Update chain and glue.
1684
6.17k
  RetOps[0] = Chain;
1685
6.17k
  if (Glue.getNode())
1686
6.17k
    RetOps.push_back(Glue);
1687
6.17k
1688
6.17k
  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1689
6.17k
}
1690
1691
// Return true if Op is an intrinsic node with chain that returns the CC value
1692
// as its only (other) argument.  Provide the associated SystemZISD opcode and
1693
// the mask of valid CC values if so.
1694
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1695
124
                                      unsigned &CCValid) {
1696
124
  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1697
124
  switch (Id) {
1698
124
  case Intrinsic::s390_tbegin:
1699
1
    Opcode = SystemZISD::TBEGIN;
1700
1
    CCValid = SystemZ::CCMASK_TBEGIN;
1701
1
    return true;
1702
124
1703
124
  case Intrinsic::s390_tbegin_nofloat:
1704
10
    Opcode = SystemZISD::TBEGIN_NOFLOAT;
1705
10
    CCValid = SystemZ::CCMASK_TBEGIN;
1706
10
    return true;
1707
124
1708
124
  case Intrinsic::s390_tend:
1709
3
    Opcode = SystemZISD::TEND;
1710
3
    CCValid = SystemZ::CCMASK_TEND;
1711
3
    return true;
1712
124
1713
124
  default:
1714
110
    return false;
1715
124
  }
1716
124
}
1717
1718
// Return true if Op is an intrinsic node without chain that returns the
1719
// CC value as its final argument.  Provide the associated SystemZISD
1720
// opcode and the mask of valid CC values if so.
1721
2.34k
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1722
2.34k
  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1723
2.34k
  switch (Id) {
1724
2.34k
  case Intrinsic::s390_vpkshs:
1725
19
  case Intrinsic::s390_vpksfs:
1726
19
  case Intrinsic::s390_vpksgs:
1727
19
    Opcode = SystemZISD::PACKS_CC;
1728
19
    CCValid = SystemZ::CCMASK_VCMP;
1729
19
    return true;
1730
19
1731
21
  case Intrinsic::s390_vpklshs:
1732
21
  case Intrinsic::s390_vpklsfs:
1733
21
  case Intrinsic::s390_vpklsgs:
1734
21
    Opcode = SystemZISD::PACKLS_CC;
1735
21
    CCValid = SystemZ::CCMASK_VCMP;
1736
21
    return true;
1737
21
1738
156
  case Intrinsic::s390_vceqbs:
1739
156
  case Intrinsic::s390_vceqhs:
1740
156
  case Intrinsic::s390_vceqfs:
1741
156
  case Intrinsic::s390_vceqgs:
1742
156
    Opcode = SystemZISD::VICMPES;
1743
156
    CCValid = SystemZ::CCMASK_VCMP;
1744
156
    return true;
1745
156
1746
156
  case Intrinsic::s390_vchbs:
1747
136
  case Intrinsic::s390_vchhs:
1748
136
  case Intrinsic::s390_vchfs:
1749
136
  case Intrinsic::s390_vchgs:
1750
136
    Opcode = SystemZISD::VICMPHS;
1751
136
    CCValid = SystemZ::CCMASK_VCMP;
1752
136
    return true;
1753
136
1754
177
  case Intrinsic::s390_vchlbs:
1755
177
  case Intrinsic::s390_vchlhs:
1756
177
  case Intrinsic::s390_vchlfs:
1757
177
  case Intrinsic::s390_vchlgs:
1758
177
    Opcode = SystemZISD::VICMPHLS;
1759
177
    CCValid = SystemZ::CCMASK_VCMP;
1760
177
    return true;
1761
177
1762
177
  case Intrinsic::s390_vtm:
1763
13
    Opcode = SystemZISD::VTM;
1764
13
    CCValid = SystemZ::CCMASK_VCMP;
1765
13
    return true;
1766
177
1767
177
  case Intrinsic::s390_vfaebs:
1768
39
  case Intrinsic::s390_vfaehs:
1769
39
  case Intrinsic::s390_vfaefs:
1770
39
    Opcode = SystemZISD::VFAE_CC;
1771
39
    CCValid = SystemZ::CCMASK_ANY;
1772
39
    return true;
1773
39
1774
39
  case Intrinsic::s390_vfaezbs:
1775
21
  case Intrinsic::s390_vfaezhs:
1776
21
  case Intrinsic::s390_vfaezfs:
1777
21
    Opcode = SystemZISD::VFAEZ_CC;
1778
21
    CCValid = SystemZ::CCMASK_ANY;
1779
21
    return true;
1780
21
1781
21
  case Intrinsic::s390_vfeebs:
1782
12
  case Intrinsic::s390_vfeehs:
1783
12
  case Intrinsic::s390_vfeefs:
1784
12
    Opcode = SystemZISD::VFEE_CC;
1785
12
    CCValid = SystemZ::CCMASK_ANY;
1786
12
    return true;
1787
12
1788
12
  case Intrinsic::s390_vfeezbs:
1789
12
  case Intrinsic::s390_vfeezhs:
1790
12
  case Intrinsic::s390_vfeezfs:
1791
12
    Opcode = SystemZISD::VFEEZ_CC;
1792
12
    CCValid = SystemZ::CCMASK_ANY;
1793
12
    return true;
1794
12
1795
12
  case Intrinsic::s390_vfenebs:
1796
12
  case Intrinsic::s390_vfenehs:
1797
12
  case Intrinsic::s390_vfenefs:
1798
12
    Opcode = SystemZISD::VFENE_CC;
1799
12
    CCValid = SystemZ::CCMASK_ANY;
1800
12
    return true;
1801
12
1802
12
  case Intrinsic::s390_vfenezbs:
1803
12
  case Intrinsic::s390_vfenezhs:
1804
12
  case Intrinsic::s390_vfenezfs:
1805
12
    Opcode = SystemZISD::VFENEZ_CC;
1806
12
    CCValid = SystemZ::CCMASK_ANY;
1807
12
    return true;
1808
12
1809
12
  case Intrinsic::s390_vistrbs:
1810
12
  case Intrinsic::s390_vistrhs:
1811
12
  case Intrinsic::s390_vistrfs:
1812
12
    Opcode = SystemZISD::VISTR_CC;
1813
12
    CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
1814
12
    return true;
1815
12
1816
15
  case Intrinsic::s390_vstrcbs:
1817
15
  case Intrinsic::s390_vstrchs:
1818
15
  case Intrinsic::s390_vstrcfs:
1819
15
    Opcode = SystemZISD::VSTRC_CC;
1820
15
    CCValid = SystemZ::CCMASK_ANY;
1821
15
    return true;
1822
15
1823
15
  case Intrinsic::s390_vstrczbs:
1824
9
  case Intrinsic::s390_vstrczhs:
1825
9
  case Intrinsic::s390_vstrczfs:
1826
9
    Opcode = SystemZISD::VSTRCZ_CC;
1827
9
    CCValid = SystemZ::CCMASK_ANY;
1828
9
    return true;
1829
9
1830
12
  case Intrinsic::s390_vstrsb:
1831
12
  case Intrinsic::s390_vstrsh:
1832
12
  case Intrinsic::s390_vstrsf:
1833
12
    Opcode = SystemZISD::VSTRS_CC;
1834
12
    CCValid = SystemZ::CCMASK_ANY;
1835
12
    return true;
1836
12
1837
12
  case Intrinsic::s390_vstrszb:
1838
12
  case Intrinsic::s390_vstrszh:
1839
12
  case Intrinsic::s390_vstrszf:
1840
12
    Opcode = SystemZISD::VSTRSZ_CC;
1841
12
    CCValid = SystemZ::CCMASK_ANY;
1842
12
    return true;
1843
12
1844
23
  case Intrinsic::s390_vfcedbs:
1845
23
  case Intrinsic::s390_vfcesbs:
1846
23
    Opcode = SystemZISD::VFCMPES;
1847
23
    CCValid = SystemZ::CCMASK_VCMP;
1848
23
    return true;
1849
23
1850
38
  case Intrinsic::s390_vfchdbs:
1851
38
  case Intrinsic::s390_vfchsbs:
1852
38
    Opcode = SystemZISD::VFCMPHS;
1853
38
    CCValid = SystemZ::CCMASK_VCMP;
1854
38
    return true;
1855
38
1856
38
  case Intrinsic::s390_vfchedbs:
1857
38
  case Intrinsic::s390_vfchesbs:
1858
38
    Opcode = SystemZISD::VFCMPHES;
1859
38
    CCValid = SystemZ::CCMASK_VCMP;
1860
38
    return true;
1861
38
1862
45
  case Intrinsic::s390_vftcidb:
1863
45
  case Intrinsic::s390_vftcisb:
1864
45
    Opcode = SystemZISD::VFTCI;
1865
45
    CCValid = SystemZ::CCMASK_VCMP;
1866
45
    return true;
1867
45
1868
45
  case Intrinsic::s390_tdc:
1869
36
    Opcode = SystemZISD::TDC;
1870
36
    CCValid = SystemZ::CCMASK_TDC;
1871
36
    return true;
1872
45
1873
1.47k
  default:
1874
1.47k
    return false;
1875
2.34k
  }
1876
2.34k
}
1877
1878
// Emit an intrinsic with chain and an explicit CC register result.
1879
static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
1880
14
                                           unsigned Opcode) {
1881
14
  // Copy all operands except the intrinsic ID.
1882
14
  unsigned NumOps = Op.getNumOperands();
1883
14
  SmallVector<SDValue, 6> Ops;
1884
14
  Ops.reserve(NumOps - 1);
1885
14
  Ops.push_back(Op.getOperand(0));
1886
36
  for (unsigned I = 2; I < NumOps; 
++I22
)
1887
22
    Ops.push_back(Op.getOperand(I));
1888
14
1889
14
  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1890
14
  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
1891
14
  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1892
14
  SDValue OldChain = SDValue(Op.getNode(), 1);
1893
14
  SDValue NewChain = SDValue(Intr.getNode(), 1);
1894
14
  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1895
14
  return Intr.getNode();
1896
14
}
1897
1898
// Emit an intrinsic with an explicit CC register result.
1899
static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
1900
766
                                   unsigned Opcode) {
1901
766
  // Copy all operands except the intrinsic ID.
1902
766
  unsigned NumOps = Op.getNumOperands();
1903
766
  SmallVector<SDValue, 6> Ops;
1904
766
  Ops.reserve(NumOps - 1);
1905
2.41k
  for (unsigned I = 1; I < NumOps; 
++I1.65k
)
1906
1.65k
    Ops.push_back(Op.getOperand(I));
1907
766
1908
766
  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
1909
766
  return Intr.getNode();
1910
766
}
1911
1912
// CC is a comparison that will be implemented using an integer or
1913
// floating-point comparison.  Return the condition code mask for
1914
// a branch on true.  In the integer case, CCMASK_CMP_UO is set for
1915
// unsigned comparisons and clear for signed ones.  In the floating-point
1916
// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1917
1.63k
static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1918
1.63k
#define CONV(X) \
1919
1.63k
  
case ISD::SET##X: return SystemZ::CCMASK_CMP_962
##X; \
1920
160
  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1921
496
  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1922
1.63k
1923
1.63k
  switch (CC) {
1924
1.63k
  default:
1925
0
    llvm_unreachable("Invalid integer condition!");
1926
1.63k
1927
1.63k
  
CONV248
(EQ);
1928
1.63k
  
CONV390
(NE);
1929
1.63k
  
CONV74
(GT);
1930
1.63k
  
CONV19
(GE);
1931
1.63k
  
CONV224
(LT);
1932
1.63k
  
CONV7
(LE);
1933
1.63k
1934
1.63k
  
case ISD::SETO: return SystemZ::CCMASK_CMP_O12
;
1935
1.63k
  
case ISD::SETUO: return SystemZ::CCMASK_CMP_UO9
;
1936
1.63k
  }
1937
1.63k
#undef CONV
1938
1.63k
}
1939
1940
// If C can be converted to a comparison against zero, adjust the operands
1941
// as necessary.
1942
1.38k
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1943
1.38k
  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1944
374
    return;
1945
1.01k
1946
1.01k
  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1947
1.01k
  if (!ConstOp1)
1948
195
    return;
1949
819
1950
819
  int64_t Value = ConstOp1->getSExtValue();
1951
819
  if ((Value == -1 && 
C.CCMask == SystemZ::CCMASK_CMP_GT41
) ||
1952
819
      
(798
Value == -1798
&&
C.CCMask == SystemZ::CCMASK_CMP_LE20
) ||
1953
819
      
(798
Value == 1798
&&
C.CCMask == SystemZ::CCMASK_CMP_LT91
) ||
1954
819
      
(784
Value == 1784
&&
C.CCMask == SystemZ::CCMASK_CMP_GE77
)) {
1955
35
    C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1956
35
    C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1957
35
  }
1958
819
}
1959
1960
// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1961
// adjust the operands as necessary.
1962
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1963
1.38k
                             Comparison &C) {
1964
1.38k
  // For us to make any changes, it must a comparison between a single-use
1965
1.38k
  // load and a constant.
1966
1.38k
  if (!C.Op0.hasOneUse() ||
1967
1.38k
      
C.Op0.getOpcode() != ISD::LOAD1.16k
||
1968
1.38k
      
C.Op1.getOpcode() != ISD::Constant248
)
1969
1.15k
    return;
1970
229
1971
229
  // We must have an 8- or 16-bit load.
1972
229
  auto *Load = cast<LoadSDNode>(C.Op0);
1973
229
  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1974
229
  if (NumBits != 8 && 
NumBits != 16149
)
1975
63
    return;
1976
166
1977
166
  // The load must be an extending one and the constant must be within the
1978
166
  // range of the unextended value.
1979
166
  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1980
166
  uint64_t Value = ConstOp1->getZExtValue();
1981
166
  uint64_t Mask = (1 << NumBits) - 1;
1982
166
  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1983
101
    // Make sure that ConstOp1 is in range of C.Op0.
1984
101
    int64_t SignedValue = ConstOp1->getSExtValue();
1985
101
    if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
1986
28
      return;
1987
73
    if (C.ICmpType != SystemZICMP::SignedOnly) {
1988
40
      // Unsigned comparison between two sign-extended values is equivalent
1989
40
      // to unsigned comparison between two zero-extended values.
1990
40
      Value &= Mask;
1991
40
    } else 
if (33
NumBits == 833
) {
1992
16
      // Try to treat the comparison as unsigned, so that we can use CLI.
1993
16
      // Adjust CCMask and Value as necessary.
1994
16
      if (Value == 0 && 
C.CCMask == SystemZ::CCMASK_CMP_LT12
)
1995
6
        // Test whether the high bit of the byte is set.
1996
6
        Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1997
10
      else if (Value == 0 && 
C.CCMask == SystemZ::CCMASK_CMP_GE6
)
1998
6
        // Test whether the high bit of the byte is clear.
1999
6
        Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2000
4
      else
2001
4
        // No instruction exists for this combination.
2002
4
        return;
2003
12
      C.ICmpType = SystemZICMP::UnsignedOnly;
2004
12
    }
2005
73
  } else 
if (65
Load->getExtensionType() == ISD::ZEXTLOAD65
) {
2006
65
    if (Value > Mask)
2007
0
      return;
2008
65
    // If the constant is in range, we can use any comparison.
2009
65
    C.ICmpType = SystemZICMP::Any;
2010
65
  } else
2011
0
    return;
2012
134
2013
134
  // Make sure that the first operand is an i32 of the right extension type.
2014
134
  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2015
17
                              ISD::SEXTLOAD :
2016
134
                              
ISD::ZEXTLOAD117
);
2017
134
  if (C.Op0.getValueType() != MVT::i32 ||
2018
134
      
Load->getExtensionType() != ExtType101
) {
2019
61
    C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2020
61
                           Load->getBasePtr(), Load->getPointerInfo(),
2021
61
                           Load->getMemoryVT(), Load->getAlignment(),
2022
61
                           Load->getMemOperand()->getFlags());
2023
61
    // Update the chain uses.
2024
61
    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2025
61
  }
2026
134
2027
134
  // Make sure that the second operand is an i32 with the right value.
2028
134
  if (C.Op1.getValueType() != MVT::i32 ||
2029
134
      
Value != ConstOp1->getZExtValue()101
)
2030
51
    C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2031
134
}
2032
2033
// Return true if Op is either an unextended load, or a load suitable
2034
// for integer register-memory comparisons of type ICmpType.
2035
2.01k
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2036
2.01k
  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2037
2.01k
  if (Load) {
2038
444
    // There are no instructions to compare a register with a memory byte.
2039
444
    if (Load->getMemoryVT() == MVT::i8)
2040
73
      return false;
2041
371
    // Otherwise decide on extension type.
2042
371
    switch (Load->getExtensionType()) {
2043
371
    case ISD::NON_EXTLOAD:
2044
222
      return true;
2045
371
    case ISD::SEXTLOAD:
2046
75
      return ICmpType != SystemZICMP::UnsignedOnly;
2047
371
    case ISD::ZEXTLOAD:
2048
72
      return ICmpType != SystemZICMP::SignedOnly;
2049
371
    default:
2050
2
      break;
2051
1.57k
    }
2052
1.57k
  }
2053
1.57k
  return false;
2054
1.57k
}
2055
2056
// Return true if it is better to swap the operands of C.
2057
1.63k
static bool shouldSwapCmpOperands(const Comparison &C) {
2058
1.63k
  // Leave f128 comparisons alone, since they have no memory forms.
2059
1.63k
  if (C.Op0.getValueType() == MVT::f128)
2060
7
    return false;
2061
1.63k
2062
1.63k
  // Always keep a floating-point constant second, since comparisons with
2063
1.63k
  // zero can use LOAD TEST and comparisons with other constants make a
2064
1.63k
  // natural memory operand.
2065
1.63k
  if (isa<ConstantFPSDNode>(C.Op1))
2066
47
    return false;
2067
1.58k
2068
1.58k
  // Never swap comparisons with zero since there are many ways to optimize
2069
1.58k
  // those later.
2070
1.58k
  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2071
1.58k
  if (ConstOp1 && 
ConstOp1->getZExtValue() == 01.09k
)
2072
477
    return false;
2073
1.10k
2074
1.10k
  // Also keep natural memory operands second if the loaded value is
2075
1.10k
  // only used here.  Several comparisons have memory forms.
2076
1.10k
  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && 
C.Op1.hasOneUse()207
)
2077
204
    return false;
2078
904
2079
904
  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2080
904
  // In that case we generally prefer the memory to be second.
2081
904
  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && 
C.Op0.hasOneUse()150
) {
2082
128
    // The only exceptions are when the second operand is a constant and
2083
128
    // we can use things like CHHSI.
2084
128
    if (!ConstOp1)
2085
43
      return true;
2086
85
    // The unsigned memory-immediate instructions can handle 16-bit
2087
85
    // unsigned integers.
2088
85
    if (C.ICmpType != SystemZICMP::SignedOnly &&
2089
85
        
isUInt<16>(ConstOp1->getZExtValue())63
)
2090
49
      return false;
2091
36
    // The signed memory-immediate instructions can handle 16-bit
2092
36
    // signed integers.
2093
36
    if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2094
36
        
isInt<16>(ConstOp1->getSExtValue())34
)
2095
18
      return false;
2096
18
    return true;
2097
18
  }
2098
776
2099
776
  // Try to promote the use of CGFR and CLGFR.
2100
776
  unsigned Opcode0 = C.Op0.getOpcode();
2101
776
  if (C.ICmpType != SystemZICMP::UnsignedOnly && 
Opcode0 == ISD::SIGN_EXTEND458
)
2102
1
    return true;
2103
775
  if (C.ICmpType != SystemZICMP::SignedOnly && 
Opcode0 == ISD::ZERO_EXTEND661
)
2104
1
    return true;
2105
774
  if (C.ICmpType != SystemZICMP::SignedOnly &&
2106
774
      
Opcode0 == ISD::AND660
&&
2107
774
      
C.Op0.getOperand(1).getOpcode() == ISD::Constant35
&&
2108
774
      
cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff34
)
2109
1
    return true;
2110
773
2111
773
  return false;
2112
773
}
2113
2114
// Return a version of comparison CC mask CCMask in which the LT and GT
2115
// actions are swapped.
2116
68
static unsigned reverseCCMask(unsigned CCMask) {
2117
68
  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
2118
68
          (CCMask & SystemZ::CCMASK_CMP_GT ? 
SystemZ::CCMASK_CMP_LT22
:
046
) |
2119
68
          (CCMask & SystemZ::CCMASK_CMP_LT ? 
SystemZ::CCMASK_CMP_GT42
:
026
) |
2120
68
          (CCMask & SystemZ::CCMASK_CMP_UO));
2121
68
}
2122
2123
// Check whether C tests for equality between X and Y and whether X - Y
2124
// or Y - X is also computed.  In that case it's better to compare the
2125
// result of the subtraction against zero.
2126
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2127
1.38k
                                 Comparison &C) {
2128
1.38k
  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2129
1.38k
      
C.CCMask == SystemZ::CCMASK_CMP_NE1.14k
) {
2130
1.57k
    for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; 
++I932
) {
2131
933
      SDNode *N = *I;
2132
933
      if (N->getOpcode() == ISD::SUB &&
2133
933
          
(2
(2
N->getOperand(0) == C.Op02
&&
N->getOperand(1) == C.Op11
) ||
2134
2
           
(1
N->getOperand(0) == C.Op11
&&
N->getOperand(1) == C.Op00
))) {
2135
1
        C.Op0 = SDValue(N, 0);
2136
1
        C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2137
1
        return;
2138
1
      }
2139
933
    }
2140
638
  }
2141
1.38k
}
2142
2143
// Check whether C compares a floating-point value with zero and if that
2144
// floating-point value is also negated.  In this case we can use the
2145
// negation to set CC, so avoiding separate LOAD AND TEST and
2146
// LOAD (NEGATIVE/COMPLEMENT) instructions.
2147
251
static void adjustForFNeg(Comparison &C) {
2148
251
  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2149
251
  if (C1 && 
C1->isZero()50
) {
2150
98
    for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; 
++I58
) {
2151
62
      SDNode *N = *I;
2152
62
      if (N->getOpcode() == ISD::FNEG) {
2153
4
        C.Op0 = SDValue(N, 0);
2154
4
        C.CCMask = reverseCCMask(C.CCMask);
2155
4
        return;
2156
4
      }
2157
62
    }
2158
40
  }
2159
251
}
2160
2161
// Check whether C compares (shl X, 32) with 0 and whether X is
2162
// also sign-extended.  In that case it is better to test the result
2163
// of the sign extension using LTGFR.
2164
//
2165
// This case is important because InstCombine transforms a comparison
2166
// with (sext (trunc X)) into a comparison with (shl X, 32).
2167
1.38k
static void adjustForLTGFR(Comparison &C) {
2168
1.38k
  // Check for a comparison between (shl X, 32) and 0.
2169
1.38k
  if (C.Op0.getOpcode() == ISD::SHL &&
2170
1.38k
      
C.Op0.getValueType() == MVT::i6413
&&
2171
1.38k
      
C.Op1.getOpcode() == ISD::Constant13
&&
2172
1.38k
      
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 013
) {
2173
13
    auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2174
13
    if (C1 && C1->getZExtValue() == 32) {
2175
13
      SDValue ShlOp0 = C.Op0.getOperand(0);
2176
13
      // See whether X has any SIGN_EXTEND_INREG uses.
2177
13
      for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; 
++I0
) {
2178
13
        SDNode *N = *I;
2179
13
        if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2180
13
            cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2181
13
          C.Op0 = SDValue(N, 0);
2182
13
          return;
2183
13
        }
2184
13
      }
2185
13
    }
2186
13
  }
2187
1.38k
}
2188
2189
// If C compares the truncation of an extending load, try to compare
2190
// the untruncated value instead.  This exposes more opportunities to
2191
// reuse CC.
2192
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2193
1.38k
                               Comparison &C) {
2194
1.38k
  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2195
1.38k
      
C.Op0.getOperand(0).getOpcode() == ISD::LOAD40
&&
2196
1.38k
      
C.Op1.getOpcode() == ISD::Constant7
&&
2197
1.38k
      
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 07
) {
2198
7
    auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2199
7
    if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
2200
7
      unsigned Type = L->getExtensionType();
2201
7
      if ((Type == ISD::ZEXTLOAD && 
C.ICmpType != SystemZICMP::SignedOnly6
) ||
2202
7
          
(1
Type == ISD::SEXTLOAD1
&&
C.ICmpType != SystemZICMP::UnsignedOnly1
)) {
2203
7
        C.Op0 = C.Op0.getOperand(0);
2204
7
        C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2205
7
      }
2206
7
    }
2207
7
  }
2208
1.38k
}
2209
2210
// Return true if shift operation N has an in-range constant shift value.
2211
// Store it in ShiftVal if so.
2212
19
static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2213
19
  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2214
19
  if (!Shift)
2215
0
    return false;
2216
19
2217
19
  uint64_t Amount = Shift->getZExtValue();
2218
19
  if (Amount >= N.getValueSizeInBits())
2219
0
    return false;
2220
19
2221
19
  ShiftVal = Amount;
2222
19
  return true;
2223
19
}
2224
2225
// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2226
// instruction and whether the CC value is descriptive enough to handle
2227
// a comparison of type Opcode between the AND result and CmpVal.
2228
// CCMask says which comparison result is being tested and BitSize is
2229
// the number of bits in the operands.  If TEST UNDER MASK can be used,
2230
// return the corresponding CC mask, otherwise return 0.
2231
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2232
                                     uint64_t Mask, uint64_t CmpVal,
2233
162
                                     unsigned ICmpType) {
2234
162
  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2235
162
2236
162
  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2237
162
  if (!SystemZ::isImmLL(Mask) && 
!SystemZ::isImmLH(Mask)98
&&
2238
162
      
!SystemZ::isImmHL(Mask)88
&&
!SystemZ::isImmHH(Mask)81
)
2239
63
    return 0;
2240
99
2241
99
  // Work out the masks for the lowest and highest bits.
2242
99
  unsigned HighShift = 63 - countLeadingZeros(Mask);
2243
99
  uint64_t High = uint64_t(1) << HighShift;
2244
99
  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2245
99
2246
99
  // Signed ordered comparisons are effectively unsigned if the sign
2247
99
  // bit is dropped.
2248
99
  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2249
99
2250
99
  // Check for equality comparisons with 0, or the equivalent.
2251
99
  if (CmpVal == 0) {
2252
60
    if (CCMask == SystemZ::CCMASK_CMP_EQ)
2253
28
      return SystemZ::CCMASK_TM_ALL_0;
2254
32
    if (CCMask == SystemZ::CCMASK_CMP_NE)
2255
31
      return SystemZ::CCMASK_TM_SOME_1;
2256
40
  }
2257
40
  if (EffectivelyUnsigned && CmpVal > 0 && 
CmpVal <= Low39
) {
2258
8
    if (CCMask == SystemZ::CCMASK_CMP_LT)
2259
2
      return SystemZ::CCMASK_TM_ALL_0;
2260
6
    if (CCMask == SystemZ::CCMASK_CMP_GE)
2261
0
      return SystemZ::CCMASK_TM_SOME_1;
2262
38
  }
2263
38
  if (EffectivelyUnsigned && CmpVal < Low) {
2264
3
    if (CCMask == SystemZ::CCMASK_CMP_LE)
2265
0
      return SystemZ::CCMASK_TM_ALL_0;
2266
3
    if (CCMask == SystemZ::CCMASK_CMP_GT)
2267
2
      return SystemZ::CCMASK_TM_SOME_1;
2268
36
  }
2269
36
2270
36
  // Check for equality comparisons with the mask, or the equivalent.
2271
36
  if (CmpVal == Mask) {
2272
12
    if (CCMask == SystemZ::CCMASK_CMP_EQ)
2273
2
      return SystemZ::CCMASK_TM_ALL_1;
2274
10
    if (CCMask == SystemZ::CCMASK_CMP_NE)
2275
2
      return SystemZ::CCMASK_TM_SOME_0;
2276
32
  }
2277
32
  if (EffectivelyUnsigned && CmpVal >= Mask - Low && 
CmpVal < Mask16
) {
2278
8
    if (CCMask == SystemZ::CCMASK_CMP_GT)
2279
2
      return SystemZ::CCMASK_TM_ALL_1;
2280
6
    if (CCMask == SystemZ::CCMASK_CMP_LE)
2281
0
      return SystemZ::CCMASK_TM_SOME_0;
2282
30
  }
2283
30
  if (EffectivelyUnsigned && CmpVal > Mask - Low && 
CmpVal <= Mask10
) {
2284
10
    if (CCMask == SystemZ::CCMASK_CMP_GE)
2285
4
      return SystemZ::CCMASK_TM_ALL_1;
2286
6
    if (CCMask == SystemZ::CCMASK_CMP_LT)
2287
6
      return SystemZ::CCMASK_TM_SOME_0;
2288
20
  }
2289
20
2290
20
  // Check for ordered comparisons with the top bit.
2291
20
  if (EffectivelyUnsigned && CmpVal >= Mask - High && 
CmpVal < High17
) {
2292
7
    if (CCMask == SystemZ::CCMASK_CMP_LE)
2293
0
      return SystemZ::CCMASK_TM_MSB_0;
2294
7
    if (CCMask == SystemZ::CCMASK_CMP_GT)
2295
2
      return SystemZ::CCMASK_TM_MSB_1;
2296
18
  }
2297
18
  if (EffectivelyUnsigned && CmpVal > Mask - High && 
CmpVal <= High11
) {
2298
6
    if (CCMask == SystemZ::CCMASK_CMP_LT)
2299
2
      return SystemZ::CCMASK_TM_MSB_0;
2300
4
    if (CCMask == SystemZ::CCMASK_CMP_GE)
2301
0
      return SystemZ::CCMASK_TM_MSB_1;
2302
16
  }
2303
16
2304
16
  // If there are just two bits, we can do equality checks for Low and High
2305
16
  // as well.
2306
16
  if (Mask == Low + High) {
2307
6
    if (CCMask == SystemZ::CCMASK_CMP_EQ && 
CmpVal == Low4
)
2308
2
      return SystemZ::CCMASK_TM_MIXED_MSB_0;
2309
4
    if (CCMask == SystemZ::CCMASK_CMP_NE && 
CmpVal == Low2
)
2310
1
      return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2311
3
    if (CCMask == SystemZ::CCMASK_CMP_EQ && 
CmpVal == High2
)
2312
2
      return SystemZ::CCMASK_TM_MIXED_MSB_1;
2313
1
    if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2314
1
      return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2315
10
  }
2316
10
2317
10
  // Looks like we've exhausted our options.
2318
10
  return 0;
2319
10
}
2320
2321
// See whether C can be implemented as a TEST UNDER MASK instruction.
2322
// Update the arguments with the TM version if so.
2323
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2324
1.63k
                                   Comparison &C) {
2325
1.63k
  // Check that we have a comparison with a constant.
2326
1.63k
  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2327
1.63k
  if (!ConstOp1)
2328
563
    return;
2329
1.07k
  uint64_t CmpVal = ConstOp1->getZExtValue();
2330
1.07k
2331
1.07k
  // Check whether the nonconstant input is an AND with a constant mask.
2332
1.07k
  Comparison NewC(C);
2333
1.07k
  uint64_t MaskVal;
2334
1.07k
  ConstantSDNode *Mask = nullptr;
2335
1.07k
  if (C.Op0.getOpcode() == ISD::AND) {
2336
106
    NewC.Op0 = C.Op0.getOperand(0);
2337
106
    NewC.Op1 = C.Op0.getOperand(1);
2338
106
    Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2339
106
    if (!Mask)
2340
4
      return;
2341
102
    MaskVal = Mask->getZExtValue();
2342
970
  } else {
2343
970
    // There is no instruction to compare with a 64-bit immediate
2344
970
    // so use TMHH instead if possible.  We need an unsigned ordered
2345
970
    // comparison with an i64 immediate.
2346
970
    if (NewC.Op0.getValueType() != MVT::i64 ||
2347
970
        
NewC.CCMask == SystemZ::CCMASK_CMP_EQ224
||
2348
970
        
NewC.CCMask == SystemZ::CCMASK_CMP_NE174
||
2349
970
        
NewC.ICmpType == SystemZICMP::SignedOnly114
)
2350
911
      return;
2351
59
    // Convert LE and GT comparisons into LT and GE.
2352
59
    if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2353
59
        NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2354
21
      if (CmpVal == uint64_t(-1))
2355
0
        return;
2356
21
      CmpVal += 1;
2357
21
      NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2358
21
    }
2359
59
    // If the low N bits of Op1 are zero than the low N bits of Op0 can
2360
59
    // be masked off without changing the result.
2361
59
    MaskVal = -(CmpVal & -CmpVal);
2362
59
    NewC.ICmpType = SystemZICMP::UnsignedOnly;
2363
59
  }
2364
1.07k
  
if (161
!MaskVal161
)
2365
2
    return;
2366
159
2367
159
  // Check whether the combination of mask, comparison value and comparison
2368
159
  // type are suitable.
2369
159
  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2370
159
  unsigned NewCCMask, ShiftVal;
2371
159
  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2372
159
      
NewC.Op0.getOpcode() == ISD::SHL158
&&
2373
159
      
isSimpleShift(NewC.Op0, ShiftVal)3
&&
2374
159
      
(MaskVal >> ShiftVal != 0)3
&&
2375
159
      
((CmpVal >> ShiftVal) << ShiftVal) == CmpVal3
&&
2376
159
      (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2377
3
                                        MaskVal >> ShiftVal,
2378
3
                                        CmpVal >> ShiftVal,
2379
3
                                        SystemZICMP::Any))) {
2380
3
    NewC.Op0 = NewC.Op0.getOperand(0);
2381
3
    MaskVal >>= ShiftVal;
2382
156
  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2383
156
             
NewC.Op0.getOpcode() == ISD::SRL155
&&
2384
156
             
isSimpleShift(NewC.Op0, ShiftVal)16
&&
2385
156
             
(MaskVal << ShiftVal != 0)16
&&
2386
156
             
((CmpVal << ShiftVal) >> ShiftVal) == CmpVal16
&&
2387
156
             (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2388
14
                                               MaskVal << ShiftVal,
2389
14
                                               CmpVal << ShiftVal,
2390
14
                                               SystemZICMP::UnsignedOnly))) {
2391
11
    NewC.Op0 = NewC.Op0.getOperand(0);
2392
11
    MaskVal <<= ShiftVal;
2393
145
  } else {
2394
145
    NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2395
145
                                     NewC.ICmpType);
2396
145
    if (!NewCCMask)
2397
70
      return;
2398
89
  }
2399
89
2400
89
  // Go ahead and make the change.
2401
89
  C.Opcode = SystemZISD::TM;
2402
89
  C.Op0 = NewC.Op0;
2403
89
  if (Mask && 
Mask->getZExtValue() == MaskVal81
)
2404
75
    C.Op1 = SDValue(Mask, 0);
2405
14
  else
2406
14
    C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2407
89
  C.CCValid = SystemZ::CCMASK_TM;
2408
89
  C.CCMask = NewCCMask;
2409
89
}
2410
2411
// See whether the comparison argument contains a redundant AND
2412
// and remove it if so.  This sometimes happens due to the generic
2413
// BRCOND expansion.
2414
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2415
1.38k
                                  Comparison &C) {
2416
1.38k
  if (C.Op0.getOpcode() != ISD::AND)
2417
1.22k
    return;
2418
167
  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2419
167
  if (!Mask)
2420
3
    return;
2421
164
  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2422
164
  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2423
107
    return;
2424
57
2425
57
  C.Op0 = C.Op0.getOperand(0);
2426
57
}
2427
2428
// Return a Comparison that tests the condition-code result of intrinsic
2429
// node Call against constant integer CC using comparison code Cond.
2430
// Opcode is the opcode of the SystemZISD operation for the intrinsic
2431
// and CCValid is the set of possible condition-code results.
2432
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2433
                                  SDValue Call, unsigned CCValid, uint64_t CC,
2434
487
                                  ISD::CondCode Cond) {
2435
487
  Comparison C(Call, SDValue());
2436
487
  C.Opcode = Opcode;
2437
487
  C.CCValid = CCValid;
2438
487
  if (Cond == ISD::SETEQ)
2439
220
    // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2440
220
    C.CCMask = CC < 4 ? 1 << (3 - CC) : 
00
;
2441
267
  else if (Cond == ISD::SETNE)
2442
147
    // ...and the inverse of that.
2443
147
    C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : 
-10
;
2444
120
  else if (Cond == ISD::SETLT || 
Cond == ISD::SETULT17
)
2445
108
    // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2446
108
    // always true for CC>3.
2447
108
    C.CCMask = CC < 4 ? ~0U << (4 - CC) : 
-10
;
2448
12
  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2449
0
    // ...and the inverse of that.
2450
0
    C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2451
12
  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2452
0
    // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2453
0
    // always true for CC>3.
2454
0
    C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2455
12
  else if (Cond == ISD::SETGT || 
Cond == ISD::SETUGT4
)
2456
12
    // ...and the inverse of that.
2457
12
    C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 
00
;
2458
12
  else
2459
12
    
llvm_unreachable0
("Unexpected integer comparison type");
2460
487
  C.CCMask &= CCValid;
2461
487
  return C;
2462
487
}
2463
2464
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2465
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2466
2.12k
                         ISD::CondCode Cond, const SDLoc &DL) {
2467
2.12k
  if (CmpOp1.getOpcode() == ISD::Constant) {
2468
1.58k
    uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2469
1.58k
    unsigned Opcode, CCValid;
2470
1.58k
    if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2471
1.58k
        
CmpOp0.getResNo() == 04
&&
CmpOp0->hasNUsesOfValue(1, 0)4
&&
2472
1.58k
        
isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)2
)
2473
2
      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2474
1.57k
    if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2475
1.57k
        
CmpOp0.getResNo() == CmpOp0->getNumValues() - 1485
&&
2476
1.57k
        
isIntrinsicWithCC(CmpOp0, Opcode, CCValid)485
)
2477
485
      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2478
1.63k
  }
2479
1.63k
  Comparison C(CmpOp0, CmpOp1);
2480
1.63k
  C.CCMask = CCMaskForCondCode(Cond);
2481
1.63k
  if (C.Op0.getValueType().isFloatingPoint()) {
2482
251
    C.CCValid = SystemZ::CCMASK_FCMP;
2483
251
    C.Opcode = SystemZISD::FCMP;
2484
251
    adjustForFNeg(C);
2485
1.38k
  } else {
2486
1.38k
    C.CCValid = SystemZ::CCMASK_ICMP;
2487
1.38k
    C.Opcode = SystemZISD::ICMP;
2488
1.38k
    // Choose the type of comparison.  Equality and inequality tests can
2489
1.38k
    // use either signed or unsigned comparisons.  The choice also doesn't
2490
1.38k
    // matter if both sign bits are known to be clear.  In those cases we
2491
1.38k
    // want to give the main isel code the freedom to choose whichever
2492
1.38k
    // form fits best.
2493
1.38k
    if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2494
1.38k
        
C.CCMask == SystemZ::CCMASK_CMP_NE1.14k
||
2495
1.38k
        
(750
DAG.SignBitIsZero(C.Op0)750
&&
DAG.SignBitIsZero(C.Op1)80
))
2496
708
      C.ICmpType = SystemZICMP::Any;
2497
680
    else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2498
374
      C.ICmpType = SystemZICMP::UnsignedOnly;
2499
306
    else
2500
306
      C.ICmpType = SystemZICMP::SignedOnly;
2501
1.38k
    C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2502
1.38k
    adjustForRedundantAnd(DAG, DL, C);
2503
1.38k
    adjustZeroCmp(DAG, DL, C);
2504
1.38k
    adjustSubwordCmp(DAG, DL, C);
2505
1.38k
    adjustForSubtraction(DAG, DL, C);
2506
1.38k
    adjustForLTGFR(C);
2507
1.38k
    adjustICmpTruncate(DAG, DL, C);
2508
1.38k
  }
2509
1.63k
2510
1.63k
  if (shouldSwapCmpOperands(C)) {
2511
64
    std::swap(C.Op0, C.Op1);
2512
64
    C.CCMask = reverseCCMask(C.CCMask);
2513
64
  }
2514
1.63k
2515
1.63k
  adjustForTestUnderMask(DAG, DL, C);
2516
1.63k
  return C;
2517
1.63k
}
2518
2519
// Emit the comparison instruction described by C.
2520
2.11k
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2521
2.11k
  if (!C.Op1.getNode()) {
2522
487
    SDNode *Node;
2523
487
    switch (C.Op0.getOpcode()) {
2524
487
    case ISD::INTRINSIC_W_CHAIN:
2525
2
      Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2526
2
      return SDValue(Node, 0);
2527
487
    case ISD::INTRINSIC_WO_CHAIN:
2528
485
      Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2529
485
      return SDValue(Node, Node->getNumValues() - 1);
2530
487
    default:
2531
0
      llvm_unreachable("Invalid comparison operands");
2532
1.62k
    }
2533
1.62k
  }
2534
1.62k
  if (C.Opcode == SystemZISD::ICMP)
2535
1.28k
    return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2536
1.28k
                       DAG.getConstant(C.ICmpType, DL, MVT::i32));
2537
340
  if (C.Opcode == SystemZISD::TM) {
2538
89
    bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2539
89
                         bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2540
89
    return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2541
89
                       DAG.getConstant(RegisterOnly, DL, MVT::i32));
2542
89
  }
2543
251
  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2544
251
}
2545
2546
// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2547
// 64 bits.  Extend is the extension type to use.  Store the high part
2548
// in Hi and the low part in Lo.
2549
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2550
                            SDValue Op0, SDValue Op1, SDValue &Hi,
2551
1
                            SDValue &Lo) {
2552
1
  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2553
1
  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2554
1
  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2555
1
  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2556
1
                   DAG.getConstant(32, DL, MVT::i64));
2557
1
  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2558
1
  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2559
1
}
2560
2561
// Lower a binary operation that produces two VT results, one in each
2562
// half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
2563
// and Opcode performs the GR128 operation.  Store the even register result
2564
// in Even and the odd register result in Odd.
2565
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2566
                             unsigned Opcode, SDValue Op0, SDValue Op1,
2567
195
                             SDValue &Even, SDValue &Odd) {
2568
195
  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2569
195
  bool Is32Bit = is32Bit(VT);
2570
195
  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2571
195
  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2572
195
}
2573
2574
// Return an i32 value that is 1 if the CC value produced by CCReg is
2575
// in the mask CCMask and 0 otherwise.  CC is known to have a value
2576
// in CCValid, so other values can be ignored.
2577
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2578
1.56k
                         unsigned CCValid, unsigned CCMask) {
2579
1.56k
  SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
2580
1.56k
                    DAG.getConstant(0, DL, MVT::i32),
2581
1.56k
                    DAG.getConstant(CCValid, DL, MVT::i32),
2582
1.56k
                    DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
2583
1.56k
  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2584
1.56k
}
2585
2586
// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2587
// be done directly.  IsFP is true if CC is for a floating-point rather than
2588
// integer comparison.
2589
1.00k
static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2590
1.00k
  switch (CC) {
2591
1.00k
  case ISD::SETOEQ:
2592
271
  case ISD::SETEQ:
2593
271
    return IsFP ? 
SystemZISD::VFCMPE15
:
SystemZISD::VICMPE256
;
2594
271
2595
271
  case ISD::SETOGE:
2596
80
  case ISD::SETGE:
2597
80
    return IsFP ? 
SystemZISD::VFCMPHE30
:
static_cast<SystemZISD::NodeType>(0)50
;
2598
80
2599
226
  case ISD::SETOGT:
2600
226
  case ISD::SETGT:
2601
226
    return IsFP ? 
SystemZISD::VFCMPH104
:
SystemZISD::VICMPH122
;
2602
226
2603
226
  case ISD::SETUGT:
2604
119
    return IsFP ? 
static_cast<SystemZISD::NodeType>(0)15
:
SystemZISD::VICMPHL104
;
2605
226
2606
307
  default:
2607
307
    return 0;
2608
1.00k
  }
2609
1.00k
}
2610
2611
// Return the SystemZISD vector comparison operation for CC or its inverse,
2612
// or 0 if neither can be done directly.  Indicate in Invert whether the
2613
// result is for the inverse of CC.  IsFP is true if CC is for a
2614
// floating-point rather than integer comparison.
2615
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2616
751
                                            bool &Invert) {
2617
751
  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2618
499
    Invert = false;
2619
499
    return Opcode;
2620
499
  }
2621
252
2622
252
  CC = ISD::getSetCCInverse(CC, !IsFP);
2623
252
  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2624
132
    Invert = true;
2625
132
    return Opcode;
2626
132
  }
2627
120
2628
120
  return 0;
2629
120
}
2630
2631
// Return a v2f64 that contains the extended form of elements Start and Start+1
2632
// of v4f32 value Op.
2633
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2634
208
                                  SDValue Op) {
2635
208
  int Mask[] = { Start, -1, Start + 1, -1 };
2636
208
  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2637
208
  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2638
208
}
2639
2640
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2641
// producing a result of type VT.
2642
SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2643
                                            const SDLoc &DL, EVT VT,
2644
                                            SDValue CmpOp0,
2645
679
                                            SDValue CmpOp1) const {
2646
679
  // There is no hardware support for v4f32 (unless we have the vector
2647
679
  // enhancements facility 1), so extend the vector into two v2f64s
2648
679
  // and compare those.
2649
679
  if (CmpOp0.getValueType() == MVT::v4f32 &&
2650
679
      
!Subtarget.hasVectorEnhancements1()109
) {
2651
52
    SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2652
52
    SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2653
52
    SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2654
52
    SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2655
52
    SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2656
52
    SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2657
52
    return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2658
52
  }
2659
627
  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2660
627
}
2661
2662
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2663
// an integer mask of type VT.
2664
SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2665
                                                const SDLoc &DL, EVT VT,
2666
                                                ISD::CondCode CC,
2667
                                                SDValue CmpOp0,
2668
655
                                                SDValue CmpOp1) const {
2669
655
  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2670
655
  bool Invert = false;
2671
655
  SDValue Cmp;
2672
655
  switch (CC) {
2673
655
    // Handle tests for order using (or (ogt y x) (oge x y)).
2674
655
  case ISD::SETUO:
2675
6
    Invert = true;
2676
6
    LLVM_FALLTHROUGH;
2677
12
  case ISD::SETO: {
2678
12
    assert(IsFP && "Unexpected integer comparison");
2679
12
    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2680
12
    SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2681
12
    Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2682
12
    break;
2683
6
  }
2684
6
2685
6
    // Handle <> tests using (or (ogt y x) (ogt x y)).
2686
6
  case ISD::SETUEQ:
2687
6
    Invert = true;
2688
6
    LLVM_FALLTHROUGH;
2689
12
  case ISD::SETONE: {
2690
12
    assert(IsFP && "Unexpected integer comparison");
2691
12
    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2692
12
    SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2693
12
    Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2694
12
    break;
2695
6
  }
2696
6
2697
6
    // Otherwise a single comparison is enough.  It doesn't really
2698
6
    // matter whether we try the inversion or the swap first, since
2699
6
    // there are no cases where both work.
2700
631
  default:
2701
631
    if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2702
511
      Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2703
120
    else {
2704
120
      CC = ISD::getSetCCSwappedOperands(CC);
2705
120
      if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2706
120
        Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2707
120
      else
2708
120
        
llvm_unreachable0
("Unhandled comparison");
2709
120
    }
2710
631
    break;
2711
655
  }
2712
655
  if (Invert) {
2713
144
    SDValue Mask =
2714
144
      DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2715
144
    Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2716
144
  }
2717
655
  return Cmp;
2718
655
}
2719
2720
SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2721
1.18k
                                          SelectionDAG &DAG) const {
2722
1.18k
  SDValue CmpOp0   = Op.getOperand(0);
2723
1.18k
  SDValue CmpOp1   = Op.getOperand(1);
2724
1.18k
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2725
1.18k
  SDLoc DL(Op);
2726
1.18k
  EVT VT = Op.getValueType();
2727
1.18k
  if (VT.isVector())
2728
655
    return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2729
532
2730
532
  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2731
532
  SDValue CCReg = emitCmp(DAG, DL, C);
2732
532
  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2733
532
}
2734
2735
719
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2736
719
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2737
719
  SDValue CmpOp0   = Op.getOperand(2);
2738
719
  SDValue CmpOp1   = Op.getOperand(3);
2739
719
  SDValue Dest     = Op.getOperand(4);
2740
719
  SDLoc DL(Op);
2741
719
2742
719
  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2743
719
  SDValue CCReg = emitCmp(DAG, DL, C);
2744
719
  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
2745
719
                     Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2746
719
                     DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2747
719
}
2748
2749
// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2750
// allowing Pos and Neg to be wider than CmpOp.
2751
77
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2752
77
  return (Neg.getOpcode() == ISD::SUB &&
2753
77
          
Neg.getOperand(0).getOpcode() == ISD::Constant15
&&
2754
77
          
cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 015
&&
2755
77
          
Neg.getOperand(1) == Pos15
&&
2756
77
          
(15
Pos == CmpOp15
||
2757
15
           
(1
Pos.getOpcode() == ISD::SIGN_EXTEND1
&&
2758
1
            Pos.getOperand(0) == CmpOp)));
2759
77
}
2760
2761
// Return the absolute or negative absolute of Op; IsNegative decides which.
2762
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2763
15
                           bool IsNegative) {
2764
15
  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2765
15
  if (IsNegative)
2766
5
    Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2767
5
                     DAG.getConstant(0, DL, Op.getValueType()), Op);
2768
15
  return Op;
2769
15
}
2770
2771
SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2772
875
                                              SelectionDAG &DAG) const {
2773
875
  SDValue CmpOp0   = Op.getOperand(0);
2774
875
  SDValue CmpOp1   = Op.getOperand(1);
2775
875
  SDValue TrueOp   = Op.getOperand(2);
2776
875
  SDValue FalseOp  = Op.getOperand(3);
2777
875
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2778
875
  SDLoc DL(Op);
2779
875
2780
875
  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2781
875
2782
875
  // Check for absolute and negative-absolute selections, including those
2783
875
  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2784
875
  // This check supplements the one in DAGCombiner.
2785
875
  if (C.Opcode == SystemZISD::ICMP &&
2786
875
      
C.CCMask != SystemZ::CCMASK_CMP_EQ687
&&
2787
875
      
C.CCMask != SystemZ::CCMASK_CMP_NE572
&&
2788
875
      
C.Op1.getOpcode() == ISD::Constant488
&&
2789
875
      
cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0365
) {
2790
42
    if (isAbsolute(C.Op0, TrueOp, FalseOp))
2791
7
      return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2792
35
    if (isAbsolute(C.Op0, FalseOp, TrueOp))
2793
8
      return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2794
860
  }
2795
860
2796
860
  SDValue CCReg = emitCmp(DAG, DL, C);
2797
860
  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2798
860
                   DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
2799
860
2800
860
  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
2801
860
}
2802
2803
SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2804
617
                                                  SelectionDAG &DAG) const {
2805
617
  SDLoc DL(Node);
2806
617
  const GlobalValue *GV = Node->getGlobal();
2807
617
  int64_t Offset = Node->getOffset();
2808
617
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2809
617
  CodeModel::Model CM = DAG.getTarget().getCodeModel();
2810
617
2811
617
  SDValue Result;
2812
617
  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2813
387
    // Assign anchors at 1<<12 byte boundaries.
2814
387
    uint64_t Anchor = Offset & ~uint64_t(0xfff);
2815
387
    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2816
387
    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2817
387
2818
387
    // The offset can be folded into the address if it is aligned to a halfword.
2819
387
    Offset -= Anchor;
2820
387
    if (Offset != 0 && 
(Offset & 1) == 014
) {
2821
11
      SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2822
11
      Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2823
11
      Offset = 0;
2824
11
    }
2825
387
  } else {
2826
230
    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2827
230
    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2828
230
    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2829
230
                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2830
230
  }
2831
617
2832
617
  // If there was a non-zero offset that we didn't fold, create an explicit
2833
617
  // addition for it.
2834
617
  if (Offset != 0)
2835
3
    Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2836
3
                         DAG.getConstant(Offset, DL, PtrVT));
2837
617
2838
617
  return Result;
2839
617
}
2840
2841
SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2842
                                                 SelectionDAG &DAG,
2843
                                                 unsigned Opcode,
2844
11
                                                 SDValue GOTOffset) const {
2845
11
  SDLoc DL(Node);
2846
11
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2847
11
  SDValue Chain = DAG.getEntryNode();
2848
11
  SDValue Glue;
2849
11
2850
11
  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2851
11
  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2852
11
  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2853
11
  Glue = Chain.getValue(1);
2854
11
  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2855
11
  Glue = Chain.getValue(1);
2856
11
2857
11
  // The first call operand is the chain and the second is the TLS symbol.
2858
11
  SmallVector<SDValue, 8> Ops;
2859
11
  Ops.push_back(Chain);
2860
11
  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2861
11
                                           Node->getValueType(0),
2862
11
                                           0, 0));
2863
11
2864
11
  // Add argument registers to the end of the list so that they are
2865
11
  // known live into the call.
2866
11
  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2867
11
  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2868
11
2869
11
  // Add a register mask operand representing the call-preserved registers.
2870
11
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2871
11
  const uint32_t *Mask =
2872
11
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
2873
11
  assert(Mask && "Missing call preserved mask for calling convention");
2874
11
  Ops.push_back(DAG.getRegisterMask(Mask));
2875
11
2876
11
  // Glue the call to the argument copies.
2877
11
  Ops.push_back(Glue);
2878
11
2879
11
  // Emit the call.
2880
11
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2881
11
  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2882
11
  Glue = Chain.getValue(1);
2883
11
2884
11
  // Copy the return value from %r2.
2885
11
  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2886
11
}
2887
2888
SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2889
15
                                                  SelectionDAG &DAG) const {
2890
15
  SDValue Chain = DAG.getEntryNode();
2891
15
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2892
15
2893
15
  // The high part of the thread pointer is in access register 0.
2894
15
  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
2895
15
  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2896
15
2897
15
  // The low part of the thread pointer is in access register 1.
2898
15
  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
2899
15
  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2900
15
2901
15
  // Merge them into a single 64-bit address.
2902
15
  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2903
15
                                    DAG.getConstant(32, DL, PtrVT));
2904
15
  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2905
15
}
2906
2907
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2908
14
                                                     SelectionDAG &DAG) const {
2909
14
  if (DAG.getTarget().useEmulatedTLS())
2910
0
    return LowerToTLSEmulatedModel(Node, DAG);
2911
14
  SDLoc DL(Node);
2912
14
  const GlobalValue *GV = Node->getGlobal();
2913
14
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2914
14
  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2915
14
2916
14
  SDValue TP = lowerThreadPointer(DL, DAG);
2917
14
2918
14
  // Get the offset of GA from the thread pointer, based on the TLS model.
2919
14
  SDValue Offset;
2920
14
  switch (model) {
2921
14
    case TLSModel::GeneralDynamic: {
2922
7
      // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2923
7
      SystemZConstantPoolValue *CPV =
2924
7
        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
2925
7
2926
7
      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2927
7
      Offset = DAG.getLoad(
2928
7
          PtrVT, DL, DAG.getEntryNode(), Offset,
2929
7
          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2930
7
2931
7
      // Call __tls_get_offset to retrieve the offset.
2932
7
      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2933
7
      break;
2934
14
    }
2935
14
2936
14
    case TLSModel::LocalDynamic: {
2937
4
      // Load the GOT offset of the module ID.
2938
4
      SystemZConstantPoolValue *CPV =
2939
4
        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
2940
4
2941
4
      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2942
4
      Offset = DAG.getLoad(
2943
4
          PtrVT, DL, DAG.getEntryNode(), Offset,
2944
4
          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2945
4
2946
4
      // Call __tls_get_offset to retrieve the module base offset.
2947
4
      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2948
4
2949
4
      // Note: The SystemZLDCleanupPass will remove redundant computations
2950
4
      // of the module base offset.  Count total number of local-dynamic
2951
4
      // accesses to trigger execution of that pass.
2952
4
      SystemZMachineFunctionInfo* MFI =
2953
4
        DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
2954
4
      MFI->incNumLocalDynamicTLSAccesses();
2955
4
2956
4
      // Add the per-symbol offset.
2957
4
      CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
2958
4
2959
4
      SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2960
4
      DTPOffset = DAG.getLoad(
2961
4
          PtrVT, DL, DAG.getEntryNode(), DTPOffset,
2962
4
          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2963
4
2964
4
      Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2965
4
      break;
2966
14
    }
2967
14
2968
14
    case TLSModel::InitialExec: {
2969
1
      // Load the offset from the GOT.
2970
1
      Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2971
1
                                          SystemZII::MO_INDNTPOFF);
2972
1
      Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2973
1
      Offset =
2974
1
          DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2975
1
                      MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2976
1
      break;
2977
14
    }
2978
14
2979
14
    case TLSModel::LocalExec: {
2980
2
      // Force the offset into the constant pool and load it from there.
2981
2
      SystemZConstantPoolValue *CPV =
2982
2
        SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
2983
2
2984
2
      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2985
2
      Offset = DAG.getLoad(
2986
2
          PtrVT, DL, DAG.getEntryNode(), Offset,
2987
2
          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2988
2
      break;
2989
14
    }
2990
14
  }
2991
14
2992
14
  // Add the base and offset together.
2993
14
  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2994
14
}
2995
2996
SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2997
1
                                                 SelectionDAG &DAG) const {
2998
1
  SDLoc DL(Node);
2999
1
  const BlockAddress *BA = Node->getBlockAddress();
3000
1
  int64_t Offset = Node->getOffset();
3001
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3002
1
3003
1
  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3004
1
  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3005
1
  return Result;
3006
1
}
3007
3008
SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3009
3
                                              SelectionDAG &DAG) const {
3010
3
  SDLoc DL(JT);
3011
3
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3012
3
  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3013
3
3014
3
  // Use LARL to load the address of the table.
3015
3
  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3016
3
}
3017
3018
SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3019
843
                                                 SelectionDAG &DAG) const {
3020
843
  SDLoc DL(CP);
3021
843
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3022
843
3023
843
  SDValue Result;
3024
843
  if (CP->isMachineConstantPoolEntry())
3025
17
    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
3026
17
                                       CP->getAlignment());
3027
826
  else
3028
826
    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
3029
826
                                       CP->getAlignment(), CP->getOffset());
3030
843
3031
843
  // Use LARL to load the address of the constant pool entry.
3032
843
  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3033
843
}
3034
3035
SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3036
2
                                              SelectionDAG &DAG) const {
3037
2
  MachineFunction &MF = DAG.getMachineFunction();
3038
2
  MachineFrameInfo &MFI = MF.getFrameInfo();
3039
2
  MFI.setFrameAddressIsTaken(true);
3040
2
3041
2
  SDLoc DL(Op);
3042
2
  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3043
2
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3044
2
3045
2
  // If the back chain frame index has not been allocated yet, do so.
3046
2
  SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
3047
2
  int BackChainIdx = FI->getFramePointerSaveIndex();
3048
2
  if (!BackChainIdx) {
3049
2
    // By definition, the frame address is the address of the back chain.
3050
2
    BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
3051
2
    FI->setFramePointerSaveIndex(BackChainIdx);
3052
2
  }
3053
2
  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3054
2
3055
2
  // FIXME The frontend should detect this case.
3056
2
  if (Depth > 0) {
3057
0
    report_fatal_error("Unsupported stack frame traversal count");
3058
0
  }
3059
2
3060
2
  return BackChain;
3061
2
}
3062
3063
SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3064
1
                                               SelectionDAG &DAG) const {
3065
1
  MachineFunction &MF = DAG.getMachineFunction();
3066
1
  MachineFrameInfo &MFI = MF.getFrameInfo();
3067
1
  MFI.setReturnAddressIsTaken(true);
3068
1
3069
1
  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3070
0
    return SDValue();
3071
1
3072
1
  SDLoc DL(Op);
3073
1
  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3074
1
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3075
1
3076
1
  // FIXME The frontend should detect this case.
3077
1
  if (Depth > 0) {
3078
0
    report_fatal_error("Unsupported stack frame traversal count");
3079
0
  }
3080
1
3081
1
  // Return R14D, which has the return address. Mark it an implicit live-in.
3082
1
  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3083
1
  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3084
1
}
3085
3086
SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3087
17
                                            SelectionDAG &DAG) const {
3088
17
  SDLoc DL(Op);
3089
17
  SDValue In = Op.getOperand(0);
3090
17
  EVT InVT = In.getValueType();
3091
17
  EVT ResVT = Op.getValueType();
3092
17
3093
17
  // Convert loads directly.  This is normally done by DAGCombiner,
3094
17
  // but we need this case for bitcasts that are created during lowering
3095
17
  // and which are then lowered themselves.
3096
17
  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3097
2
    if (ISD::isNormalLoad(LoadN)) {
3098
0
      SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3099
0
                                    LoadN->getBasePtr(), LoadN->getMemOperand());
3100
0
      // Update the chain uses.
3101
0
      DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3102
0
      return NewLoad;
3103
0
    }
3104
17
3105
17
  if (InVT == MVT::i32 && 
ResVT == MVT::f329
) {
3106
9
    SDValue In64;
3107
9
    if (Subtarget.hasHighWord()) {
3108
3
      SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3109
3
                                       MVT::i64);
3110
3
      In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3111
3
                                       MVT::i64, SDValue(U64, 0), In);
3112
6
    } else {
3113
6
      In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3114
6
      In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3115
6
                         DAG.getConstant(32, DL, MVT::i64));
3116
6
    }
3117
9
    SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3118
9
    return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3119
9
                                      DL, MVT::f32, Out64);
3120
9
  }
3121
8
  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3122
8
    SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3123
8
    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3124
8
                                             MVT::f64, SDValue(U64, 0), In);
3125
8
    SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3126
8
    if (Subtarget.hasHighWord())
3127
3
      return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3128
3
                                        MVT::i32, Out64);
3129
5
    SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3130
5
                                DAG.getConstant(32, DL, MVT::i64));
3131
5
    return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3132
5
  }
3133
0
  llvm_unreachable("Unexpected bitcast combination");
3134
0
}
3135
3136
SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3137
0
                                            SelectionDAG &DAG) const {
3138
0
  MachineFunction &MF = DAG.getMachineFunction();
3139
0
  SystemZMachineFunctionInfo *FuncInfo =
3140
0
    MF.getInfo<SystemZMachineFunctionInfo>();
3141
0
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3142
0
3143
0
  SDValue Chain   = Op.getOperand(0);
3144
0
  SDValue Addr    = Op.getOperand(1);
3145
0
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3146
0
  SDLoc DL(Op);
3147
0
3148
0
  // The initial values of each field.
3149
0
  const unsigned NumFields = 4;
3150
0
  SDValue Fields[NumFields] = {
3151
0
    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3152
0
    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3153
0
    DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3154
0
    DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3155
0
  };
3156
0
3157
0
  // Store each field into its respective slot.
3158
0
  SDValue MemOps[NumFields];
3159
0
  unsigned Offset = 0;
3160
0
  for (unsigned I = 0; I < NumFields; ++I) {
3161
0
    SDValue FieldAddr = Addr;
3162
0
    if (Offset != 0)
3163
0
      FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3164
0
                              DAG.getIntPtrConstant(Offset, DL));
3165
0
    MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3166
0
                             MachinePointerInfo(SV, Offset));
3167
0
    Offset += 8;
3168
0
  }
3169
0
  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3170
0
}
3171
3172
SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3173
0
                                           SelectionDAG &DAG) const {
3174
0
  SDValue Chain      = Op.getOperand(0);
3175
0
  SDValue DstPtr     = Op.getOperand(1);
3176
0
  SDValue SrcPtr     = Op.getOperand(2);
3177
0
  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3178
0
  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3179
0
  SDLoc DL(Op);
3180
0
3181
0
  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3182
0
                       /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3183
0
                       /*isTailCall*/false,
3184
0
                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
3185
0
}
3186
3187
SDValue SystemZTargetLowering::
3188
28
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3189
28
  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3190
28
  MachineFunction &MF = DAG.getMachineFunction();
3191
28
  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3192
28
  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3193
28
3194
28
  SDValue Chain = Op.getOperand(0);
3195
28
  SDValue Size  = Op.getOperand(1);
3196
28
  SDValue Align = Op.getOperand(2);
3197
28
  SDLoc DL(Op);
3198
28
3199
28
  // If user has set the no alignment function attribute, ignore
3200
28
  // alloca alignments.
3201
28
  uint64_t AlignVal = (RealignOpt ?
3202
28
                       dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 
00
);
3203
28
3204
28
  uint64_t StackAlign = TFI->getStackAlignment();
3205
28
  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3206
28
  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3207
28
3208
28
  unsigned SPReg = getStackPointerRegisterToSaveRestore();
3209
28
  SDValue NeededSpace = Size;
3210
28
3211
28
  // Get a reference to the stack pointer.
3212
28
  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3213
28
3214
28
  // If we need a backchain, save it now.
3215
28
  SDValue Backchain;
3216
28
  if (StoreBackchain)
3217
3
    Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3218
28
3219
28
  // Add extra space for alignment if needed.
3220
28
  if (ExtraAlignSpace)
3221
8
    NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3222
8
                              DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3223
28
3224
28
  // Get the new stack pointer value.
3225
28
  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3226
28
3227
28
  // Copy the new stack pointer back.
3228
28
  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3229
28
3230
28
  // The allocated data lives above the 160 bytes allocated for the standard
3231
28
  // frame, plus any outgoing stack arguments.  We don't know how much that
3232
28
  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3233
28
  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3234
28
  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3235
28
3236
28
  // Dynamically realign if needed.
3237
28
  if (RequiredAlign > StackAlign) {
3238
8
    Result =
3239
8
      DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3240
8
                  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3241
8
    Result =
3242
8
      DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3243
8
                  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3244
8
  }
3245
28
3246
28
  if (StoreBackchain)
3247
3
    Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3248
28
3249
28
  SDValue Ops[2] = { Result, Chain };
3250
28
  return DAG.getMergeValues(Ops, DL);
3251
28
}
3252
3253
SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3254
3
    SDValue Op, SelectionDAG &DAG) const {
3255
3
  SDLoc DL(Op);
3256
3
3257
3
  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3258
3
}
3259
3260
SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3261
18
                                              SelectionDAG &DAG) const {
3262
18
  EVT VT = Op.getValueType();
3263
18
  SDLoc DL(Op);
3264
18
  SDValue Ops[2];
3265
18
  if (is32Bit(VT))
3266
0
    // Just do a normal 64-bit multiplication and extract the results.
3267
0
    // We define this so that it can be used for constant division.
3268
0
    lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3269
0
                    Op.getOperand(1), Ops[1], Ops[0]);
3270
18
  else if (Subtarget.hasMiscellaneousExtensions2())
3271
10
    // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3272
10
    // the high result in the even register.  ISD::SMUL_LOHI is defined to
3273
10
    // return the low half first, so the results are in reverse order.
3274
10
    lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3275
10
                     Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3276
8
  else {
3277
8
    // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3278
8
    //
3279
8
    //   (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3280
8
    //
3281
8
    // but using the fact that the upper halves are either all zeros
3282
8
    // or all ones:
3283
8
    //
3284
8
    //   (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3285
8
    //
3286
8
    // and grouping the right terms together since they are quicker than the
3287
8
    // multiplication:
3288
8
    //
3289
8
    //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3290
8
    SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3291
8
    SDValue LL = Op.getOperand(0);
3292
8
    SDValue RL = Op.getOperand(1);
3293
8
    SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3294
8
    SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3295
8
    // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3296
8
    // the high result in the even register.  ISD::SMUL_LOHI is defined to
3297
8
    // return the low half first, so the results are in reverse order.
3298
8
    lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3299
8
                     LL, RL, Ops[1], Ops[0]);
3300
8
    SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3301
8
    SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3302
8
    SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3303
8
    Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3304
8
  }
3305
18
  return DAG.getMergeValues(Ops, DL);
3306
18
}
3307
3308
SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3309
29
                                              SelectionDAG &DAG) const {
3310
29
  EVT VT = Op.getValueType();
3311
29
  SDLoc DL(Op);
3312
29
  SDValue Ops[2];
3313
29
  if (is32Bit(VT))
3314
1
    // Just do a normal 64-bit multiplication and extract the results.
3315
1
    // We define this so that it can be used for constant division.
3316
1
    lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3317
1
                    Op.getOperand(1), Ops[1], Ops[0]);
3318
28
  else
3319
28
    // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3320
28
    // the high result in the even register.  ISD::UMUL_LOHI is defined to
3321
28
    // return the low half first, so the results are in reverse order.
3322
28
    lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3323
28
                     Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3324
29
  return DAG.getMergeValues(Ops, DL);
3325
29
}
3326
3327
SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3328
93
                                            SelectionDAG &DAG) const {
3329
93
  SDValue Op0 = Op.getOperand(0);
3330
93
  SDValue Op1 = Op.getOperand(1);
3331
93
  EVT VT = Op.getValueType();
3332
93
  SDLoc DL(Op);
3333
93
3334
93
  // We use DSGF for 32-bit division.  This means the first operand must
3335
93
  // always be 64-bit, and the second operand should be 32-bit whenever
3336
93
  // that is possible, to improve performance.
3337
93
  if (is32Bit(VT))
3338
53
    Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3339
40
  else if (DAG.ComputeNumSignBits(Op1) > 32)
3340
13
    Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3341
93
3342
93
  // DSG(F) returns the remainder in the even register and the
3343
93
  // quotient in the odd register.
3344
93
  SDValue Ops[2];
3345
93
  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3346
93
  return DAG.getMergeValues(Ops, DL);
3347
93
}
3348
3349
SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3350
56
                                            SelectionDAG &DAG) const {
3351
56
  EVT VT = Op.getValueType();
3352
56
  SDLoc DL(Op);
3353
56
3354
56
  // DL(G) returns the remainder in the even register and the
3355
56
  // quotient in the odd register.
3356
56
  SDValue Ops[2];
3357
56
  lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3358
56
                   Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3359
56
  return DAG.getMergeValues(Ops, DL);
3360
56
}
3361
3362
478
SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3363
478
  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3364
478
3365
478
  // Get the known-zero masks for each operand.
3366
478
  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3367
478
  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3368
478
                        DAG.computeKnownBits(Ops[1])};
3369
478
3370
478
  // See if the upper 32 bits of one operand and the lower 32 bits of the
3371
478
  // other are known zero.  They are the low and high operands respectively.
3372
478
  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3373
478
                       Known[1].Zero.getZExtValue() };
3374
478
  unsigned High, Low;
3375
478
  if ((Masks[0] >> 32) == 0xffffffff && 
uint32_t(Masks[1]) == 0xffffffff72
)
3376
28
    High = 1, Low = 0;
3377
450
  else if ((Masks[1] >> 32) == 0xffffffff && 
uint32_t(Masks[0]) == 0xffffffff226
)
3378
46
    High = 0, Low = 1;
3379
404
  else
3380
404
    return Op;
3381
74
3382
74
  SDValue LowOp = Ops[Low];
3383
74
  SDValue HighOp = Ops[High];
3384
74
3385
74
  // If the high part is a constant, we're better off using IILH.
3386
74
  if (HighOp.getOpcode() == ISD::Constant)
3387
20
    return Op;
3388
54
3389
54
  // If the low part is a constant that is outside the range of LHI,
3390
54
  // then we're better off using IILF.
3391
54
  if (LowOp.getOpcode() == ISD::Constant) {
3392
19
    int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3393
19
    if (!isInt<16>(Value))
3394
14
      return Op;
3395
40
  }
3396
40
3397
40
  // Check whether the high part is an AND that doesn't change the
3398
40
  // high 32 bits and just masks out low bits.  We can skip it if so.
3399
40
  if (HighOp.getOpcode() == ISD::AND &&
3400
40
      
HighOp.getOperand(1).getOpcode() == ISD::Constant17
) {
3401
17
    SDValue HighOp0 = HighOp.getOperand(0);
3402
17
    uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3403
17
    if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3404
16
      HighOp = HighOp0;
3405
17
  }
3406
40
3407
40
  // Take advantage of the fact that all GR32 operations only change the
3408
40
  // low 32 bits by truncating Low to an i32 and inserting it directly
3409
40
  // using a subreg.  The interesting cases are those where the truncation
3410
40
  // can be folded.
3411
40
  SDLoc DL(Op);
3412
40
  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3413
40
  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3414
40
                                   MVT::i64, HighOp, Low32);
3415
40
}
3416
3417
// Lower SADDO/SSUBO/UADDO/USUBO nodes.
3418
SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3419
884
                                          SelectionDAG &DAG) const {
3420
884
  SDNode *N = Op.getNode();
3421
884
  SDValue LHS = N->getOperand(0);
3422
884
  SDValue RHS = N->getOperand(1);
3423
884
  SDLoc DL(N);
3424
884
  unsigned BaseOp = 0;
3425
884
  unsigned CCValid = 0;
3426
884
  unsigned CCMask = 0;
3427
884
3428
884
  switch (Op.getOpcode()) {
3429
884
  
default: 0
llvm_unreachable0
("Unknown instruction!");
3430
884
  case ISD::SADDO:
3431
368
    BaseOp = SystemZISD::SADDO;
3432
368
    CCValid = SystemZ::CCMASK_ARITH;
3433
368
    CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3434
368
    break;
3435
884
  case ISD::SSUBO:
3436
88
    BaseOp = SystemZISD::SSUBO;
3437
88
    CCValid = SystemZ::CCMASK_ARITH;
3438
88
    CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3439
88
    break;
3440
884
  case ISD::UADDO:
3441
220
    BaseOp = SystemZISD::UADDO;
3442
220
    CCValid = SystemZ::CCMASK_LOGICAL;
3443
220
    CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3444
220
    break;
3445
884
  case ISD::USUBO:
3446
208
    BaseOp = SystemZISD::USUBO;
3447
208
    CCValid = SystemZ::CCMASK_LOGICAL;
3448
208
    CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3449
208
    break;
3450
884
  }
3451
884
3452
884
  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3453
884
  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3454
884
3455
884
  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3456
884
  if (N->getValueType(1) == MVT::i1)
3457
0
    SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3458
884
3459
884
  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3460
884
}
3461
3462
52
static bool isAddCarryChain(SDValue Carry) {
3463
53
  while (Carry.getOpcode() == ISD::ADDCARRY)
3464
1
    Carry = Carry.getOperand(2);
3465
52
  return Carry.getOpcode() == ISD::UADDO;
3466
52
}
3467
3468
37
static bool isSubBorrowChain(SDValue Carry) {
3469
37
  while (Carry.getOpcode() == ISD::SUBCARRY)
3470
0
    Carry = Carry.getOperand(2);
3471
37
  return Carry.getOpcode() == ISD::USUBO;
3472
37
}
3473
3474
// Lower ADDCARRY/SUBCARRY nodes.
3475
SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3476
89
                                                SelectionDAG &DAG) const {
3477
89
3478
89
  SDNode *N = Op.getNode();
3479
89
  MVT VT = N->getSimpleValueType(0);
3480
89
3481
89
  // Let legalize expand this if it isn't a legal type yet.
3482
89
  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3483
0
    return SDValue();
3484
89
3485
89
  SDValue LHS = N->getOperand(0);
3486
89
  SDValue RHS = N->getOperand(1);
3487
89
  SDValue Carry = Op.getOperand(2);
3488
89
  SDLoc DL(N);
3489
89
  unsigned BaseOp = 0;
3490
89
  unsigned CCValid = 0;
3491
89
  unsigned CCMask = 0;
3492
89
3493
89
  switch (Op.getOpcode()) {
3494
89
  
default: 0
llvm_unreachable0
("Unknown instruction!");
3495
89
  case ISD::ADDCARRY:
3496
52
    if (!isAddCarryChain(Carry))
3497
3
      return SDValue();
3498
49
3499
49
    BaseOp = SystemZISD::ADDCARRY;
3500
49
    CCValid = SystemZ::CCMASK_LOGICAL;
3501
49
    CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3502
49
    break;
3503
49
  case ISD::SUBCARRY:
3504
37
    if (!isSubBorrowChain(Carry))
3505
0
      return SDValue();
3506
37
3507
37
    BaseOp = SystemZISD::SUBCARRY;
3508
37
    CCValid = SystemZ::CCMASK_LOGICAL;
3509
37
    CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3510
37
    break;
3511
86
  }
3512
86
3513
86
  // Set the condition code from the carry flag.
3514
86
  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3515
86
                      DAG.getConstant(CCValid, DL, MVT::i32),
3516
86
                      DAG.getConstant(CCMask, DL, MVT::i32));
3517
86
3518
86
  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3519
86
  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3520
86
3521
86
  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3522
86
  if (N->getValueType(1) == MVT::i1)
3523
0
    SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3524
86
3525
86
  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3526
86
}
3527
3528
SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3529
19
                                          SelectionDAG &DAG) const {
3530
19
  EVT VT = Op.getValueType();
3531
19
  SDLoc DL(Op);
3532
19
  Op = Op.getOperand(0);
3533
19
3534
19
  // Handle vector types via VPOPCT.
3535
19
  if (VT.isVector()) {
3536
12
    Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3537
12
    Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3538
12
    switch (VT.getScalarSizeInBits()) {
3539
12
    case 8:
3540
3
      break;
3541
12
    case 16: {
3542
3
      Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3543
3
      SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3544
3
      SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3545
3
      Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3546
3
      Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3547
3
      break;
3548
12
    }
3549
12
    case 32: {
3550
3
      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3551
3
                                            DAG.getConstant(0, DL, MVT::i32));
3552
3
      Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3553
3
      break;
3554
12
    }
3555
12
    case 64: {
3556
3
      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3557
3
                                            DAG.getConstant(0, DL, MVT::i32));
3558
3
      Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3559
3
      Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3560
3
      break;
3561
12
    }
3562
12
    default:
3563
0
      llvm_unreachable("Unexpected type");
3564
12
    }
3565
12
    return Op;
3566
12
  }
3567
7
3568
7
  // Get the known-zero mask for the operand.
3569
7
  KnownBits Known = DAG.computeKnownBits(Op);
3570
7
  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
3571
7
  if (NumSignificantBits == 0)
3572
0
    return DAG.getConstant(0, DL, VT);
3573
7
3574
7
  // Skip known-zero high parts of the operand.
3575
7
  int64_t OrigBitSize = VT.getSizeInBits();
3576
7
  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3577
7
  BitSize = std::min(BitSize, OrigBitSize);
3578
7
3579
7
  // The POPCNT instruction counts the number of bits in each byte.
3580
7
  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3581
7
  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3582
7
  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3583
7
3584
7
  // Add up per-byte counts in a binary tree.  All bits of Op at
3585
7
  // position larger than BitSize remain zero throughout.
3586
16
  for (int64_t I = BitSize / 2; I >= 8; 
I = I / 29
) {
3587
9
    SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3588
9
    if (BitSize != OrigBitSize)
3589
4
      Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3590
4
                        DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3591
9
    Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3592
9
  }
3593
7
3594
7
  // Extract overall result from high byte.
3595
7
  if (BitSize > 8)
3596
5
    Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3597
5
                     DAG.getConstant(BitSize - 8, DL, VT));
3598
7
3599
7
  return Op;
3600
7
}
3601
3602
SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3603
7
                                                 SelectionDAG &DAG) const {
3604
7
  SDLoc DL(Op);
3605
7
  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3606
7
    cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3607
7
  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3608
7
    cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3609
7
3610
7
  // The only fence that needs an instruction is a sequentially-consistent
3611
7
  // cross-thread fence.
3612
7
  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3613
7
      
FenceSSID == SyncScope::System4
) {
3614
4
    return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3615
4
                                      Op.getOperand(0)),
3616
4
                   0);
3617
4
  }
3618
3
3619
3
  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3620
3
  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3621
3
}
3622
3623
// Op is an atomic load.  Lower it into a normal volatile load.
3624
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3625
17
                                                SelectionDAG &DAG) const {
3626
17
  auto *Node = cast<AtomicSDNode>(Op.getNode());
3627
17
  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3628
17
                        Node->getChain(), Node->getBasePtr(),
3629
17
                        Node->getMemoryVT(), Node->getMemOperand());
3630
17
}
3631
3632
// Op is an atomic store.  Lower it into a normal volatile store.
3633
SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3634
21
                                                 SelectionDAG &DAG) const {
3635
21
  auto *Node = cast<AtomicSDNode>(Op.getNode());
3636
21
  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3637
21
                                    Node->getBasePtr(), Node->getMemoryVT(),
3638
21
                                    Node->getMemOperand());
3639
21
  // We have to enforce sequential consistency by performing a
3640
21
  // serialization operation after the store.
3641
21
  if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3642
4
    Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3643
4
                                       MVT::Other, Chain), 0);
3644
21
  return Chain;
3645
21
}
3646
3647
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
3648
// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3649
SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3650
                                                   SelectionDAG &DAG,
3651
468
                                                   unsigned Opcode) const {
3652
468
  auto *Node = cast<AtomicSDNode>(Op.getNode());
3653
468
3654
468
  // 32-bit operations need no code outside the main loop.
3655
468
  EVT NarrowVT = Node->getMemoryVT();
3656
468
  EVT WideVT = MVT::i32;
3657
468
  if (NarrowVT == WideVT)
3658
196
    return Op;
3659
272
3660
272
  int64_t BitSize = NarrowVT.getSizeInBits();
3661
272
  SDValue ChainIn = Node->getChain();
3662
272
  SDValue Addr = Node->getBasePtr();
3663
272
  SDValue Src2 = Node->getVal();
3664
272
  MachineMemOperand *MMO = Node->getMemOperand();
3665
272
  SDLoc DL(Node);
3666
272
  EVT PtrVT = Addr.getValueType();
3667
272
3668
272
  // Convert atomic subtracts of constants into additions.
3669
272
  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3670
36
    if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3671
30
      Opcode = SystemZISD::ATOMIC_LOADW_ADD;
3672
30
      Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3673
30
    }
3674
272
3675
272
  // Get the address of the containing word.
3676
272
  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3677
272
                                    DAG.getConstant(-4, DL, PtrVT));
3678
272
3679
272
  // Get the number of bits that the word must be rotated left in order
3680
272
  // to bring the field to the top bits of a GR32.
3681
272
  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3682
272
                                 DAG.getConstant(3, DL, PtrVT));
3683
272
  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3684
272
3685
272
  // Get the complementing shift amount, for rotating a field in the top
3686
272
  // bits back to its proper position.
3687
272
  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3688
272
                                    DAG.getConstant(0, DL, WideVT), BitShift);
3689
272
3690
272
  // Extend the source operand to 32 bits and prepare it for the inner loop.
3691
272
  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3692
272
  // operations require the source to be shifted in advance.  (This shift
3693
272
  // can be folded if the source is constant.)  For AND and NAND, the lower
3694
272
  // bits must be set, while for other opcodes they should be left clear.
3695
272
  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3696
264
    Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3697
264
                       DAG.getConstant(32 - BitSize, DL, WideVT));
3698
272
  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3699
272
      
Opcode == SystemZISD::ATOMIC_LOADW_NAND236
)
3700
72
    Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3701
72
                       DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3702
272
3703
272
  // Construct the ATOMIC_LOADW_* node.
3704
272
  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3705
272
  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3706
272
                    DAG.getConstant(BitSize, DL, WideVT) };
3707
272
  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3708
272
                                             NarrowVT, MMO);
3709
272
3710
272
  // Rotate the result of the final CS so that the field is in the lower
3711
272
  // bits of a GR32, then truncate it.
3712
272
  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3713
272
                                    DAG.getConstant(BitSize, DL, WideVT));
3714
272
  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3715
272
3716
272
  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3717
272
  return DAG.getMergeValues(RetOps, DL);
3718
272
}
3719
3720
// Op is an ATOMIC_LOAD_SUB operation.  Lower 8- and 16-bit operations
3721
// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3722
// operations into additions.
3723
SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3724
72
                                                    SelectionDAG &DAG) const {
3725
72
  auto *Node = cast<AtomicSDNode>(Op.getNode());
3726
72
  EVT MemVT = Node->getMemoryVT();
3727
72
  if (MemVT == MVT::i32 || 
MemVT == MVT::i6456
) {
3728
36
    // A full-width operation.
3729
36
    assert(Op.getValueType() == MemVT && "Mismatched VTs");
3730
36
    SDValue Src2 = Node->getVal();
3731
36
    SDValue NegSrc2;
3732
36
    SDLoc DL(Src2);
3733
36
3734
36
    if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3735
22
      // Use an addition if the operand is constant and either LAA(G) is
3736
22
      // available or the negative value is in the range of A(G)FHI.
3737
22
      int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3738
22
      if (isInt<32>(Value) || 
Subtarget.hasInterlockedAccess1()4
)
3739
18
        NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3740
22
    } else 
if (14
Subtarget.hasInterlockedAccess1()14
)
3741
10
      // Use LAA(G) if available.
3742
10
      NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3743
10
                            Src2);
3744
36
3745
36
    if (NegSrc2.getNode())
3746
28
      return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3747
28
                           Node->getChain(), Node->getBasePtr(), NegSrc2,
3748
28
                           Node->getMemOperand());
3749
8
3750
8
    // Use the node as-is.
3751
8
    return Op;
3752
8
  }
3753
36
3754
36
  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3755
36
}
3756
3757
// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3758
SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3759
51
                                                    SelectionDAG &DAG) const {
3760
51
  auto *Node = cast<AtomicSDNode>(Op.getNode());
3761
51
  SDValue ChainIn = Node->getOperand(0);
3762
51
  SDValue Addr = Node->getOperand(1);
3763
51
  SDValue CmpVal = Node->getOperand(2);
3764
51
  SDValue SwapVal = Node->getOperand(3);
3765
51
  MachineMemOperand *MMO = Node->getMemOperand();
3766
51
  SDLoc DL(Node);
3767
51
3768
51
  // We have native support for 32-bit and 64-bit compare and swap, but we
3769
51
  // still need to expand extracting the "success" result from the CC.
3770
51
  EVT NarrowVT = Node->getMemoryVT();
3771
51
  EVT WideVT = NarrowVT == MVT::i64 ? 
MVT::i6412
:
MVT::i3239
;
3772
51
  if (NarrowVT == WideVT) {
3773
27
    SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3774
27
    SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3775
27
    SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
3776
27
                                               DL, Tys, Ops, NarrowVT, MMO);
3777
27
    SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3778
27
                                SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
3779
27
3780
27
    DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3781
27
    DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
3782
27
    DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3783
27
    return SDValue();
3784
27
  }
3785
24
3786
24
  // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3787
24
  // via a fullword ATOMIC_CMP_SWAPW operation.
3788
24
  int64_t BitSize = NarrowVT.getSizeInBits();
3789
24
  EVT PtrVT = Addr.getValueType();
3790
24
3791
24
  // Get the address of the containing word.
3792
24
  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3793
24
                                    DAG.getConstant(-4, DL, PtrVT));
3794
24
3795
24
  // Get the number of bits that the word must be rotated left in order
3796
24
  // to bring the field to the top bits of a GR32.
3797
24
  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3798
24
                                 DAG.getConstant(3, DL, PtrVT));
3799
24
  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3800
24
3801
24
  // Get the complementing shift amount, for rotating a field in the top
3802
24
  // bits back to its proper position.
3803
24
  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3804
24
                                    DAG.getConstant(0, DL, WideVT), BitShift);
3805
24
3806
24
  // Construct the ATOMIC_CMP_SWAPW node.
3807
24
  SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3808
24
  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3809
24
                    NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3810
24
  SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
3811
24
                                             VTList, Ops, NarrowVT, MMO);
3812
24
  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3813
24
                              SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
3814
24
3815
24
  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3816
24
  DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
3817
24
  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3818
24
  return SDValue();
3819
24
}
3820
3821
MachineMemOperand::Flags
3822
15.9k
SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
3823
15.9k
  // Because of how we convert atomic_load and atomic_store to normal loads and
3824
15.9k
  // stores in the DAG, we need to ensure that the MMOs are marked volatile
3825
15.9k
  // since DAGCombine hasn't been updated to account for atomic, but non
3826
15.9k
  // volatile loads.  (See D57601)
3827
15.9k
  if (auto *SI = dyn_cast<StoreInst>(&I))
3828
6.10k
    if (SI->isAtomic())
3829
23
      return MachineMemOperand::MOVolatile;
3830
15.9k
  if (auto *LI = dyn_cast<LoadInst>(&I))
3831
9.29k
    if (LI->isAtomic())
3832
18
      return MachineMemOperand::MOVolatile;
3833
15.9k
  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
3834
497
    if (AI->isAtomic())
3835
497
      return MachineMemOperand::MOVolatile;
3836
15.4k
  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
3837
63
    if (AI->isAtomic())
3838
63
      return MachineMemOperand::MOVolatile;
3839
15.3k
  return MachineMemOperand::MONone;
3840
15.3k
}
3841
3842
SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3843
3
                                              SelectionDAG &DAG) const {
3844
3
  MachineFunction &MF = DAG.getMachineFunction();
3845
3
  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3846
3
  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3847
3
                            SystemZ::R15D, Op.getValueType());
3848
3
}
3849
3850
SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3851
2
                                                 SelectionDAG &DAG) const {
3852
2
  MachineFunction &MF = DAG.getMachineFunction();
3853
2
  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3854
2
  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3855
2
3856
2
  SDValue Chain = Op.getOperand(0);
3857
2
  SDValue NewSP = Op.getOperand(1);
3858
2
  SDValue Backchain;
3859
2
  SDLoc DL(Op);
3860
2
3861
2
  if (StoreBackchain) {
3862
1
    SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
3863
1
    Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3864
1
  }
3865
2
3866
2
  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3867
2
3868
2
  if (StoreBackchain)
3869
1
    Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3870
2
3871
2
  return Chain;
3872
2
}
3873
3874
SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3875
17
                                             SelectionDAG &DAG) const {
3876
17
  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3877
17
  if (!IsData)
3878
2
    // Just preserve the chain.
3879
2
    return Op.getOperand(0);
3880
15
3881
15
  SDLoc DL(Op);
3882
15
  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3883
15
  unsigned Code = IsWrite ? 
SystemZ::PFD_WRITE5
:
SystemZ::PFD_READ10
;
3884
15
  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3885
15
  SDValue Ops[] = {
3886
15
    Op.getOperand(0),
3887
15
    DAG.getConstant(Code, DL, MVT::i32),
3888
15
    Op.getOperand(1)
3889
15
  };
3890
15
  return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
3891
15
                                 Node->getVTList(), Ops,
3892
15
                                 Node->getMemoryVT(), Node->getMemOperand());
3893
15
}
3894
3895
// Convert condition code in CCReg to an i32 value.
3896
293
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
3897
293
  SDLoc DL(CCReg);
3898
293
  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
3899
293
  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3900
293
                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
3901
293
}
3902
3903
SDValue
3904
SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3905
122
                                              SelectionDAG &DAG) const {
3906
122
  unsigned Opcode, CCValid;
3907
122
  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3908
12
    assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3909
12
    SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
3910
12
    SDValue CC = getCCResult(DAG, SDValue(Node, 0));
3911
12
    DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3912
12
    return SDValue();
3913
12
  }
3914
110
3915
110
  return SDValue();
3916
110
}
3917
3918
SDValue
3919
SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3920
1.75k
                                               SelectionDAG &DAG) const {
3921
1.75k
  unsigned Opcode, CCValid;
3922
1.75k
  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3923
281
    SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
3924
281
    if (Op->getNumValues() == 1)
3925
16
      return getCCResult(DAG, SDValue(Node, 0));
3926
265
    assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3927
265
    return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
3928
265
                       SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
3929
265
  }
3930
1.47k
3931
1.47k
  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3932
1.47k
  switch (Id) {
3933
1.47k
  case Intrinsic::thread_pointer:
3934
1
    return lowerThreadPointer(SDLoc(Op), DAG);
3935
1.47k
3936
1.47k
  case Intrinsic::s390_vpdi:
3937
20
    return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3938
20
                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3939
1.47k
3940
1.47k
  case Intrinsic::s390_vperm:
3941
18
    return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3942
18
                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3943
1.47k
3944
1.47k
  case Intrinsic::s390_vuphb:
3945
15
  case Intrinsic::s390_vuphh:
3946
15
  case Intrinsic::s390_vuphf:
3947
15
    return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3948
15
                       Op.getOperand(1));
3949
15
3950
15
  case Intrinsic::s390_vuplhb:
3951
9
  case Intrinsic::s390_vuplhh:
3952
9
  case Intrinsic::s390_vuplhf:
3953
9
    return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3954
9
                       Op.getOperand(1));
3955
9
3956
15
  case Intrinsic::s390_vuplb:
3957
15
  case Intrinsic::s390_vuplhw:
3958
15
  case Intrinsic::s390_vuplf:
3959
15
    return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3960
15
                       Op.getOperand(1));
3961
15
3962
15
  case Intrinsic::s390_vupllb:
3963
9
  case Intrinsic::s390_vupllh:
3964
9
  case Intrinsic::s390_vupllf:
3965
9
    return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3966
9
                       Op.getOperand(1));
3967
9
3968
12
  case Intrinsic::s390_vsumb:
3969
12
  case Intrinsic::s390_vsumh:
3970
12
  case Intrinsic::s390_vsumgh:
3971
12
  case Intrinsic::s390_vsumgf:
3972
12
  case Intrinsic::s390_vsumqf:
3973
12
  case Intrinsic::s390_vsumqg:
3974
12
    return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3975
12
                       Op.getOperand(1), Op.getOperand(2));
3976
1.37k
  }
3977
1.37k
3978
1.37k
  return SDValue();
3979
1.37k
}
3980
3981
namespace {
3982
// Says that SystemZISD operation Opcode can be used to perform the equivalent
3983
// of a VPERM with permute vector Bytes.  If Opcode takes three operands,
3984
// Operand is the constant third operand, otherwise it is the number of
3985
// bytes in each element of the result.
3986
struct Permute {
3987
  unsigned Opcode;
3988
  unsigned Operand;
3989
  unsigned char Bytes[SystemZ::VectorBytes];
3990
};
3991
}
3992
3993
static const Permute PermuteForms[] = {
3994
  // VMRHG
3995
  { SystemZISD::MERGE_HIGH, 8,
3996
    { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3997
  // VMRHF
3998
  { SystemZISD::MERGE_HIGH, 4,
3999
    { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4000
  // VMRHH
4001
  { SystemZISD::MERGE_HIGH, 2,
4002
    { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4003
  // VMRHB
4004
  { SystemZISD::MERGE_HIGH, 1,
4005
    { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4006
  // VMRLG
4007
  { SystemZISD::MERGE_LOW, 8,
4008
    { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4009
  // VMRLF
4010
  { SystemZISD::MERGE_LOW, 4,
4011
    { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4012
  // VMRLH
4013
  { SystemZISD::MERGE_LOW, 2,
4014
    { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4015
  // VMRLB
4016
  { SystemZISD::MERGE_LOW, 1,
4017
    { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4018
  // VPKG
4019
  { SystemZISD::PACK, 4,
4020
    { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4021
  // VPKF
4022
  { SystemZISD::PACK, 2,
4023
    { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4024
  // VPKH
4025
  { SystemZISD::PACK, 1,
4026
    { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4027
  // VPDI V1, V2, 4  (low half of V1, high half of V2)
4028
  { SystemZISD::PERMUTE_DWORDS, 4,
4029
    { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4030
  // VPDI V1, V2, 1  (high half of V1, low half of V2)
4031
  { SystemZISD::PERMUTE_DWORDS, 1,
4032
    { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4033
};
4034
4035
// Called after matching a vector shuffle against a particular pattern.
4036
// Both the original shuffle and the pattern have two vector operands.
4037
// OpNos[0] is the operand of the original shuffle that should be used for
4038
// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4039
// OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
4040
// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4041
// for operands 0 and 1 of the pattern.
4042
385
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4043
385
  if (OpNos[0] < 0) {
4044
2
    if (OpNos[1] < 0)
4045
0
      return false;
4046
2
    OpNo0 = OpNo1 = OpNos[1];
4047
383
  } else if (OpNos[1] < 0) {
4048
230
    OpNo0 = OpNo1 = OpNos[0];
4049
230
  } else {
4050
153
    OpNo0 = OpNos[0];
4051
153
    OpNo1 = OpNos[1];
4052
153
  }
4053
385
  return true;
4054
385
}
4055
4056
// Bytes is a VPERM-like permute vector, except that -1 is used for
4057
// undefined bytes.  Return true if the VPERM can be implemented using P.
4058
// When returning true set OpNo0 to the VPERM operand that should be
4059
// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4060
//
4061
// For example, if swapping the VPERM operands allows P to match, OpNo0
4062
// will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
4063
// operand, but rewriting it to use two duplicated operands allows it to
4064
// match P, then OpNo0 and OpNo1 will be the same.
4065
static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4066
3.03k
                         unsigned &OpNo0, unsigned &OpNo1) {
4067
3.03k
  int OpNos[] = { -1, -1 };
4068
11.3k
  for (unsigned I = 0; I < SystemZ::VectorBytes; 
++I8.34k
) {
4069
11.0k
    int Elt = Bytes[I];
4070
11.0k
    if (Elt >= 0) {
4071
8.03k
      // Make sure that the two permute vectors use the same suboperand
4072
8.03k
      // byte number.  Only the operand numbers (the high bits) are
4073
8.03k
      // allowed to differ.
4074
8.03k
      if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4075
2.64k
        return false;
4076
5.39k
      int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4077
5.39k
      int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4078
5.39k
      // Make sure that the operand mappings are consistent with previous
4079
5.39k
      // elements.
4080
5.39k
      if (OpNos[ModelOpNo] == 1 - RealOpNo)
4081
16
        return false;
4082
5.37k
      OpNos[ModelOpNo] = RealOpNo;
4083
5.37k
    }
4084
11.0k
  }
4085
3.03k
  
return chooseShuffleOpNos(OpNos, OpNo0, OpNo1)373
;
4086
3.03k
}
4087
4088
// As above, but search for a matching permute.
4089
static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4090
433
                                   unsigned &OpNo0, unsigned &OpNo1) {
4091
433
  for (auto &P : PermuteForms)
4092
3.03k
    if (matchPermute(Bytes, P, OpNo0, OpNo1))
4093
373
      return &P;
4094
433
  
return nullptr60
;
4095
433
}
4096
4097
// Bytes is a VPERM-like permute vector, except that -1 is used for
4098
// undefined bytes.  This permute is an operand of an outer permute.
4099
// See whether redistributing the -1 bytes gives a shuffle that can be
4100
// implemented using P.  If so, set Transform to a VPERM-like permute vector
4101
// that, when applied to the result of P, gives the original permute in Bytes.
4102
static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4103
                               const Permute &P,
4104
96
                               SmallVectorImpl<int> &Transform) {
4105
96
  unsigned To = 0;
4106
728
  for (unsigned From = 0; From < SystemZ::VectorBytes; 
++From632
) {
4107
718
    int Elt = Bytes[From];
4108
718
    if (Elt < 0)
4109
504
      // Byte number From of the result is undefined.
4110
504
      Transform[From] = -1;
4111
214
    else {
4112
1.65k
      while (P.Bytes[To] != Elt) {
4113
1.52k
        To += 1;
4114
1.52k
        if (To == SystemZ::VectorBytes)
4115
86
          return false;
4116
1.52k
      }
4117
214
      Transform[From] = To;
4118
128
    }
4119
718
  }
4120
96
  
return true10
;
4121
96
}
4122
4123
// As above, but search for a matching permute.
4124
static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4125
10
                                         SmallVectorImpl<int> &Transform) {
4126
10
  for (auto &P : PermuteForms)
4127
96
    if (matchDoublePermute(Bytes, P, Transform))
4128
10
      return &P;
4129
10
  
return nullptr0
;
4130
10
}
4131
4132
// Convert the mask of the given shuffle op into a byte-level mask,
4133
// as if it had type vNi8.
4134
static bool getVPermMask(SDValue ShuffleOp,
4135
116
                         SmallVectorImpl<int> &Bytes) {
4136
116
  EVT VT = ShuffleOp.getValueType();
4137
116
  unsigned NumElements = VT.getVectorNumElements();
4138
116
  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4139
116
4140
116
  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4141
116
    Bytes.resize(NumElements * BytesPerElement, -1);
4142
838
    for (unsigned I = 0; I < NumElements; 
++I722
) {
4143
722
      int Index = VSN->getMaskElt(I);
4144
722
      if (Index >= 0)
4145
1.33k
        
for (unsigned J = 0; 369
J < BytesPerElement;
++J966
)
4146
966
          Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4147
722
    }
4148
116
    return true;
4149
116
  }
4150
0
  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4151
0
      isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4152
0
    unsigned Index = ShuffleOp.getConstantOperandVal(1);
4153
0
    Bytes.resize(NumElements * BytesPerElement, -1);
4154
0
    for (unsigned I = 0; I < NumElements; ++I)
4155
0
      for (unsigned J = 0; J < BytesPerElement; ++J)
4156
0
        Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4157
0
    return true;
4158
0
  }
4159
0
  return false;
4160
0
}
4161
4162
// Bytes is a VPERM-like permute vector, except that -1 is used for
4163
// undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
4164
// the result come from a contiguous sequence of bytes from one input.
4165
// Set Base to the selector for the first byte if so.
4166
static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4167
116
                            unsigned BytesPerElement, int &Base) {
4168
116
  Base = -1;
4169
296
  for (unsigned I = 0; I < BytesPerElement; 
++I180
) {
4170
180
    if (Bytes[Start + I] >= 0) {
4171
180
      unsigned Elem = Bytes[Start + I];
4172
180
      if (Base < 0) {
4173
116
        Base = Elem - I;
4174
116
        // Make sure the bytes would come from one input operand.
4175
116
        if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4176
0
          return false;
4177
64
      } else if (unsigned(Base) != Elem - I)
4178
0
        return false;
4179
180
    }
4180
180
  }
4181
116
  return true;
4182
116
}
4183
4184
// Bytes is a VPERM-like permute vector, except that -1 is used for
4185
// undefined bytes.  Return true if it can be performed using VSLDI.
4186
// When returning true, set StartIndex to the shift amount and OpNo0
4187
// and OpNo1 to the VPERM operands that should be used as the first
4188
// and second shift operand respectively.
4189
static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4190
                               unsigned &StartIndex, unsigned &OpNo0,
4191
60
                               unsigned &OpNo1) {
4192
60
  int OpNos[] = { -1, -1 };
4193
60
  int Shift = -1;
4194
376
  for (unsigned I = 0; I < 16; 
++I316
) {
4195
364
    int Index = Bytes[I];
4196
364
    if (Index >= 0) {
4197
341
      int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4198
341
      int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4199
341
      int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4200
341
      if (Shift < 0)
4201
60
        Shift = ExpectedShift;
4202
281
      else if (Shift != ExpectedShift)
4203
43
        return false;
4204
298
      // Make sure that the operand mappings are consistent with previous
4205
298
      // elements.
4206
298
      if (OpNos[ModelOpNo] == 1 - RealOpNo)
4207
5
        return false;
4208
293
      OpNos[ModelOpNo] = RealOpNo;
4209
293
    }
4210
364
  }
4211
60
  StartIndex = Shift;
4212
12
  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4213
60
}
4214
4215
// Create a node that performs P on operands Op0 and Op1, casting the
4216
// operands to the appropriate type.  The type of the result is determined by P.
4217
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4218
383
                              const Permute &P, SDValue Op0, SDValue Op1) {
4219
383
  // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
4220
383
  // elements of a PACK are twice as wide as the outputs.
4221
383
  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 
820
:
4222
383
                      
P.Opcode == SystemZISD::PACK 363
?
P.Operand * 2102
:
4223
363
                      
P.Operand261
);
4224
383
  // Cast both operands to the appropriate type.
4225
383
  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4226
383
                              SystemZ::VectorBytes / InBytes);
4227
383
  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4228
383
  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4229
383
  SDValue Op;
4230
383
  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4231
20
    SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
4232
20
    Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4233
363
  } else if (P.Opcode == SystemZISD::PACK) {
4234
102
    MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4235
102
                                 SystemZ::VectorBytes / P.Operand);
4236
102
    Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4237
261
  } else {
4238
261
    Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4239
261
  }
4240
383
  return Op;
4241
383
}
4242
4243
// Bytes is a VPERM-like permute vector, except that -1 is used for
4244
// undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
4245
// VSLDI or VPERM.
4246
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4247
                                     SDValue *Ops,
4248
60
                                     const SmallVectorImpl<int> &Bytes) {
4249
180
  for (unsigned I = 0; I < 2; 
++I120
)
4250
120
    Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4251
60
4252
60
  // First see whether VSLDI can be used.
4253
60
  unsigned StartIndex, OpNo0, OpNo1;
4254
60
  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4255
12
    return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4256
12
                       Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
4257
48
4258
48
  // Fall back on VPERM.  Construct an SDNode for the permute vector.
4259
48
  SDValue IndexNodes[SystemZ::VectorBytes];
4260
816
  for (unsigned I = 0; I < SystemZ::VectorBytes; 
++I768
)
4261
768
    if (Bytes[I] >= 0)
4262
552
      IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4263
216
    else
4264
216
      IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4265
48
  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4266
48
  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
4267
48
}
4268
4269
namespace {
4270
// Describes a general N-operand vector shuffle.
4271
struct GeneralShuffle {
4272
787
  GeneralShuffle(EVT vt) : VT(vt) {}
4273
  void addUndef();
4274
  bool add(SDValue, unsigned);
4275
  SDValue getNode(SelectionDAG &, const SDLoc &);
4276
4277
  // The operands of the shuffle.
4278
  SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4279
4280
  // Index I is -1 if byte I of the result is undefined.  Otherwise the
4281
  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4282
  // Bytes[I] / SystemZ::VectorBytes.
4283
  SmallVector<int, SystemZ::VectorBytes> Bytes;
4284
4285
  // The type of the shuffle result.
4286
  EVT VT;
4287
};
4288
}
4289
4290
// Add an extra undefined element to the shuffle.
4291
1.23k
void GeneralShuffle::addUndef() {
4292
1.23k
  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4293
4.65k
  for (unsigned I = 0; I < BytesPerElement; 
++I3.42k
)
4294
3.42k
    Bytes.push_back(-1);
4295
1.23k
}
4296
4297
// Add an extra element to the shuffle, taking it from element Elem of Op.
4298
// A null Op indicates a vector input whose value will be calculated later;
4299
// there is at most one such input per shuffle and it always has the same
4300
// type as the result. Aborts and returns false if the source vector elements
4301
// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4302
// LLVM they become implicitly extended, but this is rare and not optimized.
4303
3.41k
bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4304
3.41k
  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4305
3.41k
4306
3.41k
  // The source vector can have wider elements than the result,
4307
3.41k
  // either through an explicit TRUNCATE or because of type legalization.
4308
3.41k
  // We want the least significant part.
4309
3.41k
  EVT FromVT = Op.getNode() ? 
Op.getValueType()2.13k
:
VT1.28k
;
4310
3.41k
  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4311
3.41k
4312
3.41k
  // Return false if the source elements are smaller than their destination
4313
3.41k
  // elements.
4314
3.41k
  if (FromBytesPerElement < BytesPerElement)
4315
1
    return false;
4316
3.41k
4317
3.41k
  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4318
3.41k
                   (FromBytesPerElement - BytesPerElement));
4319
3.41k
4320
3.41k
  // Look through things like shuffles and bitcasts.
4321
3.61k
  while (Op.getNode()) {
4322
2.32k
    if (Op.getOpcode() == ISD::BITCAST)
4323
150
      Op = Op.getOperand(0);
4324
2.17k
    else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && 
Op.hasOneUse()45
) {
4325
45
      // See whether the bytes we need come from a contiguous part of one
4326
45
      // operand.
4327
45
      SmallVector<int, SystemZ::VectorBytes> OpBytes;
4328
45
      if (!getVPermMask(Op, OpBytes))
4329
0
        break;
4330
45
      int NewByte;
4331
45
      if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4332
0
        break;
4333
45
      if (NewByte < 0) {
4334
0
        addUndef();
4335
0
        return true;
4336
0
      }
4337
45
      Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4338
45
      Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4339
2.13k
    } else if (Op.isUndef()) {
4340
0
      addUndef();
4341
0
      return true;
4342
0
    } else
4343
2.13k
      break;
4344
2.32k
  }
4345
3.41k
4346
3.41k
  // Make sure that the source of the extraction is in Ops.
4347
3.41k
  unsigned OpNo = 0;
4348
4.06k
  for (; OpNo < Ops.size(); 
++OpNo648
)
4349
3.10k
    if (Ops[OpNo] == Op)
4350
2.46k
      break;
4351
3.41k
  if (OpNo == Ops.size())
4352
954
    Ops.push_back(Op);
4353
3.41k
4354
3.41k
  // Add the element to Bytes.
4355
3.41k
  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4356
12.5k
  for (unsigned I = 0; I < BytesPerElement; 
++I9.16k
)
4357
9.16k
    Bytes.push_back(Base + I);
4358
3.41k
4359
3.41k
  return true;
4360
3.41k
}
4361
4362
// Return SDNodes for the completed shuffle.
4363
433
SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4364
433
  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4365
433
4366
433
  if (Ops.size() == 0)
4367
0
    return DAG.getUNDEF(VT);
4368
433
4369
433
  // Make sure that there are at least two shuffle operands.
4370
433
  if (Ops.size() == 1)
4371
275
    Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4372
433
4373
433
  // Create a tree of shuffles, deferring root node until after the loop.
4374
433
  // Try to redistribute the undefined elements of non-root nodes so that
4375
433
  // the non-root shuffles match something like a pack or merge, then adjust
4376
433
  // the parent node's permute vector to compensate for the new order.
4377
433
  // Among other things, this copes with vectors like <2 x i16> that were
4378
433
  // padded with undefined elements during type legalization.
4379
433
  //
4380
433
  // In the best case this redistribution will lead to the whole tree
4381
433
  // using packs and merges.  It should rarely be a loss in other cases.
4382
433
  unsigned Stride = 1;
4383
437
  for (; Stride * 2 < Ops.size(); 
Stride *= 24
) {
4384
14
    for (unsigned I = 0; I < Ops.size() - Stride; 
I += Stride * 210
) {
4385
10
      SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4386
10
4387
10
      // Create a mask for just these two operands.
4388
10
      SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
4389
170
      for (unsigned J = 0; J < SystemZ::VectorBytes; 
++J160
) {
4390
160
        unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4391
160
        unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4392
160
        if (OpNo == I)
4393
32
          NewBytes[J] = Byte;
4394
128
        else if (OpNo == I + Stride)
4395
32
          NewBytes[J] = SystemZ::VectorBytes + Byte;
4396
96
        else
4397
96
          NewBytes[J] = -1;
4398
160
      }
4399
10
      // See if it would be better to reorganize NewMask to avoid using VPERM.
4400
10
      SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4401
10
      if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4402
10
        Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4403
10
        // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4404
170
        for (unsigned J = 0; J < SystemZ::VectorBytes; 
++J160
) {
4405
160
          if (NewBytes[J] >= 0) {
4406
64
            assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4407
64
                   "Invalid double permute");
4408
64
            Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4409
64
          } else
4410
160
            assert(NewBytesMap[J] < 0 && "Invalid double permute");
4411
160
        }
4412
10
      } else {
4413
0
        // Just use NewBytes on the operands.
4414
0
        Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4415
0
        for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4416
0
          if (NewBytes[J] >= 0)
4417
0
            Bytes[J] = I * SystemZ::VectorBytes + J;
4418
0
      }
4419
10
    }
4420
4
  }
4421
433
4422
433
  // Now we just have 2 inputs.  Put the second operand in Ops[1].
4423
433
  if (Stride > 1) {
4424
3
    Ops[1] = Ops[Stride];
4425
51
    for (unsigned I = 0; I < SystemZ::VectorBytes; 
++I48
)
4426
48
      if (Bytes[I] >= int(SystemZ::VectorBytes))
4427
24
        Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4428
3
  }
4429
433
4430
433
  // Look for an instruction that can do the permute without resorting
4431
433
  // to VPERM.
4432
433
  unsigned OpNo0, OpNo1;
4433
433
  SDValue Op;
4434
433
  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4435
373
    Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4436
60
  else
4437
60
    Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4438
433
  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4439
433
}
4440
4441
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4442
158
static bool isScalarToVector(SDValue Op) {
4443
244
  for (unsigned I = 1, E = Op.getNumOperands(); I != E; 
++I86
)
4444
210
    if (!Op.getOperand(I).isUndef())
4445
124
      return false;
4446
158
  
return true34
;
4447
158
}
4448
4449
// Return a vector of type VT that contains Value in the first element.
4450
// The other elements don't matter.
4451
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4452
254
                                   SDValue Value) {
4453
254
  // If we have a constant, replicate it to all elements and let the
4454
254
  // BUILD_VECTOR lowering take care of it.
4455
254
  if (Value.getOpcode() == ISD::Constant ||
4456
254
      Value.getOpcode() == ISD::ConstantFP) {
4457
23
    SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
4458
23
    return DAG.getBuildVector(VT, DL, Ops);
4459
23
  }
4460
231
  if (Value.isUndef())
4461
0
    return DAG.getUNDEF(VT);
4462
231
  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4463
231
}
4464
4465
// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4466
// element 1.  Used for cases in which replication is cheap.
4467
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4468
141
                                 SDValue Op0, SDValue Op1) {
4469
141
  if (Op0.isUndef()) {
4470
7
    if (Op1.isUndef())
4471
5
      return DAG.getUNDEF(VT);
4472
2
    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4473
2
  }
4474
134
  if (Op1.isUndef())
4475
24
    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4476
110
  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4477
110
                     buildScalarToVector(DAG, DL, VT, Op0),
4478
110
                     buildScalarToVector(DAG, DL, VT, Op1));
4479
110
}
4480
4481
// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4482
// vector for them.
4483
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4484
78
                          SDValue Op1) {
4485
78
  if (Op0.isUndef() && 
Op1.isUndef()2
)
4486
0
    return DAG.getUNDEF(MVT::v2i64);
4487
78
  // If one of the two inputs is undefined then replicate the other one,
4488
78
  // in order to avoid using another register unnecessarily.
4489
78
  if (Op0.isUndef())
4490
2
    Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4491
76
  else if (Op1.isUndef())
4492
5
    Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4493
71
  else {
4494
71
    Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4495
71
    Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4496
71
  }
4497
78
  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4498
78
}
4499
4500
// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4501
// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4502
// the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
4503
// would benefit from this representation and return it if so.
4504
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
4505
402
                                     BuildVectorSDNode *BVN) {
4506
402
  EVT VT = BVN->getValueType(0);
4507
402
  unsigned NumElements = VT.getVectorNumElements();
4508
402
4509
402
  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4510
402
  // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
4511
402
  // need a BUILD_VECTOR, add an additional placeholder operand for that
4512
402
  // BUILD_VECTOR and store its operands in ResidueOps.
4513
402
  GeneralShuffle GS(VT);
4514
402
  SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
4515
402
  bool FoundOne = false;
4516
2.66k
  for (unsigned I = 0; I < NumElements; 
++I2.26k
) {
4517
2.26k
    SDValue Op = BVN->getOperand(I);
4518
2.26k
    if (Op.getOpcode() == ISD::TRUNCATE)
4519
122
      Op = Op.getOperand(0);
4520
2.26k
    if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4521
2.26k
        
Op.getOperand(1).getOpcode() == ISD::Constant333
) {
4522
333
      unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
4523
333
      if (!GS.add(Op.getOperand(0), Elem))
4524
1
        return SDValue();
4525
332
      FoundOne = true;
4526
1.93k
    } else if (Op.isUndef()) {
4527
648
      GS.addUndef();
4528
1.28k
    } else {
4529
1.28k
      if (!GS.add(SDValue(), ResidueOps.size()))
4530
0
        return SDValue();
4531
1.28k
      ResidueOps.push_back(BVN->getOperand(I));
4532
1.28k
    }
4533
2.26k
  }
4534
402
4535
402
  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4536
402
  
if (401
!FoundOne401
)
4537
353
    return SDValue();
4538
48
4539
48
  // Create the BUILD_VECTOR for the remaining elements, if any.
4540
48
  if (!ResidueOps.empty()) {
4541
36
    while (ResidueOps.size() < NumElements)
4542
30
      ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
4543
10
    for (auto &Op : GS.Ops) {
4544
10
      if (!Op.getNode()) {
4545
6
        Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
4546
6
        break;
4547
6
      }
4548
10
    }
4549
6
  }
4550
48
  return GS.getNode(DAG, SDLoc(BVN));
4551
48
}
4552
4553
855
bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
4554
855
  if (Op.getOpcode() == ISD::LOAD && 
cast<LoadSDNode>(Op)->isUnindexed()65
)
4555
65
    return true;
4556
790
  if (Subtarget.hasVectorEnhancements2() && 
Op.getOpcode() == SystemZISD::LRV21
)
4557
10
    return true;
4558
780
  return false;
4559
780
}
4560
4561
// Combine GPR scalar values Elems into a vector of type VT.
4562
SDValue
4563
SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4564
320
                                   SmallVectorImpl<SDValue> &Elems) const {
4565
320
  // See whether there is a single replicated value.
4566
320
  SDValue Single;
4567
320
  unsigned int NumElements = Elems.size();
4568
320
  unsigned int Count = 0;
4569
1.19k
  for (auto Elem : Elems) {
4570
1.19k
    if (!Elem.isUndef()) {
4571
912
      if (!Single.getNode())
4572
320
        Single = Elem;
4573
592
      else if (Elem != Single) {
4574
190
        Single = SDValue();
4575
190
        break;
4576
190
      }
4577
722
      Count += 1;
4578
722
    }
4579
1.19k
  }
4580
320
  // There are three cases here:
4581
320
  //
4582
320
  // - if the only defined element is a loaded one, the best sequence
4583
320
  //   is a replicating load.
4584
320
  //
4585
320
  // - otherwise, if the only defined element is an i64 value, we will
4586
320
  //   end up with the same VLVGP sequence regardless of whether we short-cut
4587
320
  //   for replication or fall through to the later code.
4588
320
  //
4589
320
  // - otherwise, if the only defined element is an i32 or smaller value,
4590
320
  //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4591
320
  //   This is only a win if the single defined element is used more than once.
4592
320
  //   In other cases we're better off using a single VLVGx.
4593
320
  if (Single.getNode() && 
(130
Count > 1130
||
isVectorElementLoad(Single)55
))
4594
95
    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4595
225
4596
225
  // If all elements are loads, use VLREP/VLEs (below).
4597
225
  bool AllLoads = true;
4598
225
  for (auto Elem : Elems)
4599
254
    if (!isVectorElementLoad(Elem)) {
4600
220
      AllLoads = false;
4601
220
      break;
4602
220
    }
4603
225
4604
225
  // The best way of building a v2i64 from two i64s is to use VLVGP.
4605
225
  if (VT == MVT::v2i64 && 
!AllLoads32
)
4606
32
    return joinDwords(DAG, DL, Elems[0], Elems[1]);
4607
193
4608
193
  // Use a 64-bit merge high to combine two doubles.
4609
193
  if (VT == MVT::v2f64 && 
!AllLoads68
)
4610
67
    return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4611
126
4612
126
  // Build v4f32 values directly from the FPRs:
4613
126
  //
4614
126
  //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4615
126
  //         V              V         VMRHF
4616
126
  //      <ABxx>         <CDxx>
4617
126
  //                V                 VMRHG
4618
126
  //              <ABCD>
4619
126
  if (VT == MVT::v4f32 && 
!AllLoads37
) {
4620
37
    SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4621
37
    SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4622
37
    // Avoid unnecessary undefs by reusing the other operand.
4623
37
    if (Op01.isUndef())
4624
1
      Op01 = Op23;
4625
36
    else if (Op23.isUndef())
4626
4
      Op23 = Op01;
4627
37
    // Merging identical replications is a no-op.
4628
37
    if (Op01.getOpcode() == SystemZISD::REPLICATE && 
Op01 == Op231
)
4629
1
      return Op01;
4630
36
    Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4631
36
    Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4632
36
    SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
4633
36
                             DL, MVT::v2i64, Op01, Op23);
4634
36
    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4635
36
  }
4636
89
4637
89
  // Collect the constant terms.
4638
89
  SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
4639
89
  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4640
89
4641
89
  unsigned NumConstants = 0;
4642
811
  for (unsigned I = 0; I < NumElements; 
++I722
) {
4643
722
    SDValue Elem = Elems[I];
4644
722
    if (Elem.getOpcode() == ISD::Constant ||
4645
722
        
Elem.getOpcode() == ISD::ConstantFP575
) {
4646
147
      NumConstants += 1;
4647
147
      Constants[I] = Elem;
4648
147
      Done[I] = true;
4649
147
    }
4650
722
  }
4651
89
  // If there was at least one constant, fill in the other elements of
4652
89
  // Constants with undefs to get a full vector constant and use that
4653
89
  // as the starting point.
4654
89
  SDValue Result;
4655
89
  SDValue ReplicatedVal;
4656
89
  if (NumConstants > 0) {
4657
197
    for (unsigned I = 0; I < NumElements; 
++I176
)
4658
176
      if (!Constants[I].getNode())
4659
29
        Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4660
21
    Result = DAG.getBuildVector(VT, DL, Constants);
4661
68
  } else {
4662
68
    // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4663
68
    // avoid a false dependency on any previous contents of the vector
4664
68
    // register.
4665
68
4666
68
    // Use a VLREP if at least one element is a load. Make sure to replicate
4667
68
    // the load with the most elements having its value.
4668
68
    std::map<const SDNode*, unsigned> UseCounts;
4669
68
    SDNode *LoadMaxUses = nullptr;
4670
614
    for (unsigned I = 0; I < NumElements; 
++I546
)
4671
546
      if (isVectorElementLoad(Elems[I])) {
4672
21
        SDNode *Ld = Elems[I].getNode();
4673
21
        UseCounts[Ld]++;
4674
21
        if (LoadMaxUses == nullptr || 
UseCounts[LoadMaxUses] < UseCounts[Ld]15
)
4675
7
          LoadMaxUses = Ld;
4676
21
      }
4677
68
    if (LoadMaxUses != nullptr) {
4678
6
      ReplicatedVal = SDValue(LoadMaxUses, 0);
4679
6
      Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
4680
62
    } else {
4681
62
      // Try to use VLVGP.
4682
62
      unsigned I1 = NumElements / 2 - 1;
4683
62
      unsigned I2 = NumElements - 1;
4684
62
      bool Def1 = !Elems[I1].isUndef();
4685
62
      bool Def2 = !Elems[I2].isUndef();
4686
62
      if (Def1 || 
Def220
) {
4687
46
        SDValue Elem1 = Elems[Def1 ? 
I142
:
I24
];
4688
46
        SDValue Elem2 = Elems[Def2 ? 
I227
:
I119
];
4689
46
        Result = DAG.getNode(ISD::BITCAST, DL, VT,
4690
46
                             joinDwords(DAG, DL, Elem1, Elem2));
4691
46
        Done[I1] = true;
4692
46
        Done[I2] = true;
4693
46
      } else
4694
16
        Result = DAG.getUNDEF(VT);
4695
62
    }
4696
68
  }
4697
89
4698
89
  // Use VLVGx to insert the other elements.
4699
811
  for (unsigned I = 0; I < NumElements; 
++I722
)
4700
722
    if (!Done[I] && 
!Elems[I].isUndef()483
&&
Elems[I] != ReplicatedVal275
)
4701
265
      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4702
265
                           DAG.getConstant(I, DL, MVT::i32));
4703
89
  return Result;
4704
89
}
4705
4706
SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4707
1.95k
                                                 SelectionDAG &DAG) const {
4708
1.95k
  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4709
1.95k
  SDLoc DL(Op);
4710
1.95k
  EVT VT = Op.getValueType();
4711
1.95k
4712
1.95k
  if (BVN->isConstant()) {
4713
1.54k
    if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
4714
1.40k
      return Op;
4715
141
4716
141
    // Fall back to loading it from memory.
4717
141
    return SDValue();
4718
141
  }
4719
402
4720
402
  // See if we should use shuffles to construct the vector from other vectors.
4721
402
  if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
4722
48
    return Res;
4723
354
4724
354
  // Detect SCALAR_TO_VECTOR conversions.
4725
354
  if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && 
isScalarToVector(Op)158
)
4726
34
    return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4727
320
4728
320
  // Otherwise use buildVector to build the vector up from GPRs.
4729
320
  unsigned NumElements = Op.getNumOperands();
4730
320
  SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
4731
1.94k
  for (unsigned I = 0; I < NumElements; 
++I1.62k
)
4732
1.62k
    Ops[I] = Op.getOperand(I);
4733
320
  return buildVector(DAG, DL, VT, Ops);
4734
320
}
4735
4736
SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4737
431
                                                   SelectionDAG &DAG) const {
4738
431
  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4739
431
  SDLoc DL(Op);
4740
431
  EVT VT = Op.getValueType();
4741
431
  unsigned NumElements = VT.getVectorNumElements();
4742
431
4743
431
  if (VSN->isSplat()) {
4744
46
    SDValue Op0 = Op.getOperand(0);
4745
46
    unsigned Index = VSN->getSplatIndex();
4746
46
    assert(Index < VT.getVectorNumElements() &&
4747
46
           "Splat index should be defined and in first operand");
4748
46
    // See whether the value we're splatting is directly available as a scalar.
4749
46
    if ((Index == 0 && 
Op0.getOpcode() == ISD::SCALAR_TO_VECTOR21
) ||
4750
46
        Op0.getOpcode() == ISD::BUILD_VECTOR)
4751
0
      return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4752
46
    // Otherwise keep it as a vector-to-vector operation.
4753
46
    return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4754
46
                       DAG.getConstant(Index, DL, MVT::i32));
4755
46
  }
4756
385
4757
385
  GeneralShuffle GS(VT);
4758
2.76k
  for (unsigned I = 0; I < NumElements; 
++I2.38k
) {
4759
2.38k
    int Elt = VSN->getMaskElt(I);
4760
2.38k
    if (Elt < 0)
4761
584
      GS.addUndef();
4762
1.79k
    else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
4763
1.79k
                     unsigned(Elt) % NumElements))
4764
0
      return SDValue();
4765
2.38k
  }
4766
385
  return GS.getNode(DAG, SDLoc(VSN));
4767
385
}
4768
4769
SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
4770
26
                                                     SelectionDAG &DAG) const {
4771
26
  SDLoc DL(Op);
4772
26
  // Just insert the scalar into element 0 of an undefined vector.
4773
26
  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4774
26
                     Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
4775
26
                     Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
4776
26
}
4777
4778
SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4779
59
                                                      SelectionDAG &DAG) const {
4780
59
  // Handle insertions of floating-point values.
4781
59
  SDLoc DL(Op);
4782
59
  SDValue Op0 = Op.getOperand(0);
4783
59
  SDValue Op1 = Op.getOperand(1);
4784
59
  SDValue Op2 = Op.getOperand(2);
4785
59
  EVT VT = Op.getValueType();
4786
59
4787
59
  // Insertions into constant indices of a v2f64 can be done using VPDI.
4788
59
  // However, if the inserted value is a bitcast or a constant then it's
4789
59
  // better to use GPRs, as below.
4790
59
  if (VT == MVT::v2f64 &&
4791
59
      
Op1.getOpcode() != ISD::BITCAST38
&&
4792
59
      
Op1.getOpcode() != ISD::ConstantFP38
&&
4793
59
      
Op2.getOpcode() == ISD::Constant34
) {
4794
28
    uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
4795
28
    unsigned Mask = VT.getVectorNumElements() - 1;
4796
28
    if (Index <= Mask)
4797
28
      return Op;
4798
31
  }
4799
31
4800
31
  // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4801
31
  MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
4802
31
  MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
4803
31
  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
4804
31
                            DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
4805
31
                            DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
4806
31
  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4807
31
}
4808
4809
SDValue
4810
SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4811
323
                                               SelectionDAG &DAG) const {
4812
323
  // Handle extractions of floating-point values.
4813
323
  SDLoc DL(Op);
4814
323
  SDValue Op0 = Op.getOperand(0);
4815
323
  SDValue Op1 = Op.getOperand(1);
4816
323
  EVT VT = Op.getValueType();
4817
323
  EVT VecVT = Op0.getValueType();
4818
323
4819
323
  // Extractions of constant indices can be done directly.
4820
323
  if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
4821
316
    uint64_t Index = CIndexN->getZExtValue();
4822
316
    unsigned Mask = VecVT.getVectorNumElements() - 1;
4823
316
    if (Index <= Mask)
4824
316
      return Op;
4825
7
  }
4826
7
4827
7
  // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4828
7
  MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
4829
7
  MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
4830
7
  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
4831
7
                            DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
4832
7
  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4833
7
}
4834
4835
SDValue
4836
SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
4837
103
                                              unsigned UnpackHigh) const {
4838
103
  SDValue PackedOp = Op.getOperand(0);
4839
103
  EVT OutVT = Op.getValueType();
4840
103
  EVT InVT = PackedOp.getValueType();
4841
103
  unsigned ToBits = OutVT.getScalarSizeInBits();
4842
103
  unsigned FromBits = InVT.getScalarSizeInBits();
4843
128
  do {
4844
128
    FromBits *= 2;
4845
128
    EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
4846
128
                                 SystemZ::VectorBits / FromBits);
4847
128
    PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
4848
128
  } while (FromBits != ToBits);
4849
103
  return PackedOp;
4850
103
}
4851
4852
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
4853
221
                                          unsigned ByScalar) const {
4854
221
  // Look for cases where a vector shift can use the *_BY_SCALAR form.
4855
221
  SDValue Op0 = Op.getOperand(0);
4856
221
  SDValue Op1 = Op.getOperand(1);
4857
221
  SDLoc DL(Op);
4858
221
  EVT VT = Op.getValueType();
4859
221
  unsigned ElemBitSize = VT.getScalarSizeInBits();
4860
221
4861
221
  // See whether the shift vector is a splat represented as BUILD_VECTOR.
4862
221
  if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
4863
185
    APInt SplatBits, SplatUndef;
4864
185
    unsigned SplatBitSize;
4865
185
    bool HasAnyUndefs;
4866
185
    // Check for constant splats.  Use ElemBitSize as the minimum element
4867
185
    // width and reject splats that need wider elements.
4868
185
    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4869
185
                             ElemBitSize, true) &&
4870
185
        
SplatBitSize == ElemBitSize173
) {
4871
173
      SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
4872
173
                                      DL, MVT::i32);
4873
173
      return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4874
173
    }
4875
12
    // Check for variable splats.
4876
12
    BitVector UndefElements;
4877
12
    SDValue Splat = BVN->getSplatValue(&UndefElements);
4878
12
    if (Splat) {
4879
12
      // Since i32 is the smallest legal type, we either need a no-op
4880
12
      // or a truncation.
4881
12
      SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
4882
12
      return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4883
12
    }
4884
36
  }
4885
36
4886
36
  // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4887
36
  // and the shift amount is directly available in a GPR.
4888
36
  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
4889
0
    if (VSN->isSplat()) {
4890
0
      SDValue VSNOp0 = VSN->getOperand(0);
4891
0
      unsigned Index = VSN->getSplatIndex();
4892
0
      assert(Index < VT.getVectorNumElements() &&
4893
0
             "Splat index should be defined and in first operand");
4894
0
      if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4895
0
          VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
4896
0
        // Since i32 is the smallest legal type, we either need a no-op
4897
0
        // or a truncation.
4898
0
        SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
4899
0
                                    VSNOp0.getOperand(Index));
4900
0
        return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4901
0
      }
4902
36
    }
4903
0
  }
4904
36
4905
36
  // Otherwise just treat the current form as legal.
4906
36
  return Op;
4907
36
}
4908
4909
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
4910
11.5k
                                              SelectionDAG &DAG) const {
4911
11.5k
  switch (Op.getOpcode()) {
4912
11.5k
  case ISD::FRAMEADDR:
4913
2
    return lowerFRAMEADDR(Op, DAG);
4914
11.5k
  case ISD::RETURNADDR:
4915
1
    return lowerRETURNADDR(Op, DAG);
4916
11.5k
  case ISD::BR_CC:
4917
719
    return lowerBR_CC(Op, DAG);
4918
11.5k
  case ISD::SELECT_CC:
4919
875
    return lowerSELECT_CC(Op, DAG);
4920
11.5k
  case ISD::SETCC:
4921
1.18k
    return lowerSETCC(Op, DAG);
4922
11.5k
  case ISD::GlobalAddress:
4923
617
    return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
4924
11.5k
  case ISD::GlobalTLSAddress:
4925
14
    return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
4926
11.5k
  case ISD::BlockAddress:
4927
1
    return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
4928
11.5k
  case ISD::JumpTable:
4929
3
    return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
4930
11.5k
  case ISD::ConstantPool:
4931
843
    return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
4932
11.5k
  case ISD::BITCAST:
4933
17
    return lowerBITCAST(Op, DAG);
4934
11.5k
  case ISD::VASTART:
4935
0
    return lowerVASTART(Op, DAG);
4936
11.5k
  case ISD::VACOPY:
4937
0
    return lowerVACOPY(Op, DAG);
4938
11.5k
  case ISD::DYNAMIC_STACKALLOC:
4939
28
    return lowerDYNAMIC_STACKALLOC(Op, DAG);
4940
11.5k
  case ISD::GET_DYNAMIC_AREA_OFFSET:
4941
3
    return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
4942
11.5k
  case ISD::SMUL_LOHI:
4943
18
    return lowerSMUL_LOHI(Op, DAG);
4944
11.5k
  case ISD::UMUL_LOHI:
4945
29
    return lowerUMUL_LOHI(Op, DAG);
4946
11.5k
  case ISD::SDIVREM:
4947
93
    return lowerSDIVREM(Op, DAG);
4948
11.5k
  case ISD::UDIVREM:
4949
56
    return lowerUDIVREM(Op, DAG);
4950
11.5k
  case ISD::SADDO:
4951
884
  case ISD::SSUBO:
4952
884
  case ISD::UADDO:
4953
884
  case ISD::USUBO:
4954
884
    return lowerXALUO(Op, DAG);
4955
884
  case ISD::ADDCARRY:
4956
89
  case ISD::SUBCARRY:
4957
89
    return lowerADDSUBCARRY(Op, DAG);
4958
478
  case ISD::OR:
4959
478
    return lowerOR(Op, DAG);
4960
89
  case ISD::CTPOP:
4961
19
    return lowerCTPOP(Op, DAG);
4962
89
  case ISD::ATOMIC_FENCE:
4963
7
    return lowerATOMIC_FENCE(Op, DAG);
4964
89
  case ISD::ATOMIC_SWAP:
4965
28
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
4966
89
  case ISD::ATOMIC_STORE:
4967
21
    return lowerATOMIC_STORE(Op, DAG);
4968
89
  case ISD::ATOMIC_LOAD:
4969
17
    return lowerATOMIC_LOAD(Op, DAG);
4970
94
  case ISD::ATOMIC_LOAD_ADD:
4971
94
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
4972
89
  case ISD::ATOMIC_LOAD_SUB:
4973
72
    return lowerATOMIC_LOAD_SUB(Op, DAG);
4974
89
  case ISD::ATOMIC_LOAD_AND:
4975
64
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
4976
89
  case ISD::ATOMIC_LOAD_OR:
4977
64
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
4978
89
  case ISD::ATOMIC_LOAD_XOR:
4979
56
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
4980
89
  case ISD::ATOMIC_LOAD_NAND:
4981
52
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
4982
89
  case ISD::ATOMIC_LOAD_MIN:
4983
32
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
4984
89
  case ISD::ATOMIC_LOAD_MAX:
4985
14
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
4986
89
  case ISD::ATOMIC_LOAD_UMIN:
4987
14
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
4988
89
  case ISD::ATOMIC_LOAD_UMAX:
4989
14
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
4990
89
  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
4991
51
    return lowerATOMIC_CMP_SWAP(Op, DAG);
4992
89
  case ISD::STACKSAVE:
4993
3
    return lowerSTACKSAVE(Op, DAG);
4994
89
  case ISD::STACKRESTORE:
4995
2
    return lowerSTACKRESTORE(Op, DAG);
4996
89
  case ISD::PREFETCH:
4997
17
    return lowerPREFETCH(Op, DAG);
4998
122
  case ISD::INTRINSIC_W_CHAIN:
4999
122
    return lowerINTRINSIC_W_CHAIN(Op, DAG);
5000
1.75k
  case ISD::INTRINSIC_WO_CHAIN:
5001
1.75k
    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5002
1.95k
  case ISD::BUILD_VECTOR:
5003
1.95k
    return lowerBUILD_VECTOR(Op, DAG);
5004
431
  case ISD::VECTOR_SHUFFLE:
5005
431
    return lowerVECTOR_SHUFFLE(Op, DAG);
5006
89
  case ISD::SCALAR_TO_VECTOR:
5007
26
    return lowerSCALAR_TO_VECTOR(Op, DAG);
5008
89
  case ISD::INSERT_VECTOR_ELT:
5009
59
    return lowerINSERT_VECTOR_ELT(Op, DAG);
5010
323
  case ISD::EXTRACT_VECTOR_ELT:
5011
323
    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5012
89
  case ISD::SIGN_EXTEND_VECTOR_INREG:
5013
85
    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
5014
89
  case ISD::ZERO_EXTEND_VECTOR_INREG:
5015
18
    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
5016
89
  case ISD::SHL:
5017
43
    return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5018
89
  case ISD::SRL:
5019
70
    return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5020
108
  case ISD::SRA:
5021
108
    return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5022
89
  default:
5023
0
    llvm_unreachable("Unexpected node to lower");
5024
11.5k
  }
5025
11.5k
}
5026
5027
// Lower operations with invalid operand or result types (currently used
5028
// only for 128-bit integer types).
5029
5030
26
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
5031
26
  SDLoc DL(In);
5032
26
  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
5033
26
                           DAG.getIntPtrConstant(0, DL));
5034
26
  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
5035
26
                           DAG.getIntPtrConstant(1, DL));
5036
26
  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
5037
26
                                    MVT::Untyped, Hi, Lo);
5038
26
  return SDValue(Pair, 0);
5039
26
}
5040
5041
13
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
5042
13
  SDLoc DL(In);
5043
13
  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5044
13
                                          DL, MVT::i64, In);
5045
13
  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5046
13
                                          DL, MVT::i64, In);
5047
13
  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
5048
13
}
5049
5050
void
5051
SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5052
                                             SmallVectorImpl<SDValue> &Results,
5053
15
                                             SelectionDAG &DAG) const {
5054
15
  switch (N->getOpcode()) {
5055
15
  case ISD::ATOMIC_LOAD: {
5056
1
    SDLoc DL(N);
5057
1
    SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5058
1
    SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5059
1
    MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5060
1
    SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5061
1
                                          DL, Tys, Ops, MVT::i128, MMO);
5062
1
    Results.push_back(lowerGR128ToI128(DAG, Res));
5063
1
    Results.push_back(Res.getValue(1));
5064
1
    break;
5065
15
  }
5066
15
  case ISD::ATOMIC_STORE: {