Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This implements routines for translating from LLVM IR into SelectionDAG IR.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "SelectionDAGBuilder.h"
15
#include "llvm/ADT/APFloat.h"
16
#include "llvm/ADT/APInt.h"
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/BitVector.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/ADT/None.h"
21
#include "llvm/ADT/Optional.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/ADT/SmallPtrSet.h"
24
#include "llvm/ADT/SmallSet.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/StringRef.h"
27
#include "llvm/ADT/Triple.h"
28
#include "llvm/ADT/Twine.h"
29
#include "llvm/Analysis/AliasAnalysis.h"
30
#include "llvm/Analysis/BranchProbabilityInfo.h"
31
#include "llvm/Analysis/ConstantFolding.h"
32
#include "llvm/Analysis/EHPersonalities.h"
33
#include "llvm/Analysis/Loads.h"
34
#include "llvm/Analysis/MemoryLocation.h"
35
#include "llvm/Analysis/TargetLibraryInfo.h"
36
#include "llvm/Analysis/ValueTracking.h"
37
#include "llvm/Analysis/VectorUtils.h"
38
#include "llvm/CodeGen/Analysis.h"
39
#include "llvm/CodeGen/FunctionLoweringInfo.h"
40
#include "llvm/CodeGen/GCMetadata.h"
41
#include "llvm/CodeGen/ISDOpcodes.h"
42
#include "llvm/CodeGen/MachineBasicBlock.h"
43
#include "llvm/CodeGen/MachineFrameInfo.h"
44
#include "llvm/CodeGen/MachineFunction.h"
45
#include "llvm/CodeGen/MachineInstr.h"
46
#include "llvm/CodeGen/MachineInstrBuilder.h"
47
#include "llvm/CodeGen/MachineJumpTableInfo.h"
48
#include "llvm/CodeGen/MachineMemOperand.h"
49
#include "llvm/CodeGen/MachineModuleInfo.h"
50
#include "llvm/CodeGen/MachineOperand.h"
51
#include "llvm/CodeGen/MachineRegisterInfo.h"
52
#include "llvm/CodeGen/MachineValueType.h"
53
#include "llvm/CodeGen/RuntimeLibcalls.h"
54
#include "llvm/CodeGen/SelectionDAG.h"
55
#include "llvm/CodeGen/SelectionDAGNodes.h"
56
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
57
#include "llvm/CodeGen/StackMaps.h"
58
#include "llvm/CodeGen/ValueTypes.h"
59
#include "llvm/CodeGen/WinEHFuncInfo.h"
60
#include "llvm/IR/Argument.h"
61
#include "llvm/IR/Attributes.h"
62
#include "llvm/IR/BasicBlock.h"
63
#include "llvm/IR/CFG.h"
64
#include "llvm/IR/CallSite.h"
65
#include "llvm/IR/CallingConv.h"
66
#include "llvm/IR/Constant.h"
67
#include "llvm/IR/ConstantRange.h"
68
#include "llvm/IR/Constants.h"
69
#include "llvm/IR/DataLayout.h"
70
#include "llvm/IR/DebugInfoMetadata.h"
71
#include "llvm/IR/DebugLoc.h"
72
#include "llvm/IR/DerivedTypes.h"
73
#include "llvm/IR/Function.h"
74
#include "llvm/IR/GetElementPtrTypeIterator.h"
75
#include "llvm/IR/InlineAsm.h"
76
#include "llvm/IR/InstrTypes.h"
77
#include "llvm/IR/Instruction.h"
78
#include "llvm/IR/Instructions.h"
79
#include "llvm/IR/IntrinsicInst.h"
80
#include "llvm/IR/Intrinsics.h"
81
#include "llvm/IR/LLVMContext.h"
82
#include "llvm/IR/Metadata.h"
83
#include "llvm/IR/Module.h"
84
#include "llvm/IR/Operator.h"
85
#include "llvm/IR/Statepoint.h"
86
#include "llvm/IR/Type.h"
87
#include "llvm/IR/User.h"
88
#include "llvm/IR/Value.h"
89
#include "llvm/MC/MCContext.h"
90
#include "llvm/MC/MCSymbol.h"
91
#include "llvm/Support/AtomicOrdering.h"
92
#include "llvm/Support/BranchProbability.h"
93
#include "llvm/Support/Casting.h"
94
#include "llvm/Support/CodeGen.h"
95
#include "llvm/Support/CommandLine.h"
96
#include "llvm/Support/Compiler.h"
97
#include "llvm/Support/Debug.h"
98
#include "llvm/Support/ErrorHandling.h"
99
#include "llvm/Support/MathExtras.h"
100
#include "llvm/Support/raw_ostream.h"
101
#include "llvm/Target/TargetFrameLowering.h"
102
#include "llvm/Target/TargetInstrInfo.h"
103
#include "llvm/Target/TargetIntrinsicInfo.h"
104
#include "llvm/Target/TargetLowering.h"
105
#include "llvm/Target/TargetMachine.h"
106
#include "llvm/Target/TargetOpcodes.h"
107
#include "llvm/Target/TargetOptions.h"
108
#include "llvm/Target/TargetRegisterInfo.h"
109
#include "llvm/Target/TargetSubtargetInfo.h"
110
#include <algorithm>
111
#include <cassert>
112
#include <cstddef>
113
#include <cstdint>
114
#include <cstring>
115
#include <iterator>
116
#include <limits>
117
#include <numeric>
118
#include <tuple>
119
#include <utility>
120
#include <vector>
121
122
using namespace llvm;
123
124
#define DEBUG_TYPE "isel"
125
126
/// LimitFloatPrecision - Generate low-precision inline sequences for
127
/// some float libcalls (6, 8 or 12 bits).
128
static unsigned LimitFloatPrecision;
129
130
static cl::opt<unsigned, true>
131
LimitFPPrecision("limit-float-precision",
132
                 cl::desc("Generate low-precision inline sequences "
133
                          "for some float libcalls"),
134
                 cl::location(LimitFloatPrecision),
135
                 cl::init(0));
136
137
// Limit the width of DAG chains. This is important in general to prevent
138
// DAG-based analysis from blowing up. For example, alias analysis and
139
// load clustering may not complete in reasonable time. It is difficult to
140
// recognize and avoid this situation within each individual analysis, and
141
// future analyses are likely to have the same behavior. Limiting DAG width is
142
// the safe approach and will be especially important with global DAGs.
143
//
144
// MaxParallelChains default is arbitrarily high to avoid affecting
145
// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
146
// sequence over this should have been converted to llvm.memcpy by the
147
// frontend. It is easy to induce this behavior with .ll code such as:
148
// %buffer = alloca [4096 x i8]
149
// %data = load [4096 x i8]* %argPtr
150
// store [4096 x i8] %data, [4096 x i8]* %buffer
151
static const unsigned MaxParallelChains = 64;
152
153
// True if the Value passed requires ABI mangling as it is a parameter to a
154
// function or a return value from a function which is not an intrinsic.
155
8.18M
static bool isABIRegCopy(const Value *V) {
156
8.18M
  const bool IsRetInst = V && isa<ReturnInst>(V);
157
8.18M
  const bool IsCallInst = V && isa<CallInst>(V);
158
8.18M
  const bool IsInLineAsm =
159
686k
      IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
160
8.18M
  const bool IsIndirectFunctionCall =
161
686k
      IsCallInst && !IsInLineAsm &&
162
686k
      !static_cast<const CallInst *>(V)->getCalledFunction();
163
8.18M
  // It is possible that the call instruction is an inline asm statement or an
164
8.18M
  // indirect function call in which case the return value of
165
8.18M
  // getCalledFunction() would be nullptr.
166
8.18M
  const bool IsInstrinsicCall =
167
8.18M
      IsCallInst && 
!IsInLineAsm686k
&&
!IsIndirectFunctionCall686k
&&
168
668k
      static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
169
668k
          Intrinsic::not_intrinsic;
170
8.18M
171
8.18M
  return IsRetInst || 
(IsCallInst && 8.18M
(!IsInLineAsm && 686k
!IsInstrinsicCall686k
));
172
8.18M
}
173
174
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
175
                                      const SDValue *Parts, unsigned NumParts,
176
                                      MVT PartVT, EVT ValueVT, const Value *V,
177
                                      bool IsABIRegCopy);
178
179
/// getCopyFromParts - Create a value that contains the specified legal parts
180
/// combined into the value they represent.  If the parts combine to a type
181
/// larger than ValueVT then AssertOp can be used to specify whether the extra
182
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
183
/// (ISD::AssertSext).
184
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
185
                                const SDValue *Parts, unsigned NumParts,
186
                                MVT PartVT, EVT ValueVT, const Value *V,
187
                                Optional<ISD::NodeType> AssertOp = None,
188
7.66M
                                bool IsABIRegCopy = false) {
189
7.66M
  if (ValueVT.isVector())
190
168k
    return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
191
168k
                                  PartVT, ValueVT, V, IsABIRegCopy);
192
7.49M
193
7.66M
  assert(NumParts > 0 && "No parts to assemble!");
194
7.49M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
195
7.49M
  SDValue Val = Parts[0];
196
7.49M
197
7.49M
  if (
NumParts > 17.49M
) {
198
28.5k
    // Assemble the value from multiple parts.
199
28.5k
    if (
ValueVT.isInteger()28.5k
) {
200
26.0k
      unsigned PartBits = PartVT.getSizeInBits();
201
26.0k
      unsigned ValueBits = ValueVT.getSizeInBits();
202
26.0k
203
26.0k
      // Assemble the power of 2 part.
204
26.0k
      unsigned RoundParts = NumParts & (NumParts - 1) ?
205
26.0k
        
1 << Log2_32(NumParts)214
:
NumParts25.8k
;
206
26.0k
      unsigned RoundBits = PartBits * RoundParts;
207
26.0k
      EVT RoundVT = RoundBits == ValueBits ?
208
26.0k
        
ValueVT25.4k
:
EVT::getIntegerVT(*DAG.getContext(), RoundBits)652
;
209
26.0k
      SDValue Lo, Hi;
210
26.0k
211
26.0k
      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
212
26.0k
213
26.0k
      if (
RoundParts > 226.0k
) {
214
1.63k
        Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
215
1.63k
                              PartVT, HalfVT, V);
216
1.63k
        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
217
1.63k
                              RoundParts / 2, PartVT, HalfVT, V);
218
26.0k
      } else {
219
24.4k
        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
220
24.4k
        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
221
24.4k
      }
222
26.0k
223
26.0k
      if (DAG.getDataLayout().isBigEndian())
224
3.73k
        std::swap(Lo, Hi);
225
26.0k
226
26.0k
      Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
227
26.0k
228
26.0k
      if (
RoundParts < NumParts26.0k
) {
229
214
        // Assemble the trailing non-power-of-2 part.
230
214
        unsigned OddParts = NumParts - RoundParts;
231
214
        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
232
214
        Hi = getCopyFromParts(DAG, DL,
233
214
                              Parts + RoundParts, OddParts, PartVT, OddVT, V);
234
214
235
214
        // Combine the round and odd parts.
236
214
        Lo = Val;
237
214
        if (DAG.getDataLayout().isBigEndian())
238
5
          std::swap(Lo, Hi);
239
214
        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
240
214
        Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
241
214
        Hi =
242
214
            DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
243
214
                        DAG.getConstant(Lo.getValueSizeInBits(), DL,
244
214
                                        TLI.getPointerTy(DAG.getDataLayout())));
245
214
        Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
246
214
        Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
247
214
      }
248
28.5k
    } else 
if (2.53k
PartVT.isFloatingPoint()2.53k
) {
249
167
      // FP split into multiple FP parts (for ppcf128)
250
167
      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
251
167
             "Unexpected split");
252
167
      SDValue Lo, Hi;
253
167
      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
254
167
      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
255
167
      if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
256
167
        std::swap(Lo, Hi);
257
167
      Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
258
2.53k
    } else {
259
2.37k
      // FP split into integer parts (soft fp)
260
2.37k
      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
261
2.37k
             !PartVT.isVector() && "Unexpected split");
262
2.37k
      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
263
2.37k
      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
264
2.37k
    }
265
28.5k
  }
266
7.49M
267
7.49M
  // There is now one part, held in Val.  Correct it to match ValueVT.
268
7.49M
  // PartEVT is the type of the register class that holds the value.
269
7.49M
  // ValueVT is the type of the inline asm operation.
270
7.49M
  EVT PartEVT = Val.getValueType();
271
7.49M
272
7.49M
  if (PartEVT == ValueVT)
273
7.34M
    return Val;
274
149k
275
149k
  
if (149k
PartEVT.isInteger() && 149k
ValueVT.isFloatingPoint()148k
&&
276
149k
      
ValueVT.bitsLT(PartEVT)4.30k
) {
277
25
    // For an FP value in an integer part, we need to truncate to the right
278
25
    // width first.
279
25
    PartEVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
280
25
    Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
281
25
  }
282
149k
283
149k
  // Handle types that have the same size.
284
149k
  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
285
4.37k
    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
286
144k
287
144k
  // Handle types with different sizes.
288
144k
  
if (144k
PartEVT.isInteger() && 144k
ValueVT.isInteger()144k
) {
289
144k
    if (
ValueVT.bitsLT(PartEVT)144k
) {
290
144k
      // For a truncate, see if we have any information to
291
144k
      // indicate whether the truncated bits will always be
292
144k
      // zero or sign-extension.
293
144k
      if (AssertOp.hasValue())
294
46.4k
        Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
295
46.4k
                          DAG.getValueType(ValueVT));
296
144k
      return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
297
144k
    }
298
22
    return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
299
22
  }
300
520
301
520
  
if (520
PartEVT.isFloatingPoint() && 520
ValueVT.isFloatingPoint()520
) {
302
520
    // FP_ROUND's are always exact here.
303
520
    if (ValueVT.bitsLT(Val.getValueType()))
304
520
      return DAG.getNode(
305
520
          ISD::FP_ROUND, DL, ValueVT, Val,
306
520
          DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
307
0
308
0
    return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
309
0
  }
310
0
311
0
  
llvm_unreachable0
("Unknown mismatch!");
312
0
}
313
314
static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
315
12
                                              const Twine &ErrMsg) {
316
12
  const Instruction *I = dyn_cast_or_null<Instruction>(V);
317
12
  if (!V)
318
0
    return Ctx.emitError(ErrMsg);
319
12
320
12
  const char *AsmError = ", possible invalid constraint for vector type";
321
12
  if (const CallInst *CI = dyn_cast<CallInst>(I))
322
12
    
if (12
isa<InlineAsm>(CI->getCalledValue())12
)
323
12
      return Ctx.emitError(I, ErrMsg + AsmError);
324
0
325
0
  return Ctx.emitError(I, ErrMsg);
326
0
}
327
328
/// getCopyFromPartsVector - Create a value that contains the specified legal
329
/// parts combined into the value they represent.  If the parts combine to a
330
/// type larger than ValueVT then AssertOp can be used to specify whether the
331
/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
332
/// ValueVT (ISD::AssertSext).
333
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
334
                                      const SDValue *Parts, unsigned NumParts,
335
                                      MVT PartVT, EVT ValueVT, const Value *V,
336
168k
                                      bool IsABIRegCopy) {
337
168k
  assert(ValueVT.isVector() && "Not a vector value");
338
168k
  assert(NumParts > 0 && "No parts to assemble!");
339
168k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
340
168k
  SDValue Val = Parts[0];
341
168k
342
168k
  // Handle a multi-element vector.
343
168k
  if (
NumParts > 1168k
) {
344
8.26k
    EVT IntermediateVT;
345
8.26k
    MVT RegisterVT;
346
8.26k
    unsigned NumIntermediates;
347
8.26k
    unsigned NumRegs;
348
8.26k
349
8.26k
    if (
IsABIRegCopy8.26k
) {
350
6.96k
      NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
351
6.96k
          *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
352
6.96k
          RegisterVT);
353
8.26k
    } else {
354
1.30k
      NumRegs =
355
1.30k
          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
356
1.30k
                                     NumIntermediates, RegisterVT);
357
1.30k
    }
358
8.26k
359
8.26k
    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
360
8.26k
    NumParts = NumRegs; // Silence a compiler warning.
361
8.26k
    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
362
8.26k
    assert(RegisterVT.getSizeInBits() ==
363
8.26k
           Parts[0].getSimpleValueType().getSizeInBits() &&
364
8.26k
           "Part type sizes don't match!");
365
8.26k
366
8.26k
    // Assemble the parts into intermediate operands.
367
8.26k
    SmallVector<SDValue, 8> Ops(NumIntermediates);
368
8.26k
    if (
NumIntermediates == NumParts8.26k
) {
369
7.63k
      // If the register was not expanded, truncate or copy the value,
370
7.63k
      // as appropriate.
371
32.4k
      for (unsigned i = 0; 
i != NumParts32.4k
;
++i24.8k
)
372
24.8k
        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
373
24.8k
                                  PartVT, IntermediateVT, V);
374
8.26k
    } else 
if (632
NumParts > 0632
) {
375
632
      // If the intermediate type was expanded, build the intermediate
376
632
      // operands from the parts.
377
632
      assert(NumParts % NumIntermediates == 0 &&
378
632
             "Must expand into a divisible number of parts!");
379
632
      unsigned Factor = NumParts / NumIntermediates;
380
2.05k
      for (unsigned i = 0; 
i != NumIntermediates2.05k
;
++i1.42k
)
381
1.42k
        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
382
1.42k
                                  PartVT, IntermediateVT, V);
383
632
    }
384
8.26k
385
8.26k
    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
386
8.26k
    // intermediate operands.
387
8.26k
    EVT BuiltVectorTy =
388
8.26k
        EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
389
8.26k
                         (IntermediateVT.isVector()
390
6.11k
                              ? IntermediateVT.getVectorNumElements() * NumParts
391
2.14k
                              : NumIntermediates));
392
6.11k
    Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
393
2.14k
                                                : ISD::BUILD_VECTOR,
394
8.26k
                      DL, BuiltVectorTy, Ops);
395
8.26k
  }
396
168k
397
168k
  // There is now one part, held in Val.  Correct it to match ValueVT.
398
168k
  EVT PartEVT = Val.getValueType();
399
168k
400
168k
  if (PartEVT == ValueVT)
401
163k
    return Val;
402
4.94k
403
4.94k
  
if (4.94k
PartEVT.isVector()4.94k
) {
404
4.21k
    // If the element type of the source/dest vectors are the same, but the
405
4.21k
    // parts vector has more elements than the value vector, then we have a
406
4.21k
    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
407
4.21k
    // elements we want.
408
4.21k
    if (
PartEVT.getVectorElementType() == ValueVT.getVectorElementType()4.21k
) {
409
1.21k
      assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
410
1.21k
             "Cannot narrow, it would be a lossy transformation");
411
1.21k
      return DAG.getNode(
412
1.21k
          ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
413
1.21k
          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
414
1.21k
    }
415
2.99k
416
2.99k
    // Vector/Vector bitcast.
417
2.99k
    
if (2.99k
ValueVT.getSizeInBits() == PartEVT.getSizeInBits()2.99k
)
418
1.75k
      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
419
1.24k
420
0
    assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
421
1.24k
      "Cannot handle this kind of promotion");
422
1.24k
    // Promoted vector extract
423
1.24k
    return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
424
1.24k
425
1.24k
  }
426
735
427
735
  // Trivial bitcast if the types are the same size and the destination
428
735
  // vector type is legal.
429
735
  
if (735
PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
430
564
      TLI.isTypeLegal(ValueVT))
431
46
    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
432
689
433
689
  
if (689
ValueVT.getVectorNumElements() != 1689
) {
434
194
     // Certain ABIs require that vectors are passed as integers. For vectors
435
194
     // are the same size, this is an obvious bitcast.
436
194
     if (
ValueVT.getSizeInBits() == PartEVT.getSizeInBits()194
) {
437
106
       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
438
88
     } else 
if (88
ValueVT.getSizeInBits() < PartEVT.getSizeInBits()88
) {
439
80
       // Bitcast Val back the original type and extract the corresponding
440
80
       // vector we want.
441
80
       unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
442
80
       EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
443
80
                                           ValueVT.getVectorElementType(), Elts);
444
80
       Val = DAG.getBitcast(WiderVecType, Val);
445
80
       return DAG.getNode(
446
80
           ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
447
80
           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
448
80
     }
449
8
450
8
     diagnosePossiblyInvalidConstraint(
451
8
         *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
452
8
     return DAG.getUNDEF(ValueVT);
453
8
  }
454
495
455
495
  // Handle cases such as i8 -> <1 x i1>
456
495
  EVT ValueSVT = ValueVT.getVectorElementType();
457
495
  if (
ValueVT.getVectorNumElements() == 1 && 495
ValueSVT != PartEVT495
)
458
83
    
Val = ValueVT.isFloatingPoint() ? 83
DAG.getFPExtendOrRound(Val, DL, ValueSVT)6
459
77
                                    : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
460
168k
461
168k
  return DAG.getBuildVector(ValueVT, DL, Val);
462
168k
}
463
464
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
465
                                 SDValue Val, SDValue *Parts, unsigned NumParts,
466
                                 MVT PartVT, const Value *V, bool IsABIRegCopy);
467
468
/// getCopyToParts - Create a series of nodes that contain the specified value
469
/// split into legal parts.  If the parts contain more bits than Val, then, for
470
/// integers, ExtendKind can be used to specify how to generate the extra bits.
471
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
472
                           SDValue *Parts, unsigned NumParts, MVT PartVT,
473
                           const Value *V,
474
                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
475
6.89M
                           bool IsABIRegCopy = false) {
476
6.89M
  EVT ValueVT = Val.getValueType();
477
6.89M
478
6.89M
  // Handle the vector case separately.
479
6.89M
  if (ValueVT.isVector())
480
101k
    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
481
101k
                                IsABIRegCopy);
482
6.79M
483
6.79M
  unsigned PartBits = PartVT.getSizeInBits();
484
6.79M
  unsigned OrigNumParts = NumParts;
485
6.79M
  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
486
6.79M
         "Copying to an illegal type!");
487
6.79M
488
6.79M
  if (NumParts == 0)
489
0
    return;
490
6.79M
491
6.79M
  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
492
6.79M
  EVT PartEVT = PartVT;
493
6.79M
  if (
PartEVT == ValueVT6.79M
) {
494
6.62M
    assert(NumParts == 1 && "No-op copy with multiple parts!");
495
6.62M
    Parts[0] = Val;
496
6.62M
    return;
497
6.62M
  }
498
163k
499
163k
  
if (163k
NumParts * PartBits > ValueVT.getSizeInBits()163k
) {
500
138k
    // If the parts cover more bits than the value has, promote the value.
501
138k
    if (
PartVT.isFloatingPoint() && 138k
ValueVT.isFloatingPoint()110
) {
502
110
      assert(NumParts == 1 && "Do not know what to promote to!");
503
110
      Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
504
138k
    } else {
505
138k
      if (
ValueVT.isFloatingPoint()138k
) {
506
22
        // FP values need to be bitcast, then extended if they are being put
507
22
        // into a larger container.
508
22
        ValueVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
509
22
        Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
510
22
      }
511
138k
      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
512
138k
             ValueVT.isInteger() &&
513
138k
             "Unknown mismatch!");
514
138k
      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
515
138k
      Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
516
138k
      if (PartVT == MVT::x86mmx)
517
1
        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
518
138k
    }
519
163k
  } else 
if (24.5k
PartBits == ValueVT.getSizeInBits()24.5k
) {
520
1.49k
    // Different types of the same size.
521
1.49k
    assert(NumParts == 1 && PartEVT != ValueVT);
522
1.49k
    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
523
24.5k
  } else 
if (23.0k
NumParts * PartBits < ValueVT.getSizeInBits()23.0k
) {
524
60
    // If the parts cover less bits than value has, truncate the value.
525
60
    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
526
60
           ValueVT.isInteger() &&
527
60
           "Unknown mismatch!");
528
60
    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
529
60
    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
530
60
    if (PartVT == MVT::x86mmx)
531
0
      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
532
24.5k
  }
533
163k
534
163k
  // The value may have changed - recompute ValueVT.
535
163k
  ValueVT = Val.getValueType();
536
163k
  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
537
163k
         "Failed to tile the value with PartVT!");
538
163k
539
163k
  if (
NumParts == 1163k
) {
540
140k
    if (
PartEVT != ValueVT140k
) {
541
4
      diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
542
4
                                        "scalar-to-vector conversion failed");
543
4
      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
544
4
    }
545
140k
546
140k
    Parts[0] = Val;
547
140k
    return;
548
140k
  }
549
23.0k
550
23.0k
  // Expand the value into multiple parts.
551
23.0k
  
if (23.0k
NumParts & (NumParts - 1)23.0k
) {
552
22
    // The number of parts is not a power of 2.  Split off and copy the tail.
553
22
    assert(PartVT.isInteger() && ValueVT.isInteger() &&
554
22
           "Do not know what to expand to!");
555
22
    unsigned RoundParts = 1 << Log2_32(NumParts);
556
22
    unsigned RoundBits = RoundParts * PartBits;
557
22
    unsigned OddParts = NumParts - RoundParts;
558
22
    SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
559
22
                                 DAG.getIntPtrConstant(RoundBits, DL));
560
22
    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
561
22
562
22
    if (DAG.getDataLayout().isBigEndian())
563
22
      // The odd parts were reversed by getCopyToParts - unreverse them.
564
3
      std::reverse(Parts + RoundParts, Parts + NumParts);
565
22
566
22
    NumParts = RoundParts;
567
22
    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
568
22
    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
569
22
  }
570
23.0k
571
23.0k
  // The number of parts is a power of 2.  Repeatedly bisect the value using
572
23.0k
  // EXTRACT_ELEMENT.
573
23.0k
  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
574
23.0k
                         EVT::getIntegerVT(*DAG.getContext(),
575
23.0k
                                           ValueVT.getSizeInBits()),
576
23.0k
                         Val);
577
23.0k
578
47.2k
  for (unsigned StepSize = NumParts; 
StepSize > 147.2k
;
StepSize /= 224.2k
) {
579
50.4k
    for (unsigned i = 0; 
i < NumParts50.4k
;
i += StepSize26.1k
) {
580
26.1k
      unsigned ThisBits = StepSize * PartBits / 2;
581
26.1k
      EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
582
26.1k
      SDValue &Part0 = Parts[i];
583
26.1k
      SDValue &Part1 = Parts[i+StepSize/2];
584
26.1k
585
26.1k
      Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
586
26.1k
                          ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
587
26.1k
      Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
588
26.1k
                          ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
589
26.1k
590
26.1k
      if (
ThisBits == PartBits && 26.1k
ThisVT != PartVT24.5k
) {
591
169
        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
592
169
        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
593
169
      }
594
26.1k
    }
595
24.2k
  }
596
23.0k
597
23.0k
  if (DAG.getDataLayout().isBigEndian())
598
2.41k
    std::reverse(Parts, Parts + OrigNumParts);
599
6.89M
}
600
601
602
/// getCopyToPartsVector - Create a series of nodes that contain the specified
603
/// value split into legal parts.
604
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
605
                                 SDValue Val, SDValue *Parts, unsigned NumParts,
606
                                 MVT PartVT, const Value *V,
607
101k
                                 bool IsABIRegCopy) {
608
101k
  EVT ValueVT = Val.getValueType();
609
101k
  assert(ValueVT.isVector() && "Not a vector");
610
101k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
611
101k
612
101k
  if (
NumParts == 1101k
) {
613
96.8k
    EVT PartEVT = PartVT;
614
96.8k
    if (
PartEVT == ValueVT96.8k
) {
615
94.9k
      // Nothing to do.
616
96.8k
    } else 
if (1.84k
PartVT.getSizeInBits() == ValueVT.getSizeInBits()1.84k
) {
617
332
      // Bitconvert vector->vector case.
618
332
      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
619
1.84k
    } else 
if (1.51k
PartVT.isVector() &&
620
1.42k
               PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
621
1.51k
               
PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()567
) {
622
567
      EVT ElementVT = PartVT.getVectorElementType();
623
567
      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
624
567
      // undef elements.
625
567
      SmallVector<SDValue, 16> Ops;
626
2.12k
      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); 
i != e2.12k
;
++i1.55k
)
627
1.55k
        Ops.push_back(DAG.getNode(
628
1.55k
            ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
629
1.55k
            DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
630
567
631
567
      for (unsigned i = ValueVT.getVectorNumElements(),
632
2.98k
           e = PartVT.getVectorNumElements(); 
i != e2.98k
;
++i2.41k
)
633
2.41k
        Ops.push_back(DAG.getUNDEF(ElementVT));
634
567
635
567
      Val = DAG.getBuildVector(PartVT, DL, Ops);
636
567
637
567
      // FIXME: Use CONCAT for 2x -> 4x.
638
567
639
567
      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
640
567
      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
641
1.51k
    } else 
if (950
PartVT.isVector() &&
642
856
               PartEVT.getVectorElementType().bitsGE(
643
856
                 ValueVT.getVectorElementType()) &&
644
950
               
PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()856
) {
645
856
646
856
      // Promoted vector extract
647
856
      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
648
950
    } else {
649
94
      if (
ValueVT.getVectorNumElements() == 194
) {
650
54
        Val = DAG.getNode(
651
54
            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
652
54
            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
653
94
      } else {
654
40
        assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
655
40
               "lossy conversion of vector to scalar type");
656
40
        EVT IntermediateType =
657
40
            EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
658
40
        Val = DAG.getBitcast(IntermediateType, Val);
659
40
        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
660
40
      }
661
1.84k
    }
662
96.8k
663
96.8k
    assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
664
96.8k
    Parts[0] = Val;
665
96.8k
    return;
666
96.8k
  }
667
4.50k
668
4.50k
  // Handle a multi-element vector.
669
4.50k
  EVT IntermediateVT;
670
4.50k
  MVT RegisterVT;
671
4.50k
  unsigned NumIntermediates;
672
4.50k
  unsigned NumRegs;
673
4.50k
  if (
IsABIRegCopy4.50k
) {
674
3.60k
    NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
675
3.60k
        *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
676
3.60k
        RegisterVT);
677
4.50k
  } else {
678
901
    NumRegs =
679
901
        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
680
901
                                   NumIntermediates, RegisterVT);
681
901
  }
682
4.50k
  unsigned NumElements = ValueVT.getVectorNumElements();
683
4.50k
684
4.50k
  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
685
4.50k
  NumParts = NumRegs; // Silence a compiler warning.
686
4.50k
  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
687
4.50k
688
4.50k
  // Convert the vector to the appropiate type if necessary.
689
4.50k
  unsigned DestVectorNoElts =
690
4.50k
      NumIntermediates *
691
4.50k
      (IntermediateVT.isVector() ? 
IntermediateVT.getVectorNumElements()3.53k
:
1968
);
692
4.50k
  EVT BuiltVectorTy = EVT::getVectorVT(
693
4.50k
      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
694
4.50k
  if (Val.getValueType() != BuiltVectorTy)
695
189
    Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
696
4.50k
697
4.50k
  // Split the vector into intermediate operands.
698
4.50k
  SmallVector<SDValue, 8> Ops(NumIntermediates);
699
16.2k
  for (unsigned i = 0; 
i != NumIntermediates16.2k
;
++i11.7k
) {
700
11.7k
    if (IntermediateVT.isVector())
701
8.61k
      Ops[i] =
702
8.61k
          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
703
8.61k
                      DAG.getConstant(i * (NumElements / NumIntermediates), DL,
704
8.61k
                                      TLI.getVectorIdxTy(DAG.getDataLayout())));
705
11.7k
    else
706
3.15k
      Ops[i] = DAG.getNode(
707
3.15k
          ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
708
3.15k
          DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
709
11.7k
  }
710
4.50k
711
4.50k
  // Split the intermediate operands into legal parts.
712
4.50k
  if (
NumParts == NumIntermediates4.50k
) {
713
4.37k
    // If the register was not expanded, promote or copy the value,
714
4.37k
    // as appropriate.
715
15.7k
    for (unsigned i = 0; 
i != NumParts15.7k
;
++i11.3k
)
716
11.3k
      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
717
4.50k
  } else 
if (127
NumParts > 0127
) {
718
127
    // If the intermediate type was expanded, split each the value into
719
127
    // legal parts.
720
127
    assert(NumIntermediates != 0 && "division by zero");
721
127
    assert(NumParts % NumIntermediates == 0 &&
722
127
           "Must expand into a divisible number of parts!");
723
127
    unsigned Factor = NumParts / NumIntermediates;
724
571
    for (unsigned i = 0; 
i != NumIntermediates571
;
++i444
)
725
444
      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
726
127
  }
727
101k
}
728
729
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
730
                           EVT valuevt, bool IsABIMangledValue)
731
    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
732
54.0k
      RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
733
734
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
735
                           const DataLayout &DL, unsigned Reg, Type *Ty,
736
8.18M
                           bool IsABIMangledValue) {
737
8.18M
  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
738
8.18M
739
8.18M
  IsABIMangled = IsABIMangledValue;
740
8.18M
741
8.19M
  for (EVT ValueVT : ValueVTs) {
742
8.19M
    unsigned NumRegs = IsABIMangledValue
743
642k
                           ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
744
7.55M
                           : TLI.getNumRegisters(Context, ValueVT);
745
8.19M
    MVT RegisterVT = IsABIMangledValue
746
642k
                         ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
747
7.55M
                         : TLI.getRegisterType(Context, ValueVT);
748
16.4M
    for (unsigned i = 0; 
i != NumRegs16.4M
;
++i8.22M
)
749
8.22M
      Regs.push_back(Reg + i);
750
8.19M
    RegVTs.push_back(RegisterVT);
751
8.19M
    RegCount.push_back(NumRegs);
752
8.19M
    Reg += NumRegs;
753
8.19M
  }
754
8.18M
}
755
756
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
757
                                      FunctionLoweringInfo &FuncInfo,
758
                                      const SDLoc &dl, SDValue &Chain,
759
5.66M
                                      SDValue *Flag, const Value *V) const {
760
5.66M
  // A Value with type {} or [0 x %t] needs no registers.
761
5.66M
  if (ValueVTs.empty())
762
0
    return SDValue();
763
5.66M
764
5.66M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
765
5.66M
766
5.66M
  // Assemble the legal parts into the final values.
767
5.66M
  SmallVector<SDValue, 4> Values(ValueVTs.size());
768
5.66M
  SmallVector<SDValue, 8> Parts;
769
11.3M
  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); 
Value != e11.3M
;
++Value5.66M
) {
770
5.66M
    // Copy the legal parts from the registers.
771
5.66M
    EVT ValueVT = ValueVTs[Value];
772
5.66M
    unsigned NumRegs = RegCount[Value];
773
5.66M
    MVT RegisterVT = IsABIMangled
774
351k
                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
775
5.31M
                         : RegVTs[Value];
776
5.66M
777
5.66M
    Parts.resize(NumRegs);
778
11.3M
    for (unsigned i = 0; 
i != NumRegs11.3M
;
++i5.68M
) {
779
5.68M
      SDValue P;
780
5.68M
      if (
!Flag5.68M
) {
781
5.68M
        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
782
5.68M
      } else {
783
3.70k
        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
784
3.70k
        *Flag = P.getValue(2);
785
3.70k
      }
786
5.68M
787
5.68M
      Chain = P.getValue(1);
788
5.68M
      Parts[i] = P;
789
5.68M
790
5.68M
      // If the source register was virtual and if we know something about it,
791
5.68M
      // add an assert node.
792
5.68M
      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
793
5.68M
          
!RegisterVT.isInteger()5.68M
||
RegisterVT.isVector()5.57M
)
794
151k
        continue;
795
5.53M
796
5.53M
      const FunctionLoweringInfo::LiveOutInfo *LOI =
797
5.53M
        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
798
5.53M
      if (!LOI)
799
2.27M
        continue;
800
3.25M
801
3.25M
      unsigned RegSize = RegisterVT.getSizeInBits();
802
3.25M
      unsigned NumSignBits = LOI->NumSignBits;
803
3.25M
      unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
804
3.25M
805
3.25M
      if (
NumZeroBits == RegSize3.25M
) {
806
2.60k
        // The current value is a zero.
807
2.60k
        // Explicitly express that as it would be easier for
808
2.60k
        // optimizations to kick in.
809
2.60k
        Parts[i] = DAG.getConstant(0, dl, RegisterVT);
810
2.60k
        continue;
811
2.60k
      }
812
3.25M
813
3.25M
      // FIXME: We capture more information than the dag can represent.  For
814
3.25M
      // now, just use the tightest assertzext/assertsext possible.
815
3.25M
      bool isSExt = true;
816
3.25M
      EVT FromVT(MVT::Other);
817
3.25M
      if (
NumSignBits == RegSize3.25M
) {
818
1.61k
        isSExt = true;   // ASSERT SEXT 1
819
1.61k
        FromVT = MVT::i1;
820
3.25M
      } else 
if (3.25M
NumZeroBits >= RegSize - 13.25M
) {
821
71.5k
        isSExt = false;  // ASSERT ZEXT 1
822
71.5k
        FromVT = MVT::i1;
823
3.25M
      } else 
if (3.18M
NumSignBits > RegSize - 83.18M
) {
824
43.4k
        isSExt = true;   // ASSERT SEXT 8
825
43.4k
        FromVT = MVT::i8;
826
3.18M
      } else 
if (3.14M
NumZeroBits >= RegSize - 83.14M
) {
827
49.2k
        isSExt = false;  // ASSERT ZEXT 8
828
49.2k
        FromVT = MVT::i8;
829
3.14M
      } else 
if (3.09M
NumSignBits > RegSize - 163.09M
) {
830
53.2k
        isSExt = true;   // ASSERT SEXT 16
831
53.2k
        FromVT = MVT::i16;
832
3.09M
      } else 
if (3.03M
NumZeroBits >= RegSize - 163.03M
) {
833
24.2k
        isSExt = false;  // ASSERT ZEXT 16
834
24.2k
        FromVT = MVT::i16;
835
3.03M
      } else 
if (3.01M
NumSignBits > RegSize - 323.01M
) {
836
414k
        isSExt = true;   // ASSERT SEXT 32
837
414k
        FromVT = MVT::i32;
838
3.01M
      } else 
if (2.59M
NumZeroBits >= RegSize - 322.59M
) {
839
572k
        isSExt = false;  // ASSERT ZEXT 32
840
572k
        FromVT = MVT::i32;
841
2.59M
      } else {
842
2.02M
        continue;
843
2.02M
      }
844
1.23M
      // Add an assertion node.
845
3.25M
      assert(FromVT != MVT::Other);
846
1.23M
      Parts[i] = DAG.getNode(isSExt ? 
ISD::AssertSext512k
:
ISD::AssertZext717k
, dl,
847
5.68M
                             RegisterVT, P, DAG.getValueType(FromVT));
848
5.68M
    }
849
5.66M
850
5.66M
    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
851
5.66M
                                     NumRegs, RegisterVT, ValueVT, V);
852
5.66M
    Part += NumRegs;
853
5.66M
    Parts.clear();
854
5.66M
  }
855
5.66M
856
5.66M
  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
857
5.66M
}
858
859
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
860
                                 const SDLoc &dl, SDValue &Chain, SDValue *Flag,
861
                                 const Value *V,
862
2.52M
                                 ISD::NodeType PreferredExtendType) const {
863
2.52M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
864
2.52M
  ISD::NodeType ExtendKind = PreferredExtendType;
865
2.52M
866
2.52M
  // Get the list of the values's legal parts.
867
2.52M
  unsigned NumRegs = Regs.size();
868
2.52M
  SmallVector<SDValue, 8> Parts(NumRegs);
869
5.06M
  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); 
Value != e5.06M
;
++Value2.53M
) {
870
2.53M
    unsigned NumParts = RegCount[Value];
871
2.53M
872
2.53M
    MVT RegisterVT = IsABIMangled
873
290k
                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
874
2.24M
                         : RegVTs[Value];
875
2.53M
876
2.53M
    if (
ExtendKind == ISD::ANY_EXTEND && 2.53M
TLI.isZExtFree(Val, RegisterVT)2.37M
)
877
116k
      ExtendKind = ISD::ZERO_EXTEND;
878
2.53M
879
2.53M
    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
880
2.53M
                   &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
881
2.53M
    Part += NumParts;
882
2.53M
  }
883
2.52M
884
2.52M
  // Copy the parts into the registers.
885
2.52M
  SmallVector<SDValue, 8> Chains(NumRegs);
886
5.07M
  for (unsigned i = 0; 
i != NumRegs5.07M
;
++i2.54M
) {
887
2.54M
    SDValue Part;
888
2.54M
    if (
!Flag2.54M
) {
889
2.54M
      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
890
2.54M
    } else {
891
2.54k
      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
892
2.54k
      *Flag = Part.getValue(1);
893
2.54k
    }
894
2.54M
895
2.54M
    Chains[i] = Part.getValue(0);
896
2.54M
  }
897
2.52M
898
2.52M
  if (
NumRegs == 1 || 2.52M
Flag14.5k
)
899
2.52M
    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
900
2.52M
    // flagged to it. That is the CopyToReg nodes and the user are considered
901
2.52M
    // a single scheduling unit. If we create a TokenFactor and return it as
902
2.52M
    // chain, then the TokenFactor is both a predecessor (operand) of the
903
2.52M
    // user as well as a successor (the TF operands are flagged to the user).
904
2.52M
    // c1, f1 = CopyToReg
905
2.52M
    // c2, f2 = CopyToReg
906
2.52M
    // c3     = TokenFactor c1, c2
907
2.52M
    // ...
908
2.52M
    //        = op c3, ..., f2
909
2.51M
    Chain = Chains[NumRegs-1];
910
2.52M
  else
911
14.4k
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
912
2.52M
}
913
914
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
915
                                        unsigned MatchingIdx, const SDLoc &dl,
916
                                        SelectionDAG &DAG,
917
54.0k
                                        std::vector<SDValue> &Ops) const {
918
54.0k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
919
54.0k
920
54.0k
  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
921
54.0k
  if (HasMatching)
922
193
    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
923
53.8k
  else 
if (53.8k
!Regs.empty() &&
924
53.8k
           
TargetRegisterInfo::isVirtualRegister(Regs.front())53.8k
) {
925
4.41k
    // Put the register class of the virtual registers in the flag word.  That
926
4.41k
    // way, later passes can recompute register class constraints for inline
927
4.41k
    // assembly as well as normal instructions.
928
4.41k
    // Don't do this for tied operands that can use the regclass information
929
4.41k
    // from the def.
930
4.41k
    const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
931
4.41k
    const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
932
4.41k
    Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
933
4.41k
  }
934
54.0k
935
54.0k
  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
936
54.0k
  Ops.push_back(Res);
937
54.0k
938
54.0k
  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
939
108k
  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); 
Value != e108k
;
++Value54.0k
) {
940
54.0k
    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
941
54.0k
    MVT RegisterVT = RegVTs[Value];
942
108k
    for (unsigned i = 0; 
i != NumRegs108k
;
++i54.1k
) {
943
54.1k
      assert(Reg < Regs.size() && "Mismatch in # registers expected");
944
54.1k
      unsigned TheReg = Regs[Reg++];
945
54.1k
      Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
946
54.1k
947
54.1k
      if (
TheReg == SP && 54.1k
Code == InlineAsm::Kind_Clobber60
) {
948
60
        // If we clobbered the stack pointer, MFI should know about it.
949
60
        assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
950
60
      }
951
54.1k
    }
952
54.0k
  }
953
54.0k
}
954
955
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
956
437k
                               const TargetLibraryInfo *li) {
957
437k
  AA = aa;
958
437k
  GFI = gfi;
959
437k
  LibInfo = li;
960
437k
  DL = &DAG.getDataLayout();
961
437k
  Context = DAG.getContext();
962
437k
  LPadToCallSiteMap.clear();
963
437k
}
964
965
3.42M
void SelectionDAGBuilder::clear() {
966
3.42M
  NodeMap.clear();
967
3.42M
  UnusedArgNodeMap.clear();
968
3.42M
  PendingLoads.clear();
969
3.42M
  PendingExports.clear();
970
3.42M
  CurInst = nullptr;
971
3.42M
  HasTailCall = false;
972
3.42M
  SDNodeOrder = LowestSDNodeOrder;
973
3.42M
  StatepointLowering.clear();
974
3.42M
}
975
976
436k
void SelectionDAGBuilder::clearDanglingDebugInfo() {
977
436k
  DanglingDebugInfoMap.clear();
978
436k
}
979
980
3.79M
SDValue SelectionDAGBuilder::getRoot() {
981
3.79M
  if (PendingLoads.empty())
982
3.08M
    return DAG.getRoot();
983
706k
984
706k
  
if (706k
PendingLoads.size() == 1706k
) {
985
559k
    SDValue Root = PendingLoads[0];
986
559k
    DAG.setRoot(Root);
987
559k
    PendingLoads.clear();
988
559k
    return Root;
989
559k
  }
990
146k
991
146k
  // Otherwise, we have to make a token factor node.
992
146k
  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
993
146k
                             PendingLoads);
994
146k
  PendingLoads.clear();
995
146k
  DAG.setRoot(Root);
996
146k
  return Root;
997
146k
}
998
999
5.84M
SDValue SelectionDAGBuilder::getControlRoot() {
1000
5.84M
  SDValue Root = DAG.getRoot();
1001
5.84M
1002
5.84M
  if (PendingExports.empty())
1003
4.26M
    return Root;
1004
1.58M
1005
1.58M
  // Turn all of the CopyToReg chains into one factored node.
1006
1.58M
  
if (1.58M
Root.getOpcode() != ISD::EntryToken1.58M
) {
1007
805k
    unsigned i = 0, e = PendingExports.size();
1008
2.01M
    for (; 
i != e2.01M
;
++i1.21M
) {
1009
1.21M
      assert(PendingExports[i].getNode()->getNumOperands() > 1);
1010
1.21M
      if (PendingExports[i].getNode()->getOperand(0) == Root)
1011
0
        break;  // Don't add the root if we already indirectly depend on it.
1012
1.21M
    }
1013
805k
1014
805k
    if (i == e)
1015
805k
      PendingExports.push_back(Root);
1016
805k
  }
1017
5.84M
1018
5.84M
  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1019
5.84M
                     PendingExports);
1020
5.84M
  PendingExports.clear();
1021
5.84M
  DAG.setRoot(Root);
1022
5.84M
  return Root;
1023
5.84M
}
1024
1025
18.0M
void SelectionDAGBuilder::visit(const Instruction &I) {
1026
18.0M
  // Set up outgoing PHI node register values before emitting the terminator.
1027
18.0M
  if (
isa<TerminatorInst>(&I)18.0M
) {
1028
3.06M
    HandlePHINodesInSuccessorBlocks(I.getParent());
1029
3.06M
  }
1030
18.0M
1031
18.0M
  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1032
18.0M
  if (!isa<DbgInfoIntrinsic>(I))
1033
18.0M
    ++SDNodeOrder;
1034
18.0M
1035
18.0M
  CurInst = &I;
1036
18.0M
1037
18.0M
  visit(I.getOpcode(), I);
1038
18.0M
1039
18.0M
  if (
!isa<TerminatorInst>(&I) && 18.0M
!HasTailCall14.9M
&&
1040
14.7M
      !isStatepoint(&I)) // statepoints handle their exports internally
1041
14.7M
    CopyToExportRegsIfNeeded(&I);
1042
18.0M
1043
18.0M
  CurInst = nullptr;
1044
18.0M
}
1045
1046
0
void SelectionDAGBuilder::visitPHI(const PHINode &) {
1047
0
  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1048
0
}
1049
1050
19.0M
void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1051
19.0M
  // Note: this doesn't use InstVisitor, because it has to work with
1052
19.0M
  // ConstantExpr's in addition to instructions.
1053
19.0M
  switch (Opcode) {
1054
0
  
default: 0
llvm_unreachable0
("Unknown instruction type encountered!");
1055
19.0M
    // Build the switch statement using the Instruction.def file.
1056
19.0M
#define HANDLE_INST(NUM, OPCODE, CLASS) \
1057
19.0M
    case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1058
373k
#include "llvm/IR/Instruction.def"
1059
19.0M
  }
1060
19.0M
}
1061
1062
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1063
// generate the debug data structures now that we've seen its definition.
1064
void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
1065
13.9M
                                                   SDValue Val) {
1066
13.9M
  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
1067
13.9M
  if (
DDI.getDI()13.9M
) {
1068
35
    const DbgValueInst *DI = DDI.getDI();
1069
35
    DebugLoc dl = DDI.getdl();
1070
35
    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1071
35
    DILocalVariable *Variable = DI->getVariable();
1072
35
    DIExpression *Expr = DI->getExpression();
1073
35
    assert(Variable->isValidLocationForIntrinsic(dl) &&
1074
35
           "Expected inlined-at fields to agree");
1075
35
    SDDbgValue *SDV;
1076
35
    if (
Val.getNode()35
) {
1077
35
      if (
!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)35
) {
1078
32
        SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
1079
32
        DAG.AddDbgValue(SDV, Val.getNode(), false);
1080
32
      }
1081
35
    } else
1082
35
      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1083
35
    DanglingDebugInfoMap[V] = DanglingDebugInfo();
1084
35
  }
1085
13.9M
}
1086
1087
/// getCopyFromRegs - If there was virtual register allocated for the value V
1088
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1089
13.4M
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
1090
13.4M
  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1091
13.4M
  SDValue Result;
1092
13.4M
1093
13.4M
  if (
It != FuncInfo.ValueMap.end()13.4M
) {
1094
5.65M
    unsigned InReg = It->second;
1095
5.65M
1096
5.65M
    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1097
5.65M
                     DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1098
5.65M
    SDValue Chain = DAG.getEntryNode();
1099
5.65M
    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1100
5.65M
                                 V);
1101
5.65M
    resolveDanglingDebugInfo(V, Result);
1102
5.65M
  }
1103
13.4M
1104
13.4M
  return Result;
1105
13.4M
}
1106
1107
/// getValue - Return an SDValue for the given Value.
1108
28.5M
SDValue SelectionDAGBuilder::getValue(const Value *V) {
1109
28.5M
  // If we already have an SDValue for this value, use it. It's important
1110
28.5M
  // to do this first, so that we don't create a CopyFromReg if we already
1111
28.5M
  // have a regular SDValue.
1112
28.5M
  SDValue &N = NodeMap[V];
1113
28.5M
  if (
N.getNode()28.5M
)
return N15.0M
;
1114
13.4M
1115
13.4M
  // If there's a virtual register allocated and initialized for this
1116
13.4M
  // value, use it.
1117
13.4M
  
if (SDValue 13.4M
copyFromReg13.4M
= getCopyFromRegs(V, V->getType()))
1118
5.65M
    return copyFromReg;
1119
7.78M
1120
7.78M
  // Otherwise create a new SDValue and remember it.
1121
7.78M
  SDValue Val = getValueImpl(V);
1122
7.78M
  NodeMap[V] = Val;
1123
7.78M
  resolveDanglingDebugInfo(V, Val);
1124
7.78M
  return Val;
1125
7.78M
}
1126
1127
// Return true if SDValue exists for the given Value
1128
304
bool SelectionDAGBuilder::findValue(const Value *V) const {
1129
304
  return (NodeMap.find(V) != NodeMap.end()) ||
1130
38
    (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1131
304
}
1132
1133
/// getNonRegisterValue - Return an SDValue for the given Value, but
1134
/// don't look in FuncInfo.ValueMap for a virtual register.
1135
2.52M
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1136
2.52M
  // If we already have an SDValue for this value, use it.
1137
2.52M
  SDValue &N = NodeMap[V];
1138
2.52M
  if (
N.getNode()2.52M
) {
1139
2.01M
    if (
isa<ConstantSDNode>(N) || 2.01M
isa<ConstantFPSDNode>(N)1.95M
) {
1140
53.8k
      // Remove the debug location from the node as the node is about to be used
1141
53.8k
      // in a location which may differ from the original debug location.  This
1142
53.8k
      // is relevant to Constant and ConstantFP nodes because they can appear
1143
53.8k
      // as constant expressions inside PHI nodes.
1144
53.8k
      N->setDebugLoc(DebugLoc());
1145
53.8k
    }
1146
2.01M
    return N;
1147
2.01M
  }
1148
515k
1149
515k
  // Otherwise create a new SDValue and remember it.
1150
515k
  SDValue Val = getValueImpl(V);
1151
515k
  NodeMap[V] = Val;
1152
515k
  resolveDanglingDebugInfo(V, Val);
1153
515k
  return Val;
1154
515k
}
1155
1156
/// getValueImpl - Helper function for getValue and getNonRegisterValue.
1157
/// Create an SDValue for the given value.
1158
8.30M
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1159
8.30M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1160
8.30M
1161
8.30M
  if (const Constant *
C8.30M
= dyn_cast<Constant>(V)) {
1162
7.87M
    EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1163
7.87M
1164
7.87M
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1165
3.33M
      return DAG.getConstant(*CI, getCurSDLoc(), VT);
1166
4.53M
1167
4.53M
    
if (const GlobalValue *4.53M
GV4.53M
= dyn_cast<GlobalValue>(C))
1168
2.61M
      return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1169
1.92M
1170
1.92M
    
if (1.92M
isa<ConstantPointerNull>(C)1.92M
) {
1171
586k
      unsigned AS = V->getType()->getPointerAddressSpace();
1172
586k
      return DAG.getConstant(0, getCurSDLoc(),
1173
586k
                             TLI.getPointerTy(DAG.getDataLayout(), AS));
1174
586k
    }
1175
1.33M
1176
1.33M
    
if (const ConstantFP *1.33M
CFP1.33M
= dyn_cast<ConstantFP>(C))
1177
117k
      return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1178
1.21M
1179
1.21M
    
if (1.21M
isa<UndefValue>(C) && 1.21M
!V->getType()->isAggregateType()44.7k
)
1180
43.8k
      return DAG.getUNDEF(VT);
1181
1.17M
1182
1.17M
    
if (const ConstantExpr *1.17M
CE1.17M
= dyn_cast<ConstantExpr>(C)) {
1183
1.04M
      visit(CE->getOpcode(), *CE);
1184
1.04M
      SDValue N1 = NodeMap[V];
1185
1.04M
      assert(N1.getNode() && "visit didn't populate the NodeMap!");
1186
1.04M
      return N1;
1187
1.04M
    }
1188
128k
1189
128k
    
if (128k
isa<ConstantStruct>(C) || 128k
isa<ConstantArray>(C)128k
) {
1190
47
      SmallVector<SDValue, 4> Constants;
1191
47
      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1192
175
           
OI != OE175
;
++OI128
) {
1193
128
        SDNode *Val = getValue(*OI).getNode();
1194
128
        // If the operand is an empty aggregate, there are no values.
1195
128
        if (
!Val128
)
continue1
;
1196
127
        // Add each leaf value from the operand to the Constants list
1197
127
        // to form a flattened list of all the values.
1198
265
        
for (unsigned i = 0, e = Val->getNumValues(); 127
i != e265
;
++i138
)
1199
138
          Constants.push_back(SDValue(Val, i));
1200
128
      }
1201
47
1202
47
      return DAG.getMergeValues(Constants, getCurSDLoc());
1203
47
    }
1204
128k
1205
128k
    
if (const ConstantDataSequential *128k
CDS128k
=
1206
95.0k
          dyn_cast<ConstantDataSequential>(C)) {
1207
95.0k
      SmallVector<SDValue, 4> Ops;
1208
442k
      for (unsigned i = 0, e = CDS->getNumElements(); 
i != e442k
;
++i346k
) {
1209
346k
        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1210
346k
        // Add each leaf value from the operand to the Constants list
1211
346k
        // to form a flattened list of all the values.
1212
693k
        for (unsigned i = 0, e = Val->getNumValues(); 
i != e693k
;
++i346k
)
1213
346k
          Ops.push_back(SDValue(Val, i));
1214
346k
      }
1215
95.0k
1216
95.0k
      if (isa<ArrayType>(CDS->getType()))
1217
29
        return DAG.getMergeValues(Ops, getCurSDLoc());
1218
95.0k
      return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1219
95.0k
    }
1220
32.9k
1221
32.9k
    
if (32.9k
C->getType()->isStructTy() || 32.9k
C->getType()->isArrayTy()32.1k
) {
1222
983
      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1223
983
             "Unknown struct or array constant!");
1224
983
1225
983
      SmallVector<EVT, 4> ValueVTs;
1226
983
      ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1227
983
      unsigned NumElts = ValueVTs.size();
1228
983
      if (NumElts == 0)
1229
1
        return SDValue(); // empty struct
1230
982
      SmallVector<SDValue, 4> Constants(NumElts);
1231
3.86k
      for (unsigned i = 0; 
i != NumElts3.86k
;
++i2.88k
) {
1232
2.88k
        EVT EltVT = ValueVTs[i];
1233
2.88k
        if (isa<UndefValue>(C))
1234
2.56k
          Constants[i] = DAG.getUNDEF(EltVT);
1235
322
        else 
if (322
EltVT.isFloatingPoint()322
)
1236
30
          Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1237
322
        else
1238
292
          Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1239
2.88k
      }
1240
983
1241
983
      return DAG.getMergeValues(Constants, getCurSDLoc());
1242
983
    }
1243
32.0k
1244
32.0k
    
if (const BlockAddress *32.0k
BA32.0k
= dyn_cast<BlockAddress>(C))
1245
110
      return DAG.getBlockAddress(BA, VT);
1246
31.8k
1247
31.8k
    VectorType *VecTy = cast<VectorType>(V->getType());
1248
31.8k
    unsigned NumElements = VecTy->getNumElements();
1249
31.8k
1250
31.8k
    // Now that we know the number and type of the elements, get that number of
1251
31.8k
    // elements into the Ops array based on what kind of constant it is.
1252
31.8k
    SmallVector<SDValue, 16> Ops;
1253
31.8k
    if (const ConstantVector *
CV31.8k
= dyn_cast<ConstantVector>(C)) {
1254
41.6k
      for (unsigned i = 0; 
i != NumElements41.6k
;
++i37.6k
)
1255
37.6k
        Ops.push_back(getValue(CV->getOperand(i)));
1256
31.8k
    } else {
1257
27.9k
      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1258
27.9k
      EVT EltVT =
1259
27.9k
          TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1260
27.9k
1261
27.9k
      SDValue Op;
1262
27.9k
      if (EltVT.isFloatingPoint())
1263
2.16k
        Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1264
27.9k
      else
1265
25.7k
        Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1266
27.9k
      Ops.assign(NumElements, Op);
1267
27.9k
    }
1268
7.87M
1269
7.87M
    // Create a BUILD_VECTOR node.
1270
7.87M
    return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1271
7.87M
  }
1272
426k
1273
426k
  // If this is a static alloca, generate it as the frameindex instead of
1274
426k
  // computation.
1275
426k
  
if (const AllocaInst *426k
AI426k
= dyn_cast<AllocaInst>(V)) {
1276
425k
    DenseMap<const AllocaInst*, int>::iterator SI =
1277
425k
      FuncInfo.StaticAllocaMap.find(AI);
1278
425k
    if (SI != FuncInfo.StaticAllocaMap.end())
1279
425k
      return DAG.getFrameIndex(SI->second,
1280
425k
                               TLI.getFrameIndexTy(DAG.getDataLayout()));
1281
1.18k
  }
1282
1.18k
1283
1.18k
  // If this is an instruction which fast-isel has deferred, select it now.
1284
1.18k
  
if (const Instruction *1.18k
Inst1.18k
= dyn_cast<Instruction>(V)) {
1285
1.18k
    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1286
1.18k
1287
1.18k
    RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1288
1.18k
                     Inst->getType(), isABIRegCopy(V));
1289
1.18k
    SDValue Chain = DAG.getEntryNode();
1290
1.18k
    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1291
1.18k
  }
1292
0
1293
0
  
llvm_unreachable0
("Can't get register for value!");
1294
0
}
1295
1296
104
void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1297
104
  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1298
104
  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1299
104
  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1300
104
  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1301
104
  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1302
104
  if (
IsMSVCCXX || 104
IsCoreCLR39
)
1303
76
    CatchPadMBB->setIsEHFuncletEntry();
1304
104
1305
104
  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
1306
104
}
1307
1308
87
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1309
87
  // Update machine-CFG edge.
1310
87
  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1311
87
  FuncInfo.MBB->addSuccessor(TargetMBB);
1312
87
1313
87
  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1314
87
  bool IsSEH = isAsynchronousEHPersonality(Pers);
1315
87
  if (
IsSEH87
) {
1316
26
    // If this is not a fall-through branch or optimizations are switched off,
1317
26
    // emit the branch.
1318
26
    if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1319
17
        TM.getOptLevel() == CodeGenOpt::None)
1320
10
      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1321
10
                              getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1322
26
    return;
1323
26
  }
1324
61
1325
61
  // Figure out the funclet membership for the catchret's successor.
1326
61
  // This will be used by the FuncletLayout pass to determine how to order the
1327
61
  // BB's.
1328
61
  // A 'catchret' returns to the outer scope's color.
1329
61
  Value *ParentPad = I.getCatchSwitchParentPad();
1330
61
  const BasicBlock *SuccessorColor;
1331
61
  if (isa<ConstantTokenNone>(ParentPad))
1332
55
    SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1333
61
  else
1334
6
    SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1335
87
  assert(SuccessorColor && "No parent funclet for catchret!");
1336
87
  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1337
87
  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1338
87
1339
87
  // Create the terminator node.
1340
87
  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1341
87
                            getControlRoot(), DAG.getBasicBlock(TargetMBB),
1342
87
                            DAG.getBasicBlock(SuccessorColorMBB));
1343
87
  DAG.setRoot(Ret);
1344
87
}
1345
1346
38
void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1347
38
  // Don't emit any special code for the cleanuppad instruction. It just marks
1348
38
  // the start of a funclet.
1349
38
  FuncInfo.MBB->setIsEHFuncletEntry();
1350
38
  FuncInfo.MBB->setIsCleanupFuncletEntry();
1351
38
}
1352
1353
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
1354
/// many places it could ultimately go. In the IR, we have a single unwind
1355
/// destination, but in the machine CFG, we enumerate all the possible blocks.
1356
/// This function skips over imaginary basic blocks that hold catchswitch
1357
/// instructions, and finds all the "real" machine
1358
/// basic block destinations. As those destinations may not be successors of
1359
/// EHPadBB, here we also calculate the edge probability to those destinations.
1360
/// The passed-in Prob is the edge probability to EHPadBB.
1361
static void findUnwindDestinations(
1362
    FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1363
    BranchProbability Prob,
1364
    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1365
18.5k
        &UnwindDests) {
1366
18.5k
  EHPersonality Personality =
1367
18.5k
    classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1368
18.5k
  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1369
18.5k
  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1370
18.5k
1371
18.6k
  while (
EHPadBB18.6k
) {
1372
18.5k
    const Instruction *Pad = EHPadBB->getFirstNonPHI();
1373
18.5k
    BasicBlock *NewEHPadBB = nullptr;
1374
18.5k
    if (
isa<LandingPadInst>(Pad)18.5k
) {
1375
18.4k
      // Stop on landingpads. They are not funclets.
1376
18.4k
      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1377
18.4k
      break;
1378
179
    } else 
if (179
isa<CleanupPadInst>(Pad)179
) {
1379
62
      // Stop on cleanup pads. Cleanups are always funclet entries for all known
1380
62
      // personalities.
1381
62
      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1382
62
      UnwindDests.back().first->setIsEHFuncletEntry();
1383
62
      break;
1384
117
    } else 
if (auto *117
CatchSwitch117
= dyn_cast<CatchSwitchInst>(Pad)) {
1385
117
      // Add the catchpad handlers to the possible destinations.
1386
128
      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1387
128
        UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1388
128
        // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1389
128
        if (
IsMSVCCXX || 128
IsCoreCLR49
)
1390
90
          UnwindDests.back().first->setIsEHFuncletEntry();
1391
128
      }
1392
117
      NewEHPadBB = CatchSwitch->getUnwindDest();
1393
0
    } else {
1394
0
      continue;
1395
0
    }
1396
117
1397
117
    BranchProbabilityInfo *BPI = FuncInfo.BPI;
1398
117
    if (
BPI && 117
NewEHPadBB116
)
1399
28
      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1400
18.5k
    EHPadBB = NewEHPadBB;
1401
18.5k
  }
1402
18.5k
}
1403
1404
31
void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1405
31
  // Update successor info.
1406
31
  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
1407
31
  auto UnwindDest = I.getUnwindDest();
1408
31
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1409
31
  BranchProbability UnwindDestProb =
1410
31
      (BPI && UnwindDest)
1411
10
          ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1412
21
          : BranchProbability::getZero();
1413
31
  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1414
11
  for (auto &UnwindDest : UnwindDests) {
1415
11
    UnwindDest.first->setIsEHPad();
1416
11
    addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1417
11
  }
1418
31
  FuncInfo.MBB->normalizeSuccProbs();
1419
31
1420
31
  // Create the terminator node.
1421
31
  SDValue Ret =
1422
31
      DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1423
31
  DAG.setRoot(Ret);
1424
31
}
1425
1426
0
void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1427
0
  report_fatal_error("visitCatchSwitch not yet implemented!");
1428
0
}
1429
1430
373k
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1431
373k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1432
373k
  auto &DL = DAG.getDataLayout();
1433
373k
  SDValue Chain = getControlRoot();
1434
373k
  SmallVector<ISD::OutputArg, 8> Outs;
1435
373k
  SmallVector<SDValue, 8> OutVals;
1436
373k
1437
373k
  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1438
373k
  // lower
1439
373k
  //
1440
373k
  //   %val = call <ty> @llvm.experimental.deoptimize()
1441
373k
  //   ret <ty> %val
1442
373k
  //
1443
373k
  // differently.
1444
373k
  if (
I.getParent()->getTerminatingDeoptimizeCall()373k
) {
1445
0
    LowerDeoptimizingReturn();
1446
0
    return;
1447
0
  }
1448
373k
1449
373k
  
if (373k
!FuncInfo.CanLowerReturn373k
) {
1450
516
    unsigned DemoteReg = FuncInfo.DemoteRegister;
1451
516
    const Function *F = I.getParent()->getParent();
1452
516
1453
516
    // Emit a store of the return value through the virtual register.
1454
516
    // Leave Outs empty so that LowerReturn won't try to load return
1455
516
    // registers the usual way.
1456
516
    SmallVector<EVT, 1> PtrValueVTs;
1457
516
    ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
1458
516
                    PtrValueVTs);
1459
516
1460
516
    SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1461
516
                                        DemoteReg, PtrValueVTs[0]);
1462
516
    SDValue RetOp = getValue(I.getOperand(0));
1463
516
1464
516
    SmallVector<EVT, 4> ValueVTs;
1465
516
    SmallVector<uint64_t, 4> Offsets;
1466
516
    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1467
516
    unsigned NumValues = ValueVTs.size();
1468
516
1469
516
    // An aggregate return value cannot wrap around the address space, so
1470
516
    // offsets to its parts don't wrap either.
1471
516
    SDNodeFlags Flags;
1472
516
    Flags.setNoUnsignedWrap(true);
1473
516
1474
516
    SmallVector<SDValue, 4> Chains(NumValues);
1475
1.25k
    for (unsigned i = 0; 
i != NumValues1.25k
;
++i741
) {
1476
741
      SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
1477
741
                                RetPtr.getValueType(), RetPtr,
1478
741
                                DAG.getIntPtrConstant(Offsets[i],
1479
741
                                                      getCurSDLoc()),
1480
741
                                Flags);
1481
741
      Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
1482
741
                               SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1483
741
                               // FIXME: better loc info would be nice.
1484
741
                               Add, MachinePointerInfo());
1485
741
    }
1486
516
1487
516
    Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1488
516
                        MVT::Other, Chains);
1489
373k
  } else 
if (372k
I.getNumOperands() != 0372k
) {
1490
271k
    SmallVector<EVT, 4> ValueVTs;
1491
271k
    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1492
271k
    unsigned NumValues = ValueVTs.size();
1493
271k
    if (
NumValues271k
) {
1494
271k
      SDValue RetOp = getValue(I.getOperand(0));
1495
271k
1496
271k
      const Function *F = I.getParent()->getParent();
1497
271k
1498
271k
      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1499
271k
      if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1500
271k
                                          Attribute::SExt))
1501
3.66k
        ExtendKind = ISD::SIGN_EXTEND;
1502
268k
      else 
if (268k
F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1503
268k
                                               Attribute::ZExt))
1504
12.6k
        ExtendKind = ISD::ZERO_EXTEND;
1505
271k
1506
271k
      LLVMContext &Context = F->getContext();
1507
271k
      bool RetInReg = F->getAttributes().hasAttribute(
1508
271k
          AttributeList::ReturnIndex, Attribute::InReg);
1509
271k
1510
545k
      for (unsigned j = 0; 
j != NumValues545k
;
++j273k
) {
1511
273k
        EVT VT = ValueVTs[j];
1512
273k
1513
273k
        if (
ExtendKind != ISD::ANY_EXTEND && 273k
VT.isInteger()16.3k
)
1514
16.3k
          VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1515
273k
1516
273k
        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1517
273k
        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1518
273k
        SmallVector<SDValue, 4> Parts(NumParts);
1519
273k
        getCopyToParts(DAG, getCurSDLoc(),
1520
273k
                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1521
273k
                       &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1522
273k
1523
273k
        // 'inreg' on function refers to return value
1524
273k
        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1525
273k
        if (RetInReg)
1526
37
          Flags.setInReg();
1527
273k
1528
273k
        // Propagate extension type if any
1529
273k
        if (ExtendKind == ISD::SIGN_EXTEND)
1530
3.66k
          Flags.setSExt();
1531
270k
        else 
if (270k
ExtendKind == ISD::ZERO_EXTEND270k
)
1532
12.6k
          Flags.setZExt();
1533
273k
1534
556k
        for (unsigned i = 0; 
i < NumParts556k
;
++i282k
) {
1535
282k
          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1536
282k
                                        VT, /*isfixed=*/true, 0, 0));
1537
282k
          OutVals.push_back(Parts[i]);
1538
282k
        }
1539
273k
      }
1540
271k
    }
1541
372k
  }
1542
373k
1543
373k
  // Push in swifterror virtual register as the last element of Outs. This makes
1544
373k
  // sure swifterror virtual register will be returned in the swifterror
1545
373k
  // physical register.
1546
373k
  const Function *F = I.getParent()->getParent();
1547
373k
  if (TLI.supportSwiftError() &&
1548
373k
      
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)319k
) {
1549
112
    assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1550
112
    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1551
112
    Flags.setSwiftError();
1552
112
    Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1553
112
                                  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1554
112
                                  true /*isfixed*/, 1 /*origidx*/,
1555
112
                                  0 /*partOffs*/));
1556
112
    // Create SDNode for the swifterror virtual register.
1557
112
    OutVals.push_back(
1558
112
        DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1559
112
                            &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1560
112
                        EVT(TLI.getPointerTy(DL))));
1561
112
  }
1562
373k
1563
373k
  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1564
373k
  CallingConv::ID CallConv =
1565
373k
    DAG.getMachineFunction().getFunction()->getCallingConv();
1566
373k
  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1567
373k
      Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1568
373k
1569
373k
  // Verify that the target's LowerReturn behaved as expected.
1570
373k
  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1571
373k
         "LowerReturn didn't return a valid chain!");
1572
373k
1573
373k
  // Update the DAG with the new chain value resulting from return lowering.
1574
373k
  DAG.setRoot(Chain);
1575
373k
}
1576
1577
/// CopyToExportRegsIfNeeded - If the given value has virtual registers
1578
/// created for it, emit nodes to copy the value into the virtual
1579
/// registers.
1580
14.7M
void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
1581
14.7M
  // Skip empty types
1582
14.7M
  if (V->getType()->isEmptyTy())
1583
16
    return;
1584
14.7M
1585
14.7M
  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1586
14.7M
  if (
VMI != FuncInfo.ValueMap.end()14.7M
) {
1587
1.90M
    assert(!V->use_empty() && "Unused value assigned virtual registers!");
1588
1.90M
    CopyValueToVirtualRegister(V, VMI->second);
1589
1.90M
  }
1590
14.7M
}
1591
1592
/// ExportFromCurrentBlock - If this condition isn't known to be exported from
1593
/// the current basic block, add it to ValueMap now so that we'll get a
1594
/// CopyTo/FromReg.
1595
197k
void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
1596
197k
  // No need to export constants.
1597
197k
  if (
!isa<Instruction>(V) && 197k
!isa<Argument>(V)70.0k
)
return52.0k
;
1598
145k
1599
145k
  // Already exported?
1600
145k
  
if (145k
FuncInfo.isExportedInst(V)145k
)
return88.2k
;
1601
57.5k
1602
57.5k
  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1603
57.5k
  CopyValueToVirtualRegister(V, Reg);
1604
57.5k
}
1605
1606
bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
1607
161k
                                                     const BasicBlock *FromBB) {
1608
161k
  // The operands of the setcc have to be in this block.  We don't know
1609
161k
  // how to export them from some other block.
1610
161k
  if (const Instruction *
VI161k
= dyn_cast<Instruction>(V)) {
1611
100k
    // Can export from current BB.
1612
100k
    if (VI->getParent() == FromBB)
1613
83.5k
      return true;
1614
16.9k
1615
16.9k
    // Is already exported, noop.
1616
16.9k
    return FuncInfo.isExportedInst(V);
1617
16.9k
  }
1618
60.6k
1619
60.6k
  // If this is an argument, we can export it if the BB is the entry block or
1620
60.6k
  // if it is already exported.
1621
60.6k
  
if (60.6k
isa<Argument>(V)60.6k
) {
1622
15.9k
    if (FromBB == &FromBB->getParent()->getEntryBlock())
1623
11.5k
      return true;
1624
4.40k
1625
4.40k
    // Otherwise, can only export this if it is already exported.
1626
4.40k
    return FuncInfo.isExportedInst(V);
1627
4.40k
  }
1628
44.7k
1629
44.7k
  // Otherwise, constants can always be exported.
1630
44.7k
  return true;
1631
44.7k
}
1632
1633
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1634
BranchProbability
1635
SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1636
3.30M
                                        const MachineBasicBlock *Dst) const {
1637
3.30M
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1638
3.30M
  const BasicBlock *SrcBB = Src->getBasicBlock();
1639
3.30M
  const BasicBlock *DstBB = Dst->getBasicBlock();
1640
3.30M
  if (
!BPI3.30M
) {
1641
42
    // If BPI is not available, set the default probability as 1 / N, where N is
1642
42
    // the number of successors.
1643
42
    auto SuccSize = std::max<uint32_t>(
1644
42
        std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
1645
42
    return BranchProbability(1, SuccSize);
1646
42
  }
1647
3.30M
  return BPI->getEdgeProbability(SrcBB, DstBB);
1648
3.30M
}
1649
1650
void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1651
                                               MachineBasicBlock *Dst,
1652
3.60M
                                               BranchProbability Prob) {
1653
3.60M
  if (!FuncInfo.BPI)
1654
1.26k
    Src->addSuccessorWithoutProb(Dst);
1655
3.60M
  else {
1656
3.60M
    if (Prob.isUnknown())
1657
3.09M
      Prob = getEdgeProbability(Src, Dst);
1658
3.60M
    Src->addSuccessor(Dst, Prob);
1659
3.60M
  }
1660
3.60M
}
1661
1662
177k
static bool InBlock(const Value *V, const BasicBlock *BB) {
1663
177k
  if (const Instruction *I = dyn_cast<Instruction>(V))
1664
177k
    return I->getParent() == BB;
1665
188
  return true;
1666
188
}
1667
1668
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1669
/// This function emits a branch and is used at the leaves of an OR or an
1670
/// AND operator tree.
1671
void
1672
SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
1673
                                                  MachineBasicBlock *TBB,
1674
                                                  MachineBasicBlock *FBB,
1675
                                                  MachineBasicBlock *CurBB,
1676
                                                  MachineBasicBlock *SwitchBB,
1677
                                                  BranchProbability TProb,
1678
                                                  BranchProbability FProb,
1679
177k
                                                  bool InvertCond) {
1680
177k
  const BasicBlock *BB = CurBB->getBasicBlock();
1681
177k
1682
177k
  // If the leaf of the tree is a comparison, merge the condition into
1683
177k
  // the caseblock.
1684
177k
  if (const CmpInst *
BOp177k
= dyn_cast<CmpInst>(Cond)) {
1685
163k
    // The operands of the cmp have to be in this block.  We don't know
1686
163k
    // how to export them from some other block.  If this is the first block
1687
163k
    // of the sequence, no exporting is needed.
1688
163k
    if (CurBB == SwitchBB ||
1689
80.5k
        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1690
163k
         
isExportableFromCurrentBlock(BOp->getOperand(1), BB)80.5k
)) {
1691
163k
      ISD::CondCode Condition;
1692
163k
      if (const ICmpInst *
IC163k
= dyn_cast<ICmpInst>(Cond)) {
1693
156k
        ICmpInst::Predicate Pred =
1694
156k
            InvertCond ? 
IC->getInversePredicate()1.08k
:
IC->getPredicate()155k
;
1695
156k
        Condition = getICmpCondCode(Pred);
1696
163k
      } else {
1697
7.17k
        const FCmpInst *FC = cast<FCmpInst>(Cond);
1698
7.17k
        FCmpInst::Predicate Pred =
1699
7.17k
            InvertCond ? 
FC->getInversePredicate()8
:
FC->getPredicate()7.16k
;
1700
7.17k
        Condition = getFCmpCondCode(Pred);
1701
7.17k
        if (TM.Options.NoNaNsFPMath)
1702
0
          Condition = getFCmpCodeWithoutNaN(Condition);
1703
7.17k
      }
1704
163k
1705
163k
      CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1706
163k
                   TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1707
163k
      SwitchCases.push_back(CB);
1708
163k
      return;
1709
163k
    }
1710
14.3k
  }
1711
14.3k
1712
14.3k
  // Create a CaseBlock record representing this branch.
1713
14.3k
  
ISD::CondCode Opc = InvertCond ? 14.3k
ISD::SETNE195
:
ISD::SETEQ14.1k
;
1714
177k
  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1715
177k
               nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1716
177k
  SwitchCases.push_back(CB);
1717
177k
}
1718
1719
/// FindMergedConditions - If Cond is an expression like
1720
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
1721
                                               MachineBasicBlock *TBB,
1722
                                               MachineBasicBlock *FBB,
1723
                                               MachineBasicBlock *CurBB,
1724
                                               MachineBasicBlock *SwitchBB,
1725
                                               Instruction::BinaryOps Opc,
1726
                                               BranchProbability TProb,
1727
                                               BranchProbability FProb,
1728
266k
                                               bool InvertCond) {
1729
266k
  // Skip over not part of the tree and remember to invert op and operands at
1730
266k
  // next level.
1731
266k
  if (
BinaryOperator::isNot(Cond) && 266k
Cond->hasOneUse()949
) {
1732
900
    const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1733
900
    if (
InBlock(CondOp, CurBB->getBasicBlock())900
) {
1734
873
      FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1735
873
                           !InvertCond);
1736
873
      return;
1737
873
    }
1738
265k
  }
1739
265k
1740
265k
  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1741
265k
  // Compute the effective opcode for Cond, taking into account whether it needs
1742
265k
  // to be inverted, e.g.
1743
265k
  //   and (not (or A, B)), C
1744
265k
  // gets lowered as
1745
265k
  //   and (and (not A, not B), C)
1746
265k
  unsigned BOpc = 0;
1747
265k
  if (
BOp265k
) {
1748
265k
    BOpc = BOp->getOpcode();
1749
265k
    if (
InvertCond265k
) {
1750
1.67k
      if (BOpc == Instruction::And)
1751
377
        BOpc = Instruction::Or;
1752
1.30k
      else 
if (1.30k
BOpc == Instruction::Or1.30k
)
1753
151
        BOpc = Instruction::And;
1754
1.67k
    }
1755
265k
  }
1756
265k
1757
265k
  // If this node is not part of the or/and tree, emit it as a branch.
1758
265k
  if (
!BOp || 265k
!(isa<BinaryOperator>(BOp) || 265k
isa<CmpInst>(BOp)169k
) ||
1759
265k
      
BOpc != Opc259k
||
!BOp->hasOneUse()94.0k
||
1760
93.9k
      BOp->getParent() != CurBB->getBasicBlock() ||
1761
88.3k
      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1762
265k
      
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())87.9k
) {
1763
177k
    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1764
177k
                                 TProb, FProb, InvertCond);
1765
177k
    return;
1766
177k
  }
1767
87.7k
1768
87.7k
  //  Create TmpBB after CurBB.
1769
87.7k
  MachineFunction::iterator BBI(CurBB);
1770
87.7k
  MachineFunction &MF = DAG.getMachineFunction();
1771
87.7k
  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1772
87.7k
  CurBB->getParent()->insert(++BBI, TmpBB);
1773
87.7k
1774
87.7k
  if (
Opc == Instruction::Or87.7k
) {
1775
29.2k
    // Codegen X | Y as:
1776
29.2k
    // BB1:
1777
29.2k
    //   jmp_if_X TBB
1778
29.2k
    //   jmp TmpBB
1779
29.2k
    // TmpBB:
1780
29.2k
    //   jmp_if_Y TBB
1781
29.2k
    //   jmp FBB
1782
29.2k
    //
1783
29.2k
1784
29.2k
    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1785
29.2k
    // The requirement is that
1786
29.2k
    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1787
29.2k
    //     = TrueProb for original BB.
1788
29.2k
    // Assuming the original probabilities are A and B, one choice is to set
1789
29.2k
    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1790
29.2k
    // A/(1+B) and 2B/(1+B). This choice assumes that
1791
29.2k
    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1792
29.2k
    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1793
29.2k
    // TmpBB, but the math is more complicated.
1794
29.2k
1795
29.2k
    auto NewTrueProb = TProb / 2;
1796
29.2k
    auto NewFalseProb = TProb / 2 + FProb;
1797
29.2k
    // Emit the LHS condition.
1798
29.2k
    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1799
29.2k
                         NewTrueProb, NewFalseProb, InvertCond);
1800
29.2k
1801
29.2k
    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1802
29.2k
    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1803
29.2k
    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1804
29.2k
    // Emit the RHS condition into TmpBB.
1805
29.2k
    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1806
29.2k
                         Probs[0], Probs[1], InvertCond);
1807
87.7k
  } else {
1808
58.5k
    assert(Opc == Instruction::And && "Unknown merge op!");
1809
58.5k
    // Codegen X & Y as:
1810
58.5k
    // BB1:
1811
58.5k
    //   jmp_if_X TmpBB
1812
58.5k
    //   jmp FBB
1813
58.5k
    // TmpBB:
1814
58.5k
    //   jmp_if_Y TBB
1815
58.5k
    //   jmp FBB
1816
58.5k
    //
1817
58.5k
    //  This requires creation of TmpBB after CurBB.
1818
58.5k
1819
58.5k
    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1820
58.5k
    // The requirement is that
1821
58.5k
    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1822
58.5k
    //     = FalseProb for original BB.
1823
58.5k
    // Assuming the original probabilities are A and B, one choice is to set
1824
58.5k
    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1825
58.5k
    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1826
58.5k
    // TrueProb for BB1 * FalseProb for TmpBB.
1827
58.5k
1828
58.5k
    auto NewTrueProb = TProb + FProb / 2;
1829
58.5k
    auto NewFalseProb = FProb / 2;
1830
58.5k
    // Emit the LHS condition.
1831
58.5k
    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1832
58.5k
                         NewTrueProb, NewFalseProb, InvertCond);
1833
58.5k
1834
58.5k
    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1835
58.5k
    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1836
58.5k
    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1837
58.5k
    // Emit the RHS condition into TmpBB.
1838
58.5k
    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1839
58.5k
                         Probs[0], Probs[1], InvertCond);
1840
58.5k
  }
1841
266k
}
1842
1843
/// If the set of cases should be emitted as a series of branches, return true.
1844
/// If we should emit this as a bunch of and/or'd together conditions, return
1845
/// false.
1846
bool
1847
90.0k
SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1848
90.0k
  if (
Cases.size() != 290.0k
)
return true9.24k
;
1849
80.7k
1850
80.7k
  // If this is two comparisons of the same values or'd or and'd together, they
1851
80.7k
  // will get folded into a single comparison, so don't emit two blocks.
1852
80.7k
  
if (80.7k
(Cases[0].CmpLHS == Cases[1].CmpLHS &&
1853
6.73k
       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1854
80.7k
      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1855
80.7k
       
Cases[0].CmpLHS == Cases[1].CmpRHS67
)) {
1856
9
    return false;
1857
9
  }
1858
80.7k
1859
80.7k
  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1860
80.7k
  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1861
80.7k
  
if (80.7k
Cases[0].CmpRHS == Cases[1].CmpRHS &&
1862
7.20k
      Cases[0].CC == Cases[1].CC &&
1863
3.58k
      isa<Constant>(Cases[0].CmpRHS) &&
1864
80.7k
      
cast<Constant>(Cases[0].CmpRHS)->isNullValue()3.25k
) {
1865
1.86k
    if (
Cases[0].CC == ISD::SETEQ && 1.86k
Cases[0].TrueBB == Cases[1].ThisBB949
)
1866
10
      return false;
1867
1.85k
    
if (1.85k
Cases[0].CC == ISD::SETNE && 1.85k
Cases[0].FalseBB == Cases[1].ThisBB387
)
1868
36
      return false;
1869
80.7k
  }
1870
80.7k
1871
80.7k
  return true;
1872
80.7k
}
1873
1874
2.62M
void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1875
2.62M
  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1876
2.62M
1877
2.62M
  // Update machine-CFG edges.
1878
2.62M
  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1879
2.62M
1880
2.62M
  if (
I.isUnconditional()2.62M
) {
1881
993k
    // Update machine-CFG edges.
1882
993k
    BrMBB->addSuccessor(Succ0MBB);
1883
993k
1884
993k
    // If this is not a fall-through branch or optimizations are switched off,
1885
993k
    // emit the branch.
1886
993k
    if (
Succ0MBB != NextBlock(BrMBB) || 993k
TM.getOptLevel() == CodeGenOpt::None639k
)
1887
354k
      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1888
354k
                              MVT::Other, getControlRoot(),
1889
354k
                              DAG.getBasicBlock(Succ0MBB)));
1890
993k
1891
993k
    return;
1892
993k
  }
1893
1.62M
1894
1.62M
  // If this condition is one of the special cases we handle, do special stuff
1895
1.62M
  // now.
1896
1.62M
  const Value *CondVal = I.getCondition();
1897
1.62M
  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1898
1.62M
1899
1.62M
  // If this is a series of conditions that are or'd or and'd together, emit
1900
1.62M
  // this as a sequence of branches instead of setcc's with and/or operations.
1901
1.62M
  // As long as jumps are not expensive, this should improve performance.
1902
1.62M
  // For example, instead of something like:
1903
1.62M
  //     cmp A, B
1904
1.62M
  //     C = seteq
1905
1.62M
  //     cmp D, E
1906
1.62M
  //     F = setle
1907
1.62M
  //     or C, F
1908
1.62M
  //     jnz foo
1909
1.62M
  // Emit:
1910
1.62M
  //     cmp A, B
1911
1.62M
  //     je foo
1912
1.62M
  //     cmp D, E
1913
1.62M
  //     jle foo
1914
1.62M
  if (const BinaryOperator *
BOp1.62M
= dyn_cast<BinaryOperator>(CondVal)) {
1915
100k
    Instruction::BinaryOps Opcode = BOp->getOpcode();
1916
100k
    if (
!DAG.getTargetLoweringInfo().isJumpExpensive() && 100k
BOp->hasOneUse()100k
&&
1917
90.1k
        !I.getMetadata(LLVMContext::MD_unpredictable) &&
1918
100k
        
(Opcode == Instruction::And || 90.1k
Opcode == Instruction::Or28.1k
)) {
1919
90.0k
      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1920
90.0k
                           Opcode,
1921
90.0k
                           getEdgeProbability(BrMBB, Succ0MBB),
1922
90.0k
                           getEdgeProbability(BrMBB, Succ1MBB),
1923
90.0k
                           /*InvertCond=*/false);
1924
90.0k
      // If the compares in later blocks need to use values not currently
1925
90.0k
      // exported from this block, export them now.  This block should always
1926
90.0k
      // be the first entry.
1927
90.0k
      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1928
90.0k
1929
90.0k
      // Allow some cases to be rejected.
1930
90.0k
      if (
ShouldEmitAsBranches(SwitchCases)90.0k
) {
1931
177k
        for (unsigned i = 1, e = SwitchCases.size(); 
i != e177k
;
++i87.7k
) {
1932
87.7k
          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1933
87.7k
          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1934
87.7k
        }
1935
89.9k
1936
89.9k
        // Emit the branch for this block.
1937
89.9k
        visitSwitchCase(SwitchCases[0], BrMBB);
1938
89.9k
        SwitchCases.erase(SwitchCases.begin());
1939
89.9k
        return;
1940
89.9k
      }
1941
55
1942
55
      // Okay, we decided not to do this, remove any inserted MBB's and clear
1943
55
      // SwitchCases.
1944
110
      
for (unsigned i = 1, e = SwitchCases.size(); 55
i != e110
;
++i55
)
1945
55
        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1946
90.0k
1947
90.0k
      SwitchCases.clear();
1948
90.0k
    }
1949
100k
  }
1950
1.62M
1951
1.62M
  // Create a CaseBlock record representing this branch.
1952
1.53M
  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1953
1.53M
               nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
1954
1.53M
1955
1.53M
  // Use visitSwitchCase to actually insert the fast branch sequence for this
1956
1.53M
  // cond branch.
1957
1.53M
  visitSwitchCase(CB, BrMBB);
1958
1.53M
}
1959
1960
/// visitSwitchCase - Emits the necessary code to represent a single node in
1961
/// the binary search tree resulting from lowering a switch instruction.
1962
void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
1963
1.75M
                                          MachineBasicBlock *SwitchBB) {
1964
1.75M
  SDValue Cond;
1965
1.75M
  SDValue CondLHS = getValue(CB.CmpLHS);
1966
1.75M
  SDLoc dl = CB.DL;
1967
1.75M
1968
1.75M
  // Build the setcc now.
1969
1.75M
  if (
!CB.CmpMHS1.75M
) {
1970
1.75M
    // Fold "(X == true)" to X and "(X == false)" to !X to
1971
1.75M
    // handle common cases produced by branch lowering.
1972
1.75M
    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1973
1.55M
        CB.CC == ISD::SETEQ)
1974
1.55M
      Cond = CondLHS;
1975
201k
    else 
if (201k
CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1976
201k
             
CB.CC == ISD::SETEQ3
) {
1977
3
      SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
1978
3
      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1979
3
    } else
1980
201k
      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1981
1.75M
  } else {
1982
549
    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1983
549
1984
549
    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1985
549
    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
1986
549
1987
549
    SDValue CmpOp = getValue(CB.CmpMHS);
1988
549
    EVT VT = CmpOp.getValueType();
1989
549
1990
549
    if (
cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)549
) {
1991
1
      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
1992
1
                          ISD::SETLE);
1993
549
    } else {
1994
548
      SDValue SUB = DAG.getNode(ISD::SUB, dl,
1995
548
                                VT, CmpOp, DAG.getConstant(Low, dl, VT));
1996
548
      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1997
548
                          DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
1998
548
    }
1999
549
  }
2000
1.75M
2001
1.75M
  // Update successor info
2002
1.75M
  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2003
1.75M
  // TrueBB and FalseBB are always different unless the incoming IR is
2004
1.75M
  // degenerate. This only happens when running llc on weird IR.
2005
1.75M
  if (CB.TrueBB != CB.FalseBB)
2006
1.75M
    addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2007
1.75M
  SwitchBB->normalizeSuccProbs();
2008
1.75M
2009
1.75M
  // If the lhs block is the next block, invert the condition so that we can
2010
1.75M
  // fall through to the lhs instead of the rhs block.
2011
1.75M
  if (
CB.TrueBB == NextBlock(SwitchBB)1.75M
) {
2012
804k
    std::swap(CB.TrueBB, CB.FalseBB);
2013
804k
    SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2014
804k
    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2015
804k
  }
2016
1.75M
2017
1.75M
  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2018
1.75M
                               MVT::Other, getControlRoot(), Cond,
2019
1.75M
                               DAG.getBasicBlock(CB.TrueBB));
2020
1.75M
2021
1.75M
  // Insert the false branch. Do this even if it's a fall through branch,
2022
1.75M
  // this makes it easier to do DAG optimizations which require inverting
2023
1.75M
  // the branch condition.
2024
1.75M
  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2025
1.75M
                       DAG.getBasicBlock(CB.FalseBB));
2026
1.75M
2027
1.75M
  DAG.setRoot(BrCond);
2028
1.75M
}
2029
2030
/// visitJumpTable - Emit JumpTable node in the current MBB
2031
5.16k
void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
2032
5.16k
  // Emit the code for the jump table
2033
5.16k
  assert(JT.Reg != -1U && "Should lower JT Header first!");
2034
5.16k
  EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2035
5.16k
  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2036
5.16k
                                     JT.Reg, PTy);
2037
5.16k
  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2038
5.16k
  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2039
5.16k
                                    MVT::Other, Index.getValue(1),
2040
5.16k
                                    Table, Index);
2041
5.16k
  DAG.setRoot(BrJumpTable);
2042
5.16k
}
2043
2044
/// visitJumpTableHeader - This function emits necessary code to produce index
2045
/// in the JumpTable from switch case.
2046
void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
2047
                                               JumpTableHeader &JTH,
2048
5.16k
                                               MachineBasicBlock *SwitchBB) {
2049
5.16k
  SDLoc dl = getCurSDLoc();
2050
5.16k
2051
5.16k
  // Subtract the lowest switch case value from the value being switched on and
2052
5.16k
  // conditional branch to default mbb if the result is greater than the
2053
5.16k
  // difference between smallest and largest cases.
2054
5.16k
  SDValue SwitchOp = getValue(JTH.SValue);
2055
5.16k
  EVT VT = SwitchOp.getValueType();
2056
5.16k
  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2057
5.16k
                            DAG.getConstant(JTH.First, dl, VT));
2058
5.16k
2059
5.16k
  // The SDNode we just created, which holds the value being switched on minus
2060
5.16k
  // the smallest case value, needs to be copied to a virtual register so it
2061
5.16k
  // can be used as an index into the jump table in a subsequent basic block.
2062
5.16k
  // This value may be smaller or larger than the target's pointer type, and
2063
5.16k
  // therefore require extension or truncating.
2064
5.16k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2065
5.16k
  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2066
5.16k
2067
5.16k
  unsigned JumpTableReg =
2068
5.16k
      FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2069
5.16k
  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2070
5.16k
                                    JumpTableReg, SwitchOp);
2071
5.16k
  JT.Reg = JumpTableReg;
2072
5.16k
2073
5.16k
  // Emit the range check for the jump table, and branch to the default block
2074
5.16k
  // for the switch statement if the value being switched on exceeds the largest
2075
5.16k
  // case in the switch.
2076
5.16k
  SDValue CMP = DAG.getSetCC(
2077
5.16k
      dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2078
5.16k
                                 Sub.getValueType()),
2079
5.16k
      Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2080
5.16k
2081
5.16k
  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2082
5.16k
                               MVT::Other, CopyTo, CMP,
2083
5.16k
                               DAG.getBasicBlock(JT.Default));
2084
5.16k
2085
5.16k
  // Avoid emitting unnecessary branches to the next block.
2086
5.16k
  if (JT.MBB != NextBlock(SwitchBB))
2087
226
    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2088
226
                         DAG.getBasicBlock(JT.MBB));
2089
5.16k
2090
5.16k
  DAG.setRoot(BrCond);
2091
5.16k
}
2092
2093
/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2094
/// variable if there exists one.
2095
static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
2096
9.67k
                                 SDValue &Chain) {
2097
9.67k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2098
9.67k
  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2099
9.67k
  MachineFunction &MF = DAG.getMachineFunction();
2100
9.67k
  Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
2101
9.67k
  MachineSDNode *Node =
2102
9.67k
      DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2103
9.67k
  if (
Global9.67k
) {
2104
9.66k
    MachinePointerInfo MPInfo(Global);
2105
9.66k
    MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
2106
9.66k
    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
2107
9.66k
                 MachineMemOperand::MODereferenceable;
2108
9.66k
    *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2109
9.66k
                                       DAG.getEVTAlignment(PtrTy));
2110
9.66k
    Node->setMemRefs(MemRefs, MemRefs + 1);
2111
9.66k
  }
2112
9.67k
  return SDValue(Node, 0);
2113
9.67k
}
2114
2115
/// Codegen a new tail for a stack protector check ParentMBB which has had its
2116
/// tail spliced into a stack protector check success bb.
2117
///
2118
/// For a high level explanation of how this fits into the stack protector
2119
/// generation see the comment on the declaration of class
2120
/// StackProtectorDescriptor.
2121
void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2122
3.40k
                                                  MachineBasicBlock *ParentBB) {
2123
3.40k
2124
3.40k
  // First create the loads to the guard/stack slot for the comparison.
2125
3.40k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2126
3.40k
  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2127
3.40k
2128
3.40k
  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2129
3.40k
  int FI = MFI.getStackProtectorIndex();
2130
3.40k
2131
3.40k
  SDValue Guard;
2132
3.40k
  SDLoc dl = getCurSDLoc();
2133
3.40k
  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2134
3.40k
  const Module &M = *ParentBB->getParent()->getFunction()->getParent();
2135
3.40k
  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2136
3.40k
2137
3.40k
  // Generate code to load the content of the guard slot.
2138
3.40k
  SDValue StackSlot = DAG.getLoad(
2139
3.40k
      PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2140
3.40k
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
2141
3.40k
      MachineMemOperand::MOVolatile);
2142
3.40k
2143
3.40k
  // Retrieve guard check function, nullptr if instrumentation is inlined.
2144
3.40k
  if (const Value *
GuardCheck3.40k
= TLI.getSSPStackGuardCheck(M)) {
2145
66
    // The target provides a guard check function to validate the guard value.
2146
66
    // Generate a call to that function with the content of the guard slot as
2147
66
    // argument.
2148
66
    auto *Fn = cast<Function>(GuardCheck);
2149
66
    FunctionType *FnTy = Fn->getFunctionType();
2150
66
    assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2151
66
2152
66
    TargetLowering::ArgListTy Args;
2153
66
    TargetLowering::ArgListEntry Entry;
2154
66
    Entry.Node = StackSlot;
2155
66
    Entry.Ty = FnTy->getParamType(0);
2156
66
    if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2157
66
      Entry.IsInReg = true;
2158
66
    Args.push_back(Entry);
2159
66
2160
66
    TargetLowering::CallLoweringInfo CLI(DAG);
2161
66
    CLI.setDebugLoc(getCurSDLoc())
2162
66
      .setChain(DAG.getEntryNode())
2163
66
      .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2164
66
                 getValue(GuardCheck), std::move(Args));
2165
66
2166
66
    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2167
66
    DAG.setRoot(Result.second);
2168
66
    return;
2169
66
  }
2170
3.34k
2171
3.34k
  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2172
3.34k
  // Otherwise, emit a volatile load to retrieve the stack guard value.
2173
3.34k
  SDValue Chain = DAG.getEntryNode();
2174
3.34k
  if (
TLI.useLoadStackGuardNode()3.34k
) {
2175
3.26k
    Guard = getLoadStackGuard(DAG, dl, Chain);
2176
3.34k
  } else {
2177
78
    const Value *IRGuard = TLI.getSDagStackGuard(M);
2178
78
    SDValue GuardPtr = getValue(IRGuard);
2179
78
2180
78
    Guard =
2181
78
        DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2182
78
                    Align, MachineMemOperand::MOVolatile);
2183
78
  }
2184
3.40k
2185
3.40k
  // Perform the comparison via a subtract/getsetcc.
2186
3.40k
  EVT VT = Guard.getValueType();
2187
3.40k
  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
2188
3.40k
2189
3.40k
  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2190
3.40k
                                                        *DAG.getContext(),
2191
3.40k
                                                        Sub.getValueType()),
2192
3.40k
                             Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2193
3.40k
2194
3.40k
  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2195
3.40k
  // branch to failure MBB.
2196
3.40k
  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2197
3.40k
                               MVT::Other, StackSlot.getOperand(0),
2198
3.40k
                               Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2199
3.40k
  // Otherwise branch to success MBB.
2200
3.40k
  SDValue Br = DAG.getNode(ISD::BR, dl,
2201
3.40k
                           MVT::Other, BrCond,
2202
3.40k
                           DAG.getBasicBlock(SPD.getSuccessMBB()));
2203
3.40k
2204
3.40k
  DAG.setRoot(Br);
2205
3.40k
}
2206
2207
/// Codegen the failure basic block for a stack protector check.
2208
///
2209
/// A failure stack protector machine basic block consists simply of a call to
2210
/// __stack_chk_fail().
2211
///
2212
/// For a high level explanation of how this fits into the stack protector
2213
/// generation see the comment on the declaration of class
2214
/// StackProtectorDescriptor.
2215
void
2216
3.26k
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2217
3.26k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2218
3.26k
  SDValue Chain =
2219
3.26k
      TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2220
3.26k
                      None, false, getCurSDLoc(), false, false).second;
2221
3.26k
  DAG.setRoot(Chain);
2222
3.26k
}
2223
2224
/// visitBitTestHeader - This function emits necessary code to produce value
2225
/// suitable for "bit tests"
2226
void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
2227
621
                                             MachineBasicBlock *SwitchBB) {
2228
621
  SDLoc dl = getCurSDLoc();
2229
621
2230
621
  // Subtract the minimum value
2231
621
  SDValue SwitchOp = getValue(B.SValue);
2232
621
  EVT VT = SwitchOp.getValueType();
2233
621
  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2234
621
                            DAG.getConstant(B.First, dl, VT));
2235
621
2236
621
  // Check range
2237
621
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2238
621
  SDValue RangeCmp = DAG.getSetCC(
2239
621
      dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2240
621
                                 Sub.getValueType()),
2241
621
      Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2242
621
2243
621
  // Determine the type of the test operands.
2244
621
  bool UsePtrType = false;
2245
621
  if (!TLI.isTypeLegal(VT))
2246
0
    UsePtrType = true;
2247
621
  else {
2248
1.00k
    for (unsigned i = 0, e = B.Cases.size(); 
i != e1.00k
;
++i386
)
2249
702
      
if (702
!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)702
) {
2250
316
        // Switch table case range are encoded into series of masks.
2251
316
        // Just use pointer type, it's guaranteed to fit.
2252
316
        UsePtrType = true;
2253
316
        break;
2254
316
      }
2255
621
  }
2256
621
  if (
UsePtrType621
) {
2257
316
    VT = TLI.getPointerTy(DAG.getDataLayout());
2258
316
    Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2259
316
  }
2260
621
2261
621
  B.RegVT = VT.getSimpleVT();
2262
621
  B.Reg = FuncInfo.CreateReg(B.RegVT);
2263
621
  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2264
621
2265
621
  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2266
621
2267
621
  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2268
621
  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2269
621
  SwitchBB->normalizeSuccProbs();
2270
621
2271
621
  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2272
621
                                MVT::Other, CopyTo, RangeCmp,
2273
621
                                DAG.getBasicBlock(B.Default));
2274
621
2275
621
  // Avoid emitting unnecessary branches to the next block.
2276
621
  if (MBB != NextBlock(SwitchBB))
2277
30
    BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2278
30
                          DAG.getBasicBlock(MBB));
2279
621
2280
621
  DAG.setRoot(BrRange);
2281
621
}
2282
2283
/// visitBitTestCase - this function produces one "bit test"
2284
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
2285
                                           MachineBasicBlock* NextMBB,
2286
                                           BranchProbability BranchProbToNext,
2287
                                           unsigned Reg,
2288
                                           BitTestCase &B,
2289
768
                                           MachineBasicBlock *SwitchBB) {
2290
768
  SDLoc dl = getCurSDLoc();
2291
768
  MVT VT = BB.RegVT;
2292
768
  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2293
768
  SDValue Cmp;
2294
768
  unsigned PopCount = countPopulation(B.Mask);
2295
768
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2296
768
  if (
PopCount == 1768
) {
2297
90
    // Testing for a single bit; just compare the shift count with what it
2298
90
    // would need to be to shift a 1 bit in that position.
2299
90
    Cmp = DAG.getSetCC(
2300
90
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2301
90
        ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2302
90
        ISD::SETEQ);
2303
768
  } else 
if (678
PopCount == BB.Range678
) {
2304
37
    // There is only one zero bit in the range, test for it directly.
2305
37
    Cmp = DAG.getSetCC(
2306
37
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2307
37
        ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2308
37
        ISD::SETNE);
2309
678
  } else {
2310
641
    // Make desired shift
2311
641
    SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2312
641
                                    DAG.getConstant(1, dl, VT), ShiftOp);
2313
641
2314
641
    // Emit bit tests and jumps
2315
641
    SDValue AndOp = DAG.getNode(ISD::AND, dl,
2316
641
                                VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2317
641
    Cmp = DAG.getSetCC(
2318
641
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2319
641
        AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2320
641
  }
2321
768
2322
768
  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2323
768
  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2324
768
  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2325
768
  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2326
768
  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2327
768
  // one as they are relative probabilities (and thus work more like weights),
2328
768
  // and hence we need to normalize them to let the sum of them become one.
2329
768
  SwitchBB->normalizeSuccProbs();
2330
768
2331
768
  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2332
768
                              MVT::Other, getControlRoot(),
2333
768
                              Cmp, DAG.getBasicBlock(B.TargetBB));
2334
768
2335
768
  // Avoid emitting unnecessary branches to the next block.
2336
768
  if (NextMBB != NextBlock(SwitchBB))
2337
426
    BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2338
426
                        DAG.getBasicBlock(NextMBB));
2339
768
2340
768
  DAG.setRoot(BrAnd);
2341
768
}
2342
2343
18.5k
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2344
18.5k
  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2345
18.5k
2346
18.5k
  // Retrieve successors. Look through artificial IR level blocks like
2347
18.5k
  // catchswitch for successors.
2348
18.5k
  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2349
18.5k
  const BasicBlock *EHPadBB = I.getSuccessor(1);
2350
18.5k
2351
18.5k
  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2352
18.5k
  // have to do anything here to lower funclet bundles.
2353
18.5k
  assert(!I.hasOperandBundlesOtherThan(
2354
18.5k
             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2355
18.5k
         "Cannot lower invokes with arbitrary operand bundles yet!");
2356
18.5k
2357
18.5k
  const Value *Callee(I.getCalledValue());
2358
18.5k
  const Function *Fn = dyn_cast<Function>(Callee);
2359
18.5k
  if (isa<InlineAsm>(Callee))
2360
1
    visitInlineAsm(&I);
2361
18.5k
  else 
if (18.5k
Fn && 18.5k
Fn->isIntrinsic()14.7k
) {
2362
10
    switch (Fn->getIntrinsicID()) {
2363
0
    default:
2364
0
      llvm_unreachable("Cannot invoke this intrinsic");
2365
1
    case Intrinsic::donothing:
2366
1
      // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2367
1
      break;
2368
1
    case Intrinsic::experimental_patchpoint_void:
2369
1
    case Intrinsic::experimental_patchpoint_i64:
2370
1
      visitPatchpoint(&I, EHPadBB);
2371
1
      break;
2372
8
    case Intrinsic::experimental_gc_statepoint:
2373
8
      LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2374
8
      break;
2375
18.5k
    }
2376
18.5k
  } else 
if (18.5k
I.countOperandBundlesOfType(LLVMContext::OB_deopt)18.5k
) {
2377
0
    // Currently we do not lower any intrinsic calls with deopt operand bundles.
2378
0
    // Eventually we will support lowering the @llvm.experimental.deoptimize
2379
0
    // intrinsic, and right now there are no plans to support other intrinsics
2380
0
    // with deopt state.
2381
0
    LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2382
18.5k
  } else {
2383
18.5k
    LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2384
18.5k
  }
2385
18.5k
2386
18.5k
  // If the value of the invoke is used outside of its defining block, make it
2387
18.5k
  // available as a virtual register.
2388
18.5k
  // We already took care of the exported value for the statepoint instruction
2389
18.5k
  // during call to the LowerStatepoint.
2390
18.5k
  
if (18.5k
!isStatepoint(I)18.5k
) {
2391
18.5k
    CopyToExportRegsIfNeeded(&I);
2392
18.5k
  }
2393
18.5k
2394
18.5k
  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2395
18.5k
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2396
18.5k
  BranchProbability EHPadBBProb =
2397
18.4k
      BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2398
124
          : BranchProbability::getZero();
2399
18.5k
  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2400
18.5k
2401
18.5k
  // Update successor info.
2402
18.5k
  addSuccessorWithProb(InvokeMBB, Return);
2403
18.5k
  for (auto &UnwindDest : UnwindDests) {
2404
18.5k
    UnwindDest.first->setIsEHPad();
2405
18.5k
    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2406
18.5k
  }
2407
18.5k
  InvokeMBB->normalizeSuccProbs();
2408
18.5k
2409
18.5k
  // Drop into normal successor.
2410
18.5k
  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2411
18.5k
                          MVT::Other, getControlRoot(),
2412
18.5k
                          DAG.getBasicBlock(Return)));
2413
18.5k
}
2414
2415
0
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2416
0
  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2417
0
}
2418
2419
12.8k
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2420
12.8k
  assert(FuncInfo.MBB->isEHPad() &&
2421
12.8k
         "Call to landingpad not in landing pad!");
2422
12.8k
2423
12.8k
  MachineBasicBlock *MBB = FuncInfo.MBB;
2424
12.8k
  addLandingPadInfo(LP, *MBB);
2425
12.8k
2426
12.8k
  // If there aren't registers to copy the values into (e.g., during SjLj
2427
12.8k
  // exceptions), then don't bother to create these DAG nodes.
2428
12.8k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2429
12.8k
  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2430
12.8k
  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2431
123
      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2432
123
    return;
2433
12.7k
2434
12.7k
  // If landingpad's return type is token type, we don't create DAG nodes
2435
12.7k
  // for its exception pointer and selector value. The extraction of exception
2436
12.7k
  // pointer or selector value from token type landingpads is not currently
2437
12.7k
  // supported.
2438
12.7k
  
if (12.7k
LP.getType()->isTokenTy()12.7k
)
2439
8
    return;
2440
12.7k
2441
12.7k
  SmallVector<EVT, 2> ValueVTs;
2442
12.7k
  SDLoc dl = getCurSDLoc();
2443
12.7k
  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2444
12.7k
  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2445
12.7k
2446
12.7k
  // Get the two live-in registers as SDValues. The physregs have already been
2447
12.7k
  // copied into virtual registers.
2448
12.7k
  SDValue Ops[2];
2449
12.7k
  if (
FuncInfo.ExceptionPointerVirtReg12.7k
) {
2450
12.7k
    Ops[0] = DAG.getZExtOrTrunc(
2451
12.7k
        DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2452
12.7k
                           FuncInfo.ExceptionPointerVirtReg,
2453
12.7k
                           TLI.getPointerTy(DAG.getDataLayout())),
2454
12.7k
        dl, ValueVTs[0]);
2455
12.7k
  } else {
2456
0
    Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2457
0
  }
2458
12.8k
  Ops[1] = DAG.getZExtOrTrunc(
2459
12.8k
      DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2460
12.8k
                         FuncInfo.ExceptionSelectorVirtReg,
2461
12.8k
                         TLI.getPointerTy(DAG.getDataLayout())),
2462
12.8k
      dl, ValueVTs[1]);
2463
12.8k
2464
12.8k
  // Merge into one.
2465
12.8k
  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2466
12.8k
                            DAG.getVTList(ValueVTs), Ops);
2467
12.8k
  setValue(&LP, Res);
2468
12.8k
}
2469
2470
30.5k
void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2471
#ifndef NDEBUG
2472
  for (const CaseCluster &CC : Clusters)
2473
    assert(CC.Low == CC.High && "Input clusters must be single-case");
2474
#endif
2475
2476
30.5k
  std::sort(Clusters.begin(), Clusters.end(),
2477
122k
            [](const CaseCluster &a, const CaseCluster &b) {
2478
122k
    return a.Low->getValue().slt(b.Low->getValue());
2479
122k
  });
2480
30.5k
2481
30.5k
  // Merge adjacent clusters with the same destination.
2482
30.5k
  const unsigned N = Clusters.size();
2483
30.5k
  unsigned DstIndex = 0;
2484
123k
  for (unsigned SrcIndex = 0; 
SrcIndex < N123k
;
++SrcIndex93.2k
) {
2485
93.2k
    CaseCluster &CC = Clusters[SrcIndex];
2486
93.2k
    const ConstantInt *CaseVal = CC.Low;
2487
93.2k
    MachineBasicBlock *Succ = CC.MBB;
2488
93.2k
2489
93.2k
    if (
DstIndex != 0 && 93.2k
Clusters[DstIndex - 1].MBB == Succ62.6k
&&
2490
93.2k
        
(CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 120.6k
) {
2491
5.63k
      // If this case has the same successor and is a neighbour, merge it into
2492
5.63k
      // the previous cluster.
2493
5.63k
      Clusters[DstIndex - 1].High = CaseVal;
2494
5.63k
      Clusters[DstIndex - 1].Prob += CC.Prob;
2495
93.2k
    } else {
2496
87.6k
      std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2497
87.6k
                   sizeof(Clusters[SrcIndex]));
2498
87.6k
    }
2499
93.2k
  }
2500
30.5k
  Clusters.resize(DstIndex);
2501
30.5k
}
2502
2503
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
2504
0
                                           MachineBasicBlock *Last) {
2505
0
  // Update JTCases.
2506
0
  for (unsigned i = 0, e = JTCases.size(); 
i != e0
;
++i0
)
2507
0
    
if (0
JTCases[i].first.HeaderBB == First0
)
2508
0
      JTCases[i].first.HeaderBB = Last;
2509
0
2510
0
  // Update BitTestCases.
2511
0
  for (unsigned i = 0, e = BitTestCases.size(); 
i != e0
;
++i0
)
2512
0
    
if (0
BitTestCases[i].Parent == First0
)
2513
0
      BitTestCases[i].Parent = Last;
2514
0
}
2515
2516
78
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2517
78
  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2518
78
2519
78
  // Update machine-CFG edges with unique successors.
2520
78
  SmallSet<BasicBlock*, 32> Done;
2521
344
  for (unsigned i = 0, e = I.getNumSuccessors(); 
i != e344
;
++i266
) {
2522
266
    BasicBlock *BB = I.getSuccessor(i);
2523
266
    bool Inserted = Done.insert(BB).second;
2524
266
    if (!Inserted)
2525
6
        continue;
2526
260
2527
260
    MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2528
260
    addSuccessorWithProb(IndirectBrMBB, Succ);
2529
260
  }
2530
78
  IndirectBrMBB->normalizeSuccProbs();
2531
78
2532
78
  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2533
78
                          MVT::Other, getControlRoot(),
2534
78
                          getValue(I.getAddress())));
2535
78
}
2536
2537
21.2k
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2538
21.2k
  if (DAG.getTarget().Options.TrapUnreachable)
2539
71
    DAG.setRoot(
2540
71
        DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2541
21.2k
}
2542
2543
18.8k
void SelectionDAGBuilder::visitFSub(const User &I) {
2544
18.8k
  // -0.0 - X --> fneg
2545
18.8k
  Type *Ty = I.getType();
2546
18.8k
  if (isa<Constant>(I.getOperand(0)) &&
2547
18.8k
      
I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)8.23k
) {
2548
5.78k
    SDValue Op2 = getValue(I.getOperand(1));
2549
5.78k
    setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2550
5.78k
                             Op2.getValueType(), Op2));
2551
5.78k
    return;
2552
5.78k
  }
2553
13.1k
2554
13.1k
  visitBinary(I, ISD::FSUB);
2555
13.1k
}
2556
2557
/// Checks if the given instruction performs a vector reduction, in which case
2558
/// we have the freedom to alter the elements in the result as long as the
2559
/// reduction of them stays unchanged.
2560
1.45M
static bool isVectorReductionOp(const User *I) {
2561
1.45M
  const Instruction *Inst = dyn_cast<Instruction>(I);
2562
1.45M
  if (
!Inst || 1.45M
!Inst->getType()->isVectorTy()1.45M
)
2563
1.36M
    return false;
2564
87.9k
2565
87.9k
  auto OpCode = Inst->getOpcode();
2566
87.9k
  switch (OpCode) {
2567
50.0k
  case Instruction::Add:
2568
50.0k
  case Instruction::Mul:
2569
50.0k
  case Instruction::And:
2570
50.0k
  case Instruction::Or:
2571
50.0k
  case Instruction::Xor:
2572
50.0k
    break;
2573
18.8k
  case Instruction::FAdd:
2574
18.8k
  case Instruction::FMul:
2575
18.8k
    if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2576
18.8k
      
if (18.8k
FPOp->getFastMathFlags().unsafeAlgebra()18.8k
)
2577
50
        break;
2578
18.8k
    
LLVM_FALLTHROUGH18.8k
;
2579
37.8k
  default:
2580
37.8k
    return false;
2581
50.1k
  }
2582
50.1k
2583
50.1k
  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2584
50.1k
  unsigned ElemNumToReduce = ElemNum;
2585
50.1k
2586
50.1k
  // Do DFS search on the def-use chain from the given instruction. We only
2587
50.1k
  // allow four kinds of operations during the search until we reach the
2588
50.1k
  // instruction that extracts the first element from the vector:
2589
50.1k
  //
2590
50.1k
  //   1. The reduction operation of the same opcode as the given instruction.
2591
50.1k
  //
2592
50.1k
  //   2. PHI node.
2593
50.1k
  //
2594
50.1k
  //   3. ShuffleVector instruction together with a reduction operation that
2595
50.1k
  //      does a partial reduction.
2596
50.1k
  //
2597
50.1k
  //   4. ExtractElement that extracts the first element from the vector, and we
2598
50.1k
  //      stop searching the def-use chain here.
2599
50.1k
  //
2600
50.1k
  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2601
50.1k
  // from 1-3 to the stack to continue the DFS. The given instruction is not
2602
50.1k
  // a reduction operation if we meet any other instructions other than those
2603
50.1k
  // listed above.
2604
50.1k
2605
50.1k
  SmallVector<const User *, 16> UsersToVisit{Inst};
2606
50.1k
  SmallPtrSet<const User *, 16> Visited;
2607
50.1k
  bool ReduxExtracted = false;
2608
50.1k
2609
86.0k
  while (
!UsersToVisit.empty()86.0k
) {
2610
85.0k
    auto User = UsersToVisit.back();
2611
85.0k
    UsersToVisit.pop_back();
2612
85.0k
    if (!Visited.insert(User).second)
2613
5.35k
      continue;
2614
79.6k
2615
79.6k
    
for (const auto &U : User->users()) 79.6k
{
2616
93.9k
      auto Inst = dyn_cast<Instruction>(U);
2617
93.9k
      if (!Inst)
2618
0
        return false;
2619
93.9k
2620
93.9k
      
if (93.9k
Inst->getOpcode() == OpCode || 93.9k
isa<PHINode>(U)63.7k
) {
2621
42.1k
        if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2622
7
          
if (7
!isa<PHINode>(FPOp) && 7
!FPOp->getFastMathFlags().unsafeAlgebra()7
)
2623
0
            return false;
2624
42.1k
        UsersToVisit.push_back(U);
2625
93.9k
      } else 
if (const ShuffleVectorInst *51.7k
ShufInst51.7k
=
2626
3.23k
                     dyn_cast<ShuffleVectorInst>(U)) {
2627
3.23k
        // Detect the following pattern: A ShuffleVector instruction together
2628
3.23k
        // with a reduction that do partial reduction on the first and second
2629
3.23k
        // ElemNumToReduce / 2 elements, and store the result in
2630
3.23k
        // ElemNumToReduce / 2 elements in another vector.
2631
3.23k
2632
3.23k
        unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2633
3.23k
        if (ResultElements < ElemNum)
2634
49
          return false;
2635
3.18k
2636
3.18k
        
if (3.18k
ElemNumToReduce == 13.18k
)
2637
0
          return false;
2638
3.18k
        
if (3.18k
!isa<UndefValue>(U->getOperand(1))3.18k
)
2639
924
          return false;
2640
5.34k
        
for (unsigned i = 0; 2.25k
i < ElemNumToReduce / 25.34k
;
++i3.09k
)
2641
3.64k
          
if (3.64k
ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)3.64k
)
2642
552
            return false;
2643
8.56k
        
for (unsigned i = ElemNumToReduce / 2; 1.70k
i < ElemNum8.56k
;
++i6.86k
)
2644
6.86k
          
if (6.86k
ShufInst->getMaskValue(i) != -16.86k
)
2645
6
            return false;
2646
1.70k
2647
1.70k
        // There is only one user of this ShuffleVector instruction, which
2648
1.70k
        // must be a reduction operation.
2649
1.70k
        
if (1.70k
!U->hasOneUse()1.70k
)
2650
0
          return false;
2651
1.70k
2652
1.70k
        auto U2 = dyn_cast<Instruction>(*U->user_begin());
2653
1.70k
        if (
!U2 || 1.70k
U2->getOpcode() != OpCode1.70k
)
2654
2
          return false;
2655
1.69k
2656
1.69k
        // Check operands of the reduction operation.
2657
1.69k
        
if (1.69k
(U2->getOperand(0) == U->getOperand(0) && 1.69k
U2->getOperand(1) == U1.69k
) ||
2658
1.69k
            
(U2->getOperand(1) == U->getOperand(0) && 0
U2->getOperand(0) == U0
)) {
2659
1.69k
          UsersToVisit.push_back(U2);
2660
1.69k
          ElemNumToReduce /= 2;
2661
1.69k
        } else
2662
0
          return false;
2663
48.5k
      } else 
if (48.5k
isa<ExtractElementInst>(U)48.5k
) {
2664
1.66k
        // At this moment we should have reduced all elements in the vector.
2665
1.66k
        if (ElemNumToReduce != 1)
2666
771
          return false;
2667
892
2668
892
        const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2669
892
        if (
!Val || 892
Val->getZExtValue() != 0892
)
2670
0
          return false;
2671
892
2672
892
        ReduxExtracted = true;
2673
892
      } else
2674
46.8k
        return false;
2675
93.9k
    }
2676
85.0k
  }
2677
971
  return ReduxExtracted;
2678
1.45M
}
2679
2680
1.45M
void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2681
1.45M
  SDValue Op1 = getValue(I.getOperand(0));
2682
1.45M
  SDValue Op2 = getValue(I.getOperand(1));
2683
1.45M
2684
1.45M
  bool nuw = false;
2685
1.45M
  bool nsw = false;
2686
1.45M
  bool exact = false;
2687
1.45M
  bool vec_redux = false;
2688
1.45M
  FastMathFlags FMF;
2689
1.45M
2690
1.45M
  if (const OverflowingBinaryOperator *OFBinOp =
2691
955k
          dyn_cast<const OverflowingBinaryOperator>(&I)) {
2692
955k
    nuw = OFBinOp->hasNoUnsignedWrap();
2693
955k
    nsw = OFBinOp->hasNoSignedWrap();
2694
955k
  }
2695
1.45M
  if (const PossiblyExactOperator *ExactOp =
2696
1.45M
          dyn_cast<const PossiblyExactOperator>(&I))
2697
13.3k
    exact = ExactOp->isExact();
2698
1.45M
  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
2699
177k
    FMF = FPOp->getFastMathFlags();
2700
1.45M
2701
1.45M
  if (
isVectorReductionOp(&I)1.45M
) {
2702
892
    vec_redux = true;
2703
892
    DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2704
892
  }
2705
1.45M
2706
1.45M
  SDNodeFlags Flags;
2707
1.45M
  Flags.setExact(exact);
2708
1.45M
  Flags.setNoSignedWrap(nsw);
2709
1.45M
  Flags.setNoUnsignedWrap(nuw);
2710
1.45M
  Flags.setVectorReduction(vec_redux);
2711
1.45M
  Flags.setAllowReciprocal(FMF.allowReciprocal());
2712
1.45M
  Flags.setAllowContract(FMF.allowContract());
2713
1.45M
  Flags.setNoInfs(FMF.noInfs());
2714
1.45M
  Flags.setNoNaNs(FMF.noNaNs());
2715
1.45M
  Flags.setNoSignedZeros(FMF.noSignedZeros());
2716
1.45M
  Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
2717
1.45M
2718
1.45M
  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
2719
1.45M
                                     Op1, Op2, Flags);
2720
1.45M
  setValue(&I, BinNodeValue);
2721
1.45M
}
2722
2723
205k
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2724
205k
  SDValue Op1 = getValue(I.getOperand(0));
2725
205k
  SDValue Op2 = getValue(I.getOperand(1));
2726
205k
2727
205k
  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2728
205k
      Op2.getValueType(), DAG.getDataLayout());
2729
205k
2730
205k
  // Coerce the shift amount to the right type if we can.
2731
205k
  if (
!I.getType()->isVectorTy() && 205k
Op2.getValueType() != ShiftTy198k
) {
2732
102k
    unsigned ShiftSize = ShiftTy.getSizeInBits();
2733
102k
    unsigned Op2Size = Op2.getValueSizeInBits();
2734
102k
    SDLoc DL = getCurSDLoc();
2735
102k
2736
102k
    // If the operand is smaller than the shift count type, promote it.
2737
102k
    if (ShiftSize > Op2Size)
2738
87.0k
      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2739
102k
2740
102k
    // If the operand is larger than the shift count type but the shift
2741
102k
    // count type has enough bits to represent any shift value, truncate
2742
102k
    // it now. This is a common case and it exposes the truncate to
2743
102k
    // optimization early.
2744
15.9k
    else 
if (15.9k
ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())15.9k
)
2745
15.9k
      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2746
15.9k
    // Otherwise we'll need to temporarily settle for some other convenient
2747
15.9k
    // type.  Type legalization will make adjustments once the shiftee is split.
2748
15.9k
    else
2749
9
      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2750
102k
  }
2751
205k
2752
205k
  bool nuw = false;
2753
205k
  bool nsw = false;
2754
205k
  bool exact = false;
2755
205k
2756
205k
  if (
Opcode == ISD::SRL || 205k
Opcode == ISD::SRA137k
||
Opcode == ISD::SHL110k
) {
2757
205k
2758
205k
    if (const OverflowingBinaryOperator *OFBinOp =
2759
110k
            dyn_cast<const OverflowingBinaryOperator>(&I)) {
2760
110k
      nuw = OFBinOp->hasNoUnsignedWrap();
2761
110k
      nsw = OFBinOp->hasNoSignedWrap();
2762
110k
    }
2763
205k
    if (const PossiblyExactOperator *ExactOp =
2764
205k
            dyn_cast<const PossiblyExactOperator>(&I))
2765
94.2k
      exact = ExactOp->isExact();
2766
205k
  }
2767
205k
  SDNodeFlags Flags;
2768
205k
  Flags.setExact(exact);
2769
205k
  Flags.setNoSignedWrap(nsw);
2770
205k
  Flags.setNoUnsignedWrap(nuw);
2771
205k
  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2772
205k
                            Flags);
2773
205k
  setValue(&I, Res);
2774
205k
}
2775
2776
8.61k
void SelectionDAGBuilder::visitSDiv(const User &I) {
2777
8.61k
  SDValue Op1 = getValue(I.getOperand(0));
2778
8.61k
  SDValue Op2 = getValue(I.getOperand(1));
2779
8.61k
2780
8.61k
  SDNodeFlags Flags;
2781
8.61k
  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2782
8.61k
                 cast<PossiblyExactOperator>(&I)->isExact());
2783
8.61k
  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2784
8.61k
                           Op2, Flags));
2785
8.61k
}
2786
2787
1.80M
void SelectionDAGBuilder::visitICmp(const User &I) {
2788
1.80M
  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2789
1.80M
  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2790
1.80M
    predicate = IC->getPredicate();
2791
240
  else 
if (const ConstantExpr *240
IC240
= dyn_cast<ConstantExpr>(&I))
2792
240
    predicate = ICmpInst::Predicate(IC->getPredicate());
2793
1.80M
  SDValue Op1 = getValue(I.getOperand(0));
2794
1.80M
  SDValue Op2 = getValue(I.getOperand(1));
2795
1.80M
  ISD::CondCode Opcode = getICmpCondCode(predicate);
2796
1.80M
2797
1.80M
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2798
1.80M
                                                        I.getType());
2799
1.80M
  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2800
1.80M
}
2801
2802
35.1k
void SelectionDAGBuilder::visitFCmp(const User &I) {
2803
35.1k
  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2804
35.1k
  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2805
35.1k
    predicate = FC->getPredicate();
2806
4
  else 
if (const ConstantExpr *4
FC4
= dyn_cast<ConstantExpr>(&I))
2807
4
    predicate = FCmpInst::Predicate(FC->getPredicate());
2808
35.1k
  SDValue Op1 = getValue(I.getOperand(0));
2809
35.1k
  SDValue Op2 = getValue(I.getOperand(1));
2810
35.1k
  ISD::CondCode Condition = getFCmpCondCode(predicate);
2811
35.1k
2812
35.1k
  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
2813
35.1k
  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
2814
35.1k
  // further optimization, but currently FMF is only applicable to binary nodes.
2815
35.1k
  if (TM.Options.NoNaNsFPMath)
2816
285
    Condition = getFCmpCodeWithoutNaN(Condition);
2817
35.1k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2818
35.1k
                                                        I.getType());
2819
35.1k
  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2820
35.1k
}
2821
2822
// Check if the condition of the select has one use or two users that are both
2823
// selects with the same condition.
2824
4.17k
static bool hasOnlySelectUsers(const Value *Cond) {
2825
4.32k
  return llvm::all_of(Cond->users(), [](const Value *V) {
2826
4.32k
    return isa<SelectInst>(V);
2827
4.32k
  });
2828
4.17k
}
2829
2830
140k
void SelectionDAGBuilder::visitSelect(const User &I) {
2831
140k
  SmallVector<EVT, 4> ValueVTs;
2832
140k
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
2833
140k
                  ValueVTs);
2834
140k
  unsigned NumValues = ValueVTs.size();
2835
140k
  if (
NumValues == 0140k
)
return7
;
2836
140k
2837
140k
  SmallVector<SDValue, 4> Values(NumValues);
2838
140k
  SDValue Cond     = getValue(I.getOperand(0));
2839
140k
  SDValue LHSVal   = getValue(I.getOperand(1));
2840
140k
  SDValue RHSVal   = getValue(I.getOperand(2));
2841
140k
  auto BaseOps = {Cond};
2842
140k
  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2843
140k
    
ISD::VSELECT10.6k
:
ISD::SELECT130k
;
2844
140k
2845
140k
  // Min/max matching is only viable if all output VTs are the same.
2846
140k
  if (
std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())140k
) {
2847
140k
    EVT VT = ValueVTs[0];
2848
140k
    LLVMContext &Ctx = *DAG.getContext();
2849
140k
    auto &TLI = DAG.getTargetLoweringInfo();
2850
140k
2851
140k
    // We care about the legality of the operation after it has been type
2852
140k
    // legalized.
2853
157k
    while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2854
16.9k
           VT != TLI.getTypeToTransformTo(Ctx, VT))
2855
16.9k
      VT = TLI.getTypeToTransformTo(Ctx, VT);
2856
140k
2857
140k
    // If the vselect is legal, assume we want to leave this as a vector setcc +
2858
140k
    // vselect. Otherwise, if this is going to be scalarized, we want to see if
2859
140k
    // min/max is legal on the scalar type.
2860
140k
    bool UseScalarMinMax = VT.isVector() &&
2861
10.9k
      !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2862
140k
2863
140k
    Value *LHS, *RHS;
2864
140k
    auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2865
140k
    ISD::NodeType Opc = ISD::DELETED_NODE;
2866
140k
    switch (SPR.Flavor) {
2867
16.1k
    case SPF_UMAX:    Opc = ISD::UMAX; break;
2868
3.26k
    case SPF_UMIN:    Opc = ISD::UMIN; break;
2869
20.5k
    case SPF_SMAX:    Opc = ISD::SMAX; break;
2870
5.82k
    case SPF_SMIN:    Opc = ISD::SMIN; break;
2871
436
    case SPF_FMINNUM:
2872
436
      switch (SPR.NaNBehavior) {
2873
0
      
case SPNB_NA: 0
llvm_unreachable0
("No NaN behavior for FP op?");
2874
332
      case SPNB_RETURNS_NAN:   Opc = ISD::FMINNAN; break;
2875
67
      case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2876
37
      case SPNB_RETURNS_ANY: {
2877
37
        if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2878
30
          Opc = ISD::FMINNUM;
2879
7
        else 
if (7
TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)7
)
2880
0
          Opc = ISD::FMINNAN;
2881
7
        else 
if (7
UseScalarMinMax7
)
2882
0
          Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2883
0
            
ISD::FMINNUM0
:
ISD::FMINNAN0
;
2884
37
        break;
2885
436
      }
2886
436
      }
2887
436
      break;
2888
234
    case SPF_FMAXNUM:
2889
234
      switch (SPR.NaNBehavior) {
2890
0
      
case SPNB_NA: 0
llvm_unreachable0
("No NaN behavior for FP op?");
2891
113
      case SPNB_RETURNS_NAN:   Opc = ISD::FMAXNAN; break;
2892
83
      case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2893
38
      case SPNB_RETURNS_ANY:
2894
38
2895
38
        if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2896
32
          Opc = ISD::FMAXNUM;
2897
6
        else 
if (6
TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)6
)
2898
0
          Opc = ISD::FMAXNAN;
2899
6
        else 
if (6
UseScalarMinMax6
)
2900
0
          Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2901
0
            
ISD::FMAXNUM0
:
ISD::FMAXNAN0
;
2902
38
        break;
2903
234
      }
2904
234
      break;
2905
94.2k
    default: break;
2906
140k
    }
2907
140k
2908
140k
    
if (140k
Opc != ISD::DELETED_NODE &&
2909
46.4k
        (TLI.isOperationLegalOrCustom(Opc, VT) ||
2910
42.3k
         (UseScalarMinMax &&
2911
46.4k
          TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2912
140k
        // If the underlying comparison instruction is used by any other
2913
140k
        // instruction, the consumed instructions won't be destroyed, so it is
2914
140k
        // not profitable to convert to a min/max.
2915
140k
        
hasOnlySelectUsers(cast<SelectInst>(I).getCondition())4.17k
) {
2916
4.15k
      OpCode = Opc;
2917
4.15k
      LHSVal = getValue(LHS);
2918
4.15k
      RHSVal = getValue(RHS);
2919
4.15k
      BaseOps = {};
2920
4.15k
    }
2921
140k
  }
2922
140k
2923
281k
  
for (unsigned i = 0; 140k
i != NumValues281k
;
++i140k
) {
2924
140k
    SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2925
140k
    Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2926
140k
    Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2927
140k
    Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2928
140k
                            LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2929
140k
                            Ops);
2930
140k
  }
2931
140k
2932
140k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2933
140k
                           DAG.getVTList(ValueVTs), Values));
2934
140k
}
2935
2936
302k
void SelectionDAGBuilder::visitTrunc(const User &I) {
2937
302k
  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2938
302k
  SDValue N = getValue(I.getOperand(0));
2939
302k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2940
302k
                                                        I.getType());
2941
302k
  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
2942
302k
}
2943
2944
200k
void SelectionDAGBuilder::visitZExt(const User &I) {
2945
200k
  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2946
200k
  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2947
200k
  SDValue N = getValue(I.getOperand(0));
2948
200k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2949
200k
                                                        I.getType());
2950
200k
  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
2951
200k
}
2952
2953
278k
void SelectionDAGBuilder::visitSExt(const User &I) {
2954
278k
  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2955
278k
  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2956
278k
  SDValue N = getValue(I.getOperand(0));
2957
278k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2958
278k
                                                        I.getType());
2959
278k
  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
2960
278k
}
2961
2962
8.33k
void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2963
8.33k
  // FPTrunc is never a no-op cast, no need to check
2964
8.33k
  SDValue N = getValue(I.getOperand(0));
2965
8.33k
  SDLoc dl = getCurSDLoc();
2966
8.33k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2967
8.33k
  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
2968
8.33k
  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
2969
8.33k
                           DAG.getTargetConstant(
2970
8.33k
                               0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
2971
8.33k
}
2972
2973
25.0k
void SelectionDAGBuilder::visitFPExt(const User &I) {
2974
25.0k
  // FPExt is never a no-op cast, no need to check
2975
25.0k
  SDValue N = getValue(I.getOperand(0));
2976
25.0k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2977
25.0k
                                                        I.getType());
2978
25.0k
  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
2979
25.0k
}
2980
2981
6.96k
void SelectionDAGBuilder::visitFPToUI(const User &I) {
2982
6.96k
  // FPToUI is never a no-op cast, no need to check
2983
6.96k
  SDValue N = getValue(I.getOperand(0));
2984
6.96k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2985
6.96k
                                                        I.getType());
2986
6.96k
  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
2987
6.96k
}
2988
2989
5.24k
void SelectionDAGBuilder::visitFPToSI(const User &I) {
2990
5.24k
  // FPToSI is never a no-op cast, no need to check
2991
5.24k
  SDValue N = getValue(I.getOperand(0));
2992
5.24k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2993
5.24k
                                                        I.getType());
2994
5.24k
  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
2995
5.24k
}
2996
2997
29.6k
void SelectionDAGBuilder::visitUIToFP(const User &I) {
2998
29.6k
  // UIToFP is never a no-op cast, no need to check
2999
29.6k
  SDValue N = getValue(I.getOperand(0));
3000
29.6k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3001
29.6k
                                                        I.getType());
3002
29.6k
  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3003
29.6k
}
3004
3005
46.8k
void SelectionDAGBuilder::visitSIToFP(const User &I) {
3006
46.8k
  // SIToFP is never a no-op cast, no need to check
3007
46.8k
  SDValue N = getValue(I.getOperand(0));
3008
46.8k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3009
46.8k
                                                        I.getType());
3010
46.8k
  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3011
46.8k
}
3012
3013
101k
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3014
101k
  // What to do depends on the size of the integer and the size of the pointer.
3015
101k
  // We can either truncate, zero extend, or no-op, accordingly.
3016
101k
  SDValue N = getValue(I.getOperand(0));
3017
101k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3018
101k
                                                        I.getType());
3019
101k
  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3020
101k
}
3021
3022
52.3k
void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3023
52.3k
  // What to do depends on the size of the integer and the size of the pointer.
3024
52.3k
  // We can either truncate, zero extend, or no-op, accordingly.
3025
52.3k
  SDValue N = getValue(I.getOperand(0));
3026
52.3k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3027
52.3k
                                                        I.getType());
3028
52.3k
  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3029
52.3k
}
3030
3031
1.90M
void SelectionDAGBuilder::visitBitCast(const User &I) {
3032
1.90M
  SDValue N = getValue(I.getOperand(0));
3033
1.90M
  SDLoc dl = getCurSDLoc();
3034
1.90M
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3035
1.90M
                                                        I.getType());
3036
1.90M
3037
1.90M
  // BitCast assures us that source and destination are the same size so this is
3038
1.90M
  // either a BITCAST or a no-op.
3039
1.90M
  if (DestVT != N.getValueType())
3040
31.1k
    setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3041
31.1k
                             DestVT, N)); // convert types.
3042
1.90M
  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3043
1.90M
  // might fold any kind of constant expression to an integer constant and that
3044
1.90M
  // is not what we are looking for. Only recognize a bitcast of a genuine
3045
1.90M
  // constant integer as an opaque constant.
3046
1.87M
  else 
if(ConstantInt *1.87M
C1.87M
= dyn_cast<ConstantInt>(I.getOperand(0)))
3047
19.5k
    setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3048
19.5k
                                 /*isOpaque*/true));
3049
1.87M
  else
3050
1.85M
    setValue(&I, N);            // noop cast.
3051
1.90M
}
3052
3053
290
void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3054
290
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3055
290
  const Value *SV = I.getOperand(0);
3056
290
  SDValue N = getValue(SV);
3057
290
  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3058
290
3059
290
  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3060
290
  unsigned DestAS = I.getType()->getPointerAddressSpace();
3061
290
3062
290
  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3063
199
    N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3064
290
3065
290
  setValue(&I, N);
3066
290
}
3067
3068
26.3k
void SelectionDAGBuilder::visitInsertElement(const User &I) {
3069
26.3k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3070
26.3k
  SDValue InVec = getValue(I.getOperand(0));
3071
26.3k
  SDValue InVal = getValue(I.getOperand(1));
3072
26.3k
  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3073
26.3k
                                     TLI.getVectorIdxTy(DAG.getDataLayout()));
3074
26.3k
  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3075
26.3k
                           TLI.getValueType(DAG.getDataLayout(), I.getType()),
3076
26.3k
                           InVec, InVal, InIdx));
3077
26.3k
}
3078
3079
20.8k
void SelectionDAGBuilder::visitExtractElement(const User &I) {
3080
20.8k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3081
20.8k
  SDValue InVec = getValue(I.getOperand(0));
3082
20.8k
  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3083
20.8k
                                     TLI.getVectorIdxTy(DAG.getDataLayout()));
3084
20.8k
  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3085
20.8k
                           TLI.getValueType(DAG.getDataLayout(), I.getType()),
3086
20.8k
                           InVec, InIdx));
3087
20.8k
}
3088
3089
58.6k
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3090
58.6k
  SDValue Src1 = getValue(I.getOperand(0));
3091
58.6k
  SDValue Src2 = getValue(I.getOperand(1));
3092
58.6k
  SDLoc DL = getCurSDLoc();
3093
58.6k
3094
58.6k
  SmallVector<int, 8> Mask;
3095
58.6k
  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3096
58.6k
  unsigned MaskNumElts = Mask.size();
3097
58.6k
3098
58.6k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3099
58.6k
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3100
58.6k
  EVT SrcVT = Src1.getValueType();
3101
58.6k
  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3102
58.6k
3103
58.6k
  if (
SrcNumElts == MaskNumElts58.6k
) {
3104
22.9k
    setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3105
22.9k
    return;
3106
22.9k
  }
3107
35.7k
3108
35.7k
  // Normalize the shuffle vector since mask and vector length don't match.
3109
35.7k
  
if (35.7k
SrcNumElts < MaskNumElts35.7k
) {
3110
9.81k
    // Mask is longer than the source vectors. We can use concatenate vector to
3111
9.81k
    // make the mask and vectors lengths match.
3112
9.81k
3113
9.81k
    if (
MaskNumElts % SrcNumElts == 09.81k
) {
3114
8.75k
      // Mask length is a multiple of the source vector length.
3115
8.75k
      // Check if the shuffle is some kind of concatenation of the input
3116
8.75k
      // vectors.
3117
8.75k
      unsigned NumConcat = MaskNumElts / SrcNumElts;
3118
8.75k
      bool IsConcat = true;
3119
8.75k
      SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3120
148k
      for (unsigned i = 0; 
i != MaskNumElts148k
;
++i139k
) {
3121
140k
        int Idx = Mask[i];
3122
140k
        if (Idx < 0)
3123
7.01k
          continue;
3124
133k
        // Ensure the indices in each SrcVT sized piece are sequential and that
3125
133k
        // the same source is used for the whole piece.
3126
133k
        
if (133k
(Idx % SrcNumElts != (i % SrcNumElts)) ||
3127
132k
            (ConcatSrcs[i / SrcNumElts] >= 0 &&
3128
133k
             
ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)109k
)) {
3129
1.05k
          IsConcat = false;
3130
1.05k
          break;
3131
1.05k
        }
3132
132k
        // Remember which source this index came from.
3133
132k
        ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3134
132k
      }
3135
8.75k
3136
8.75k
      // The shuffle is concatenating multiple vectors together. Just emit
3137
8.75k
      // a CONCAT_VECTORS operation.
3138
8.75k
      if (
IsConcat8.75k
) {
3139
7.70k
        SmallVector<SDValue, 8> ConcatOps;
3140
23.4k
        for (auto Src : ConcatSrcs) {
3141
23.4k
          if (Src < 0)
3142
1.55k
            ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3143
21.9k
          else 
if (21.9k
Src == 021.9k
)
3144
9.29k
            ConcatOps.push_back(Src1);
3145
21.9k
          else
3146
12.6k
            ConcatOps.push_back(Src2);
3147
23.4k
        }
3148
7.70k
        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3149
7.70k
        return;
3150
7.70k
      }
3151
2.10k
    }
3152
2.10k
3153
2.10k
    unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3154
2.10k
    unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3155
2.10k
    EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3156
2.10k
                                    PaddedMaskNumElts);
3157
2.10k
3158
2.10k
    // Pad both vectors with undefs to make them the same length as the mask.
3159
2.10k
    SDValue UndefVal = DAG.getUNDEF(SrcVT);
3160
2.10k
3161
2.10k
    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3162
2.10k
    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3163
2.10k
    MOps1[0] = Src1;
3164
2.10k
    MOps2[0] = Src2;
3165
2.10k
3166
2.10k
    Src1 = Src1.isUndef()
3167
6
               ? DAG.getUNDEF(PaddedVT)
3168
2.09k
               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3169
2.10k
    Src2 = Src2.isUndef()
3170
824
               ? DAG.getUNDEF(PaddedVT)
3171
1.28k
               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3172
2.10k
3173
2.10k
    // Readjust mask for new input vector length.
3174
2.10k
    SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3175
34.6k
    for (unsigned i = 0; 
i != MaskNumElts34.6k
;
++i32.5k
) {
3176
32.5k
      int Idx = Mask[i];
3177
32.5k
      if (Idx >= (int)SrcNumElts)
3178
8.27k
        Idx -= SrcNumElts - PaddedMaskNumElts;
3179
32.5k
      MappedOps[i] = Idx;
3180
32.5k
    }
3181
2.10k
3182
2.10k
    SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3183
2.10k
3184
2.10k
    // If the concatenated vector was padded, extract a subvector with the
3185
2.10k
    // correct number of elements.
3186
2.10k
    if (MaskNumElts != PaddedMaskNumElts)
3187
1.05k
      Result = DAG.getNode(
3188
1.05k
          ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3189
1.05k
          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3190
9.81k
3191
9.81k
    setValue(&I, Result);
3192
9.81k
    return;
3193
9.81k
  }
3194
25.8k
3195
25.8k
  
if (25.8k
SrcNumElts > MaskNumElts25.8k
) {
3196
25.8k
    // Analyze the access pattern of the vector to see if we can extract
3197
25.8k
    // two subvectors and do the shuffle.
3198
25.8k
    int StartIdx[2] = { -1, -1 };  // StartIdx to extract from
3199
25.8k
    bool CanExtract = true;
3200
66.9k
    for (int Idx : Mask) {
3201
66.9k
      unsigned Input = 0;
3202
66.9k
      if (Idx < 0)
3203
19
        continue;
3204
66.8k
3205
66.8k
      
if (66.8k
Idx >= (int)SrcNumElts66.8k
) {
3206
1.11k
        Input = 1;
3207
1.11k
        Idx -= SrcNumElts;
3208
1.11k
      }
3209
66.8k
3210
66.8k
      // If all the indices come from the same MaskNumElts sized portion of
3211
66.8k
      // the sources we can use extract. Also make sure the extract wouldn't
3212
66.8k
      // extract past the end of the source.
3213
66.8k
      int NewStartIdx = alignDown(Idx, MaskNumElts);
3214
66.8k
      if (NewStartIdx + MaskNumElts > SrcNumElts ||
3215
66.8k
          
(StartIdx[Input] >= 0 && 66.8k
StartIdx[Input] != NewStartIdx40.9k
))
3216
2.43k
        CanExtract = false;
3217
66.9k
      // Make sure we always update StartIdx as we use it to track if all
3218
66.9k
      // elements are undef.
3219
66.9k
      StartIdx[Input] = NewStartIdx;
3220
66.9k
    }
3221
25.8k
3222
25.8k
    if (
StartIdx[0] < 0 && 25.8k
StartIdx[1] < 0123
) {
3223
0
      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3224
0
      return;
3225
0
    }
3226
25.8k
    
if (25.8k
CanExtract25.8k
) {
3227
24.6k
      // Extract appropriate subvector and generate a vector shuffle
3228
73.9k
      for (unsigned Input = 0; 
Input < 273.9k
;
++Input49.3k
) {
3229
49.3k
        SDValue &Src = Input == 0 ? 
Src124.6k
:
Src224.6k
;
3230
49.3k
        if (StartIdx[Input] < 0)
3231
24.6k
          Src = DAG.getUNDEF(VT);
3232
24.6k
        else {
3233
24.6k
          Src = DAG.getNode(
3234
24.6k
              ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3235
24.6k
              DAG.getConstant(StartIdx[Input], DL,
3236
24.6k
                              TLI.getVectorIdxTy(DAG.getDataLayout())));
3237
24.6k
        }
3238
49.3k
      }
3239
24.6k
3240
24.6k
      // Calculate new mask.
3241
24.6k
      SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3242
59.7k
      for (int &Idx : MappedOps) {
3243
59.7k
        if (Idx >= (int)SrcNumElts)
3244
1.06k
          Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3245
58.6k
        else 
if (58.6k
Idx >= 058.6k
)
3246
58.6k
          Idx -= StartIdx[0];
3247
59.7k
      }
3248
24.6k
3249
24.6k
      setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3250
24.6k
      return;
3251
24.6k
    }
3252
1.23k
  }
3253
1.23k
3254
1.23k
  // We can't use either concat vectors or extract subvectors so fall back to
3255
1.23k
  // replacing the shuffle with extract and build vector.
3256
1.23k
  // to insert and build vector.
3257
1.23k
  EVT EltVT = VT.getVectorElementType();
3258
1.23k
  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3259
1.23k
  SmallVector<SDValue,8> Ops;
3260
7.17k
  for (int Idx : Mask) {
3261
7.17k
    SDValue Res;
3262
7.17k
3263
7.17k
    if (
Idx < 07.17k
) {
3264
10
      Res = DAG.getUNDEF(EltVT);
3265
7.17k
    } else {
3266
7.16k
      SDValue &Src = Idx < (int)SrcNumElts ? 
Src17.11k
:
Src251
;
3267
7.16k
      if (
Idx >= (int)SrcNumElts7.16k
)
Idx -= SrcNumElts51
;
3268
7.16k
3269
7.16k
      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3270
7.16k
                        EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3271
7.16k
    }
3272
7.17k
3273
7.17k
    Ops.push_back(Res);
3274
7.17k
  }
3275
58.6k
3276
58.6k
  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3277
58.6k
}
3278
3279
2.02k
void SelectionDAGBuilder::visitInsertValue(const User &I) {
3280
2.02k
  ArrayRef<unsigned> Indices;
3281
2.02k
  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3282
2.02k
    Indices = IV->getIndices();
3283
2.02k
  else
3284
0
    Indices = cast<ConstantExpr>(&I)->getIndices();
3285
2.02k
3286
2.02k
  const Value *Op0 = I.getOperand(0);
3287
2.02k
  const Value *Op1 = I.getOperand(1);
3288
2.02k
  Type *AggTy = I.getType();
3289
2.02k
  Type *ValTy = Op1->getType();
3290
2.02k
  bool IntoUndef = isa<UndefValue>(Op0);
3291
2.02k
  bool FromUndef = isa<UndefValue>(Op1);
3292
2.02k
3293
2.02k
  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3294
2.02k
3295
2.02k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3296
2.02k
  SmallVector<EVT, 4> AggValueVTs;
3297
2.02k
  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3298
2.02k
  SmallVector<EVT, 4> ValValueVTs;
3299
2.02k
  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3300
2.02k
3301
2.02k
  unsigned NumAggValues = AggValueVTs.size();
3302
2.02k
  unsigned NumValValues = ValValueVTs.size();
3303
2.02k
  SmallVector<SDValue, 4> Values(NumAggValues);
3304
2.02k
3305
2.02k
  // Ignore an insertvalue that produces an empty object
3306
2.02k
  if (
!NumAggValues2.02k
) {
3307
1
    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3308
1
    return;
3309
1
  }
3310
2.02k
3311
2.02k
  SDValue Agg = getValue(Op0);
3312
2.02k
  unsigned i = 0;
3313
2.02k
  // Copy the beginning value(s) from the original aggregate.
3314
3.95k
  for (; 
i != LinearIndex3.95k
;
++i1.92k
)
3315
1.92k
    
Values[i] = IntoUndef ? 1.92k
DAG.getUNDEF(AggValueVTs[i])62
:
3316
1.86k
                SDValue(Agg.getNode(), Agg.getResNo() + i);
3317
2.02k
  // Copy values from the inserted value(s).
3318
2.02k
  if (
NumValValues2.02k
) {
3319
2.02k
    SDValue Val = getValue(Op1);
3320
4.07k
    for (; 
i != LinearIndex + NumValValues4.07k
;
++i2.04k
)
3321
2.04k
      
Values[i] = FromUndef ? 2.04k
DAG.getUNDEF(AggValueVTs[i])17
:
3322
2.03k
                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3323
2.02k
  }
3324
2.02k
  // Copy remaining value(s) from the original aggregate.
3325
3.86k
  for (; 
i != NumAggValues3.86k
;
++i1.83k
)
3326
1.83k
    
Values[i] = IntoUndef ? 1.83k
DAG.getUNDEF(AggValueVTs[i])1.09k
:
3327
741
                SDValue(Agg.getNode(), Agg.getResNo() + i);
3328
2.02k
3329
2.02k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3330
2.02k
                           DAG.getVTList(AggValueVTs), Values));
3331
2.02k
}
3332
3333
30.1k
void SelectionDAGBuilder::visitExtractValue(const User &I) {
3334
30.1k
  ArrayRef<unsigned> Indices;
3335
30.1k
  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3336
30.1k
    Indices = EV->getIndices();
3337
30.1k
  else
3338
1
    Indices = cast<ConstantExpr>(&I)->getIndices();
3339
30.1k
3340
30.1k
  const Value *Op0 = I.getOperand(0);
3341
30.1k
  Type *AggTy = Op0->getType();
3342
30.1k
  Type *ValTy = I.getType();
3343
30.1k
  bool OutOfUndef = isa<UndefValue>(Op0);
3344
30.1k
3345
30.1k
  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3346
30.1k
3347
30.1k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3348
30.1k
  SmallVector<EVT, 4> ValValueVTs;
3349
30.1k
  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3350
30.1k
3351
30.1k
  unsigned NumValValues = ValValueVTs.size();
3352
30.1k
3353
30.1k
  // Ignore a extractvalue that produces an empty object
3354
30.1k
  if (
!NumValValues30.1k
) {
3355
4
    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3356
4
    return;
3357
4
  }
3358
30.1k
3359
30.1k
  SmallVector<SDValue, 4> Values(NumValValues);
3360
30.1k
3361
30.1k
  SDValue Agg = getValue(Op0);
3362
30.1k
  // Copy out the selected value(s).
3363
60.3k
  for (unsigned i = LinearIndex; 
i != LinearIndex + NumValValues60.3k
;
++i30.1k
)
3364
30.1k
    Values[i - LinearIndex] =
3365
30.1k
      OutOfUndef ?
3366
14
        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3367
30.1k
        SDValue(Agg.getNode(), Agg.getResNo() + i);
3368
30.1k
3369
30.1k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3370
30.1k
                           DAG.getVTList(ValValueVTs), Values));
3371
30.1k
}
3372
3373
3.58M
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3374
3.58M
  Value *Op0 = I.getOperand(0);
3375
3.58M
  // Note that the pointer operand may be a vector of pointers. Take the scalar
3376
3.58M
  // element which holds a pointer.
3377
3.58M
  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3378
3.58M
  SDValue N = getValue(Op0);
3379
3.58M
  SDLoc dl = getCurSDLoc();
3380
3.58M
3381
3.58M
  // Normalize Vector GEP - all scalar operands should be converted to the
3382
3.58M
  // splat vector.
3383
3.58M
  unsigned VectorWidth = I.getType()->isVectorTy() ?
3384
3.58M
    
cast<VectorType>(I.getType())->getVectorNumElements()173
:
03.58M
;
3385
3.58M
3386
3.58M
  if (
VectorWidth && 3.58M
!N.getValueType().isVector()173
) {
3387
72
    LLVMContext &Context = *DAG.getContext();
3388
72
    EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3389
72
    N = DAG.getSplatBuildVector(VT, dl, N);
3390
72
  }
3391
3.58M
3392
3.58M
  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3393
10.1M
       
GTI != E10.1M
;
++GTI6.53M
) {
3394
6.53M
    const Value *Idx = GTI.getOperand();
3395
6.53M
    if (StructType *
StTy6.53M
= GTI.getStructTypeOrNull()) {
3396
1.53M
      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3397
1.53M
      if (
Field1.53M
) {
3398
1.30M
        // N = N + Offset
3399
1.30M
        uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3400
1.30M
3401
1.30M
        // In an inbounds GEP with an offset that is nonnegative even when
3402
1.30M
        // interpreted as signed, assume there is no unsigned overflow.
3403
1.30M
        SDNodeFlags Flags;
3404
1.30M
        if (
int64_t(Offset) >= 0 && 1.30M
cast<GEPOperator>(I).isInBounds()1.30M
)
3405
1.28M
          Flags.setNoUnsignedWrap(true);
3406
1.30M
3407
1.30M
        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3408
1.30M
                        DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3409
1.30M
      }
3410
6.53M
    } else {
3411
4.99M
      MVT PtrTy =
3412
4.99M
          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
3413
4.99M
      unsigned PtrSize = PtrTy.getSizeInBits();
3414
4.99M
      APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3415
4.99M
3416
4.99M
      // If this is a scalar constant or a splat vector of constants,
3417
4.99M
      // handle it quickly.
3418
4.99M
      const auto *CI = dyn_cast<ConstantInt>(Idx);
3419
4.99M
      if (
!CI && 4.99M
isa<ConstantDataVector>(Idx)444k
&&
3420
37
          cast<ConstantDataVector>(Idx)->getSplatValue())
3421
31
        CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3422
4.99M
3423
4.99M
      if (
CI4.99M
) {
3424
4.55M
        if (CI->isZero())
3425
2.93M
          continue;
3426
1.61M
        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
3427
1.61M
        LLVMContext &Context = *DAG.getContext();
3428
1.61M
        SDValue OffsVal = VectorWidth ?
3429
37
          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
3430
1.61M
          DAG.getConstant(Offs, dl, PtrTy);
3431
1.61M
3432
1.61M
        // In an inbouds GEP with an offset that is nonnegative even when
3433
1.61M
        // interpreted as signed, assume there is no unsigned overflow.
3434
1.61M
        SDNodeFlags Flags;
3435
1.61M
        if (
Offs.isNonNegative() && 1.61M
cast<GEPOperator>(I).isInBounds()1.51M
)
3436
811k
          Flags.setNoUnsignedWrap(true);
3437
4.55M
3438
4.55M
        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3439
4.55M
        continue;
3440
4.55M
      }
3441
444k
3442
444k
      // N = N + Idx * ElementSize;
3443
444k
      SDValue IdxN = getValue(Idx);
3444
444k
3445
444k
      if (
!IdxN.getValueType().isVector() && 444k
VectorWidth444k
) {
3446
10
        EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3447
10
        IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3448
10
      }
3449
444k
3450
444k
      // If the index is smaller or larger than intptr_t, truncate or extend
3451
444k
      // it.
3452
444k
      IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3453
444k
3454
444k
      // If this is a multiply by a power of two, turn it into a shl
3455
444k
      // immediately.  This is a very common case.
3456
444k
      if (
ElementSize != 1444k
) {
3457
277k
        if (
ElementSize.isPowerOf2()277k
) {
3458
207k
          unsigned Amt = ElementSize.logBase2();
3459
207k
          IdxN = DAG.getNode(ISD::SHL, dl,
3460
207k
                             N.getValueType(), IdxN,
3461
207k
                             DAG.getConstant(Amt, dl, IdxN.getValueType()));
3462
277k
        } else {
3463
70.2k
          SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3464
70.2k
          IdxN = DAG.getNode(ISD::MUL, dl,
3465
70.2k
                             N.getValueType(), IdxN, Scale);
3466
70.2k
        }
3467
277k
      }
3468
4.99M
3469
4.99M
      N = DAG.getNode(ISD::ADD, dl,
3470
4.99M
                      N.getValueType(), N, IdxN);
3471
4.99M
    }
3472
6.53M
  }
3473
3.58M
3474
3.58M
  setValue(&I, N);
3475
3.58M
}
3476
3477
84.9k
void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3478
84.9k
  // If this is a fixed sized alloca in the entry block of the function,
3479
84.9k
  // allocate it statically on the stack.
3480
84.9k
  if (FuncInfo.StaticAllocaMap.count(&I))
3481
84.3k
    return;   // getValue will auto-populate this.
3482
634
3483
634
  SDLoc dl = getCurSDLoc();
3484
634
  Type *Ty = I.getAllocatedType();
3485
634
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3486
634
  auto &DL = DAG.getDataLayout();
3487
634
  uint64_t TySize = DL.getTypeAllocSize(Ty);
3488
634
  unsigned Align =
3489
634
      std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3490
634
3491
634
  SDValue AllocSize = getValue(I.getArraySize());
3492
634
3493
634
  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
3494
634
  if (AllocSize.getValueType() != IntPtr)
3495
108
    AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3496
634
3497
634
  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3498
634
                          AllocSize,
3499
634
                          DAG.getConstant(TySize, dl, IntPtr));
3500
634
3501
634
  // Handle alignment.  If the requested alignment is less than or equal to
3502
634
  // the stack alignment, ignore it.  If the size is greater than or equal to
3503
634
  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3504
634
  unsigned StackAlign =
3505
634
      DAG.getSubtarget().getFrameLowering()->getStackAlignment();
3506
634
  if (Align <= StackAlign)
3507
555
    Align = 0;
3508
84.9k
3509
84.9k
  // Round the size of the allocation up to the stack alignment size
3510
84.9k
  // by add SA-1 to the size. This doesn't overflow because we're computing
3511
84.9k
  // an address inside an alloca.
3512
84.9k
  SDNodeFlags Flags;
3513
84.9k
  Flags.setNoUnsignedWrap(true);
3514
84.9k
  AllocSize = DAG.getNode(ISD::ADD, dl,
3515
84.9k
                          AllocSize.getValueType(), AllocSize,
3516
84.9k
                          DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
3517
84.9k
3518
84.9k
  // Mask out the low bits for alignment purposes.
3519
84.9k
  AllocSize = DAG.getNode(ISD::AND, dl,
3520
84.9k
                          AllocSize.getValueType(), AllocSize,
3521
84.9k
                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
3522
84.9k
                                                dl));
3523
84.9k
3524
84.9k
  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
3525
84.9k
  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3526
84.9k
  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3527
84.9k
  setValue(&I, DSA);
3528
84.9k
  DAG.setRoot(DSA.getValue(1));
3529
84.9k
3530
84.9k
  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3531
84.9k
}
3532
3533
1.91M
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3534
1.91M
  if (I.isAtomic())
3535
1.01k
    return visitAtomicLoad(I);
3536
1.91M
3537
1.91M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3538
1.91M
  const Value *SV = I.getOperand(0);
3539
1.91M
  if (
TLI.supportSwiftError()1.91M
) {
3540
1.87M
    // Swifterror values can come from either a function parameter with
3541
1.87M
    // swifterror attribute or an alloca with swifterror attribute.
3542
1.87M
    if (const Argument *
Arg1.87M
= dyn_cast<Argument>(SV)) {
3543
30.3k
      if (Arg->hasSwiftErrorAttr())
3544
6
        return visitLoadFromSwiftError(I);
3545
1.87M
    }
3546
1.87M
3547
1.87M
    
if (const AllocaInst *1.87M
Alloca1.87M
= dyn_cast<AllocaInst>(SV)) {
3548
24.7k
      if (Alloca->isSwiftError())
3549
47
        return visitLoadFromSwiftError(I);
3550
1.91M
    }
3551
1.87M
  }
3552
1.91M
3553
1.91M
  SDValue Ptr = getValue(SV);
3554
1.91M
3555
1.91M
  Type *Ty = I.getType();
3556
1.91M
3557
1.91M
  bool isVolatile = I.isVolatile();
3558
1.91M
  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3559
1.91M
  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3560
1.91M
  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3561
1.91M
  unsigned Alignment = I.getAlignment();
3562
1.91M
3563
1.91M
  AAMDNodes AAInfo;
3564
1.91M
  I.getAAMetadata(AAInfo);
3565
1.91M
  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3566
1.91M
3567
1.91M
  SmallVector<EVT, 4> ValueVTs;
3568
1.91M
  SmallVector<uint64_t, 4> Offsets;
3569
1.91M
  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3570
1.91M
  unsigned NumValues = ValueVTs.size();
3571
1.91M
  if (NumValues == 0)
3572
3
    return;
3573
1.91M
3574
1.91M
  SDValue Root;
3575
1.91M
  bool ConstantMemory = false;
3576
1.91M
  if (
isVolatile || 1.91M
NumValues > MaxParallelChains1.90M
)
3577
1.91M
    // Serialize volatile loads with other side effects.
3578
9.45k
    Root = getRoot();
3579
1.90M
  else 
if (1.90M
AA && 1.90M
AA->pointsToConstantMemory(MemoryLocation(
3580
1.90M
               SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3581
13.4k
    // Do not serialize (non-volatile) loads of constant memory with anything.
3582
13.4k
    Root = DAG.getEntryNode();
3583
13.4k
    ConstantMemory = true;
3584
1.90M
  } else {
3585
1.89M
    // Do not serialize non-volatile loads against each other.
3586
1.89M
    Root = DAG.getRoot();
3587
1.89M
  }
3588
1.91M
3589
1.91M
  SDLoc dl = getCurSDLoc();
3590
1.91M
3591
1.91M
  if (isVolatile)
3592
9.45k
    Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3593
1.91M
3594
1.91M
  // An aggregate load cannot wrap around the address space, so offsets to its
3595
1.91M
  // parts don't wrap either.
3596
1.91M
  SDNodeFlags Flags;
3597
1.91M
  Flags.setNoUnsignedWrap(true);
3598
1.91M
3599
1.91M
  SmallVector<SDValue, 4> Values(NumValues);
3600
1.91M
  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3601
1.91M
  EVT PtrVT = Ptr.getValueType();
3602
1.91M
  unsigned ChainI = 0;
3603
3.82M
  for (unsigned i = 0; 
i != NumValues3.82M
;
++i, ++ChainI1.91M
) {
3604
1.91M
    // Serializing loads here may result in excessive register pressure, and
3605
1.91M
    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3606
1.91M
    // could recover a bit by hoisting nodes upward in the chain by recognizing
3607
1.91M
    // they are side-effect free or do not alias. The optimizer should really
3608
1.91M
    // avoid this case by converting large object/array copies to llvm.memcpy
3609
1.91M
    // (MaxParallelChains should always remain as failsafe).
3610
1.91M
    if (
ChainI == MaxParallelChains1.91M
) {
3611
0
      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3612
0
      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3613
0
                                  makeArrayRef(Chains.data(), ChainI));
3614
0
      Root = Chain;
3615
0
      ChainI = 0;
3616
0
    }
3617
1.91M
    SDValue A = DAG.getNode(ISD::ADD, dl,
3618
1.91M
                            PtrVT, Ptr,
3619
1.91M
                            DAG.getConstant(Offsets[i], dl, PtrVT),
3620
1.91M
                            Flags);
3621
1.91M
    auto MMOFlags = MachineMemOperand::MONone;
3622
1.91M
    if (isVolatile)
3623
9.73k
      MMOFlags |= MachineMemOperand::MOVolatile;
3624
1.91M
    if (isNonTemporal)
3625
491
      MMOFlags |= MachineMemOperand::MONonTemporal;
3626
1.91M
    if (isInvariant)
3627
409
      MMOFlags |= MachineMemOperand::MOInvariant;
3628
1.91M
    if (isDereferenceable)
3629
390k
      MMOFlags |= MachineMemOperand::MODereferenceable;
3630
1.91M
    MMOFlags |= TLI.getMMOFlags(I);
3631
1.91M
3632
1.91M
    SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3633
1.91M
                            MachinePointerInfo(SV, Offsets[i]), Alignment,
3634
1.91M
                            MMOFlags, AAInfo, Ranges);
3635
1.91M
3636
1.91M
    Values[i] = L;
3637
1.91M
    Chains[ChainI] = L.getValue(1);
3638
1.91M
  }
3639
1.91M
3640
1.91M
  if (
!ConstantMemory1.91M
) {
3641
1.90M
    SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3642
1.90M
                                makeArrayRef(Chains.data(), ChainI));
3643
1.90M
    if (isVolatile)
3644
9.45k
      DAG.setRoot(Chain);
3645
1.90M
    else
3646
1.89M
      PendingLoads.push_back(Chain);
3647
1.90M
  }
3648
1.91M
3649
1.91M
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3650
1.91M
                           DAG.getVTList(ValueVTs), Values));
3651
1.91M
}
3652
3653
108
void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3654
108
  assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
3655
108
         "call visitStoreToSwiftError when backend supports swifterror");
3656
108
3657
108
  SmallVector<EVT, 4> ValueVTs;
3658
108
  SmallVector<uint64_t, 4> Offsets;
3659
108
  const Value *SrcV = I.getOperand(0);
3660
108
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
3661
108
                  SrcV->getType(), ValueVTs, &Offsets);
3662
108
  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3663
108
         "expect a single EVT for swifterror");
3664
108
3665
108
  SDValue Src = getValue(SrcV);
3666
108
  // Create a virtual register, then update the virtual register.
3667
108
  unsigned VReg; bool CreatedVReg;
3668
108
  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3669
108
  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3670
108
  // Chain can be getRoot or getControlRoot.
3671
108
  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3672
108
                                      SDValue(Src.getNode(), Src.getResNo()));
3673
108
  DAG.setRoot(CopyNode);
3674
108
  if (CreatedVReg)
3675
69
    FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3676
108
}
3677
3678
53
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3679
53
  assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
3680
53
         "call visitLoadFromSwiftError when backend supports swifterror");
3681
53
3682
53
  assert(!I.isVolatile() &&
3683
53
         I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3684
53
         I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
3685
53
         "Support volatile, non temporal, invariant for load_from_swift_error");
3686
53
3687
53
  const Value *SV = I.getOperand(0);
3688
53
  Type *Ty = I.getType();
3689
53
  AAMDNodes AAInfo;
3690
53
  I.getAAMetadata(AAInfo);
3691
53
  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3692
53
             SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3693
53
         "load_from_swift_error should not be constant memory");
3694
53
3695
53
  SmallVector<EVT, 4> ValueVTs;
3696
53
  SmallVector<uint64_t, 4> Offsets;
3697
53
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
3698
53
                  ValueVTs, &Offsets);
3699
53
  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3700
53
         "expect a single EVT for swifterror");
3701
53
3702
53
  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3703
53
  SDValue L = DAG.getCopyFromReg(
3704
53
      getRoot(), getCurSDLoc(),
3705
53
      FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3706
53
      ValueVTs[0]);
3707
53
3708
53
  setValue(&I, L);
3709
53
}
3710
3711
1.46M
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3712
1.46M
  if (I.isAtomic())
3713
12.1k
    return visitAtomicStore(I);
3714
1.45M
3715
1.45M
  const Value *SrcV = I.getOperand(0);
3716
1.45M
  const Value *PtrV = I.getOperand(1);
3717
1.45M
3718
1.45M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3719
1.45M
  if (
TLI.supportSwiftError()1.45M
) {
3720
1.41M
    // Swifterror values can come from either a function parameter with
3721
1.41M
    // swifterror attribute or an alloca with swifterror attribute.
3722
1.41M
    if (const Argument *
Arg1.41M
= dyn_cast<Argument>(PtrV)) {
3723
16.4k
      if (Arg->hasSwiftErrorAttr())
3724
45
        return visitStoreToSwiftError(I);
3725
1.41M
    }
3726
1.41M
3727
1.41M
    
if (const AllocaInst *1.41M
Alloca1.41M
= dyn_cast<AllocaInst>(PtrV)) {
3728
40.0k
      if (Alloca->isSwiftError())
3729
63
        return visitStoreToSwiftError(I);
3730
1.45M
    }
3731
1.41M
  }
3732
1.45M
3733
1.45M
  SmallVector<EVT, 4> ValueVTs;
3734
1.45M
  SmallVector<uint64_t, 4> Offsets;
3735
1.45M
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
3736
1.45M
                  SrcV->getType(), ValueVTs, &Offsets);
3737
1.45M
  unsigned NumValues = ValueVTs.size();
3738
1.45M
  if (NumValues == 0)
3739
17
    return;
3740
1.45M
3741
1.45M
  // Get the lowered operands. Note that we do this after
3742
1.45M
  // checking if NumResults is zero, because with zero results
3743
1.45M
  // the operands won't have values in the map.
3744
1.45M
  SDValue Src = getValue(SrcV);
3745
1.45M
  SDValue Ptr = getValue(PtrV);
3746
1.45M
3747
1.45M
  SDValue Root = getRoot();
3748
1.45M
  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3749
1.45M
  SDLoc dl = getCurSDLoc();
3750
1.45M
  EVT PtrVT = Ptr.getValueType();
3751
1.45M
  unsigned Alignment = I.getAlignment();
3752
1.45M
  AAMDNodes AAInfo;
3753
1.45M
  I.getAAMetadata(AAInfo);
3754
1.45M
3755
1.45M
  auto MMOFlags = MachineMemOperand::MONone;
3756
1.45M
  if (I.isVolatile())
3757
9.97k
    MMOFlags |= MachineMemOperand::MOVolatile;
3758
1.45M
  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3759
538
    MMOFlags |= MachineMemOperand::MONonTemporal;
3760
1.45M
  MMOFlags |= TLI.getMMOFlags(I);
3761
1.45M
3762
1.45M
  // An aggregate load cannot wrap around the address space, so offsets to its
3763
1.45M
  // parts don't wrap either.
3764
1.45M
  SDNodeFlags Flags;
3765
1.45M
  Flags.setNoUnsignedWrap(true);
3766
1.45M
3767
1.45M
  unsigned ChainI = 0;
3768
2.91M
  for (unsigned i = 0; 
i != NumValues2.91M
;
++i, ++ChainI1.45M
) {
3769
1.45M
    // See visitLoad comments.
3770
1.45M
    if (
ChainI == MaxParallelChains1.45M
) {
3771
0
      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3772
0
                                  makeArrayRef(Chains.data(), ChainI));
3773
0
      Root = Chain;
3774
0
      ChainI = 0;
3775
0
    }
3776
1.45M
    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3777
1.45M
                              DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3778
1.45M
    SDValue St = DAG.getStore(
3779
1.45M
        Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3780
1.45M
        MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3781
1.45M
    Chains[ChainI] = St;
3782
1.45M
  }
3783
1.46M
3784
1.46M
  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3785
1.46M
                                  makeArrayRef(Chains.data(), ChainI));
3786
1.46M
  DAG.setRoot(StoreNode);
3787
1.46M
}
3788
3789
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3790
120
                                           bool IsCompressing) {
3791
120
  SDLoc sdl = getCurSDLoc();
3792
120
3793
120
  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3794
100
                           unsigned& Alignment) {
3795
100
    // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3796
100
    Src0 = I.getArgOperand(0);
3797
100
    Ptr = I.getArgOperand(1);
3798
100
    Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3799
100
    Mask = I.getArgOperand(3);
3800
100
  };
3801
120
  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3802
20
                           unsigned& Alignment) {
3803
20
    // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3804
20
    Src0 = I.getArgOperand(0);
3805
20
    Ptr = I.getArgOperand(1);
3806
20
    Mask = I.getArgOperand(2);
3807
20
    Alignment = 0;
3808
20
  };
3809
120
3810
120
  Value  *PtrOperand, *MaskOperand, *Src0Operand;
3811
120
  unsigned Alignment;
3812
120
  if (IsCompressing)
3813
20
    getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3814
120
  else
3815
100
    getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3816
120
3817
120
  SDValue Ptr = getValue(PtrOperand);
3818
120
  SDValue Src0 = getValue(Src0Operand);
3819
120
  SDValue Mask = getValue(MaskOperand);
3820
120
3821
120
  EVT VT = Src0.getValueType();
3822
120
  if (!Alignment)
3823
20
    Alignment = DAG.getEVTAlignment(VT);
3824
120
3825
120
  AAMDNodes AAInfo;
3826
120
  I.getAAMetadata(AAInfo);
3827
120
3828
120
  MachineMemOperand *MMO =
3829
120
    DAG.getMachineFunction().
3830
120
    getMachineMemOperand(MachinePointerInfo(PtrOperand),
3831
120
                          MachineMemOperand::MOStore,  VT.getStoreSize(),
3832
120
                          Alignment, AAInfo);
3833
120
  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3834
120
                                         MMO, false /* Truncating */,
3835
120
                                         IsCompressing);
3836
120
  DAG.setRoot(StoreNode);
3837
120
  setValue(&I, StoreNode);
3838
120
}
3839
3840
// Get a uniform base for the Gather/Scatter intrinsic.
3841
// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3842
// We try to represent it as a base pointer + vector of indices.
3843
// Usually, the vector of pointers comes from a 'getelementptr' instruction.
3844
// The first operand of the GEP may be a single pointer or a vector of pointers
3845
// Example:
3846
//   %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3847
//  or
3848
//   %gep.ptr = getelementptr i32, i32* %ptr,        <8 x i32> %ind
3849
// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3850
//
3851
// When the first GEP operand is a single pointer - it is the uniform base we
3852
// are looking for. If first operand of the GEP is a splat vector - we
3853
// extract the spalt value and use it as a uniform base.
3854
// In all other cases the function returns 'false'.
3855
static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3856
236
                           SelectionDAGBuilder* SDB) {
3857
236
  SelectionDAG& DAG = SDB->DAG;
3858
236
  LLVMContext &Context = *DAG.getContext();
3859
236
3860
236
  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3861
236
  const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
3862
236
  if (
!GEP || 236
GEP->getNumOperands() > 2135
)
3863
111
    return false;
3864
125
3865
125
  const Value *GEPPtr = GEP->getPointerOperand();
3866
125
  if (!GEPPtr->getType()->isVectorTy())
3867
65
    Ptr = GEPPtr;
3868
60
  else 
if (60
!(Ptr = getSplatValue(GEPPtr))60
)
3869
5
    return false;
3870
120
3871
120
  Value *IndexVal = GEP->getOperand(1);
3872
120
3873
120
  // The operands of the GEP may be defined in another basic block.
3874
120
  // In this case we'll not find nodes for the operands.
3875
120
  if (
!SDB->findValue(Ptr) || 120
!SDB->findValue(IndexVal)120
)
3876
11
    return false;
3877
109
3878
109
  Base = SDB->getValue(Ptr);
3879
109
  Index = SDB->getValue(IndexVal);
3880
109
3881
109
  // Suppress sign extension.
3882
109
  if (SExtInst* 
Sext109
= dyn_cast<SExtInst>(IndexVal)) {
3883
64
    if (
SDB->findValue(Sext->getOperand(0))64
) {
3884
64
      IndexVal = Sext->getOperand(0);
3885
64
      Index = SDB->getValue(IndexVal);
3886
64
    }
3887
64
  }
3888
109
  if (
!Index.getValueType().isVector()109
) {
3889
5
    unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3890
5
    EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3891
5
    Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3892
5
  }
3893
236
  return true;
3894
236
}
3895
3896
61
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3897
61
  SDLoc sdl = getCurSDLoc();
3898
61
3899
61
  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3900
61
  const Value *Ptr = I.getArgOperand(1);
3901
61
  SDValue Src0 = getValue(I.getArgOperand(0));
3902
61
  SDValue Mask = getValue(I.getArgOperand(3));
3903
61
  EVT VT = Src0.getValueType();
3904
61
  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3905
61
  if (!Alignment)
3906
0
    Alignment = DAG.getEVTAlignment(VT);
3907
61
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3908
61
3909
61
  AAMDNodes AAInfo;
3910
61
  I.getAAMetadata(AAInfo);
3911
61
3912
61
  SDValue Base;
3913
61
  SDValue Index;
3914
61
  const Value *BasePtr = Ptr;
3915
61
  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3916
61
3917
61
  const Value *MemOpBasePtr = UniformBase ? 
BasePtr15
:
nullptr46
;
3918
61
  MachineMemOperand *MMO = DAG.getMachineFunction().
3919
61
    getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3920
61
                         MachineMemOperand::MOStore,  VT.getStoreSize(),
3921
61
                         Alignment, AAInfo);
3922
61
  if (
!UniformBase61
) {
3923
46
    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3924
46
    Index = getValue(Ptr);
3925
46
  }
3926
61
  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
3927
61
  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3928
61
                                         Ops, MMO);
3929
61
  DAG.setRoot(Scatter);
3930
61
  setValue(&I, Scatter);
3931
61
}
3932
3933
227
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
3934
227
  SDLoc sdl = getCurSDLoc();
3935
227
3936
227
  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3937
211
                           unsigned& Alignment) {
3938
211
    // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
3939
211
    Ptr = I.getArgOperand(0);
3940
211
    Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3941
211
    Mask = I.getArgOperand(2);
3942
211
    Src0 = I.getArgOperand(3);
3943
211
  };
3944
227
  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3945
16
                           unsigned& Alignment) {
3946
16
    // @llvm.masked.expandload.*(Ptr, Mask, Src0)
3947
16
    Ptr = I.getArgOperand(0);
3948
16
    Alignment = 0;
3949
16
    Mask = I.getArgOperand(1);
3950
16
    Src0 = I.getArgOperand(2);
3951
16
  };
3952
227
3953
227
  Value  *PtrOperand, *MaskOperand, *Src0Operand;
3954
227
  unsigned Alignment;
3955
227
  if (IsExpanding)
3956
16
    getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3957
227
  else
3958
211
    getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3959
227
3960
227
  SDValue Ptr = getValue(PtrOperand);
3961
227
  SDValue Src0 = getValue(Src0Operand);
3962
227
  SDValue Mask = getValue(MaskOperand);
3963
227
3964
227
  EVT VT = Src0.getValueType();
3965
227
  if (!Alignment)
3966
16
    Alignment = DAG.getEVTAlignment(VT);
3967
227
3968
227
  AAMDNodes AAInfo;
3969
227
  I.getAAMetadata(AAInfo);
3970
227
  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3971
227
3972
227
  // Do not serialize masked loads of constant memory with anything.
3973
227
  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
3974
227
      PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
3975
227
  SDValue InChain = AddToChain ? 
DAG.getRoot()226
:
DAG.getEntryNode()1
;
3976
227
3977
227
  MachineMemOperand *MMO =
3978
227
    DAG.getMachineFunction().
3979
227
    getMachineMemOperand(MachinePointerInfo(PtrOperand),
3980
227
                          MachineMemOperand::MOLoad,  VT.getStoreSize(),
3981
227
                          Alignment, AAInfo, Ranges);
3982
227
3983
227
  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
3984
227
                                   ISD::NON_EXTLOAD, IsExpanding);
3985
227
  if (
AddToChain227
) {
3986
226
    SDValue OutChain = Load.getValue(1);
3987
226
    DAG.setRoot(OutChain);
3988
226
  }
3989
227
  setValue(&I, Load);
3990
227
}
3991
3992
175
void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
3993
175
  SDLoc sdl = getCurSDLoc();
3994
175
3995
175
  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
3996
175
  const Value *Ptr = I.getArgOperand(0);
3997
175
  SDValue Src0 = getValue(I.getArgOperand(3));
3998
175
  SDValue Mask = getValue(I.getArgOperand(2));
3999
175
4000
175
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4001
175
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4002
175
  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4003
175
  if (!Alignment)
4004
0
    Alignment = DAG.getEVTAlignment(VT);
4005
175
4006
175
  AAMDNodes AAInfo;
4007
175
  I.getAAMetadata(AAInfo);
4008
175
  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4009
175
4010
175
  SDValue Root = DAG.getRoot();
4011
175
  SDValue Base;
4012
175
  SDValue Index;
4013
175
  const Value *BasePtr = Ptr;
4014
175
  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
4015
175
  bool ConstantMemory = false;
4016
175
  if (UniformBase &&
4017
175
      
AA94
&& AA->pointsToConstantMemory(MemoryLocation(
4018
84
          BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4019
175
          AAInfo))) {
4020
0
    // Do not serialize (non-volatile) loads of constant memory with anything.
4021
0
    Root = DAG.getEntryNode();
4022
0
    ConstantMemory = true;
4023
0
  }
4024
175
4025
175
  MachineMemOperand *MMO =
4026
175
    DAG.getMachineFunction().
4027
175
    getMachineMemOperand(MachinePointerInfo(UniformBase ? 
BasePtr94
:
nullptr81
),
4028
175
                         MachineMemOperand::MOLoad,  VT.getStoreSize(),
4029
175
                         Alignment, AAInfo, Ranges);
4030
175
4031
175
  if (
!UniformBase175
) {
4032
81
    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4033
81
    Index = getValue(Ptr);
4034
81
  }
4035
175
  SDValue Ops[] = { Root, Src0, Mask, Base, Index };
4036
175
  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4037
175
                                       Ops, MMO);
4038
175
4039
175
  SDValue OutChain = Gather.getValue(1);
4040
175
  if (!ConstantMemory)
4041
175
    PendingLoads.push_back(OutChain);
4042
175
  setValue(&I, Gather);
4043
175
}
4044
4045
2.47k
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4046
2.47k
  SDLoc dl = getCurSDLoc();
4047
2.47k
  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4048
2.47k
  AtomicOrdering FailureOrder = I.getFailureOrdering();
4049
2.47k
  SyncScope::ID SSID = I.getSyncScopeID();
4050
2.47k
4051
2.47k
  SDValue InChain = getRoot();
4052
2.47k
4053
2.47k
  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4054
2.47k
  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4055
2.47k
  SDValue L = DAG.getAtomicCmpSwap(
4056
2.47k
      ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4057
2.47k
      getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4058
2.47k
      getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
4059
2.47k
      /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4060
2.47k
4061
2.47k
  SDValue OutChain = L.getValue(2);
4062
2.47k
4063
2.47k
  setValue(&I, L);
4064
2.47k
  DAG.setRoot(OutChain);
4065
2.47k
}
4066
4067
5.10k
void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4068
5.10k
  SDLoc dl = getCurSDLoc();
4069
5.10k
  ISD::NodeType NT;
4070
5.10k
  switch (I.getOperation()) {
4071
0
  
default: 0
llvm_unreachable0
("Unknown atomicrmw operation");
4072
1.07k
  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4073
860
  case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
4074
693
  case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
4075
382
  case AtomicRMWInst::And:  NT = ISD::ATOMIC_LOAD_AND; break;
4076
122
  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4077
383
  case AtomicRMWInst::Or:   NT = ISD::ATOMIC_LOAD_OR; break;
4078
368
  case AtomicRMWInst::Xor:  NT = ISD::ATOMIC_LOAD_XOR; break;
4079
300
  case AtomicRMWInst::Max:  NT = ISD::ATOMIC_LOAD_MAX; break;
4080
313
  case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
4081
304
  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4082
304
  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4083
5.10k
  }
4084
5.10k
  AtomicOrdering Order = I.getOrdering();
4085
5.10k
  SyncScope::ID SSID = I.getSyncScopeID();
4086
5.10k
4087
5.10k
  SDValue InChain = getRoot();
4088
5.10k
4089
5.10k
  SDValue L =
4090
5.10k
    DAG.getAtomic(NT, dl,
4091
5.10k
                  getValue(I.getValOperand()).getSimpleValueType(),
4092
5.10k
                  InChain,
4093
5.10k
                  getValue(I.getPointerOperand()),
4094
5.10k
                  getValue(I.getValOperand()),
4095
5.10k
                  I.getPointerOperand(),
4096
5.10k
                  /* Alignment=*/ 0, Order, SSID);
4097
5.10k
4098
5.10k
  SDValue OutChain = L.getValue(1);
4099
5.10k
4100
5.10k
  setValue(&I, L);
4101
5.10k
  DAG.setRoot(OutChain);
4102
5.10k
}
4103
4104
3.75k
void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4105
3.75k
  SDLoc dl = getCurSDLoc();
4106
3.75k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4107
3.75k
  SDValue Ops[3];
4108
3.75k
  Ops[0] = getRoot();
4109
3.75k
  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4110
3.75k
                           TLI.getFenceOperandTy(DAG.getDataLayout()));
4111
3.75k
  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4112
3.75k
                           TLI.getFenceOperandTy(DAG.getDataLayout()));
4113
3.75k
  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4114
3.75k
}
4115
4116
1.01k
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4117
1.01k
  SDLoc dl = getCurSDLoc();
4118
1.01k
  AtomicOrdering Order = I.getOrdering();
4119
1.01k
  SyncScope::ID SSID = I.getSyncScopeID();
4120
1.01k
4121
1.01k
  SDValue InChain = getRoot();
4122
1.01k
4123
1.01k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4124
1.01k
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4125
1.01k
4126
1.01k
  if (I.getAlignment() < VT.getSizeInBits() / 8)
4127
0
    report_fatal_error("Cannot generate unaligned atomic load");
4128
1.01k
4129
1.01k
  MachineMemOperand *MMO =
4130
1.01k
      DAG.getMachineFunction().
4131
1.01k
      getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4132
1.01k
                           MachineMemOperand::MOVolatile |
4133
1.01k
                           MachineMemOperand::MOLoad,
4134
1.01k
                           VT.getStoreSize(),
4135
1.01k
                           I.getAlignment() ? I.getAlignment() :
4136
0
                                              DAG.getEVTAlignment(VT),
4137
1.01k
                           AAMDNodes(), nullptr, SSID, Order);
4138
1.01k
4139
1.01k
  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4140
1.01k
  SDValue L =
4141
1.01k
      DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4142
1.01k
                    getValue(I.getPointerOperand()), MMO);
4143
1.01k
4144
1.01k
  SDValue OutChain = L.getValue(1);
4145
1.01k
4146
1.01k
  setValue(&I, L);
4147
1.01k
  DAG.setRoot(OutChain);
4148
1.01k
}
4149
4150
12.1k
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4151
12.1k
  SDLoc dl = getCurSDLoc();
4152
12.1k
4153
12.1k
  AtomicOrdering Order = I.getOrdering();
4154
12.1k
  SyncScope::ID SSID = I.getSyncScopeID();
4155
12.1k
4156
12.1k
  SDValue InChain = getRoot();
4157
12.1k
4158
12.1k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4159
12.1k
  EVT VT =
4160
12.1k
      TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
4161
12.1k
4162
12.1k
  if (I.getAlignment() < VT.getSizeInBits() / 8)
4163
0
    report_fatal_error("Cannot generate unaligned atomic store");
4164
12.1k
4165
12.1k
  SDValue OutChain =
4166
12.1k
    DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4167
12.1k
                  InChain,
4168
12.1k
                  getValue(I.getPointerOperand()),
4169
12.1k
                  getValue(I.getValueOperand()),
4170
12.1k
                  I.getPointerOperand(), I.getAlignment(),
4171
12.1k
                  Order, SSID);
4172
12.1k
4173
12.1k
  DAG.setRoot(OutChain);
4174
12.1k
}
4175
4176
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4177
/// node.
4178
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4179
171k
                                               unsigned Intrinsic) {
4180
171k
  // Ignore the callsite's attributes. A specific call site may be marked with
4181
171k
  // readnone, but the lowering code will expect the chain based on the
4182
171k
  // definition.
4183
171k
  const Function *F = I.getCalledFunction();
4184
171k
  bool HasChain = !F->doesNotAccessMemory();
4185
139k
  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4186
171k
4187
171k
  // Build the operand list.
4188
171k
  SmallVector<SDValue, 8> Ops;
4189
171k
  if (
HasChain171k
) { // If this intrinsic has side-effects, chainify it.
4190
139k
    if (
OnlyLoad139k
) {
4191
2.96k
      // We don't need to serialize loads against other loads.
4192
2.96k
      Ops.push_back(DAG.getRoot());
4193
139k
    } else {
4194
136k
      Ops.push_back(getRoot());
4195
136k
    }
4196
139k
  }
4197
171k
4198
171k
  // Info is set by getTgtMemInstrinsic
4199
171k
  TargetLowering::IntrinsicInfo Info;
4200
171k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4201
171k
  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
4202
171k
4203
171k
  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4204
171k
  if (
!IsTgtIntrinsic || 171k
Info.opc == ISD::INTRINSIC_VOID122k
||
4205
96.5k
      Info.opc == ISD::INTRINSIC_W_CHAIN)
4206
171k
    Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4207
171k
                                        TLI.getPointerTy(DAG.getDataLayout())));
4208
171k
4209
171k
  // Add all operands of the call to the operand list.
4210
500k
  for (unsigned i = 0, e = I.getNumArgOperands(); 
i != e500k
;
++i328k
) {
4211
328k
    SDValue Op = getValue(I.getArgOperand(i));
4212
328k
    Ops.push_back(Op);
4213
328k
  }
4214
171k
4215
171k
  SmallVector<EVT, 4> ValueVTs;
4216
171k
  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4217
171k
4218
171k
  if (HasChain)
4219
139k
    ValueVTs.push_back(MVT::Other);
4220
171k
4221
171k
  SDVTList VTs = DAG.getVTList(ValueVTs);
4222
171k
4223
171k
  // Create the node.
4224
171k
  SDValue Result;
4225
171k
  if (
IsTgtIntrinsic171k
) {
4226
122k
    // This is target intrinsic that touches memory
4227
122k
    Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
4228
122k
                                     VTs, Ops, Info.memVT,
4229
122k
                                   MachinePointerInfo(Info.ptrVal, Info.offset),
4230
122k
                                     Info.align, Info.vol,
4231
122k
                                     Info.readMem, Info.writeMem, Info.size);
4232
171k
  } else 
if (49.5k
!HasChain49.5k
) {
4233
32.5k
    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4234
49.5k
  } else 
if (17.0k
!I.getType()->isVoidTy()17.0k
) {
4235
2.15k
    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4236
17.0k
  } else {
4237
14.8k
    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4238
14.8k
  }
4239
171k
4240
171k
  if (
HasChain171k
) {
4241
139k
    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4242
139k
    if (OnlyLoad)
4243
2.96k
      PendingLoads.push_back(Chain);
4244
139k
    else
4245
136k
      DAG.setRoot(Chain);
4246
139k
  }
4247
171k
4248
171k
  if (
!I.getType()->isVoidTy()171k
) {
4249
131k
    if (VectorType *
PTy131k
= dyn_cast<VectorType>(I.getType())) {
4250
22.5k
      EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4251
22.5k
      Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4252
22.5k
    } else
4253
108k
      Result = lowerRangeToAssertZExt(DAG, I, Result);
4254
131k
4255
131k
    setValue(&I, Result);
4256
131k
  }
4257
171k
}
4258
4259
/// GetSignificand - Get the significand and build it into a floating-point
4260
/// number with exponent of 1:
4261
///
4262
///   Op = (Op & 0x007fffff) | 0x3f800000;
4263
///
4264
/// where Op is the hexadecimal representation of floating point value.
4265
9
static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
4266
9
  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4267
9
                           DAG.getConstant(0x007fffff, dl, MVT::i32));
4268
9
  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4269
9
                           DAG.getConstant(0x3f800000, dl, MVT::i32));
4270
9
  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4271
9
}
4272
4273
/// GetExponent - Get the exponent:
4274
///
4275
///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4276
///
4277
/// where Op is the hexadecimal representation of floating point value.
4278
static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
4279
9
                           const TargetLowering &TLI, const SDLoc &dl) {
4280
9
  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4281
9
                           DAG.getConstant(0x7f800000, dl, MVT::i32));
4282
9
  SDValue t1 = DAG.getNode(
4283
9
      ISD::SRL, dl, MVT::i32, t0,
4284
9
      DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4285
9
  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4286
9
                           DAG.getConstant(127, dl, MVT::i32));
4287
9
  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4288
9
}
4289
4290
/// getF32Constant - Get 32-bit floating point constant.
4291
static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4292
97
                              const SDLoc &dl) {
4293
97
  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4294
97
                           MVT::f32);
4295
97
}
4296
4297
static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
4298
9
                                       SelectionDAG &DAG) {
4299
9
  // TODO: What fast-math-flags should be set on the floating-point nodes?
4300
9
4301
9
  //   IntegerPartOfX = ((int32_t)(t0);
4302
9
  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4303
9
4304
9
  //   FractionalPartOfX = t0 - (float)IntegerPartOfX;
4305
9
  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4306
9
  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4307
9
4308
9
  //   IntegerPartOfX <<= 23;
4309
9
  IntegerPartOfX = DAG.getNode(
4310
9
      ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4311
9
      DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
4312
9
                                  DAG.getDataLayout())));
4313
9
4314
9
  SDValue TwoToFractionalPartOfX;
4315
9
  if (
LimitFloatPrecision <= 69
) {
4316
3
    // For floating-point precision of 6:
4317
3
    //
4318
3
    //   TwoToFractionalPartOfX =
4319
3
    //     0.997535578f +
4320
3
    //       (0.735607626f + 0.252464424f * x) * x;
4321
3
    //
4322
3
    // error 0.0144103317, which is 6 bits
4323
3
    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4324
3
                             getF32Constant(DAG, 0x3e814304, dl));
4325
3
    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4326
3
                             getF32Constant(DAG, 0x3f3c50c8, dl));
4327
3
    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4328
3
    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4329
3
                                         getF32Constant(DAG, 0x3f7f5e7e, dl));
4330
9
  } else 
if (6
LimitFloatPrecision <= 126
) {
4331
3
    // For floating-point precision of 12:
4332
3
    //
4333
3
    //   TwoToFractionalPartOfX =
4334
3
    //     0.999892986f +
4335
3
    //       (0.696457318f +
4336
3
    //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
4337
3
    //
4338
3
    // error 0.000107046256, which is 13 to 14 bits
4339
3
    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4340
3
                             getF32Constant(DAG, 0x3da235e3, dl));
4341
3
    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4342
3
                             getF32Constant(DAG, 0x3e65b8f3, dl));
4343
3
    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4344
3
    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4345
3
                             getF32Constant(DAG, 0x3f324b07, dl));
4346
3
    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4347
3
    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4348
3
                                         getF32Constant(DAG, 0x3f7ff8fd, dl));
4349
6
  } else { // LimitFloatPrecision <= 18
4350
3
    // For floating-point precision of 18:
4351
3
    //
4352
3
    //   TwoToFractionalPartOfX =
4353
3
    //     0.999999982f +
4354
3
    //       (0.693148872f +
4355
3
    //         (0.240227044f +
4356
3
    //           (0.554906021e-1f +
4357
3
    //             (0.961591928e-2f +
4358
3
    //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4359
3
    // error 2.47208000*10^(-7), which is better than 18 bits
4360
3
    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4361
3
                             getF32Constant(DAG, 0x3924b03e, dl));
4362
3
    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4363
3
                             getF32Constant(DAG, 0x3ab24b87, dl));
4364
3
    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4365
3
    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4366
3
                             getF32Constant(DAG, 0x3c1d8c17, dl));
4367
3
    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4368
3
    SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4369
3
                             getF32Constant(DAG, 0x3d634a1d, dl));
4370
3
    SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4371
3
    SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4372
3
                             getF32Constant(DAG, 0x3e75fe14, dl));
4373
3
    SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4374
3
    SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4375
3
                              getF32Constant(DAG, 0x3f317234, dl));
4376
3
    SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4377
3
    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4378
3
                                         getF32Constant(DAG, 0x3f800000, dl));
4379
3
  }
4380
9
4381
9
  // Add the exponent into the result in integer domain.
4382
9
  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4383
9
  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4384
9
                     DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4385
9
}
4386
4387
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
4388
/// limited-precision mode.
4389
static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4390
75
                         const TargetLowering &TLI) {
4391
75
  if (Op.getValueType() == MVT::f32 &&
4392
75
      
LimitFloatPrecision > 020
&&
LimitFloatPrecision <= 183
) {
4393
3
4394
3
    // Put the exponent in the right bit position for later addition to the
4395
3
    // final result:
4396
3
    //
4397
3
    //   #define LOG2OFe 1.4426950f
4398
3
    //   t0 = Op * LOG2OFe
4399
3
4400
3
    // TODO: What fast-math-flags should be set here?
4401
3
    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4402
3
                             getF32Constant(DAG, 0x3fb8aa3b, dl));
4403
3
    return getLimitedPrecisionExp2(t0, dl, DAG);
4404
3
  }
4405
72
4406
72
  // No special expansion.
4407
72
  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4408
72
}
4409
4410
/// expandLog - Lower a log intrinsic. Handles the special sequences for
4411
/// limited-precision mode.
4412
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4413
43
                         const TargetLowering &TLI) {
4414
43
  // TODO: What fast-math-flags should be set on the floating-point nodes?
4415
43
4416
43
  if (Op.getValueType() == MVT::f32 &&
4417
43
      
LimitFloatPrecision > 014
&&
LimitFloatPrecision <= 183
) {
4418
3
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4419
3
4420
3
    // Scale the exponent by log(2) [0.69314718f].
4421
3
    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4422
3
    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4423
3
                                        getF32Constant(DAG, 0x3f317218, dl));
4424
3
4425
3
    // Get the significand and build it into a floating-point number with
4426
3
    // exponent of 1.
4427
3
    SDValue X = GetSignificand(DAG, Op1, dl);
4428
3
4429
3
    SDValue LogOfMantissa;
4430
3
    if (
LimitFloatPrecision <= 63
) {
4431
1
      // For floating-point precision of 6:
4432
1
      //
4433
1
      //   LogofMantissa =
4434
1
      //     -1.1609546f +
4435
1
      //       (1.4034025f - 0.23903021f * x) * x;
4436
1
      //
4437
1
      // error 0.0034276066, which is better than 8 bits
4438
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4439
1
                               getF32Constant(DAG, 0xbe74c456, dl));
4440
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4441
1
                               getF32Constant(DAG, 0x3fb3a2b1, dl));
4442
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4443
1
      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4444
1
                                  getF32Constant(DAG, 0x3f949a29, dl));
4445
3
    } else 
if (2
LimitFloatPrecision <= 122
) {
4446
1
      // For floating-point precision of 12:
4447
1
      //
4448
1
      //   LogOfMantissa =
4449
1
      //     -1.7417939f +
4450
1
      //       (2.8212026f +
4451
1
      //         (-1.4699568f +
4452
1
      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4453
1
      //
4454
1
      // error 0.000061011436, which is 14 bits
4455
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4456
1
                               getF32Constant(DAG, 0xbd67b6d6, dl));
4457
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4458
1
                               getF32Constant(DAG, 0x3ee4f4b8, dl));
4459
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4460
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4461
1
                               getF32Constant(DAG, 0x3fbc278b, dl));
4462
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4463
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4464
1
                               getF32Constant(DAG, 0x40348e95, dl));
4465
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4466
1
      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4467
1
                                  getF32Constant(DAG, 0x3fdef31a, dl));
4468
2
    } else { // LimitFloatPrecision <= 18
4469
1
      // For floating-point precision of 18:
4470
1
      //
4471
1
      //   LogOfMantissa =
4472
1
      //     -2.1072184f +
4473
1
      //       (4.2372794f +
4474
1
      //         (-3.7029485f +
4475
1
      //           (2.2781945f +
4476
1
      //             (-0.87823314f +
4477
1
      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4478
1
      //
4479
1
      // error 0.0000023660568, which is better than 18 bits
4480
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4481
1
                               getF32Constant(DAG, 0xbc91e5ac, dl));
4482
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4483
1
                               getF32Constant(DAG, 0x3e4350aa, dl));
4484
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4485
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4486
1
                               getF32Constant(DAG, 0x3f60d3e3, dl));
4487
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4488
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4489
1
                               getF32Constant(DAG, 0x4011cdf0, dl));
4490
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4491
1
      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4492
1
                               getF32Constant(DAG, 0x406cfd1c, dl));
4493
1
      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4494
1
      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4495
1
                               getF32Constant(DAG, 0x408797cb, dl));
4496
1
      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4497
1
      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4498
1
                                  getF32Constant(DAG, 0x4006dcab, dl));
4499
1
    }
4500
3
4501
3
    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4502
3
  }
4503
40
4504
40
  // No special expansion.
4505
40
  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4506
40
}
4507
4508
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4509
/// limited-precision mode.
4510
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4511
67
                          const TargetLowering &TLI) {
4512
67
  // TODO: What fast-math-flags should be set on the floating-point nodes?
4513
67
4514
67
  if (Op.getValueType() == MVT::f32 &&
4515
67
      
LimitFloatPrecision > 023
&&
LimitFloatPrecision <= 183
) {
4516
3
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4517
3
4518
3
    // Get the exponent.
4519
3
    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4520
3
4521
3
    // Get the significand and build it into a floating-point number with
4522
3
    // exponent of 1.
4523
3
    SDValue X = GetSignificand(DAG, Op1, dl);
4524
3
4525
3
    // Different possible minimax approximations of significand in
4526
3
    // floating-point for various degrees of accuracy over [1,2].
4527
3
    SDValue Log2ofMantissa;
4528
3
    if (
LimitFloatPrecision <= 63
) {
4529
1
      // For floating-point precision of 6:
4530
1
      //
4531
1
      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4532
1
      //
4533
1
      // error 0.0049451742, which is more than 7 bits
4534
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4535
1
                               getF32Constant(DAG, 0xbeb08fe0, dl));
4536
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4537
1
                               getF32Constant(DAG, 0x40019463, dl));
4538
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4539
1
      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4540
1
                                   getF32Constant(DAG, 0x3fd6633d, dl));
4541
3
    } else 
if (2
LimitFloatPrecision <= 122
) {
4542
1
      // For floating-point precision of 12:
4543
1
      //
4544
1
      //   Log2ofMantissa =
4545
1
      //     -2.51285454f +
4546
1
      //       (4.07009056f +
4547
1
      //         (-2.12067489f +
4548
1
      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4549
1
      //
4550
1
      // error 0.0000876136000, which is better than 13 bits
4551
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4552
1
                               getF32Constant(DAG, 0xbda7262e, dl));
4553
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4554
1
                               getF32Constant(DAG, 0x3f25280b, dl));
4555
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4556
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4557
1
                               getF32Constant(DAG, 0x4007b923, dl));
4558
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4559
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4560
1
                               getF32Constant(DAG, 0x40823e2f, dl));
4561
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4562
1
      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4563
1
                                   getF32Constant(DAG, 0x4020d29c, dl));
4564
2
    } else { // LimitFloatPrecision <= 18
4565
1
      // For floating-point precision of 18:
4566
1
      //
4567
1
      //   Log2ofMantissa =
4568
1
      //     -3.0400495f +
4569
1
      //       (6.1129976f +
4570
1
      //         (-5.3420409f +
4571
1
      //           (3.2865683f +
4572
1
      //             (-1.2669343f +
4573
1
      //               (0.27515199f -
4574
1
      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4575
1
      //
4576
1
      // error 0.0000018516, which is better than 18 bits
4577
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4578
1
                               getF32Constant(DAG, 0xbcd2769e, dl));
4579
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4580
1
                               getF32Constant(DAG, 0x3e8ce0b9, dl));
4581
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4582
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4583
1
                               getF32Constant(DAG, 0x3fa22ae7, dl));
4584
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4585
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4586
1
                               getF32Constant(DAG, 0x40525723, dl));
4587
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4588
1
      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4589
1
                               getF32Constant(DAG, 0x40aaf200, dl));
4590
1
      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4591
1
      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4592
1
                               getF32Constant(DAG, 0x40c39dad, dl));
4593
1
      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4594
1
      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4595
1
                                   getF32Constant(DAG, 0x4042902c, dl));
4596
1
    }
4597
3
4598
3
    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4599
3
  }
4600
64
4601
64
  // No special expansion.
4602
64
  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4603
64
}
4604
4605
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4606
/// limited-precision mode.
4607
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4608
48
                           const TargetLowering &TLI) {
4609
48
  // TODO: What fast-math-flags should be set on the floating-point nodes?
4610
48
4611
48
  if (Op.getValueType() == MVT::f32 &&
4612
48
      
LimitFloatPrecision > 020
&&
LimitFloatPrecision <= 183
) {
4613
3
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4614
3
4615
3
    // Scale the exponent by log10(2) [0.30102999f].
4616
3
    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4617
3
    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4618
3
                                        getF32Constant(DAG, 0x3e9a209a, dl));
4619
3
4620
3
    // Get the significand and build it into a floating-point number with
4621
3
    // exponent of 1.
4622
3
    SDValue X = GetSignificand(DAG, Op1, dl);
4623
3
4624
3
    SDValue Log10ofMantissa;
4625
3
    if (
LimitFloatPrecision <= 63
) {
4626
1
      // For floating-point precision of 6:
4627
1
      //
4628
1
      //   Log10ofMantissa =
4629
1
      //     -0.50419619f +
4630
1
      //       (0.60948995f - 0.10380950f * x) * x;
4631
1
      //
4632
1
      // error 0.0014886165, which is 6 bits
4633
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4634
1
                               getF32Constant(DAG, 0xbdd49a13, dl));
4635
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4636
1
                               getF32Constant(DAG, 0x3f1c0789, dl));
4637
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4638
1
      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4639
1
                                    getF32Constant(DAG, 0x3f011300, dl));
4640
3
    } else 
if (2
LimitFloatPrecision <= 122
) {
4641
1
      // For floating-point precision of 12:
4642
1
      //
4643
1
      //   Log10ofMantissa =
4644
1
      //     -0.64831180f +
4645
1
      //       (0.91751397f +
4646
1
      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4647
1
      //
4648
1
      // error 0.00019228036, which is better than 12 bits
4649
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4650
1
                               getF32Constant(DAG, 0x3d431f31, dl));
4651
1
      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4652
1
                               getF32Constant(DAG, 0x3ea21fb2, dl));
4653
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4654
1
      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4655
1
                               getF32Constant(DAG, 0x3f6ae232, dl));
4656
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4657
1
      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4658
1
                                    getF32Constant(DAG, 0x3f25f7c3, dl));
4659
2
    } else { // LimitFloatPrecision <= 18
4660
1
      // For floating-point precision of 18:
4661
1
      //
4662
1
      //   Log10ofMantissa =
4663
1
      //     -0.84299375f +
4664
1
      //       (1.5327582f +
4665
1
      //         (-1.0688956f +
4666
1
      //           (0.49102474f +
4667
1
      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4668
1
      //
4669
1
      // error 0.0000037995730, which is better than 18 bits
4670
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4671
1
                               getF32Constant(DAG, 0x3c5d51ce, dl));
4672
1
      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4673
1
                               getF32Constant(DAG, 0x3e00685a, dl));
4674
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4675
1
      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4676
1
                               getF32Constant(DAG, 0x3efb6798, dl));
4677
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4678
1
      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4679
1
                               getF32Constant(DAG, 0x3f88d192, dl));
4680
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4681
1
      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4682
1
                               getF32Constant(DAG, 0x3fc4316c, dl));
4683
1
      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4684
1
      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4685
1
                                    getF32Constant(DAG, 0x3f57ce70, dl));
4686
1
    }
4687
3
4688
3
    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4689
3
  }
4690
45
4691
45
  // No special expansion.
4692
45
  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4693
45
}
4694
4695
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4696
/// limited-precision mode.
4697
static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4698
105
                          const TargetLowering &TLI) {
4699
105
  if (Op.getValueType() == MVT::f32 &&
4700
105
      
LimitFloatPrecision > 033
&&
LimitFloatPrecision <= 183
)
4701
3
    return getLimitedPrecisionExp2(Op, dl, DAG);
4702
102
4703
102
  // No special expansion.
4704
102
  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4705
102
}
4706
4707
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
4708
/// limited-precision mode with x == 10.0f.
4709
static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4710
162
                         SelectionDAG &DAG, const TargetLowering &TLI) {
4711
162
  bool IsExp10 = false;
4712
162
  if (
LHS.getValueType() == MVT::f32 && 162
RHS.getValueType() == MVT::f3230
&&
4713
162
      
LimitFloatPrecision > 030
&&
LimitFloatPrecision <= 183
) {
4714
3
    if (ConstantFPSDNode *
LHSC3
= dyn_cast<ConstantFPSDNode>(LHS)) {
4715
3
      APFloat Ten(10.0f);
4716
3
      IsExp10 = LHSC->isExactlyValue(Ten);
4717
3
    }
4718
3
  }
4719
162
4720
162
  // TODO: What fast-math-flags should be set on the FMUL node?
4721
162
  if (
IsExp10162
) {
4722
3
    // Put the exponent in the right bit position for later addition to the
4723
3
    // final result:
4724
3
    //
4725
3
    //   #define LOG2OF10 3.3219281f
4726
3
    //   t0 = Op * LOG2OF10;
4727
3
    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4728
3
                             getF32Constant(DAG, 0x40549a78, dl));
4729
3
    return getLimitedPrecisionExp2(t0, dl, DAG);
4730
3
  }
4731
159
4732
159
  // No special expansion.
4733
159
  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4734
159
}
4735
4736
/// ExpandPowI - Expand a llvm.powi intrinsic.
4737
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4738
97
                          SelectionDAG &DAG) {
4739
97
  // If RHS is a constant, we can expand this out to a multiplication tree,
4740
97
  // otherwise we end up lowering to a call to __powidf2 (for example).  When
4741
97
  // optimizing for size, we only want to do this if the expansion would produce
4742
97
  // a small number of multiplies, otherwise we do the full expansion.
4743
97
  if (ConstantSDNode *
RHSC97
= dyn_cast<ConstantSDNode>(RHS)) {
4744
12
    // Get the exponent as a positive value.
4745
12
    unsigned Val = RHSC->getSExtValue();
4746
12
    if (
(int)Val < 012
)
Val = -Val1
;
4747
12
4748
12
    // powi(x, 0) -> 1.0
4749
12
    if (Val == 0)
4750
0
      return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4751
12
4752
12
    const Function *F = DAG.getMachineFunction().getFunction();
4753
12
    if (!F->optForSize() ||
4754
12
        // If optimizing for size, don't insert too many multiplies.
4755
12
        // This inserts up to 5 multiplies.
4756
12
        
countPopulation(Val) + Log2_32(Val) < 72
) {
4757
10
      // We use the simple binary decomposition method to generate the multiply
4758
10
      // sequence.  There are more optimal ways to do this (for example,
4759
10
      // powi(x,15) generates one more multiply than it should), but this has
4760
10
      // the benefit of being both really simple and much better than a libcall.
4761
10
      SDValue Res;  // Logically starts equal to 1.0
4762
10
      SDValue CurSquare = LHS;
4763
10
      // TODO: Intrinsics should have fast-math-flags that propagate to these
4764
10
      // nodes.
4765
37
      while (
Val37
) {
4766
27
        if (
Val & 127
) {
4767
14
          if (Res.getNode())
4768
4
            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4769
14
          else
4770
10
            Res = CurSquare;  // 1.0*CurSquare.
4771
14
        }
4772
27
4773
27
        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4774
27
                                CurSquare, CurSquare);
4775
27
        Val >>= 1;
4776
27
      }
4777
10
4778
10
      // If the original was negative, invert the result, producing 1/(x*x*x).
4779
10
      if (RHSC->getSExtValue() < 0)
4780
1
        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4781
1
                          DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4782
10
      return Res;
4783
10
    }
4784
87
  }
4785
87
4786
87
  // Otherwise, expand to a libcall.
4787
87
  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4788
87
}
4789
4790
// getUnderlyingArgReg - Find underlying register used for a truncated or
4791
// bitcasted argument.
4792
233
static unsigned getUnderlyingArgReg(const SDValue &N) {
4793
233
  switch (N.getOpcode()) {
4794
178
  case ISD::CopyFromReg:
4795
178
    return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4796
30
  case ISD::BITCAST:
4797
30
  case ISD::AssertZext:
4798
30
  case ISD::AssertSext:
4799
30
  case ISD::TRUNCATE:
4800
30
    return getUnderlyingArgReg(N.getOperand(0));
4801
25
  default:
4802
25
    return 0;
4803
0
  }
4804
0
}
4805
4806
/// If the DbgValueInst is a dbg_value of a function argument, create the
4807
/// corresponding DBG_VALUE machine instruction for it now.  At the end of
4808
/// instruction selection, they will be inserted to the entry BB.
4809
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4810
    const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4811
466
    DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4812
466
  const Argument *Arg = dyn_cast<Argument>(V);
4813
466
  if (!Arg)
4814
256
    return false;
4815
210
4816
210
  MachineFunction &MF = DAG.getMachineFunction();
4817
210
  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4818
210
4819
210
  // Ignore inlined function arguments here.
4820
210
  //
4821
210
  // FIXME: Should we be checking DL->inlinedAt() to determine this?
4822
210
  if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
4823
3
    return false;
4824
207
4825
207
  bool IsIndirect = false;
4826
207
  Optional<MachineOperand> Op;
4827
207
  // Some arguments' frame index is recorded during argument lowering.
4828
207
  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4829
207
  if (FI != std::numeric_limits<int>::max())
4830
4
    Op = MachineOperand::CreateFI(FI);
4831
207
4832
207
  if (
!Op && 207
N.getNode()203
) {
4833
203
    unsigned Reg = getUnderlyingArgReg(N);
4834
203
    if (
Reg && 203
TargetRegisterInfo::isVirtualRegister(Reg)178
) {
4835
178
      MachineRegisterInfo &RegInfo = MF.getRegInfo();
4836
178
      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4837
178
      if (PR)
4838
177
        Reg = PR;
4839
178
    }
4840
203
    if (
Reg203
) {
4841
178
      Op = MachineOperand::CreateReg(Reg, false);
4842
178
      IsIndirect = IsDbgDeclare;
4843
178
    }
4844
203
  }
4845
207
4846
207
  if (
!Op207
) {
4847
25
    // Check if ValueMap has reg number.
4848
25
    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4849
25
    if (
VMI != FuncInfo.ValueMap.end()25
) {
4850
8
      const auto &TLI = DAG.getTargetLoweringInfo();
4851
8
      RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4852
8
                       V->getType(), isABIRegCopy(V));
4853
8
      unsigned NumRegs =
4854
8
          std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
4855
8
      if (
NumRegs > 18
) {
4856
3
        unsigned I = 0;
4857
3
        unsigned Offset = 0;
4858
3
        auto RegisterVT = RFV.RegVTs.begin();
4859
7
        for (auto RegCount : RFV.RegCount) {
4860
7
          unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
4861
16
          for (unsigned E = I + RegCount; 
I != E16
;
++I9
) {
4862
9
            // The vregs are guaranteed to be allocated in sequence.
4863
9
            Op = MachineOperand::CreateReg(VMI->second + I, false);
4864
9
            auto *FragmentExpr = DIExpression::createFragmentExpression(
4865
9
                Expr, Offset, RegisterSize);
4866
9
            FuncInfo.ArgDbgValues.push_back(
4867
9
                BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4868
9
                        Op->getReg(), Variable, FragmentExpr));
4869
9
            Offset += RegisterSize;
4870
9
          }
4871
7
        }
4872
3
        return true;
4873
3
      }
4874
5
      Op = MachineOperand::CreateReg(VMI->second, false);
4875
5
      IsIndirect = IsDbgDeclare;
4876
5
    }
4877
25
  }
4878
207
4879
204
  
if (204
!Op && 204
N.getNode()17
)
4880
204
    // Check if frame index is available.
4881
17
    
if (LoadSDNode *17
LNode17
= dyn_cast<LoadSDNode>(N.getNode()))
4882
9
      
if (FrameIndexSDNode *9
FINode9
=
4883
9
          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4884
9
        Op = MachineOperand::CreateFI(FINode->getIndex());
4885
204
4886
204
  if (!Op)
4887
8
    return false;
4888
196
4889
204
  assert(Variable->isValidLocationForIntrinsic(DL) &&
4890
196
         "Expected inlined-at fields to agree");
4891
196
  if (Op->isReg())
4892
183
    FuncInfo.ArgDbgValues.push_back(
4893
183
        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4894
183
                Op->getReg(), Variable, Expr));
4895
196
  else
4896
13
    FuncInfo.ArgDbgValues.push_back(
4897
13
        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4898
13
            .add(*Op)
4899
13
            .addImm(0)
4900
13
            .addMetadata(Variable)
4901
13
            .addMetadata(Expr));
4902
466
4903
466
  return true;
4904
466
}
4905
4906
/// Return the appropriate SDDbgValue based on N.
4907
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4908
                                             DILocalVariable *Variable,
4909
                                             DIExpression *Expr,
4910
                                             const DebugLoc &dl,
4911
267
                                             unsigned DbgSDNodeOrder) {
4912
267
  if (auto *
FISDN267
= dyn_cast<FrameIndexSDNode>(N.getNode())) {
4913
43
    // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4914
43
    // stack slot locations as such instead of as indirectly addressed
4915
43
    // locations.
4916
43
    return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4917
43
                                     DbgSDNodeOrder);
4918
43
  }
4919
224
  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
4920
224
                         DbgSDNodeOrder);
4921
224
}
4922
4923
// VisualStudio defines setjmp as _setjmp
4924
#if defined(_MSC_VER) && defined(setjmp) && \
4925
                         !defined(setjmp_undefined_for_msvc)
4926
#  pragma push_macro("setjmp")
4927
#  undef setjmp
4928
#  define setjmp_undefined_for_msvc
4929
#endif
4930
4931
/// Lower the call to the specified intrinsic function. If we want to emit this
4932
/// as a call to a named external function, return the name. Otherwise, lower it
4933
/// and return null.
4934
const char *
4935
408k
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
4936
408k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4937
408k
  SDLoc sdl = getCurSDLoc();
4938
408k
  DebugLoc dl = getCurDebugLoc();
4939
408k
  SDValue Res;
4940
408k
4941
408k
  switch (Intrinsic) {
4942
171k
  default:
4943
171k
    // By default, turn this into a target intrinsic node.
4944
171k
    visitTargetIntrinsic(I, Intrinsic);
4945
171k
    return nullptr;
4946
369
  case Intrinsic::vastart:  visitVAStart(I); return nullptr;
4947
350
  case Intrinsic::vaend:    visitVAEnd(I); return nullptr;
4948
66
  case Intrinsic::vacopy:   visitVACopy(I); return nullptr;
4949
80
  case Intrinsic::returnaddress:
4950
80
    setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
4951
80
                             TLI.getPointerTy(DAG.getDataLayout()),
4952
80
                             getValue(I.getArgOperand(0))));
4953
80
    return nullptr;
4954
6
  case Intrinsic::addressofreturnaddress:
4955
6
    setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
4956
6
                             TLI.getPointerTy(DAG.getDataLayout())));
4957
6
    return nullptr;
4958
109
  case Intrinsic::frameaddress:
4959
109
    setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
4960
109
                             TLI.getPointerTy(DAG.getDataLayout()),
4961
109
                             getValue(I.getArgOperand(0))));
4962
109
    return nullptr;
4963
195
  case Intrinsic::read_register: {
4964
195
    Value *Reg = I.getArgOperand(0);
4965
195
    SDValue Chain = getRoot();
4966
195
    SDValue RegName =
4967
195
        DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4968
195
    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4969
195
    Res = DAG.getNode(ISD::READ_REGISTER, sdl,
4970
195
      DAG.getVTList(VT, MVT::Other), Chain, RegName);
4971
195
    setValue(&I, Res);
4972
195
    DAG.setRoot(Res.getValue(1));
4973
195
    return nullptr;
4974
408k
  }
4975
181
  case Intrinsic::write_register: {
4976
181
    Value *Reg = I.getArgOperand(0);
4977
181
    Value *RegValue = I.getArgOperand(1);
4978
181
    SDValue Chain = getRoot();
4979
181
    SDValue RegName =
4980
181
        DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4981
181
    DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
4982
181
                            RegName, getValue(RegValue)));
4983
181
    return nullptr;
4984
408k
  }
4985
0
  case Intrinsic::setjmp:
4986
0
    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
4987
0
  case Intrinsic::longjmp:
4988
0
    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
4989
15.1k
  case Intrinsic::memcpy: {
4990
15.1k
    SDValue Op1 = getValue(I.getArgOperand(0));
4991
15.1k
    SDValue Op2 = getValue(I.getArgOperand(1));
4992
15.1k
    SDValue Op3 = getValue(I.getArgOperand(2));
4993
15.1k
    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4994
15.1k
    if (!Align)
4995
31
      Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
4996
15.1k
    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4997
6.36k
    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
4998
15.1k
    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
4999
15.1k
                               false, isTC,
5000
15.1k
                               MachinePointerInfo(I.getArgOperand(0)),
5001
15.1k
                               MachinePointerInfo(I.getArgOperand(1)));
5002
15.1k
    updateDAGForMaybeTailCall(MC);
5003
15.1k
    return nullptr;
5004
408k
  }
5005
29.5k
  case Intrinsic::memset: {
5006
29.5k
    SDValue Op1 = getValue(I.getArgOperand(0));
5007
29.5k
    SDValue Op2 = getValue(I.getArgOperand(1));
5008
29.5k
    SDValue Op3 = getValue(I.getArgOperand(2));
5009
29.5k
    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
5010
29.5k
    if (!Align)
5011
3
      Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
5012
29.5k
    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
5013
2.32k
    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5014
29.5k
    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5015
29.5k
                               isTC, MachinePointerInfo(I.getArgOperand(0)));
5016
29.5k
    updateDAGForMaybeTailCall(MS);
5017
29.5k
    return nullptr;
5018
408k
  }
5019
441
  case Intrinsic::memmove: {
5020
441
    SDValue Op1 = getValue(I.getArgOperand(0));
5021
441
    SDValue Op2 = getValue(I.getArgOperand(1));
5022
441
    SDValue Op3 = getValue(I.getArgOperand(2));
5023
441
    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
5024
441
    if (!Align)
5025
0
      Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
5026
441
    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
5027
90
    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5028
441
    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5029
441
                                isTC, MachinePointerInfo(I.getArgOperand(0)),
5030
441
                                MachinePointerInfo(I.getArgOperand(1)));
5031
441
    updateDAGForMaybeTailCall(MM);
5032
441
    return nullptr;
5033
408k
  }
5034
6
  case Intrinsic::memcpy_element_unordered_atomic: {
5035
6
    const ElementUnorderedAtomicMemCpyInst &MI =
5036
6
        cast<ElementUnorderedAtomicMemCpyInst>(I);
5037
6
    SDValue Dst = getValue(MI.getRawDest());
5038
6
    SDValue Src = getValue(MI.getRawSource());
5039
6
    SDValue Length = getValue(MI.getLength());
5040
6
5041
6
    // Emit a library call.
5042
6
    TargetLowering::ArgListTy Args;
5043
6
    TargetLowering::ArgListEntry Entry;
5044
6
    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
5045
6
    Entry.Node = Dst;
5046
6
    Args.push_back(Entry);
5047
6
5048
6
    Entry.Node = Src;
5049
6
    Args.push_back(Entry);
5050
6
5051
6
    Entry.Ty = MI.getLength()->getType();
5052
6
    Entry.Node = Length;
5053
6
    Args.push_back(Entry);
5054
6
5055
6
    uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
5056
6
    RTLIB::Libcall LibraryCall =
5057
6
        RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
5058
6
    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
5059
0
      report_fatal_error("Unsupported element size");
5060
6
5061
6
    TargetLowering::CallLoweringInfo CLI(DAG);
5062
6
    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
5063
6
        TLI.getLibcallCallingConv(LibraryCall),
5064
6
        Type::getVoidTy(*DAG.getContext()),
5065
6
        DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
5066
6
                              TLI.getPointerTy(DAG.getDataLayout())),
5067
6
        std::move(Args));
5068
6
5069
6
    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
5070
6
    DAG.setRoot(CallResult.second);
5071
6
    return nullptr;
5072
6
  }
5073
6
  case Intrinsic::memmove_element_unordered_atomic: {
5074
6
    auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I);
5075
6
    SDValue Dst = getValue(MI.getRawDest());
5076
6
    SDValue Src = getValue(MI.getRawSource());
5077
6
    SDValue Length = getValue(MI.getLength());
5078
6
5079
6
    // Emit a library call.
5080
6
    TargetLowering::ArgListTy Args;
5081
6
    TargetLowering::ArgListEntry Entry;
5082
6
    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
5083
6
    Entry.Node = Dst;
5084
6
    Args.push_back(Entry);
5085
6
5086
6
    Entry.Node = Src;
5087
6
    Args.push_back(Entry);
5088
6
5089
6
    Entry.Ty = MI.getLength()->getType();
5090
6
    Entry.Node = Length;
5091
6
    Args.push_back(Entry);
5092
6
5093
6
    uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
5094
6
    RTLIB::Libcall LibraryCall =
5095
6
        RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
5096
6
    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
5097
0
      report_fatal_error("Unsupported element size");
5098
6
5099
6
    TargetLowering::CallLoweringInfo CLI(DAG);
5100
6
    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
5101
6
        TLI.getLibcallCallingConv(LibraryCall),
5102
6
        Type::getVoidTy(*DAG.getContext()),
5103
6
        DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
5104
6
                              TLI.getPointerTy(DAG.getDataLayout())),
5105
6
        std::move(Args));
5106
6
5107
6
    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
5108
6
    DAG.setRoot(CallResult.second);
5109
6
    return nullptr;
5110
6
  }
5111
6
  case Intrinsic::memset_element_unordered_atomic: {
5112
6
    auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
5113
6
    SDValue Dst = getValue(MI.getRawDest());
5114
6
    SDValue Val = getValue(MI.getValue());
5115
6
    SDValue Length = getValue(MI.getLength());
5116
6
5117
6
    // Emit a library call.
5118
6
    TargetLowering::ArgListTy Args;
5119
6
    TargetLowering::ArgListEntry Entry;
5120
6
    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
5121
6
    Entry.Node = Dst;
5122
6
    Args.push_back(Entry);
5123
6
5124
6
    Entry.Ty = Type::getInt8Ty(*DAG.getContext());
5125
6
    Entry.Node = Val;
5126
6
    Args.push_back(Entry);
5127
6
5128
6
    Entry.Ty = MI.getLength()->getType();
5129
6
    Entry.Node = Length;
5130
6
    Args.push_back(Entry);
5131
6
5132
6
    uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
5133
6
    RTLIB::Libcall LibraryCall =
5134
6
        RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
5135
6
    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
5136
0
      report_fatal_error("Unsupported element size");
5137
6
5138
6
    TargetLowering::CallLoweringInfo CLI(DAG);
5139
6
    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
5140
6
        TLI.getLibcallCallingConv(LibraryCall),
5141
6
        Type::getVoidTy(*DAG.getContext()),
5142
6
        DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
5143
6
                              TLI.getPointerTy(DAG.getDataLayout())),
5144
6
        std::move(Args));
5145
6
5146
6
    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
5147
6
    DAG.setRoot(CallResult.second);
5148
6
    return nullptr;
5149
6
  }
5150
387
  case Intrinsic::dbg_addr:
5151
387
  case Intrinsic::dbg_declare: {
5152
387
    const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
5153
387
    DILocalVariable *Variable = DI.getVariable();
5154
387
    DIExpression *Expression = DI.getExpression();
5155
387
    assert(Variable && "Missing variable");
5156
387
5157
387
    // Check if address has undef value.
5158
387
    const Value *Address = DI.getVariableLocation();
5159
387
    if (
!Address || 387
isa<UndefValue>(Address)379
||
5160
387
        
(Address->use_empty() && 356
!isa<Argument>(Address)36
)) {
5161
60
      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
5162
60
      return nullptr;
5163
60
    }
5164
327
5165
327
    
bool isParameter = Variable->isParameter() || 327
isa<Argument>(Address)124
;
5166
327
5167
327
    // Check if this variable can be described by a frame index, typically
5168
327
    // either as a static alloca or a byval parameter.
5169
327
    int FI = std::numeric_limits<int>::max();
5170
327
    if (const auto *AI =
5171
296
            dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
5172
296
      if (
AI->isStaticAlloca()296
) {
5173
294
        auto I = FuncInfo.StaticAllocaMap.find(AI);
5174
294
        if (I != FuncInfo.StaticAllocaMap.end())
5175
294
          FI = I->second;
5176
294
      }
5177
327
    } else 
if (const auto *31
Arg31
= dyn_cast<Argument>(
5178
24
                   Address->stripInBoundsConstantOffsets())) {
5179
24
      FI = FuncInfo.getArgumentFrameIndex(Arg);
5180
24
    }
5181
327
5182
327
    // llvm.dbg.addr is control dependent and always generates indirect
5183
327
    // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
5184
327
    // the MachineFunction variable table.
5185
327
    if (
FI != std::numeric_limits<int>::max()327
) {
5186
307
      if (Intrinsic == Intrinsic::dbg_addr)
5187
3
        DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl,
5188
3
                                                  SDNodeOrder),
5189
3
                        getRoot().getNode(), isParameter);
5190
307
      return nullptr;
5191
307
    }
5192
20
5193
20
    SDValue &N = NodeMap[Address];
5194
20
    if (
!N.getNode() && 20
isa<Argument>(Address)0
)
5195
20
      // Check unused arguments map.
5196
0
      N = UnusedArgNodeMap[Address];
5197
20
    SDDbgValue *SDV;
5198
20
    if (
N.getNode()20
) {
5199
20
      if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
5200
2
        Address = BCI->getOperand(0);
5201
20
      // Parameters are handled specially.
5202
20
      auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
5203
20
      if (
isParameter && 20
FINode13
) {
5204
0
        // Byval parameter. We have a frame index at this point.
5205
0
        SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
5206
0
                                        FINode->getIndex(), dl, SDNodeOrder);
5207
20
      } else 
if (20
isa<Argument>(Address)20
) {
5208
11
        // Address is an argument, so try to emit its dbg value using
5209
11
        // virtual register info from the FuncInfo.ValueMap.
5210
11
        EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
5211
11
        return nullptr;
5212
0
      } else {
5213
9
        SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
5214
9
                              true, dl, SDNodeOrder);
5215
9
      }
5216
9
      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
5217
20
    } else {
5218
0
      // If Address is an argument then try to emit its dbg value using
5219
0
      // virtual register info from the FuncInfo.ValueMap.
5220
0
      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
5221
0
                                    N)) {
5222
0
        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
5223
0
      }
5224
0
    }
5225
9
    return nullptr;
5226
20
  }
5227
559
  case Intrinsic::dbg_value: {
5228
559
    const DbgValueInst &DI = cast<DbgValueInst>(I);
5229
559
    assert(DI.getVariable() && "Missing variable");
5230
559
5231
559
    DILocalVariable *Variable = DI.getVariable();
5232
559
    DIExpression *Expression = DI.getExpression();
5233
559
    const Value *V = DI.getValue();
5234
559
    if (!V)
5235
2
      return nullptr;
5236
557
5237
557
    SDDbgValue *SDV;
5238
557
    if (
isa<ConstantInt>(V) || 557
isa<ConstantFP>(V)475
||
isa<UndefValue>(V)468
) {
5239
95
      SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
5240
95
      DAG.AddDbgValue(SDV, nullptr, false);
5241
95
      return nullptr;
5242
95
    }
5243
462
5244
462
    // Do not use getValue() in here; we don't want to generate code at
5245
462
    // this point if it hasn't been done yet.
5246
462
    SDValue N = NodeMap[V];
5247
462
    if (
!N.getNode() && 462
isa<Argument>(V)85
) // Check unused arguments map.
5248
47
      N = UnusedArgNodeMap[V];
5249
462
    if (
N.getNode()462
) {
5250
420
      if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
5251
185
        return nullptr;
5252
235
      SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
5253
235
      DAG.AddDbgValue(SDV, N.getNode(), false);
5254
235
      return nullptr;
5255
235
    }
5256
42
5257
42
    
if (42
!V->use_empty()42
) {
5258
38
      // Do not call getValue(V) yet, as we don't want to generate code.
5259
38
      // Remember it for later.
5260
38
      DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
5261
38
      DanglingDebugInfoMap[V] = DDI;
5262
38
      return nullptr;
5263
38
    }
5264
4
5265
4
    
DEBUG4
(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
5266
4
    DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
5267
4
    return nullptr;
5268
4
  }
5269
4
5270
237
  case Intrinsic::eh_typeid_for: {
5271
237
    // Find the type id for the given typeinfo.
5272
237
    GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
5273
237
    unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
5274
237
    Res = DAG.getConstant(TypeID, sdl, MVT::i32);
5275
237
    setValue(&I, Res);
5276
237
    return nullptr;
5277
4
  }
5278
4
5279
29
  case Intrinsic::eh_return_i32:
5280
29
  case Intrinsic::eh_return_i64:
5281
29
    DAG.getMachineFunction().setCallsEHReturn(true);
5282
29
    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
5283
29
                            MVT::Other,
5284
29
                            getControlRoot(),
5285
29
                            getValue(I.getArgOperand(0)),
5286
29
                            getValue(I.getArgOperand(1))));
5287
29
    return nullptr;
5288
15
  case Intrinsic::eh_unwind_init:
5289
15
    DAG.getMachineFunction().setCallsUnwindInit(true);
5290
15
    return nullptr;
5291
19
  case Intrinsic::eh_dwarf_cfa:
5292
19
    setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
5293
19
                             TLI.getPointerTy(DAG.getDataLayout()),
5294
19
                             getValue(I.getArgOperand(0))));
5295
19
    return nullptr;
5296
169
  case Intrinsic::eh_sjlj_callsite: {
5297
169
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
5298
169
    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
5299
169
    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
5300
169
    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
5301
169
5302
169
    MMI.setCurrentCallSite(CI->getZExtValue());
5303
169
    return nullptr;
5304
29
  }
5305
33
  case Intrinsic::eh_sjlj_functioncontext: {
5306
33
    // Get and store the index of the function context.
5307
33
    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5308
33
    AllocaInst *FnCtx =
5309
33
      cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
5310
33
    int FI = FuncInfo.StaticAllocaMap[FnCtx];
5311
33
    MFI.setFunctionContextIndex(FI);
5312
33
    return nullptr;
5313
29
  }
5314
28
  case Intrinsic::eh_sjlj_setjmp: {
5315
28
    SDValue Ops[2];
5316
28
    Ops[0] = getRoot();
5317
28
    Ops[1] = getValue(I.getArgOperand(0));
5318
28
    SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
5319
28
                             DAG.getVTList(MVT::i32, MVT::Other), Ops);
5320
28
    setValue(&I, Op.getValue(0));
5321
28
    DAG.setRoot(Op.getValue(1));
5322
28
    return nullptr;
5323
29
  }
5324
22
  case Intrinsic::eh_sjlj_longjmp:
5325
22
    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
5326
22
                            getRoot(), getValue(I.getArgOperand(0))));
5327
22
    return nullptr;
5328
33
  case Intrinsic::eh_sjlj_setup_dispatch:
5329
33
    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
5330
33
                            getRoot()));
5331
33
    return nullptr;
5332
175
  case Intrinsic::masked_gather:
5333
175
    visitMaskedGather(I);
5334
175
    return nullptr;
5335
211
  case Intrinsic::masked_load:
5336
211
    visitMaskedLoad(I);
5337
211
    return nullptr;
5338
61
  case Intrinsic::masked_scatter:
5339
61
    visitMaskedScatter(I);
5340
61
    return nullptr;
5341
100
  case Intrinsic::masked_store:
5342
100
    visitMaskedStore(I);
5343
100
    return nullptr;
5344
16
  case Intrinsic::masked_expandload:
5345
16
    visitMaskedLoad(I, true /* IsExpanding */);
5346
16
    return nullptr;
5347
20
  case Intrinsic::masked_compressstore:
5348
20
    visitMaskedStore(I, true /* IsCompressing */);
5349
20
    return nullptr;
5350
65
  case Intrinsic::x86_mmx_pslli_w:
5351
65
  case Intrinsic::x86_mmx_pslli_d:
5352
65
  case Intrinsic::x86_mmx_pslli_q:
5353
65
  case Intrinsic::x86_mmx_psrli_w:
5354
65
  case Intrinsic::x86_mmx_psrli_d:
5355
65
  case Intrinsic::x86_mmx_psrli_q:
5356
65
  case Intrinsic::x86_mmx_psrai_w:
5357
65
  case Intrinsic::x86_mmx_psrai_d: {
5358
65
    SDValue ShAmt = getValue(I.getArgOperand(1));
5359
65
    if (
isa<ConstantSDNode>(ShAmt)65
) {
5360
40
      visitTargetIntrinsic(I, Intrinsic);
5361
40
      return nullptr;
5362
40
    }
5363
25
    unsigned NewIntrinsic = 0;
5364
25
    EVT ShAmtVT = MVT::v2i32;
5365
25
    switch (Intrinsic) {
5366
2
    case Intrinsic::x86_mmx_pslli_w:
5367
2
      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
5368
2
      break;
5369
2
    case Intrinsic::x86_mmx_pslli_d:
5370
2
      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
5371
2
      break;
5372
9
    case Intrinsic::x86_mmx_pslli_q:
5373
9
      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
5374
9
      break;
5375
2
    case Intrinsic::x86_mmx_psrli_w:
5376
2
      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
5377
2
      break;
5378
2
    case Intrinsic::x86_mmx_psrli_d:
5379
2
      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
5380
2
      break;
5381
4
    case Intrinsic::x86_mmx_psrli_q:
5382
4
      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
5383
4
      break;
5384
2
    case Intrinsic::x86_mmx_psrai_w:
5385
2
      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
5386
2
      break;
5387
2
    case Intrinsic::x86_mmx_psrai_d:
5388
2
      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
5389
2
      break;
5390
0
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
5391
25
    }
5392
25
5393
25
    // The vector shift intrinsics with scalars uses 32b shift amounts but
5394
25
    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
5395
25
    // to be zero.
5396
25
    // We must do this early because v2i32 is not a legal type.
5397
25
    SDValue ShOps[2];
5398
25
    ShOps[0] = ShAmt;
5399
25
    ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
5400
25
    ShAmt =  DAG.getBuildVector(ShAmtVT, sdl, ShOps);
5401
25
    EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5402
25
    ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
5403
25
    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
5404
25
                       DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
5405
25
                       getValue(I.getArgOperand(0)), ShAmt);
5406
25
    setValue(&I, Res);
5407
25
    return nullptr;
5408
25
  }
5409
97
  case Intrinsic::powi:
5410
97
    setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
5411
97
                            getValue(I.getArgOperand(1)), DAG));
5412
97
    return nullptr;
5413
43
  case Intrinsic::log:
5414
43
    setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5415
43
    return nullptr;
5416
67
  case Intrinsic::log2:
5417
67
    setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5418
67
    return nullptr;
5419
48
  case Intrinsic::log10:
5420
48
    setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5421
48
    return nullptr;
5422
75
  case Intrinsic::exp:
5423
75
    setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5424
75
    return nullptr;
5425
105
  case Intrinsic::exp2:
5426
105
    setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5427
105
    return nullptr;
5428
162
  case Intrinsic::pow:
5429
162
    setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
5430
162
                           getValue(I.getArgOperand(1)), DAG, TLI));
5431
162
    return nullptr;
5432
5.38k
  case Intrinsic::sqrt:
5433
5.38k
  case Intrinsic::fabs:
5434
5.38k
  case Intrinsic::sin:
5435
5.38k
  case Intrinsic::cos:
5436
5.38k
  case Intrinsic::floor:
5437
5.38k
  case Intrinsic::ceil:
5438
5.38k
  case Intrinsic::trunc:
5439
5.38k
  case Intrinsic::rint:
5440
5.38k
  case Intrinsic::nearbyint:
5441
5.38k
  case Intrinsic::round:
5442
5.38k
  case Intrinsic::canonicalize: {
5443
5.38k
    unsigned Opcode;
5444
5.38k
    switch (Intrinsic) {
5445
0
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
5446
499
    case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
5447
3.37k
    case Intrinsic::fabs:      Opcode = ISD::FABS;       break;
5448
95
    case Intrinsic::sin:       Opcode = ISD::FSIN;       break;
5449
88
    case Intrinsic::cos:       Opcode = ISD::FCOS;       break;
5450
437
    case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
5451
148
    case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
5452
131
    case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
5453
149
    case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
5454
103
    case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
5455
82
    case Intrinsic::round:     Opcode = ISD::FROUND;     break;
5456
279
    case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
5457
5.38k
    }
5458
5.38k
5459
5.38k
    setValue(&I, DAG.getNode(Opcode, sdl,
5460
5.38k
                             getValue(I.getArgOperand(0)).getValueType(),
5461
5.38k
                             getValue(I.getArgOperand(0))));
5462
5.38k
    return nullptr;
5463
5.38k
  }
5464
862
  case Intrinsic::minnum: {
5465
862
    auto VT = getValue(I.getArgOperand(0)).getValueType();
5466
862
    unsigned Opc =
5467
2
        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
5468
1
            ? ISD::FMINNAN
5469
861
            : ISD::FMINNUM;
5470
862
    setValue(&I, DAG.getNode(Opc, sdl, VT,
5471
862
                             getValue(I.getArgOperand(0)),
5472
862
                             getValue(I.getArgOperand(1))));
5473
862
    return nullptr;
5474
5.38k
  }
5475
948
  case Intrinsic::maxnum: {
5476
948
    auto VT = getValue(I.getArgOperand(0)).getValueType();
5477
948
    unsigned Opc =
5478
2
        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
5479
1
            ? ISD::FMAXNAN
5480
947
            : ISD::FMAXNUM;
5481
948
    setValue(&I, DAG.getNode(Opc, sdl, VT,
5482
948
                             getValue(I.getArgOperand(0)),
5483
948
                             getValue(I.getArgOperand(1))));
5484
948
    return nullptr;
5485
5.38k
  }
5486
1.57k
  case Intrinsic::copysign:
5487
1.57k
    setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
5488
1.57k
                             getValue(I.getArgOperand(0)).getValueType(),
5489
1.57k
                             getValue(I.getArgOperand(0)),
5490
1.57k
                             getValue(I.getArgOperand(1))));
5491
1.57k
    return nullptr;
5492
586
  case Intrinsic::fma:
5493
586
    setValue(&I, DAG.getNode(ISD::FMA, sdl,
5494
586
                             getValue(I.getArgOperand(0)).getValueType(),
5495
586
                             getValue(I.getArgOperand(0)),
5496
586
                             getValue(I.getArgOperand(1)),
5497
586
                             getValue(I.getArgOperand(2))));
5498
586
    return nullptr;
5499
40
  case Intrinsic::experimental_constrained_fadd:
5500
40
  case Intrinsic::experimental_constrained_fsub:
5501
40
  case Intrinsic::experimental_constrained_fmul:
5502
40
  case Intrinsic::experimental_constrained_fdiv:
5503
40
  case Intrinsic::experimental_constrained_frem:
5504
40
  case Intrinsic::experimental_constrained_fma:
5505
40
  case Intrinsic::experimental_constrained_sqrt:
5506
40
  case Intrinsic::experimental_constrained_pow:
5507
40
  case Intrinsic::experimental_constrained_powi:
5508
40
  case Intrinsic::experimental_constrained_sin:
5509
40
  case Intrinsic::experimental_constrained_cos:
5510
40
  case Intrinsic::experimental_constrained_exp:
5511
40
  case Intrinsic::experimental_constrained_exp2:
5512
40
  case Intrinsic::experimental_constrained_log:
5513
40
  case Intrinsic::experimental_constrained_log10:
5514
40
  case Intrinsic::experimental_constrained_log2:
5515
40
  case Intrinsic::experimental_constrained_rint:
5516
40
  case Intrinsic::experimental_constrained_nearbyint:
5517
40
    visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
5518
40
    return nullptr;
5519
729
  case Intrinsic::fmuladd: {
5520
729
    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5521
729
    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
5522
729
        
TLI.isFMAFasterThanFMulAndFAdd(VT)729
) {
5523
105
      setValue(&I, DAG.getNode(ISD::FMA, sdl,
5524
105
                               getValue(I.getArgOperand(0)).getValueType(),
5525
105
                               getValue(I.getArgOperand(0)),
5526
105
                               getValue(I.getArgOperand(1)),
5527
105
                               getValue(I.getArgOperand(2))));
5528
729
    } else {
5529
624
      // TODO: Intrinsic calls should have fast-math-flags.
5530
624
      SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
5531
624
                                getValue(I.getArgOperand(0)).getValueType(),
5532
624
                                getValue(I.getArgOperand(0)),
5533
624
                                getValue(I.getArgOperand(1)));
5534
624
      SDValue Add = DAG.getNode(ISD::FADD, sdl,
5535
624
                                getValue(I.getArgOperand(0)).getValueType(),
5536
624
                                Mul,
5537
624
                                getValue(I.getArgOperand(2)));
5538
624
      setValue(&I, Add);
5539
624
    }
5540
729
    return nullptr;
5541
40
  }
5542
229
  case Intrinsic::convert_to_fp16:
5543
229
    setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
5544
229
                             DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
5545
229
                                         getValue(I.getArgOperand(0)),
5546
229
                                         DAG.getTargetConstant(0, sdl,
5547
229
                                                               MVT::i32))));
5548
229
    return nullptr;
5549
275
  case Intrinsic::convert_from_fp16:
5550
275
    setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
5551
275
                             TLI.getValueType(DAG.getDataLayout(), I.getType()),
5552
275
                             DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
5553
275
                                         getValue(I.getArgOperand(0)))));
5554
275
    return nullptr;
5555
0
  case Intrinsic::pcmarker: {
5556
0
    SDValue Tmp = getValue(I.getArgOperand(0));
5557
0
    DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
5558
0
    return nullptr;
5559
40
  }
5560
22
  case Intrinsic::readcyclecounter: {
5561
22
    SDValue Op = getRoot();
5562
22
    Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
5563
22
                      DAG.getVTList(MVT::i64, MVT::Other), Op);
5564
22
    setValue(&I, Res);
5565
22
    DAG.setRoot(Res.getValue(1));
5566
22
    return nullptr;
5567
40
  }
5568
266
  case Intrinsic::bitreverse:
5569
266
    setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
5570
266
                             getValue(I.getArgOperand(0)).getValueType(),
5571
266
                             getValue(I.getArgOperand(0))));
5572
266
    return nullptr;
5573
875
  case Intrinsic::bswap:
5574
875
    setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
5575
875
                             getValue(I.getArgOperand(0)).getValueType(),
5576
875
                             getValue(I.getArgOperand(0))));
5577
875
    return nullptr;
5578
996
  case Intrinsic::cttz: {
5579
996
    SDValue Arg = getValue(I.getArgOperand(0));
5580
996
    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5581
996
    EVT Ty = Arg.getValueType();
5582
996
    setValue(&I, DAG.getNode(CI->isZero() ? 
ISD::CTTZ440
:
ISD::CTTZ_ZERO_UNDEF556
,
5583
996
                             sdl, Ty, Arg));
5584
996
    return nullptr;
5585
40
  }
5586
4.67k
  case Intrinsic::ctlz: {
5587
4.67k
    SDValue Arg = getValue(I.getArgOperand(0));
5588
4.67k
    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5589
4.67k
    EVT Ty = Arg.getValueType();
5590
4.67k
    setValue(&I, DAG.getNode(CI->isZero() ? 
ISD::CTLZ986
:
ISD::CTLZ_ZERO_UNDEF3.69k
,
5591
4.67k
                             sdl, Ty, Arg));
5592
4.67k
    return nullptr;
5593
40
  }
5594
400
  case Intrinsic::ctpop: {
5595
400
    SDValue Arg = getValue(I.getArgOperand(0));
5596
400
    EVT Ty = Arg.getValueType();
5597
400
    setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
5598
400
    return nullptr;
5599
40
  }
5600
114
  case Intrinsic::stacksave: {
5601
114
    SDValue Op = getRoot();
5602
114
    Res = DAG.getNode(
5603
114
        ISD::STACKSAVE, sdl,
5604
114
        DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
5605
114
    setValue(&I, Res);
5606
114
    DAG.setRoot(Res.getValue(1));
5607
114
    return nullptr;
5608
40
  }
5609
30
  case Intrinsic::stackrestore:
5610
30
    Res = getValue(I.getArgOperand(0));
5611
30
    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
5612
30
    return nullptr;
5613
4
  case Intrinsic::get_dynamic_area_offset: {
5614
4
    SDValue Op = getRoot();
5615
4
    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5616
4
    EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
5617
4
    // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
5618
4
    // target.
5619
4
    if (PtrTy != ResTy)
5620
0
      report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
5621
0
                         " intrinsic!");
5622
4
    Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
5623
4
                      Op);
5624
4
    DAG.setRoot(Op);
5625
4
    setValue(&I, Res);
5626
4
    return nullptr;
5627
4
  }
5628
3.38k
  case Intrinsic::stackguard: {
5629
3.38k
    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5630
3.38k
    MachineFunction &MF = DAG.getMachineFunction();
5631
3.38k
    const Module &M = *MF.getFunction()->getParent();
5632
3.38k
    SDValue Chain = getRoot();
5633
3.38k
    if (
TLI.useLoadStackGuardNode()3.38k
) {
5634
3.21k
      Res = getLoadStackGuard(DAG, sdl, Chain);
5635
3.38k
    } else {
5636
170
      const Value *Global = TLI.getSDagStackGuard(M);
5637
170
      unsigned Align = DL->getPrefTypeAlignment(Global->getType());
5638
170
      Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
5639
170
                        MachinePointerInfo(Global, 0), Align,
5640
170
                        MachineMemOperand::MOVolatile);
5641
170
    }
5642
3.38k
    DAG.setRoot(Chain);
5643
3.38k
    setValue(&I, Res);
5644
3.38k
    return nullptr;
5645
4
  }
5646
3.58k
  case Intrinsic::stackprotector: {
5647
3.58k
    // Emit code into the DAG to store the stack guard onto the stack.
5648
3.58k
    MachineFunction &MF = DAG.getMachineFunction();
5649
3.58k
    MachineFrameInfo &MFI = MF.getFrameInfo();
5650
3.58k
    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5651
3.58k
    SDValue Src, Chain = getRoot();
5652
3.58k
5653
3.58k
    if (TLI.useLoadStackGuardNode())
5654
3.19k
      Src = getLoadStackGuard(DAG, sdl, Chain);
5655
3.58k
    else
5656
388
      Src = getValue(I.getArgOperand(0));   // The guard's value.
5657
3.58k
5658
3.58k
    AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
5659
3.58k
5660
3.58k
    int FI = FuncInfo.StaticAllocaMap[Slot];
5661
3.58k
    MFI.setStackProtectorIndex(FI);
5662
3.58k
5663
3.58k
    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
5664
3.58k
5665
3.58k
    // Store the stack protector onto the stack.
5666
3.58k
    Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
5667
3.58k
                                                 DAG.getMachineFunction(), FI),
5668
3.58k
                       /* Alignment = */ 0, MachineMemOperand::MOVolatile);
5669
3.58k
    setValue(&I, Res);
5670
3.58k
    DAG.setRoot(Res);
5671
3.58k
    return nullptr;
5672
4
  }
5673
0
  case Intrinsic::objectsize: {
5674
0
    // If we don't know by now, we're never going to know.
5675
0
    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
5676
0
5677
0
    assert(CI && "Non-constant type in __builtin_object_size?");
5678
0
5679
0
    SDValue Arg = getValue(I.getCalledValue());
5680
0
    EVT Ty = Arg.getValueType();
5681
0
5682
0
    if (CI->isZero())
5683
0
      Res = DAG.getConstant(-1ULL, sdl, Ty);
5684
0
    else
5685
0
      Res = DAG.getConstant(0, sdl, Ty);
5686
0
5687
0
    setValue(&I, Res);
5688
0
    return nullptr;
5689
4
  }
5690
3
  case Intrinsic::annotation:
5691
3
  case Intrinsic::ptr_annotation:
5692
3
  case Intrinsic::invariant_group_barrier:
5693
3
    // Drop the intrinsic, but forward the value
5694
3
    setValue(&I, getValue(I.getOperand(0)));
5695
3
    return nullptr;
5696
23
  case Intrinsic::assume:
5697
23
  case Intrinsic::var_annotation:
5698
23
    // Discard annotate attributes and assumptions
5699
23
    return nullptr;
5700
23
5701
1
  case Intrinsic::codeview_annotation: {
5702
1
    // Emit a label associated with this metadata.
5703
1
    MachineFunction &MF = DAG.getMachineFunction();
5704
1
    MCSymbol *Label =
5705
1
        MF.getMMI().getContext().createTempSymbol("annotation", true);
5706
1
    Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
5707
1
    MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
5708
1
    Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
5709
1
    DAG.setRoot(Res);
5710
1
    return nullptr;
5711
23
  }
5712
23
5713
4
  case Intrinsic::init_trampoline: {
5714
4
    const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
5715
4
5716
4
    SDValue Ops[6];
5717
4
    Ops[0] = getRoot();
5718
4
    Ops[1] = getValue(I.getArgOperand(0));
5719
4
    Ops[2] = getValue(I.getArgOperand(1));
5720
4
    Ops[3] = getValue(I.getArgOperand(2));
5721
4
    Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
5722
4
    Ops[5] = DAG.getSrcValue(F);
5723
4
5724
4
    Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
5725
4
5726
4
    DAG.setRoot(Res);
5727
4
    return nullptr;
5728
23
  }
5729
4
  case Intrinsic::adjust_trampoline:
5730
4
    setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
5731
4
                             TLI.getPointerTy(DAG.getDataLayout()),
5732
4
                             getValue(I.getArgOperand(0))));
5733
4
    return nullptr;
5734
2
  case Intrinsic::gcroot: {
5735
2
    MachineFunction &MF = DAG.getMachineFunction();
5736
2
    const Function *F = MF.getFunction();
5737
2
    (void)F;
5738
2
    assert(F->hasGC() &&
5739
2
           "only valid in functions with gc specified, enforced by Verifier");
5740
2
    assert(GFI && "implied by previous");
5741
2
    const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
5742
2
    const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
5743
2
5744
2
    FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
5745
2
    GFI->addStackRoot(FI->getIndex(), TypeMap);
5746
2
    return nullptr;
5747
23
  }
5748
0
  case Intrinsic::gcread:
5749
0
  case Intrinsic::gcwrite:
5750
0
    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
5751
6
  case Intrinsic::flt_rounds:
5752
6
    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
5753
6
    return nullptr;
5754
0
5755
10
  case Intrinsic::expect:
5756
10
    // Just replace __builtin_expect(exp, c) with EXP.
5757
10
    setValue(&I, getValue(I.getArgOperand(0)));
5758
10
    return nullptr;
5759
0
5760
306
  case Intrinsic::debugtrap:
5761
306
  case Intrinsic::trap: {
5762
306
    StringRef TrapFuncName =
5763
306
        I.getAttributes()
5764
306
            .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
5765
306
            .getValueAsString();
5766
306
    if (
TrapFuncName.empty()306
) {
5767
297
      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
5768
297
        
ISD::TRAP274
:
ISD::DEBUGTRAP23
;
5769
297
      DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
5770
297
      return nullptr;
5771
297
    }
5772
9
    TargetLowering::ArgListTy Args;
5773
9
5774
9
    TargetLowering::CallLoweringInfo CLI(DAG);
5775
9
    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
5776
9
        CallingConv::C, I.getType(),
5777
9
        DAG.getExternalSymbol(TrapFuncName.data(),
5778
9
                              TLI.getPointerTy(DAG.getDataLayout())),
5779
9
        std::move(Args));
5780
9
5781
9
    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
5782
9
    DAG.setRoot(Result.second);
5783
9
    return nullptr;
5784
9
  }
5785
9
5786
1.70k
  case Intrinsic::uadd_with_overflow:
5787
1.70k
  case Intrinsic::sadd_with_overflow:
5788
1.70k
  case Intrinsic::usub_with_overflow:
5789
1.70k
  case Intrinsic::ssub_with_overflow:
5790
1.70k
  case Intrinsic::umul_with_overflow:
5791
1.70k
  case Intrinsic::smul_with_overflow: {
5792
1.70k
    ISD::NodeType Op;
5793
1.70k
    switch (Intrinsic) {
5794
0
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
5795
239
    case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
5796
95
    case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
5797
59
    case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
5798
48
    case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
5799
1.21k
    case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
5800
47
    case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
5801
1.70k
    }
5802
1.70k
    SDValue Op1 = getValue(I.getArgOperand(0));
5803
1.70k
    SDValue Op2 = getValue(I.getArgOperand(1));
5804
1.70k
5805
1.70k
    SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
5806
1.70k
    setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
5807
1.70k
    return nullptr;
5808
1.70k
  }
5809
162
  case Intrinsic::prefetch: {
5810
162
    SDValue Ops[5];
5811
162
    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
5812
162
    Ops[0] = getRoot();
5813
162
    Ops[1] = getValue(I.getArgOperand(0));
5814
162
    Ops[2] = getValue(I.getArgOperand(1));
5815
162
    Ops[3] = getValue(I.getArgOperand(2));
5816
162
    Ops[4] = getValue(I.getArgOperand(3));
5817
162
    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
5818
162
                                        DAG.getVTList(MVT::Other), Ops,
5819
162
                                        EVT::getIntegerVT(*Context, 8),
5820
162
                                        MachinePointerInfo(I.getArgOperand(0)),
5821
162
                                        0, /* align */
5822
162
                                        false, /* volatile */
5823
162
                                        rw==0, /* read */
5824
162
                                        rw==1)); /* write */
5825
162
    return nullptr;
5826
1.70k
  }
5827
159k
  case Intrinsic::lifetime_start:
5828
159k
  case Intrinsic::lifetime_end: {
5829
159k
    bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
5830
159k
    // Stack coloring is not enabled in O0, discard region information.
5831
159k
    if (TM.getOptLevel() == CodeGenOpt::None)
5832
1
      return nullptr;
5833
159k
5834
159k
    SmallVector<Value *, 4> Allocas;
5835
159k
    GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
5836
159k
5837
159k
    for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
5838
318k
           E = Allocas.end(); 
Object != E318k
;
++Object159k
) {
5839
159k
      AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
5840
159k
5841
159k
      // Could not find an Alloca.
5842
159k
      if (!LifetimeObject)
5843
1
        continue;
5844
159k
5845
159k
      // First check that the Alloca is static, otherwise it won't have a
5846
159k
      // valid frame index.
5847
159k
      auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
5848
159k
      if (SI == FuncInfo.StaticAllocaMap.end())
5849
2
        return nullptr;
5850
159k
5851
159k
      int FI = SI->second;
5852
159k
5853
159k
      SDValue Ops[2];
5854
159k
      Ops[0] = getRoot();
5855
159k
      Ops[1] =
5856
159k
          DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
5857
159k
      unsigned Opcode = (IsStart ? 
ISD::LIFETIME_START69.2k
:
ISD::LIFETIME_END89.9k
);
5858
159k
5859
159k
      Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
5860
159k
      DAG.setRoot(Res);
5861
159k
    }
5862
159k
    return nullptr;
5863
159k
  }
5864
2
  case Intrinsic::invariant_start:
5865
2
    // Discard region information.
5866
2
    setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
5867
2
    return nullptr;
5868
0
  case Intrinsic::invariant_end:
5869
0
    // Discard region information.
5870
0
    return nullptr;
5871
3
  case Intrinsic::clear_cache:
5872
3
    return TLI.getClearCacheBuiltinName();
5873
1
  case Intrinsic::donothing:
5874
1
    // ignore
5875
1
    return nullptr;
5876
112
  case Intrinsic::experimental_stackmap:
5877
112
    visitStackmap(I);
5878
112
    return nullptr;
5879
115
  case Intrinsic::experimental_patchpoint_void:
5880
115
  case Intrinsic::experimental_patchpoint_i64:
5881
115
    visitPatchpoint(&I);
5882
115
    return nullptr;
5883
58
  case Intrinsic::experimental_gc_statepoint:
5884
58
    LowerStatepoint(ImmutableStatepoint(&I));
5885
58
    return nullptr;
5886
24
  case Intrinsic::experimental_gc_result:
5887
24
    visitGCResult(cast<GCResultInst>(I));
5888
24
    return nullptr;
5889
66
  case Intrinsic::experimental_gc_relocate:
5890
66
    visitGCRelocate(cast<GCRelocateInst>(I));
5891
66
    return nullptr;
5892
0
  case Intrinsic::instrprof_increment:
5893
0
    llvm_unreachable("instrprof failed to lower an increment");
5894
0
  case Intrinsic::instrprof_value_profile:
5895
0
    llvm_unreachable("instrprof failed to lower a value profiling call");
5896
10
  case Intrinsic::localescape: {
5897
10
    MachineFunction &MF = DAG.getMachineFunction();
5898
10
    const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
5899
10
5900
10
    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
5901
10
    // is the same on all targets.
5902
26
    for (unsigned Idx = 0, E = I.getNumArgOperands(); 
Idx < E26
;
++Idx16
) {
5903
16
      Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
5904
16
      if (isa<ConstantPointerNull>(Arg))
5905
0
        continue; // Skip null pointers. They represent a hole in index space.
5906
16
      AllocaInst *Slot = cast<AllocaInst>(Arg);
5907
16
      assert(FuncInfo.StaticAllocaMap.count(Slot) &&
5908
16
             "can only escape static allocas");
5909
16
      int FI = FuncInfo.StaticAllocaMap[Slot];
5910
16
      MCSymbol *FrameAllocSym =
5911
16
          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
5912
16
              GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
5913
16
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
5914
16
              TII->get(TargetOpcode::LOCAL_ESCAPE))
5915
16
          .addSym(FrameAllocSym)
5916
16
          .addFrameIndex(FI);
5917
16
    }
5918
10
5919
10
    return nullptr;
5920
115
  }
5921
115
5922
12
  case Intrinsic::localrecover: {
5923
12
    // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
5924
12
    MachineFunction &MF = DAG.getMachineFunction();
5925
12
    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
5926
12
5927
12
    // Get the symbol that defines the frame offset.
5928
12
    auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
5929
12
    auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
5930
12
    unsigned IdxVal =
5931
12
        unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
5932
12
    MCSymbol *FrameAllocSym =
5933
12
        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
5934
12
            GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
5935
12
5936
12
    // Create a MCSymbol for the label to avoid any target lowering
5937
12
    // that would make this PC relative.
5938
12
    SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
5939
12
    SDValue OffsetVal =
5940
12
        DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
5941
12
5942
12
    // Add the offset to the FP.
5943
12
    Value *FP = I.getArgOperand(1);
5944
12
    SDValue FPVal = getValue(FP);
5945
12
    SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
5946
12
    setValue(&I, Add);
5947
12
5948
12
    return nullptr;
5949
115
  }
5950
115
5951
6
  case Intrinsic::eh_exceptionpointer:
5952
6
  case Intrinsic::eh_exceptioncode: {
5953
6
    // Get the exception pointer vreg, copy from it, and resize it to fit.
5954
6
    const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
5955
6
    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
5956
6
    const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
5957
6
    unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
5958
6
    SDValue N =
5959
6
        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
5960
6
    if (Intrinsic == Intrinsic::eh_exceptioncode)
5961
3
      N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
5962
6
    setValue(&I, N);
5963
6
    return nullptr;
5964
6
  }
5965
2
  case Intrinsic::xray_customevent: {
5966
2
    // Here we want to make sure that the intrinsic behaves as if it has a
5967
2
    // specific calling convention, and only for x86_64.
5968
2
    // FIXME: Support other platforms later.
5969
2
    const auto &Triple = DAG.getTarget().getTargetTriple();
5970
2
    if (
Triple.getArch() != Triple::x86_64 || 2
!Triple.isOSLinux()2
)
5971
0
      return nullptr;
5972
2
5973
2
    SDLoc DL = getCurSDLoc();
5974
2
    SmallVector<SDValue, 8> Ops;
5975
2
5976
2
    // We want to say that we always want the arguments in registers.
5977
2
    SDValue LogEntryVal = getValue(I.getArgOperand(0));
5978
2
    SDValue StrSizeVal = getValue(I.getArgOperand(1));
5979
2
    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5980
2
    SDValue Chain = getRoot();
5981
2
    Ops.push_back(LogEntryVal);
5982
2
    Ops.push_back(StrSizeVal);
5983
2
    Ops.push_back(Chain);
5984
2
5985
2
    // We need to enforce the calling convention for the callsite, so that
5986
2
    // argument ordering is enforced correctly, and that register allocation can
5987
2
    // see that some registers may be assumed clobbered and have to preserve
5988
2
    // them across calls to the intrinsic.
5989
2
    MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
5990
2
                                           DL, NodeTys, Ops);
5991
2
    SDValue patchableNode = SDValue(MN, 0);
5992
2
    DAG.setRoot(patchableNode);
5993
2
    setValue(&I, patchableNode);
5994
2
    return nullptr;
5995
2
  }
5996
0
  case Intrinsic::experimental_deoptimize:
5997
0
    LowerDeoptimizeCall(&I);
5998
0
    return nullptr;
5999
2
6000
913
  case Intrinsic::experimental_vector_reduce_fadd:
6001
913
  case Intrinsic::experimental_vector_reduce_fmul:
6002
913
  case Intrinsic::experimental_vector_reduce_add:
6003
913
  case Intrinsic::experimental_vector_reduce_mul:
6004
913
  case Intrinsic::experimental_vector_reduce_and:
6005
913
  case Intrinsic::experimental_vector_reduce_or:
6006
913
  case Intrinsic::experimental_vector_reduce_xor:
6007
913
  case Intrinsic::experimental_vector_reduce_smax:
6008
913
  case Intrinsic::experimental_vector_reduce_smin:
6009
913
  case Intrinsic::experimental_vector_reduce_umax:
6010
913
  case Intrinsic::experimental_vector_reduce_umin:
6011
913
  case Intrinsic::experimental_vector_reduce_fmax:
6012
913
  case Intrinsic::experimental_vector_reduce_fmin:
6013
913
    visitVectorReduce(I, Intrinsic);
6014
913
    return nullptr;
6015
0
  }
6016
0
}
6017
6018
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
6019
40
    const ConstrainedFPIntrinsic &FPI) {
6020
40
  SDLoc sdl = getCurSDLoc();
6021
40
  unsigned Opcode;
6022
40
  switch (FPI.getIntrinsicID()) {
6023
0
  
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
6024
2
  case Intrinsic::experimental_constrained_fadd:
6025
2
    Opcode = ISD::STRICT_FADD;
6026
2
    break;
6027
6
  case Intrinsic::experimental_constrained_fsub:
6028
6
    Opcode = ISD::STRICT_FSUB;
6029
6
    break;
6030
2
  case Intrinsic::experimental_constrained_fmul:
6031
2
    Opcode = ISD::STRICT_FMUL;
6032
2
    break;
6033
2
  case Intrinsic::experimental_constrained_fdiv:
6034
2
    Opcode = ISD::STRICT_FDIV;
6035
2
    break;
6036
0
  case Intrinsic::experimental_constrained_frem:
6037
0
    Opcode = ISD::STRICT_FREM;
6038
0
    break;
6039
4
  case Intrinsic::experimental_constrained_fma:
6040
4
    Opcode = ISD::STRICT_FMA;
6041
4
    break;
6042
2
  case Intrinsic::experimental_constrained_sqrt:
6043
2
    Opcode = ISD::STRICT_FSQRT;
6044
2
    break;
6045
2
  case Intrinsic::experimental_constrained_pow:
6046
2
    Opcode = ISD::STRICT_FPOW;
6047
2
    break;
6048
2
  case Intrinsic::experimental_constrained_powi:
6049
2
    Opcode = ISD::STRICT_FPOWI;
6050
2
    break;
6051
2
  case Intrinsic::experimental_constrained_sin:
6052
2
    Opcode = ISD::STRICT_FSIN;
6053
2
    break;
6054
2
  case Intrinsic::experimental_constrained_cos:
6055
2
    Opcode = ISD::STRICT_FCOS;
6056
2
    break;
6057
2
  case Intrinsic::experimental_constrained_exp:
6058
2
    Opcode = ISD::STRICT_FEXP;
6059
2
    break;
6060
2
  case Intrinsic::experimental_constrained_exp2:
6061
2
    Opcode = ISD::STRICT_FEXP2;
6062
2
    break;
6063
2
  case Intrinsic::experimental_constrained_log:
6064
2
    Opcode = ISD::STRICT_FLOG;
6065
2
    break;
6066
2
  case Intrinsic::experimental_constrained_log10:
6067
2
    Opcode = ISD::STRICT_FLOG10;
6068
2
    break;
6069
2
  case Intrinsic::experimental_constrained_log2:
6070
2
    Opcode = ISD::STRICT_FLOG2;
6071
2
    break;
6072
2
  case Intrinsic::experimental_constrained_rint:
6073
2
    Opcode = ISD::STRICT_FRINT;
6074
2
    break;
6075
2
  case Intrinsic::experimental_constrained_nearbyint:
6076
2
    Opcode = ISD::STRICT_FNEARBYINT;
6077
2
    break;
6078
40
  }
6079
40
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6080
40
  SDValue Chain = getRoot();
6081
40
  SmallVector<EVT, 4> ValueVTs;
6082
40
  ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
6083
40
  ValueVTs.push_back(MVT::Other); // Out chain
6084
40
6085
40
  SDVTList VTs = DAG.getVTList(ValueVTs);
6086
40
  SDValue Result;
6087
40
  if (FPI.isUnaryOp())
6088
20
    Result = DAG.getNode(Opcode, sdl, VTs,
6089
20
                         { Chain, getValue(FPI.getArgOperand(0)) });
6090
20
  else 
if (20
FPI.isTernaryOp()20
)
6091
4
    Result = DAG.getNode(Opcode, sdl, VTs,
6092
4
                         { Chain, getValue(FPI.getArgOperand(0)),
6093
4
                                  getValue(FPI.getArgOperand(1)),
6094
4
                                  getValue(FPI.getArgOperand(2)) });
6095
20
  else
6096
16
    Result = DAG.getNode(Opcode, sdl, VTs,
6097
16
                         { Chain, getValue(FPI.getArgOperand(0)),
6098
16
                           getValue(FPI.getArgOperand(1))  });
6099
40
6100
40
  assert(Result.getNode()->getNumValues() == 2);
6101
40
  SDValue OutChain = Result.getValue(1);
6102
40
  DAG.setRoot(OutChain);
6103
40
  SDValue FPResult = Result.getValue(0);
6104
40
  setValue(&FPI, FPResult);
6105
40
}
6106
6107
std::pair<SDValue, SDValue>
6108
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
6109
1.76M
                                    const BasicBlock *EHPadBB) {
6110
1.76M
  MachineFunction &MF = DAG.getMachineFunction();
6111
1.76M
  MachineModuleInfo &MMI = MF.getMMI();
6112
1.76M
  MCSymbol *BeginLabel = nullptr;
6113
1.76M
6114
1.76M
  if (
EHPadBB1.76M
) {
6115
18.5k
    // Insert a label before the invoke call to mark the try range.  This can be
6116
18.5k
    // used to detect deletion of the invoke via the MachineModuleInfo.
6117
18.5k
    BeginLabel = MMI.getContext().createTempSymbol();
6118
18.5k
6119
18.5k
    // For SjLj, keep track of which landing pads go with which invokes
6120
18.5k
    // so as to maintain the ordering of pads in the LSDA.
6121
18.5k
    unsigned CallSiteIndex = MMI.getCurrentCallSite();
6122
18.5k
    if (
CallSiteIndex18.5k
) {
6123
169
      MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
6124
169
      LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
6125
169
6126
169
      // Now that the call site is handled, stop tracking it.
6127
169
      MMI.setCurrentCallSite(0);
6128
169
    }
6129
18.5k
6130
18.5k
    // Both PendingLoads and PendingExports must be flushed here;
6131
18.5k
    // this call might not return.
6132
18.5k
    (void)getRoot();
6133
18.5k
    DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
6134
18.5k
6135
18.5k
    CLI.setChain(getRoot());
6136
18.5k
  }
6137
1.76M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6138
1.76M
  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
6139
1.76M
6140
1.76M
  assert((CLI.IsTailCall || Result.second.getNode()) &&
6141
1.76M
         "Non-null chain expected with non-tail call!");
6142
1.76M
  assert((Result.second.getNode() || !Result.first.getNode()) &&
6143
1.76M
         "Null value expected with tail call!");
6144
1.76M
6145
1.76M
  if (
!Result.second.getNode()1.76M
) {
6146
231k
    // As a special case, a null chain means that a tail call has been emitted
6147
231k
    // and the DAG root is already updated.
6148
231k
    HasTailCall = true;
6149
231k
6150
231k
    // Since there's no actual continuation from this block, nothing can be
6151
231k
    // relying on us setting vregs for them.
6152
231k
    PendingExports.clear();
6153
1.76M
  } else {
6154
1.53M
    DAG.setRoot(Result.second);
6155
1.53M
  }
6156
1.76M
6157
1.76M
  if (
EHPadBB1.76M
) {
6158
18.5k
    // Insert a label at the end of the invoke call to mark the try range.  This
6159
18.5k
    // can be used to detect deletion of the invoke via the MachineModuleInfo.
6160
18.5k
    MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
6161
18.5k
    DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
6162
18.5k
6163
18.5k
    // Inform MachineModuleInfo of range.
6164
18.5k
    if (
MF.hasEHFunclets()18.5k
) {
6165
141
      assert(CLI.CS);
6166
141
      WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
6167
141
      EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
6168
141
                                BeginLabel, EndLabel);
6169
18.5k
    } else {
6170
18.4k
      MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
6171
18.4k
    }
6172
18.5k
  }
6173
1.76M
6174
1.76M
  return Result;
6175
1.76M
}
6176
6177
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
6178
                                      bool isTailCall,
6179
1.76M
                                      const BasicBlock *EHPadBB) {
6180
1.76M
  auto &DL = DAG.getDataLayout();
6181
1.76M
  FunctionType *FTy = CS.getFunctionType();
6182
1.76M
  Type *RetTy = CS.getType();
6183
1.76M
6184
1.76M
  TargetLowering::ArgListTy Args;
6185
1.76M
  Args.reserve(CS.arg_size());
6186
1.76M
6187
1.76M
  const Value *SwiftErrorVal = nullptr;
6188
1.76M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6189
1.76M
6190
1.76M
  // We can't tail call inside a function with a swifterror argument. Lowering
6191
1.76M
  // does not support this yet. It would have to move into the swifterror
6192
1.76M
  // register before the call.
6193
1.76M
  auto *Caller = CS.getInstruction()->getParent()->getParent();
6194
1.76M
  if (TLI.supportSwiftError() &&
6195
1.74M
      Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
6196
111
    isTailCall = false;
6197
1.76M
6198
1.76M
  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
6199
5.76M
       
i != e5.76M
;
++i4.00M
) {
6200
4.00M
    TargetLowering::ArgListEntry Entry;
6201
4.00M
    const Value *V = *i;
6202
4.00M
6203
4.00M
    // Skip empty types
6204
4.00M
    if (V->getType()->isEmptyTy())
6205
6
      continue;
6206
4.00M
6207
4.00M
    SDValue ArgNode = getValue(V);
6208
4.00M
    Entry.Node = ArgNode; Entry.Ty = V->getType();
6209
4.00M
6210
4.00M
    Entry.setAttributes(&CS, i - CS.arg_begin());
6211
4.00M
6212
4.00M
    // Use swifterror virtual register as input to the call.
6213
4.00M
    if (
Entry.IsSwiftError && 4.00M
TLI.supportSwiftError()129
) {
6214
110
      SwiftErrorVal = V;
6215
110
      // We find the virtual register for the actual swifterror argument.
6216
110
      // Instead of using the Value, we use the virtual register instead.
6217
110
      Entry.Node = DAG.getRegister(FuncInfo
6218
110
                                       .getOrCreateSwiftErrorVRegUseAt(
6219
110
                                           CS.getInstruction(), FuncInfo.MBB, V)
6220
110
                                       .first,
6221
110
                                   EVT(TLI.getPointerTy(DL)));
6222
110
    }
6223
4.00M
6224
4.00M
    Args.push_back(Entry);
6225
4.00M
6226
4.00M
    // If we have an explicit sret argument that is an Instruction, (i.e., it
6227
4.00M
    // might point to function-local memory), we can't meaningfully tail-call.
6228
4.00M
    if (
Entry.IsSRet && 4.00M
isa<Instruction>(V)3.58k
)
6229
3.52k
      isTailCall = false;
6230
4.00M
  }
6231
1.76M
6232
1.76M
  // Check if target-independent constraints permit a tail call here.
6233
1.76M
  // Target-dependent constraints are checked within TLI->LowerCallTo.
6234
1.76M
  if (
isTailCall && 1.76M
!isInTailCallPosition(CS, DAG.getTarget())1.32M
)
6235
1.08M
    isTailCall = false;
6236
1.76M
6237
1.76M
  // Disable tail calls if there is an swifterror argument. Targets have not
6238
1.76M
  // been updated to support tail calls.
6239
1.76M
  if (
TLI.supportSwiftError() && 1.76M
SwiftErrorVal1.74M
)
6240
110
    isTailCall = false;
6241
1.76M
6242
1.76M
  TargetLowering::CallLoweringInfo CLI(DAG);
6243
1.76M
  CLI.setDebugLoc(getCurSDLoc())
6244
1.76M
      .setChain(getRoot())
6245
1.76M
      .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
6246
1.76M
      .setTailCall(isTailCall)
6247
1.76M
      .setConvergent(CS.isConvergent());
6248
1.76M
  std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
6249
1.76M
6250
1.76M
  if (
Result.first.getNode()1.76M
) {
6251
1.03M
    const Instruction *Inst = CS.getInstruction();
6252
1.03M
    Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
6253
1.03M
    setValue(Inst, Result.first);
6254
1.03M
  }
6255
1.76M
6256
1.76M
  // The last element of CLI.InVals has the SDValue for swifterror return.
6257
1.76M
  // Here we copy it to a virtual register and update SwiftErrorMap for
6258
1.76M
  // book-keeping.
6259
1.76M
  if (
SwiftErrorVal && 1.76M
TLI.supportSwiftError()110
) {
6260
110
    // Get the last element of InVals.
6261
110
    SDValue Src = CLI.InVals.back();
6262
110
    unsigned VReg; bool CreatedVReg;
6263
110
    std::tie(VReg, CreatedVReg) =
6264
110
        FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
6265
110
    SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
6266
110
    // We update the virtual register for the actual swifterror argument.
6267
110
    if (CreatedVReg)
6268
67
      FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
6269
110
    DAG.setRoot(CopyNode);
6270
110
  }
6271
1.76M
}
6272
6273
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
6274
310
                             SelectionDAGBuilder &Builder) {
6275
310
  // Check to see if this load can be trivially constant folded, e.g. if the
6276
310
  // input is from a string literal.
6277
310
  if (const Constant *
LoadInput310
= dyn_cast<Constant>(PtrVal)) {
6278
117
    // Cast pointer to the type we really want to load.
6279
117
    Type *LoadTy =
6280
117
        Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
6281
117
    if (LoadVT.isVector())
6282
8
      LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
6283
117
6284
117
    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
6285
117
                                         PointerType::getUnqual(LoadTy));
6286
117
6287
117
    if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
6288
117
            const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
6289
93
      return Builder.getValue(LoadCst);
6290
217
  }
6291
217
6292
217
  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
6293
217
  // still constant memory, the input chain can be the entry node.
6294
217
  SDValue Root;
6295
217
  bool ConstantMemory = false;
6296
217
6297
217
  // Do not serialize (non-volatile) loads of constant memory with anything.
6298
217
  if (
Builder.AA && 217
Builder.AA->pointsToConstantMemory(PtrVal)217
) {
6299
0
    Root = Builder.DAG.getEntryNode();
6300
0
    ConstantMemory = true;
6301
217
  } else {
6302
217
    // Do not serialize non-volatile loads against each other.
6303
217
    Root = Builder.DAG.getRoot();
6304
217
  }
6305
217
6306
217
  SDValue Ptr = Builder.getValue(PtrVal);
6307
217
  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
6308
217
                                        Ptr, MachinePointerInfo(PtrVal),
6309
217
                                        /* Alignment = */ 1);
6310
217
6311
217
  if (!ConstantMemory)
6312
217
    Builder.PendingLoads.push_back(LoadVal.getValue(1));
6313
310
  return LoadVal;
6314
310
}
6315
6316
/// Record the value for an instruction that produces an integer result,
6317
/// converting the type where necessary.
6318
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
6319
                                                  SDValue Value,
6320
172
                                                  bool IsSigned) {
6321
172
  EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
6322
172
                                                    I.getType(), true);
6323
172
  if (IsSigned)
6324
15
    Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
6325
172
  else
6326
157
    Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
6327
172
  setValue(&I, Value);
6328
172
}
6329
6330
/// See if we can lower a memcmp call into an optimized form. If so, return
6331
/// true and lower it. Otherwise return false, and it will be lowered like a
6332
/// normal call.
6333
/// The caller already checked that \p I calls the appropriate LibFunc with a
6334
/// correct prototype.
6335
679
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
6336
679
  const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
6337
679
  const Value *Size = I.getArgOperand(2);
6338
679
  const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
6339
679
  if (
CSize && 679
CSize->getZExtValue() == 0451
) {
6340
1
    EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
6341
1
                                                          I.getType(), true);
6342
1
    setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
6343
1
    return true;
6344
1
  }
6345
678
6346
678
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6347
678
  std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
6348
678
      DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
6349
678
      getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
6350
678
  if (
Res.first.getNode()678
) {
6351
12
    processIntegerCallValue(I, Res.first, true);
6352
12
    PendingLoads.push_back(Res.second);
6353
12
    return true;
6354
12
  }
6355
666
6356
666
  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
6357
666
  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
6358
666
  
if (666
!CSize || 666
!isOnlyUsedInZeroEqualityComparison(&I)438
)
6359
306
    return false;
6360
360
6361
360
  // If the target has a fast compare for the given size, it will return a
6362
360
  // preferred load type for that size. Require that the load VT is legal and
6363
360
  // that the target supports unaligned loads of that type. Otherwise, return
6364
360
  // INVALID.
6365
360
  
auto hasFastLoadsAndCompare = [&](unsigned NumBits) 360
{
6366
120
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6367
120
    MVT LVT = TLI.hasFastEqualityCompare(NumBits);
6368
120
    if (
LVT != MVT::INVALID_SIMPLE_VALUE_TYPE120
) {
6369
54
      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
6370
54
      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
6371
54
      // TODO: Check alignment of src and dest ptrs.
6372
54
      unsigned DstAS = LHS->getType()->getPointerAddressSpace();
6373
54
      unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
6374
54
      if (!TLI.isTypeLegal(LVT) ||
6375
54
          !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
6376
54
          !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
6377
0
        LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
6378
54
    }
6379
120
6380
120
    return LVT;
6381
120
  };
6382
360
6383
360
  // This turns into unaligned loads. We only do this if the target natively
6384
360
  // supports the MVT we'll be loading or if it is small enough (<= 4) that
6385
360
  // we'll only produce a small number of byte loads.
6386
360
  MVT LoadVT;
6387
360
  unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
6388
360
  switch (NumBitsToCompare) {
6389
139
  default:
6390
139
    return false;
6391
26
  case 16:
6392
26
    LoadVT = MVT::i16;
6393
26
    break;
6394
75
  case 32:
6395
75
    LoadVT = MVT::i32;
6396
75
    break;
6397
120
  case 64:
6398
120
  case 128:
6399
120
  case 256:
6400
120
    LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
6401
120
    break;
6402
221
  }
6403
221
6404
221
  
if (221
LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE221
)
6405
66
    return false;
6406
155
6407
155
  SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
6408
155
  SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
6409
155
6410
155
  // Bitcast to a wide integer type if the loads are vectors.
6411
155
  if (
LoadVT.isVector()155
) {
6412
16
    EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
6413
16
    LoadL = DAG.getBitcast(CmpVT, LoadL);
6414
16
    LoadR = DAG.getBitcast(CmpVT, LoadR);
6415
16
  }
6416
679
6417
679
  SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
6418
679
  processIntegerCallValue(I, Cmp, false);
6419
679
  return true;
6420
679
}
6421
6422
/// See if we can lower a memchr call into an optimized form. If so, return
6423
/// true and lower it. Otherwise return false, and it will be lowered like a
6424
/// normal call.
6425
/// The caller already checked that \p I calls the appropriate LibFunc with a
6426
/// correct prototype.
6427
82
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
6428
82
  const Value *Src = I.getArgOperand(0);
6429
82
  const Value *Char = I.getArgOperand(1);
6430
82
  const Value *Length = I.getArgOperand(2);
6431
82
6432
82
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6433
82
  std::pair<SDValue, SDValue> Res =
6434
82
    TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
6435
82
                                getValue(Src), getValue(Char), getValue(Length),
6436
82
                                MachinePointerInfo(Src));
6437
82
  if (
Res.first.getNode()82
) {
6438
5
    setValue(&I, Res.first);
6439
5
    PendingLoads.push_back(Res.second);
6440
5
    return true;
6441
5
  }
6442
77
6443
77
  return false;
6444
77
}
6445
6446
/// See if we can lower a mempcpy call into an optimized form. If so, return
6447
/// true and lower it. Otherwise return false, and it will be lowered like a
6448
/// normal call.
6449
/// The caller already checked that \p I calls the appropriate LibFunc with a
6450
/// correct prototype.
6451
2
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
6452
2
  SDValue Dst = getValue(I.getArgOperand(0));
6453
2
  SDValue Src = getValue(I.getArgOperand(1));
6454
2
  SDValue Size = getValue(I.getArgOperand(2));
6455
2
6456
2
  unsigned DstAlign = DAG.InferPtrAlignment(Dst);
6457
2
  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
6458
2
  unsigned Align = std::min(DstAlign, SrcAlign);
6459
2
  if (Align == 0) // Alignment of one or both could not be inferred.
6460
2
    Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
6461
2
6462
2
  bool isVol = false;
6463
2
  SDLoc sdl = getCurSDLoc();
6464
2
6465
2
  // In the mempcpy context we need to pass in a false value for isTailCall
6466
2
  // because the return pointer needs to be adjusted by the size of
6467
2
  // the copied memory.
6468
2
  SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
6469
2
                             false, /*isTailCall=*/false,
6470
2
                             MachinePointerInfo(I.getArgOperand(0)),
6471
2
                             MachinePointerInfo(I.getArgOperand(1)));
6472
2
  assert(MC.getNode() != nullptr &&
6473
2
         "** memcpy should not be lowered as TailCall in mempcpy context **");
6474
2
  DAG.setRoot(MC);
6475
2
6476
2
  // Check if Size needs to be truncated or extended.
6477
2
  Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
6478
2
6479
2
  // Adjust return pointer to point just past the last dst byte.
6480
2
  SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
6481
2
                                    Dst, Size);
6482
2
  setValue(&I, DstPlusSize);
6483
2
  return true;
6484
2
}
6485
6486
/// See if we can lower a strcpy call into an optimized form.  If so, return
6487
/// true and lower it, otherwise return false and it will be lowered like a
6488
/// normal call.
6489
/// The caller already checked that \p I calls the appropriate LibFunc with a
6490
/// correct prototype.
6491
838
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
6492
838
  const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
6493
838
6494
838
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6495
838
  std::pair<SDValue, SDValue> Res =
6496
838
    TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
6497
838
                                getValue(Arg0), getValue(Arg1),
6498
838
                                MachinePointerInfo(Arg0),
6499
838
                                MachinePointerInfo(Arg1), isStpcpy);
6500
838
  if (
Res.first.getNode()838
) {
6501
3
    setValue(&I, Res.first);
6502
3
    DAG.setRoot(Res.second);
6503
3
    return true;
6504
3
  }
6505
835
6506
835
  return false;
6507
835
}
6508
6509
/// See if we can lower a strcmp call into an optimized form.  If so, return
6510
/// true and lower it, otherwise return false and it will be lowered like a
6511
/// normal call.
6512
/// The caller already checked that \p I calls the appropriate LibFunc with a
6513
/// correct prototype.
6514
11.9k
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
6515
11.9k
  const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
6516
11.9k
6517
11.9k
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6518
11.9k
  std::pair<SDValue, SDValue> Res =
6519
11.9k
    TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
6520
11.9k
                                getValue(Arg0), getValue(Arg1),
6521
11.9k
                                MachinePointerInfo(Arg0),
6522
11.9k
                                MachinePointerInfo(Arg1));
6523
11.9k
  if (
Res.first.getNode()11.9k
) {
6524
3
    processIntegerCallValue(I, Res.first, true);
6525
3
    PendingLoads.push_back(Res.second);
6526
3
    return true;
6527
3
  }
6528
11.9k
6529
11.9k
  return false;
6530
11.9k
}
6531
6532
/// See if we can lower a strlen call into an optimized form.  If so, return
6533
/// true and lower it, otherwise return false and it will be lowered like a
6534
/// normal call.
6535
/// The caller already checked that \p I calls the appropriate LibFunc with a
6536
/// correct prototype.
6537
3.31k
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
6538
3.31k
  const Value *Arg0 = I.getArgOperand(0);
6539
3.31k
6540
3.31k
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6541
3.31k
  std::pair<SDValue, SDValue> Res =
6542
3.31k
    TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
6543
3.31k
                                getValue(Arg0), MachinePointerInfo(Arg0));
6544
3.31k
  if (
Res.first.getNode()3.31k
) {
6545
1
    processIntegerCallValue(I, Res.first, false);
6546
1
    PendingLoads.push_back(Res.second);
6547
1
    return true;
6548
1
  }
6549
3.31k
6550
3.31k
  return false;
6551
3.31k
}
6552
6553
/// See if we can lower a strnlen call into an optimized form.  If so, return
6554
/// true and lower it, otherwise return false and it will be lowered like a
6555
/// normal call.
6556
/// The caller already checked that \p I calls the appropriate LibFunc with a
6557
/// correct prototype.
6558
3
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
6559
3
  const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
6560
3
6561
3
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6562
3
  std::pair<SDValue, SDValue> Res =
6563
3
    TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
6564
3
                                 getValue(Arg0), getValue(Arg1),
6565
3
                                 MachinePointerInfo(Arg0));
6566
3
  if (
Res.first.getNode()3
) {
6567
1
    processIntegerCallValue(I, Res.first, false);
6568
1
    PendingLoads.push_back(Res.second);
6569
1
    return true;
6570
1
  }
6571
2
6572
2
  return false;
6573
2
}
6574
6575
/// See if we can lower a unary floating-point operation into an SDNode with
6576
/// the specified Opcode.  If so, return true and lower it, otherwise return
6577
/// false and it will be lowered like a normal call.
6578
/// The caller already checked that \p I calls the appropriate LibFunc with a
6579
/// correct prototype.
6580
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
6581
1.87k
                                              unsigned Opcode) {
6582
1.87k
  // We already checked this call's prototype; verify it doesn't modify errno.
6583
1.87k
  if (!I.onlyReadsMemory())
6584
173
    return false;
6585
1.70k
6586
1.70k
  SDValue Tmp = getValue(I.getArgOperand(0));
6587
1.70k
  setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
6588
1.70k
  return true;
6589
1.70k
}
6590
6591
/// See if we can lower a binary floating-point operation into an SDNode with
6592
/// the specified Opcode. If so, return true and lower it. Otherwise return
6593
/// false, and it will be lowered like a normal call.
6594
/// The caller already checked that \p I calls the appropriate LibFunc with a
6595
/// correct prototype.
6596
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
6597
38
                                               unsigned Opcode) {
6598
38
  // We already checked this call's prototype; verify it doesn't modify errno.
6599
38
  if (!I.onlyReadsMemory())
6600
0
    return false;
6601
38
6602
38
  SDValue Tmp0 = getValue(I.getArgOperand(0));
6603
38
  SDValue Tmp1 = getValue(I.getArgOperand(1));
6604
38
  EVT VT = Tmp0.getValueType();
6605
38
  setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
6606
38
  return true;
6607
38
}
6608
6609
2.16M
void SelectionDAGBuilder::visitCall(const CallInst &I) {
6610
2.16M
  // Handle inline assembly differently.
6611
2.16M
  if (
isa<InlineAsm>(I.getCalledValue())2.16M
) {
6612
11.4k
    visitInlineAsm(&I);
6613
11.4k
    return;
6614
11.4k
  }
6615
2.15M
6616
2.15M
  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
6617
2.15M
  computeUsesVAFloatArgument(I, MMI);
6618
2.15M
6619
2.15M
  const char *RenameFn = nullptr;
6620
2.15M
  if (Function *
F2.15M
= I.getCalledFunction()) {
6621
2.08M
    if (
F->isDeclaration()2.08M
) {
6622
1.02M
      if (const TargetIntrinsicInfo *
II1.02M
= TM.getIntrinsicInfo()) {
6623
12.8k
        if (unsigned 
IID12.8k
= II->getIntrinsicID(F)) {
6624
925
          RenameFn = visitIntrinsicCall(I, IID);
6625
925
          if (!RenameFn)
6626
925
            return;
6627
1.02M
        }
6628
12.8k
      }
6629
1.02M
      
if (Intrinsic::ID 1.02M
IID1.02M
= F->getIntrinsicID()) {
6630
407k
        RenameFn = visitIntrinsicCall(I, IID);
6631
407k
        if (!RenameFn)
6632
407k
          return;
6633
1.67M
      }
6634
1.02M
    }
6635
1.67M
6636
1.67M
    // Check for well-known libc/libm calls.  If the function is internal, it
6637
1.67M
    // can't be a library call.  Don't do the check if marked as nobuiltin for
6638
1.67M
    // some reason or the call site requires strict floating point semantics.
6639
1.67M
    LibFunc Func;
6640
1.67M
    if (
!I.isNoBuiltin() && 1.67M
!I.isStrictFP()454k
&&
!F->hasLocalLinkage()454k
&&
6641
1.67M
        
F->hasName()419k
&&
LibInfo->getLibFunc(*F, Func)419k
&&
6642
1.67M
        
LibInfo->hasOptimizedCodeGen(Func)139k
) {
6643
18.9k
      switch (Func) {
6644
0
      default: break;
6645
129
      case LibFunc_copysign:
6646
129
      case LibFunc_copysignf:
6647
129
      case LibFunc_copysignl:
6648
129
        // We already checked this call's prototype; verify it doesn't modify
6649
129
        // errno.
6650
129
        if (
I.onlyReadsMemory()129
) {
6651
124
          SDValue LHS = getValue(I.getArgOperand(0));
6652
124
          SDValue RHS = getValue(I.getArgOperand(1));
6653
124
          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
6654
124
                                   LHS.getValueType(), LHS, RHS));
6655
124
          return;
6656
124
        }
6657
5
        break;
6658
77
      case LibFunc_fabs:
6659
77
      case LibFunc_fabsf:
6660
77
      case LibFunc_fabsl:
6661
77
        if (visitUnaryFloatCall(I, ISD::FABS))
6662
75
          return;
6663
2
        break;
6664
18
      case LibFunc_fmin:
6665
18
      case LibFunc_fminf:
6666
18
      case LibFunc_fminl:
6667
18
        if (visitBinaryFloatCall(I, ISD::FMINNUM))
6668
18
          return;
6669
0
        break;
6670
20
      case LibFunc_fmax:
6671
20
      case LibFunc_fmaxf:
6672
20
      case LibFunc_fmaxl:
6673
20
        if (visitBinaryFloatCall(I, ISD::FMAXNUM))
6674
20
          return;
6675
0
        break;
6676
352
      case LibFunc_sin:
6677
352
      case LibFunc_sinf:
6678
352
      case LibFunc_sinl:
6679
352
        if (visitUnaryFloatCall(I, ISD::FSIN))
6680
295
          return;
6681
57
        break;
6682
418
      case LibFunc_cos:
6683
418
      case LibFunc_cosf:
6684
418
      case LibFunc_cosl:
6685
418
        if (visitUnaryFloatCall(I, ISD::FCOS))
6686
366
          return;
6687
52
        break;
6688
763
      case LibFunc_sqrt:
6689
763
      case LibFunc_sqrtf:
6690
763
      case LibFunc_sqrtl:
6691
763
      case LibFunc_sqrt_finite:
6692
763
      case LibFunc_sqrtf_finite:
6693
763
      case LibFunc_sqrtl_finite:
6694
763
        if (visitUnaryFloatCall(I, ISD::FSQRT))
6695
735
          return;
6696
28
        break;
6697
66
      case LibFunc_floor:
6698
66
      case LibFunc_floorf:
6699
66
      case LibFunc_floorl:
6700
66
        if (visitUnaryFloatCall(I, ISD::FFLOOR))
6701
59
          return;
6702
7
        break;
6703
23
      case LibFunc_nearbyint:
6704
23
      case LibFunc_nearbyintf:
6705
23
      case LibFunc_nearbyintl:
6706
23
        if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
6707
23
          return;
6708
0
        break;
6709
54
      case LibFunc_ceil:
6710
54
      case LibFunc_ceilf:
6711
54
      case LibFunc_ceill:
6712
54
        if (visitUnaryFloatCall(I, ISD::FCEIL))
6713
47
          return;
6714
7
        break;
6715
23
      case LibFunc_rint:
6716
23
      case LibFunc_rintf:
6717
23
      case LibFunc_rintl:
6718
23
        if (visitUnaryFloatCall(I, ISD::FRINT))
6719
23
          return;
6720
0
        break;
6721
39
      case LibFunc_round:
6722
39
      case LibFunc_roundf:
6723
39
      case LibFunc_roundl:
6724
39
        if (visitUnaryFloatCall(I, ISD::FROUND))
6725
39
          return;
6726
0
        break;
6727
38
      case LibFunc_trunc:
6728
38
      case LibFunc_truncf:
6729
38
      case LibFunc_truncl:
6730
38
        if (visitUnaryFloatCall(I, ISD::FTRUNC))
6731
38
          return;
6732
0
        break;
6733
10
      case LibFunc_log2:
6734
10
      case LibFunc_log2f:
6735
10
      case LibFunc_log2l:
6736
10
        if (visitUnaryFloatCall(I, ISD::FLOG2))
6737
0
          return;
6738
10
        break;
6739
10
      case LibFunc_exp2:
6740
10
      case LibFunc_exp2f:
6741
10
      case LibFunc_exp2l:
6742
10
        if (visitUnaryFloatCall(I, ISD::FEXP2))
6743
0
          return;
6744
10
        break;
6745
679
      case LibFunc_memcmp:
6746
679
        if (visitMemCmpCall(I))
6747
168
          return;
6748
511
        break;
6749
2
      case LibFunc_mempcpy:
6750
2
        if (visitMemPCpyCall(I))
6751
2
          return;
6752
0
        break;
6753
82
      case LibFunc_memchr:
6754
82
        if (visitMemChrCall(I))
6755
5
          return;
6756
77
        break;
6757
837
      case LibFunc_strcpy:
6758
837
        if (visitStrCpyCall(I, false))
6759
2
          return;
6760
835
        break;
6761
1
      case LibFunc_stpcpy:
6762
1
        if (visitStrCpyCall(I, true))
6763
1
          return;
6764
0
        break;
6765
11.9k
      case LibFunc_strcmp:
6766
11.9k
        if (visitStrCmpCall(I))
6767
3
          return;
6768
11.9k
        break;
6769
3.31k
      case LibFunc_strlen:
6770
3.31k
        if (visitStrLenCall(I))
6771
1
          return;
6772
3.31k
        break;
6773
3
      case LibFunc_strnlen:
6774
3
        if (visitStrNLenCall(I))
6775
1
          return;
6776
2
        break;
6777
18.9k
      }
6778
18.9k
    }
6779
2.08M
  }
6780
1.74M
6781
1.74M
  SDValue Callee;
6782
1.74M
  if (!RenameFn)
6783
1.74M
    Callee = getValue(I.getCalledValue());
6784
1.74M
  else
6785
2
    Callee = DAG.getExternalSymbol(
6786
2
        RenameFn,
6787
2
        DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
6788
1.74M
6789
1.74M
  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
6790
1.74M
  // have to do anything here to lower funclet bundles.
6791
1.74M
  assert(!I.hasOperandBundlesOtherThan(
6792
1.74M
             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
6793
1.74M
         "Cannot lower calls with arbitrary operand bundles!");
6794
1.74M
6795
1.74M
  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
6796
3
    LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
6797
1.74M
  else
6798
1.74M
    // Check if we can potentially perform a tail call. More detailed checking
6799
1.74M
    // is be done within LowerCallTo, after more information about the call is
6800
1.74M
    // known.
6801
1.74M
    LowerCallTo(&I, Callee, I.isTailCall());
6802
2.16M
}
6803
6804
namespace {
6805
6806
/// AsmOperandInfo - This contains information for each constraint that we are
6807
/// lowering.
6808
class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
6809
public:
6810
  /// CallOperand - If this is the result output operand or a clobber
6811
  /// this is null, otherwise it is the incoming operand to the CallInst.
6812
  /// This gets modified as the asm is processed.
6813
  SDValue CallOperand;
6814
6815
  /// AssignedRegs - If this is a register or register class operand, this
6816
  /// contains the set of register corresponding to the operand.
6817
  RegsForValue AssignedRegs;
6818
6819
  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
6820
63.3k
    : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
6821
63.3k
  }
6822
6823
  /// Whether or not this operand accesses memory
6824
57.9k
  bool hasMemory(const TargetLowering &TLI) const {
6825
57.9k
    // Indirect operand accesses access memory.
6826
57.9k
    if (isIndirect)
6827
268
      return true;
6828
57.6k
6829
57.6k
    for (const auto &Code : Codes)
6830
102k
      
if (102k
TLI.getConstraintType(Code) == TargetLowering::C_Memory102k
)
6831
2.18k
        return true;
6832
55.4k
6833
55.4k
    return false;
6834
55.4k
  }
6835
6836
  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
6837
  /// corresponds to.  If there is no Value* for this operand, it returns
6838
  /// MVT::Other.
6839
  EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
6840
3.28k
                           const DataLayout &DL) const {
6841
3.28k
    if (
!CallOperandVal3.28k
)
return MVT::Other0
;
6842
3.28k
6843
3.28k
    
if (3.28k
isa<BasicBlock>(CallOperandVal)3.28k
)
6844
2
      return TLI.getPointerTy(DL);
6845
3.27k
6846
3.27k
    llvm::Type *OpTy = CallOperandVal->getType();
6847
3.27k
6848
3.27k
    // FIXME: code duplicated from TargetLowering::ParseConstraints().
6849
3.27k
    // If this is an indirect operand, the operand is a pointer to the
6850
3.27k
    // accessed type.
6851
3.27k
    if (
isIndirect3.27k
) {
6852
358
      PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
6853
358
      if (!PtrTy)
6854
0
        report_fatal_error("Indirect operand for inline asm not a pointer!");
6855
358
      OpTy = PtrTy->getElementType();
6856
358
    }
6857
3.27k
6858
3.27k
    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6859
3.27k
    
if (StructType *3.27k
STy3.27k
= dyn_cast<StructType>(OpTy))
6860
12
      
if (12
STy->getNumElements() == 112
)
6861
8
        OpTy = STy->getElementType(0);
6862
3.27k
6863
3.27k
    // If OpTy is not a single value, it may be a struct/union that we
6864
3.27k
    // can tile with integers.
6865
3.27k
    if (
!OpTy->isSingleValueType() && 3.27k
OpTy->isSized()20
) {
6866
14
      unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6867
14
      switch (BitSize) {
6868
7
      default: break;
6869
7
      case 1:
6870
7
      case 8:
6871
7
      case 16:
6872
7
      case 32:
6873
7
      case 64:
6874
7
      case 128:
6875
7
        OpTy = IntegerType::get(Context, BitSize);
6876
7
        break;
6877
3.27k
      }
6878
3.27k
    }
6879
3.27k
6880
3.27k
    return TLI.getValueType(DL, OpTy, true);
6881
3.27k
  }
6882
};
6883
6884
using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
6885
6886
} // end anonymous namespace
6887
6888
/// Make sure that the output operand \p OpInfo and its corresponding input
6889
/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
6890
/// out).
6891
static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
6892
                               SDISelAsmOperandInfo &MatchingOpInfo,
6893
197
                               SelectionDAG &DAG) {
6894
197
  if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
6895
184
    return;
6896
13
6897
13
  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
6898
13
  const auto &TLI = DAG.getTargetLoweringInfo();
6899
13
6900
13
  std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6901
13
      TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6902
13
                                       OpInfo.ConstraintVT);
6903
13
  std::pair<unsigned, const TargetRegisterClass *> InputRC =
6904
13
      TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
6905
13
                                       MatchingOpInfo.ConstraintVT);
6906
13
  if ((OpInfo.ConstraintVT.isInteger() !=
6907
13
       MatchingOpInfo.ConstraintVT.isInteger()) ||
6908
13
      
(MatchRC.second != InputRC.second)13
) {
6909
0
    // FIXME: error out in a more elegant fashion
6910
0
    report_fatal_error("Unsupported asm: input constraint"
6911
0
                       " with a matching output constraint of"
6912
0
                       " incompatible type!");
6913
0
  }
6914
13
  MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
6915
13
}
6916
6917
/// Get a direct memory input to behave well as an indirect operand.
6918
/// This may introduce stores, hence the need for a \p Chain.
6919
/// \return The (possibly updated) chain.
6920
static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
6921
                                        SDISelAsmOperandInfo &OpInfo,
6922
81
                                        SelectionDAG &DAG) {
6923
81
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6924
81
6925
81
  // If we don't have an indirect input, put it in the constpool if we can,
6926
81
  // otherwise spill it to a stack slot.
6927
81
  // TODO: This isn't quite right. We need to handle these according to
6928
81
  // the addressing mode that the constraint wants. Also, this may take
6929
81
  // an additional register for the computation and we don't want that
6930
81
  // either.
6931
81
6932
81
  // If the operand is a float, integer, or vector constant, spill to a
6933
81
  // constant pool entry to get its address.
6934
81
  const Value *OpVal = OpInfo.CallOperandVal;
6935
81
  if (
isa<ConstantFP>(OpVal) || 81
isa<ConstantInt>(OpVal)77
||
6936
81
      
isa<ConstantVector>(OpVal)71
||
isa<ConstantDataVector>(OpVal)71
) {
6937
10
    OpInfo.CallOperand = DAG.getConstantPool(
6938
10
        cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
6939
10
    return Chain;
6940
10
  }
6941
71
6942
71
  // Otherwise, create a stack slot and emit a store to it before the asm.
6943
71
  Type *Ty = OpVal->getType();
6944
71
  auto &DL = DAG.getDataLayout();
6945
71
  uint64_t TySize = DL.getTypeAllocSize(Ty);
6946
71
  unsigned Align = DL.getPrefTypeAlignment(Ty);
6947
71
  MachineFunction &MF = DAG.getMachineFunction();
6948
71
  int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
6949
71
  SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
6950
71
  Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
6951
71
                       MachinePointerInfo::getFixedStack(MF, SSFI));
6952
71
  OpInfo.CallOperand = StackSlot;
6953
71
6954
71
  return Chain;
6955
71
}
6956
6957
/// GetRegistersForValue - Assign registers (virtual or physical) for the
6958
/// specified operand.  We prefer to assign virtual registers, to allow the
6959
/// register allocator to handle the assignment process.  However, if the asm
6960
/// uses features that we can't model on machineinstrs, we have SDISel do the
6961
/// allocation.  This produces generally horrible, but correct, code.
6962
///
6963
///   OpInfo describes the operand.
6964
static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
6965
                                 const SDLoc &DL,
6966
60.2k
                                 SDISelAsmOperandInfo &OpInfo) {
6967
60.2k
  LLVMContext &Context = *DAG.getContext();
6968
60.2k
6969
60.2k
  MachineFunction &MF = DAG.getMachineFunction();
6970
60.2k
  SmallVector<unsigned, 4> Regs;
6971
60.2k
  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
6972
60.2k
6973
60.2k
  // If this is a constraint for a single physreg, or a constraint for a
6974
60.2k
  // register class, find it.
6975
60.2k
  std::pair<unsigned, const TargetRegisterClass *> PhysReg =
6976
60.2k
      TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode,
6977
60.2k
                                       OpInfo.ConstraintVT);
6978
60.2k
6979
60.2k
  unsigned NumRegs = 1;
6980
60.2k
  if (
OpInfo.ConstraintVT != MVT::Other60.2k
) {
6981
5.98k
    // If this is a FP input in an integer register (or visa versa) insert a bit
6982
5.98k
    // cast of the input value.  More generally, handle any case where the input
6983
5.98k
    // value disagrees with the register class we plan to stick this in.
6984
5.98k
    if (
OpInfo.Type == InlineAsm::isInput && 5.98k
PhysReg.second2.29k
&&
6985
5.98k
        
!TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)2.27k
) {
6986
185
      // Try to convert to the first EVT that the reg class contains.  If the
6987
185
      // types are identical size, use a bitcast to convert (e.g. two differing
6988
185
      // vector types).
6989
185
      MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
6990
185
      if (
RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()185
) {
6991
64
        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
6992
64
                                         RegVT, OpInfo.CallOperand);
6993
64
        OpInfo.ConstraintVT = RegVT;
6994
185
      } else 
if (121
RegVT.isInteger() && 121
OpInfo.ConstraintVT.isFloatingPoint()112
) {
6995
1
        // If the input is a FP value and we want it in FP registers, do a
6996
1
        // bitcast to the corresponding integer type.  This turns an f64 value
6997
1
        // into i64, which can be passed with two i32 values on a 32-bit
6998
1
        // machine.
6999
1
        RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
7000
1
        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
7001
1
                                         RegVT, OpInfo.CallOperand);
7002
1
        OpInfo.ConstraintVT = RegVT;
7003
1
      }
7004
185
    }
7005
5.98k
7006
5.98k
    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
7007
5.98k
  }
7008
60.2k
7009
60.2k
  MVT RegVT;
7010
60.2k
  EVT ValueVT = OpInfo.ConstraintVT;
7011
60.2k
7012
60.2k
  // If this is a constraint for a specific physical register, like {r17},
7013
60.2k
  // assign it now.
7014
60.2k
  if (unsigned 
AssignedReg60.2k
= PhysReg.first) {
7015
49.4k
    const TargetRegisterClass *RC = PhysReg.second;
7016
49.4k
    if (OpInfo.ConstraintVT == MVT::Other)
7017
47.9k
      ValueVT = *TRI.legalclasstypes_begin(*RC);
7018
49.4k
7019
49.4k
    // Get the actual register value type.  This is important, because the user
7020
49.4k
    // may have asked for (e.g.) the AX register in i32 type.  We need to
7021
49.4k
    // remember that AX is actually i16 to get the right extension.
7022
49.4k
    RegVT = *TRI.legalclasstypes_begin(*RC);
7023
49.4k
7024
49.4k
    // This is a explicit reference to a physical register.
7025
49.4k
    Regs.push_back(AssignedReg);
7026
49.4k
7027
49.4k
    // If this is an expanded reference, add the rest of the regs to Regs.
7028
49.4k
    if (
NumRegs != 149.4k
) {
7029
8
      TargetRegisterClass::iterator I = RC->begin();
7030
30
      for (; 
*I != AssignedReg30
;
++I22
)
7031
8
        assert(I != RC->end() && "Didn't find reg!");
7032
8
7033
8
      // Already added the first reg.
7034
8
      --NumRegs; ++I;
7035
16
      for (; 
NumRegs16
;
--NumRegs, ++I8
) {
7036
8
        assert(I != RC->end() && "Ran out of registers to allocate!");
7037
8
        Regs.push_back(*I);
7038
8
      }
7039
8
    }
7040
49.4k
7041
49.4k
    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
7042
49.4k
    return;
7043
49.4k
  }
7044
10.8k
7045
10.8k
  // Otherwise, if this was a reference to an LLVM register class, create vregs
7046
10.8k
  // for this reference.
7047
10.8k
  
if (const TargetRegisterClass *10.8k
RC10.8k
= PhysReg.second) {
7048
4.41k
    RegVT = *TRI.legalclasstypes_begin(*RC);
7049
4.41k
    if (OpInfo.ConstraintVT == MVT::Other)
7050
1
      ValueVT = RegVT;
7051
4.41k
7052
4.41k
    // Create the appropriate number of virtual registers.
7053
4.41k
    MachineRegisterInfo &RegInfo = MF.getRegInfo();
7054
8.95k
    for (; 
NumRegs8.95k
;
--NumRegs4.54k
)
7055
4.54k
      Regs.push_back(RegInfo.createVirtualRegister(RC));
7056
4.41k
7057
4.41k
    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
7058
4.41k
    return;
7059
4.41k
  }
7060
6.39k
7061
6.39k
  // Otherwise, we couldn't allocate enough registers for this.
7062
6.39k
}
7063
7064
static unsigned
7065
findMatchingInlineAsmOperand(unsigned OperandNo,
7066
194
                             const std::vector<SDValue> &AsmNodeOperands) {
7067
194
  // Scan until we find the definition we already emitted of this operand.
7068
194
  unsigned CurOp = InlineAsm::Op_FirstOperand;
7069
544
  for (; 
OperandNo544
;
--OperandNo350
) {
7070
350
    // Advance to the next operand.
7071
350
    unsigned OpFlag =
7072
350
        cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
7073
350
    assert((InlineAsm::isRegDefKind(OpFlag) ||
7074
350
            InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
7075
350
            InlineAsm::isMemKind(OpFlag)) &&
7076
350
           "Skipped past definitions?");
7077
350
    CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
7078
350
  }
7079
194
  return CurOp;
7080
194
}
7081
7082
/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
7083
/// \return true if it has succeeded, false otherwise
7084
static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
7085
193
                              MVT RegVT, SelectionDAG &DAG) {
7086
193
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7087
193
  MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
7088
400
  for (unsigned i = 0, e = NumRegs; 
i != e400
;
++i207
) {
7089
207
    if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
7090
207
      Regs.push_back(RegInfo.createVirtualRegister(RC));
7091
207
    else
7092
0
      return false;
7093
207
  }
7094
193
  return true;
7095
193
}
7096
7097
namespace {
7098
7099
class ExtraFlags {
7100
  unsigned Flags = 0;
7101
7102
public:
7103
11.4k
  explicit ExtraFlags(ImmutableCallSite CS) {
7104
11.4k
    const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
7105
11.4k
    if (IA->hasSideEffects())
7106
10.3k
      Flags |= InlineAsm::Extra_HasSideEffects;
7107
11.4k
    if (IA->isAlignStack())
7108
21
      Flags |= InlineAsm::Extra_IsAlignStack;
7109
11.4k
    if (CS.isConvergent())
7110
1
      Flags |= InlineAsm::Extra_IsConvergent;
7111
11.4k
    Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
7112
11.4k
  }
7113
7114
63.3k
  void update(const TargetLowering::AsmOperandInfo &OpInfo) {
7115
63.3k
    // Ideally, we would only check against memory constraints.  However, the
7116
63.3k
    // meaning of an Other constraint can be target-specific and we can't easily
7117
63.3k
    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
7118
63.3k
    // for Other constraints as well.
7119
63.3k
    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
7120
63.3k
        
OpInfo.ConstraintType == TargetLowering::C_Other60.7k
) {
7121
2.86k
      if (OpInfo.Type == InlineAsm::isInput)
7122
549
        Flags |= InlineAsm::Extra_MayLoad;
7123
2.31k
      else 
if (2.31k
OpInfo.Type == InlineAsm::isOutput2.31k
)
7124
165
        Flags |= InlineAsm::Extra_MayStore;
7125
2.15k
      else 
if (2.15k
OpInfo.Type == InlineAsm::isClobber2.15k
)
7126
2.15k
        Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
7127
2.86k
    }
7128
63.3k
  }
7129
7130
11.4k
  unsigned get() const { return Flags; }
7131
};
7132
7133
} // end anonymous namespace
7134
7135
/// visitInlineAsm - Handle a call to an InlineAsm object.
7136
11.4k
void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
7137
11.4k
  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
7138
11.4k
7139
11.4k
  /// ConstraintOperands - Information about all of the constraints.
7140
11.4k
  SDISelAsmOperandInfoVector ConstraintOperands;
7141
11.4k
7142
11.4k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7143
11.4k
  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
7144
11.4k
      DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
7145
11.4k
7146
11.4k
  bool hasMemory = false;
7147
11.4k
7148
11.4k
  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
7149
11.4k
  ExtraFlags ExtraInfo(CS);
7150
11.4k
7151
11.4k
  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
7152
11.4k
  unsigned ResNo = 0;   // ResNo - The result number of the next output.
7153
74.8k
  for (unsigned i = 0, e = TargetConstraints.size(); 
i != e74.8k
;
++i63.3k
) {
7154
63.3k
    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
7155
63.3k
    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
7156
63.3k
7157
63.3k
    MVT OpVT = MVT::Other;
7158
63.3k
7159
63.3k
    // Compute the value type for each operand.
7160
63.3k
    if (OpInfo.Type == InlineAsm::isInput ||
7161
63.3k
        
(OpInfo.Type == InlineAsm::isOutput && 60.2k
OpInfo.isIndirect3.85k
)) {
7162
3.28k
      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
7163
3.28k
7164
3.28k
      // Process the call argument. BasicBlocks are labels, currently appearing
7165
3.28k
      // only in asm's.
7166
3.28k
      if (const BasicBlock *
BB3.28k
= dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
7167
2
        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
7168
3.28k
      } else {
7169
3.27k
        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
7170
3.27k
      }
7171
3.28k
7172
3.28k
      OpVT =
7173
3.28k
          OpInfo
7174
3.28k
              .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
7175
3.28k
              .getSimpleVT();
7176
3.28k
    }
7177
63.3k
7178
63.3k
    if (
OpInfo.Type == InlineAsm::isOutput && 63.3k
!OpInfo.isIndirect3.85k
) {
7179
3.65k
      // The return value of the call is this value.  As such, there is no
7180
3.65k
      // corresponding argument.
7181
3.65k
      assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
7182
3.65k
      if (StructType *
STy3.65k
= dyn_cast<StructType>(CS.getType())) {
7183
559
        OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
7184
559
                                      STy->getElementType(ResNo));
7185
3.65k
      } else {
7186
3.09k
        assert(ResNo == 0 && "Asm only has one result!");
7187
3.09k
        OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
7188
3.09k
      }
7189
3.65k
      ++ResNo;
7190
3.65k
    }
7191
63.3k
7192
63.3k
    OpInfo.ConstraintVT = OpVT;
7193
63.3k
7194
63.3k
    if (!hasMemory)
7195
57.9k
      hasMemory = OpInfo.hasMemory(TLI);
7196
63.3k
7197
63.3k
    // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
7198
63.3k
    // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
7199
63.3k
    auto TargetConstraint = TargetConstraints[i];
7200
63.3k
7201
63.3k
    // Compute the constraint code and ConstraintType to use.
7202
63.3k
    TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
7203
63.3k
7204
63.3k
    ExtraInfo.update(TargetConstraint);
7205
63.3k
  }
7206
11.4k
7207
11.4k
  SDValue Chain, Flag;
7208
11.4k
7209
11.4k
  // We won't need to flush pending loads if this asm doesn't touch
7210
11.4k
  // memory and is nonvolatile.
7211
11.4k
  if (
hasMemory || 11.4k
IA->hasSideEffects()9.02k
)
7212
10.5k
    Chain = getRoot();
7213
11.4k
  else
7214
932
    Chain = DAG.getRoot();
7215
11.4k
7216
11.4k
  // Second pass over the constraints: compute which constraint option to use
7217
11.4k
  // and assign registers to constraints that want a specific physreg.
7218
74.8k
  for (unsigned i = 0, e = ConstraintOperands.size(); 
i != e74.8k
;
++i63.3k
) {
7219
63.3k
    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
7220
63.3k
7221
63.3k
    // If this is an output operand with a matching input operand, look up the
7222
63.3k
    // matching input. If their types mismatch, e.g. one is an integer, the
7223
63.3k
    // other is floating point, or their sizes are different, flag it as an
7224
63.3k
    // error.
7225
63.3k
    if (
OpInfo.hasMatchingInput()63.3k
) {
7226
197
      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
7227
197
      patchMatchingInput(OpInfo, Input, DAG);
7228
197
    }
7229
63.3k
7230
63.3k
    // Compute the constraint code and ConstraintType to use.
7231
63.3k
    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
7232
63.3k
7233
63.3k
    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
7234
2.54k
        OpInfo.Type == InlineAsm::isClobber)
7235
2.15k
      continue;
7236
61.1k
7237
61.1k
    // If this is a memory input, and if the operand is not indirect, do what we
7238
61.1k
    // need to to provide an address for the memory input.
7239
61.1k
    
if (61.1k
OpInfo.ConstraintType == TargetLowering::C_Memory &&
7240
61.1k
        
!OpInfo.isIndirect397
) {
7241
81
      assert((OpInfo.isMultipleAlternative ||
7242
81
              (OpInfo.Type == InlineAsm::isInput)) &&
7243
81
             "Can only indirectify direct input operands!");
7244
81
7245
81
      // Memory operands really want the address of the value.
7246
81
      Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
7247
81
7248
81
      // There is no longer a Value* corresponding to this operand.
7249
81
      OpInfo.CallOperandVal = nullptr;
7250
81
7251
81
      // It is now an indirect operand.
7252
81
      OpInfo.isIndirect = true;
7253
81
    }
7254
61.1k
7255
61.1k
    // If this constraint is for a specific register, allocate it before
7256
61.1k
    // anything else.
7257
61.1k
    if (OpInfo.ConstraintType == TargetLowering::C_Register)
7258
55.7k
      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
7259
63.3k
  }
7260
11.4k
7261
11.4k
  // Third pass - Loop over all of the operands, assigning virtual or physregs
7262
11.4k
  // to register class operands.
7263
74.8k
  for (unsigned i = 0, e = ConstraintOperands.size(); 
i != e74.8k
;
++i63.3k
) {
7264
63.3k
    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
7265
63.3k
7266
63.3k
    // C_Register operands have already been allocated, Other/Memory don't need
7267
63.3k
    // to be.
7268
63.3k
    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
7269
4.47k
      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
7270
63.3k
  }
7271
11.4k
7272
11.4k
  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
7273
11.4k
  std::vector<SDValue> AsmNodeOperands;
7274
11.4k
  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
7275
11.4k
  AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
7276
11.4k
      IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
7277
11.4k
7278
11.4k
  // If we have a !srcloc metadata node associated with it, we want to attach
7279
11.4k
  // this to the ultimately generated inline asm machineinstr.  To do this, we
7280
11.4k
  // pass in the third operand as this (potentially null) inline asm MDNode.
7281
11.4k
  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
7282
11.4k
  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
7283
11.4k
7284
11.4k
  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
7285
11.4k
  // bits as operand 3.
7286
11.4k
  AsmNodeOperands.push_back(DAG.getTargetConstant(
7287
11.4k
      ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
7288
11.4k
7289
11.4k
  // Loop over all of the inputs, copying the operand values into the
7290
11.4k
  // appropriate registers and processing the output regs.
7291
11.4k
  RegsForValue RetValRegs;
7292
11.4k
7293
11.4k
  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
7294
11.4k
  std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
7295
11.4k
7296
74.7k
  for (unsigned i = 0, e = ConstraintOperands.size(); 
i != e74.7k
;
++i63.2k
) {
7297
63.3k
    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
7298
63.3k
7299
63.3k
    switch (OpInfo.Type) {
7300
3.85k
    case InlineAsm::isOutput:
7301
3.85k
      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
7302
3.85k
          
OpInfo.ConstraintType != TargetLowering::C_Register1.01k
) {
7303
165
        // Memory output, or 'other' output (e.g. 'X' constraint).
7304
165
        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
7305
165
7306
165
        unsigned ConstraintID =
7307
165
            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
7308
165
        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
7309
165
               "Failed to convert memory constraint code to constraint id.");
7310
165
7311
165
        // Add information to the INLINEASM node to know about this output.
7312
165
        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
7313
165
        OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
7314
165
        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
7315
165
                                                        MVT::i32));
7316
165
        AsmNodeOperands.push_back(OpInfo.CallOperand);
7317
165
        break;
7318
165
      }
7319
3.68k
7320
3.68k
      // Otherwise, this is a register or register class output.
7321
3.68k
7322
3.68k
      // Copy the output from the appropriate register.  Find a register that
7323
3.68k
      // we can use.
7324
3.68k
      
if (3.68k
OpInfo.AssignedRegs.Regs.empty()3.68k
) {
7325
28
        emitInlineAsmError(
7326
28
            CS, "couldn't allocate output register for constraint '" +
7327
28
                    Twine(OpInfo.ConstraintCode) + "'");
7328
28
        return;
7329
28
      }
7330
3.66k
7331
3.66k
      // If this is an indirect operand, store through the pointer after the
7332
3.66k
      // asm.
7333
3.66k
      
if (3.66k
OpInfo.isIndirect3.66k
) {
7334
35
        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
7335
35
                                                      OpInfo.CallOperandVal));
7336
3.66k
      } else {
7337
3.62k
        // This is the result value of the call.
7338
3.62k
        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
7339
3.62k
        // Concatenate this output onto the outputs list.
7340
3.62k
        RetValRegs.append(OpInfo.AssignedRegs);
7341
3.62k
      }
7342
3.66k
7343
3.66k
      // Add information to the INLINEASM node to know that this register is
7344
3.66k
      // set.
7345
3.66k
      OpInfo.AssignedRegs
7346
3.66k
          .AddInlineAsmOperands(OpInfo.isEarlyClobber
7347
170
                                    ? InlineAsm::Kind_RegDefEarlyClobber
7348
3.49k
                                    : InlineAsm::Kind_RegDef,
7349
3.66k
                                false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
7350
3.66k
      break;
7351
3.66k
7352
3.07k
    case InlineAsm::isInput: {
7353
3.07k
      SDValue InOperandVal = OpInfo.CallOperand;
7354
3.07k
7355
3.07k
      if (
OpInfo.isMatchingInputConstraint()3.07k
) {
7356
194
        // If this is required to match an output register we have already set,
7357
194
        // just use its register.
7358
194
        auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
7359
194
                                                  AsmNodeOperands);
7360
194
        unsigned OpFlag =
7361
194
          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
7362
194
        if (InlineAsm::isRegDefKind(OpFlag) ||
7363
194
            
InlineAsm::isRegDefEarlyClobberKind(OpFlag)19
) {
7364
194
          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
7365
194
          if (
OpInfo.isIndirect194
) {
7366
1
            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
7367
1
            emitInlineAsmError(CS, "inline asm not supported yet:"
7368
1
                                   " don't know how to handle tied "
7369
1
                                   "indirect register inputs");
7370
1
            return;
7371
1
          }
7372
193
7373
193
          MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
7374
193
          SmallVector<unsigned, 4> Regs;
7375
193
7376
193
          if (!createVirtualRegs(Regs,
7377
193
                                 InlineAsm::getNumOperandRegisters(OpFlag),
7378
193
                                 RegVT, DAG)) {
7379
0
            emitInlineAsmError(CS, "inline asm error: This value type register "
7380
0
                                   "class is not natively supported!");
7381
0
            return;
7382
0
          }
7383
193
7384
193
          RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
7385
193
7386
193
          SDLoc dl = getCurSDLoc();
7387
193
          // Use the produced MatchedRegs object to
7388
193
          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
7389
193
                                    CS.getInstruction());
7390
193
          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
7391
193
                                           true, OpInfo.getMatchedOperand(), dl,
7392
193
                                           DAG, AsmNodeOperands);
7393
193
          break;
7394
193
        }
7395
0
7396
0
        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
7397
0
        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
7398
0
               "Unexpected number of operands");
7399
0
        // Add information to the INLINEASM node to know about this input.
7400
0
        // See InlineAsm.h isUseOperandTiedToDef.
7401
0
        OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
7402
0
        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
7403
0
                                                    OpInfo.getMatchedOperand());
7404
0
        AsmNodeOperands.push_back(DAG.getTargetConstant(
7405
0
            OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
7406
0
        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
7407
0
        break;
7408
0
      }
7409
2.87k
7410
2.87k
      // Treat indirect 'X' constraint as memory.
7411
2.87k
      
if (2.87k
OpInfo.ConstraintType == TargetLowering::C_Other &&
7412
349
          OpInfo.isIndirect)
7413
1
        OpInfo.ConstraintType = TargetLowering::C_Memory;
7414
2.87k
7415
2.87k
      if (
OpInfo.ConstraintType == TargetLowering::C_Other2.87k
) {
7416
348
        std::vector<SDValue> Ops;
7417
348
        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
7418
348
                                          Ops, DAG);
7419
348
        if (
Ops.empty()348
) {
7420
19
          emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
7421
19
                                     Twine(OpInfo.ConstraintCode) + "'");
7422
19
          return;
7423
19
        }
7424
329
7425
329
        // Add information to the INLINEASM node to know about this input.
7426
329
        unsigned ResOpType =
7427
329
          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
7428
329
        AsmNodeOperands.push_back(DAG.getTargetConstant(
7429
329
            ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
7430
329
        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
7431
329
        break;
7432
329
      }
7433
2.53k
7434
2.53k
      
if (2.53k
OpInfo.ConstraintType == TargetLowering::C_Memory2.53k
) {
7435
236
        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
7436
236
        assert(InOperandVal.getValueType() ==
7437
236
                   TLI.getPointerTy(DAG.getDataLayout()) &&
7438
236
               "Memory operands expect pointer values");
7439
236
7440
236
        unsigned ConstraintID =
7441
236
            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
7442
236
        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
7443
236
               "Failed to convert memory constraint code to constraint id.");
7444
236
7445
236
        // Add information to the INLINEASM node to know about this input.
7446
236
        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
7447
236
        ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
7448
236
        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
7449
236
                                                        getCurSDLoc(),
7450
236
                                                        MVT::i32));
7451
236
        AsmNodeOperands.push_back(InOperandVal);
7452
236
        break;
7453
236
      }
7454
2.29k
7455
2.53k
      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
7456
2.29k
              OpInfo.ConstraintType == TargetLowering::C_Register) &&
7457
2.29k
             "Unknown constraint type!");
7458
2.29k
7459
2.29k
      // TODO: Support this.
7460
2.29k
      if (
OpInfo.isIndirect2.29k
) {
7461
1
        emitInlineAsmError(
7462
1
            CS, "Don't know how to handle indirect register inputs yet "
7463
1
                "for constraint '" +
7464
1
                    Twine(OpInfo.ConstraintCode) + "'");
7465
1
        return;
7466
1
      }
7467
2.29k
7468
2.29k
      // Copy the input into the appropriate registers.
7469
2.29k
      
if (2.29k
OpInfo.AssignedRegs.Regs.empty()2.29k
) {
7470
24
        emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
7471
24
                                   Twine(OpInfo.ConstraintCode) + "'");
7472
24
        return;
7473
24
      }
7474
2.26k
7475
2.26k
      SDLoc dl = getCurSDLoc();
7476
2.26k
7477
2.26k
      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
7478
2.26k
                                        Chain, &Flag, CS.getInstruction());
7479
2.26k
7480
2.26k
      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
7481
2.26k
                                               dl, DAG, AsmNodeOperands);
7482
2.26k
      break;
7483
2.26k
    }
7484
56.3k
    case InlineAsm::isClobber:
7485
56.3k
      // Add the clobbered value to the operand list, so that the register
7486
56.3k
      // allocator is aware that the physreg got clobbered.
7487
56.3k
      if (!OpInfo.AssignedRegs.Regs.empty())
7488
47.9k
        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
7489
47.9k
                                                 false, 0, getCurSDLoc(), DAG,
7490
47.9k
                                                 AsmNodeOperands);
7491
3.85k
      break;
7492
63.3k
    }
7493
63.3k
  }
7494
11.4k
7495
11.4k
  // Finish up input operands.  Set the input chain and add the flag last.
7496
11.4k
  AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
7497
11.4k
  if (
Flag.getNode()11.4k
)
AsmNodeOperands.push_back(Flag)1.67k
;
7498
11.4k
7499
11.4k
  Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
7500
11.4k
                      DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
7501
11.4k
  Flag = Chain.getValue(1);
7502
11.4k
7503
11.4k
  // If this asm returns a register value, copy the result from that register
7504
11.4k
  // and set it as the value of the call.
7505
11.4k
  if (
!RetValRegs.Regs.empty()11.4k
) {
7506
3.21k
    SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
7507
3.21k
                                             Chain, &Flag, CS.getInstruction());
7508
3.21k
7509
3.21k
    // FIXME: Why don't we do this for inline asms with MRVs?
7510
3.21k
    if (
CS.getType()->isSingleValueType() && 3.21k
CS.getType()->isSized()3.05k
) {
7511
3.05k
      EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
7512
3.05k
7513
3.05k
      // If any of the results of the inline asm is a vector, it may have the
7514
3.05k
      // wrong width/num elts.  This can happen for register classes that can
7515
3.05k
      // contain multiple different value types.  The preg or vreg allocated may
7516
3.05k
      // not have the same VT as was expected.  Convert it to the right type
7517
3.05k
      // with bit_convert.
7518
3.05k
      if (
ResultType != Val.getValueType() && 3.05k
Val.getValueType().isVector()0
) {
7519
0
        Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
7520
0
                          ResultType, Val);
7521
0
7522
3.05k
      } else 
if (3.05k
ResultType != Val.getValueType() &&
7523
3.05k
                 
ResultType.isInteger()0
&&
Val.getValueType().isInteger()0
) {
7524
0
        // If a result value was tied to an input value, the computed result may
7525
0
        // have a wider width than the expected result.  Extract the relevant
7526
0
        // portion.
7527
0
        Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val);
7528
0
      }
7529
3.05k
7530
3.05k
      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
7531
3.05k
    }
7532
3.21k
7533
3.21k
    setValue(CS.getInstruction(), Val);
7534
3.21k
    // Don't need to use this as a chain in this case.
7535
3.21k
    if (
!IA->hasSideEffects() && 3.21k
!hasMemory844
&&
IndirectStoresToEmit.empty()758
)
7536
758
      return;
7537
10.6k
  }
7538
10.6k
7539
10.6k
  std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
7540
10.6k
7541
10.6k
  // Process indirect outputs, first output all of the flagged copies out of
7542
10.6k
  // physregs.
7543
10.6k
  for (unsigned i = 0, e = IndirectStoresToEmit.size(); 
i != e10.6k
;
++i32
) {
7544
32
    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
7545
32
    const Value *Ptr = IndirectStoresToEmit[i].second;
7546
32
    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
7547
32
                                             Chain, &Flag, IA);
7548
32
    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
7549
32
  }
7550
10.6k
7551
10.6k
  // Emit the non-flagged stores from the physregs.
7552
10.6k
  SmallVector<SDValue, 8> OutChains;
7553
10.6k
  for (unsigned i = 0, e = StoresToEmit.size(); 
i != e10.6k
;
++i32
) {
7554
32
    SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
7555
32
                               getValue(StoresToEmit[i].second),
7556
32
                               MachinePointerInfo(StoresToEmit[i].second));
7557
32
    OutChains.push_back(Val);
7558
32
  }
7559
10.6k
7560
10.6k
  if (!OutChains.empty())
7561
29
    Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
7562
11.4k
7563
11.4k
  DAG.setRoot(Chain);
7564
11.4k
}
7565
7566
void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
7567
73
                                             const Twine &Message) {
7568
73
  LLVMContext &Ctx = *DAG.getContext();
7569
73
  Ctx.emitError(CS.getInstruction(), Message);
7570
73
7571
73
  // Make sure we leave the DAG in a valid state
7572
73
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7573
73
  auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType());
7574
73
  setValue(CS.getInstruction(), DAG.getUNDEF(VT));
7575
73
}
7576
7577
369
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
7578
369
  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
7579
369
                          MVT::Other, getRoot(),
7580
369
                          getValue(I.getArgOperand(0)),
7581
369
                          DAG.getSrcValue(I.getArgOperand(0))));
7582
369
}
7583
7584
353
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
7585
353
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7586
353
  const DataLayout &DL = DAG.getDataLayout();
7587
353
  SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
7588
353
                           getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
7589
353
                           DAG.getSrcValue(I.getOperand(0)),
7590
353
                           DL.getABITypeAlignment(I.getType()));
7591
353
  setValue(&I, V);
7592
353
  DAG.setRoot(V.getValue(1));
7593
353
}
7594
7595
350
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
7596
350
  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
7597
350
                          MVT::Other, getRoot(),
7598
350
                          getValue(I.getArgOperand(0)),
7599
350
                          DAG.getSrcValue(I.getArgOperand(0))));
7600
350
}
7601
7602
66
void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
7603
66
  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
7604
66
                          MVT::Other, getRoot(),
7605
66
                          getValue(I.getArgOperand(0)),
7606
66
                          getValue(I.getArgOperand(1)),
7607
66
                          DAG.getSrcValue(I.getArgOperand(0)),
7608
66
                          DAG.getSrcValue(I.getArgOperand(1))));
7609
66
}
7610
7611
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
7612
                                                    const Instruction &I,
7613
1.14M
                                                    SDValue Op) {
7614
1.14M
  const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
7615
1.14M
  if (!Range)
7616
1.14M
    return Op;
7617
2.97k
7618
2.97k
  ConstantRange CR = getConstantRangeFromMetadata(*Range);
7619
2.97k
  if (
CR.isFullSet() || 2.97k
CR.isEmptySet()2.97k
||
CR.isWrappedSet()2.97k
)
7620
0
    return Op;
7621
2.97k
7622
2.97k
  APInt Lo = CR.getUnsignedMin();
7623
2.97k
  if (!Lo.isMinValue())
7624
0
    return Op;
7625
2.97k
7626
2.97k
  APInt Hi = CR.getUnsignedMax();
7627
2.97k
  unsigned Bits = Hi.getActiveBits();
7628
2.97k
7629
2.97k
  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
7630
2.97k
7631
2.97k
  SDLoc SL = getCurSDLoc();
7632
2.97k
7633
2.97k
  SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
7634
2.97k
                             DAG.getValueType(SmallVT));
7635
2.97k
  unsigned NumVals = Op.getNode()->getNumValues();
7636
2.97k
  if (NumVals == 1)
7637
2.96k
    return ZExt;
7638
4
7639
4
  SmallVector<SDValue, 4> Ops;
7640
4
7641
4
  Ops.push_back(ZExt);
7642
12
  for (unsigned I = 1; 
I != NumVals12
;
++I8
)
7643
8
    Ops.push_back(Op.getValue(I));
7644
1.14M
7645
1.14M
  return DAG.getMergeValues(Ops, SL);
7646
1.14M
}
7647
7648
/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of
7649
/// the call being lowered.
7650
///
7651
/// This is a helper for lowering intrinsics that follow a target calling
7652
/// convention or require stack pointer adjustment. Only a subset of the
7653
/// intrinsic's operands need to participate in the calling convention.
7654
void SelectionDAGBuilder::populateCallLoweringInfo(
7655
    TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
7656
    unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
7657
185
    bool IsPatchPoint) {
7658
185
  TargetLowering::ArgListTy Args;
7659
185
  Args.reserve(NumArgs);
7660
185
7661
185
  // Populate the argument list.
7662
185
  // Attributes for args start at offset 1, after the return attribute.
7663
185
  for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
7664
349
       
ArgI != ArgE349
;
++ArgI164
) {
7665
164
    const Value *V = CS->getOperand(ArgI);
7666
164
7667
164
    assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
7668
164
7669
164
    TargetLowering::ArgListEntry Entry;
7670
164
    Entry.Node = getValue(V);
7671
164
    Entry.Ty = V->getType();
7672
164
    Entry.setAttributes(&CS, ArgIdx);
7673
164
    Args.push_back(Entry);
7674
164
  }
7675
185
7676
185
  CLI.setDebugLoc(getCurSDLoc())
7677
185
      .setChain(getRoot())
7678
185
      .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
7679
185
      .setDiscardResult(CS->use_empty())
7680
185
      .setIsPatchPoint(IsPatchPoint);
7681
185
}
7682
7683
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
7684
/// or patchpoint target node's operand list.
7685
///
7686
/// Constants are converted to TargetConstants purely as an optimization to
7687
/// avoid constant materialization and register allocation.
7688
///
7689
/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
7690
/// generate addess computation nodes, and so ExpandISelPseudo can convert the
7691
/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
7692
/// address materialization and register allocation, but may also be required
7693
/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
7694
/// alloca in the entry block, then the runtime may assume that the alloca's
7695
/// StackMap location can be read immediately after compilation and that the
7696
/// location is valid at any point during execution (this is similar to the
7697
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
7698
/// only available in a register, then the runtime would need to trap when
7699
/// execution reaches the StackMap in order to read the alloca's location.
7700
static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
7701
                                const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
7702
228
                                SelectionDAGBuilder &Builder) {
7703
534
  for (unsigned i = StartIdx, e = CS.arg_size(); 
i != e534
;
++i306
) {
7704
306
    SDValue OpVal = Builder.getValue(CS.getArgument(i));
7705
306
    if (ConstantSDNode *
C306
= dyn_cast<ConstantSDNode>(OpVal)) {
7706
44
      Ops.push_back(
7707
44
        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
7708
44
      Ops.push_back(
7709
44
        Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
7710
306
    } else 
if (FrameIndexSDNode *262
FI262
= dyn_cast<FrameIndexSDNode>(OpVal)) {
7711
16
      const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
7712
16
      Ops.push_back(Builder.DAG.getTargetFrameIndex(
7713
16
          FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
7714
16
    } else
7715
246
      Ops.push_back(OpVal);
7716
306
  }
7717
228
}
7718
7719
/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
7720
112
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
7721
112
  // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
7722
112
  //                                  [live variables...])
7723
112
7724
112
  assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
7725
112
7726
112
  SDValue Chain, InFlag, Callee, NullPtr;
7727
112
  SmallVector<SDValue, 32> Ops;
7728
112
7729
112
  SDLoc DL = getCurSDLoc();
7730
112
  Callee = getValue(CI.getCalledValue());
7731
112
  NullPtr = DAG.getIntPtrConstant(0, DL, true);
7732
112
7733
112
  // The stackmap intrinsic only records the live variables (the arguemnts
7734
112
  // passed to it) and emits NOPS (if requested). Unlike the patchpoint
7735
112
  // intrinsic, this won't be lowered to a function call. This means we don't
7736
112
  // have to worry about calling conventions and target specific lowering code.
7737
112
  // Instead we perform the call lowering right here.
7738
112
  //
7739
112
  // chain, flag = CALLSEQ_START(chain, 0, 0)
7740
112
  // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
7741
112
  // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
7742
112
  //
7743
112
  Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
7744
112
  InFlag = Chain.getValue(1);
7745
112
7746
112
  // Add the <id> and <numBytes> constants.
7747
112
  SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
7748
112
  Ops.push_back(DAG.getTargetConstant(
7749
112
                  cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
7750
112
  SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
7751
112
  Ops.push_back(DAG.getTargetConstant(
7752
112
                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
7753
112
                  MVT::i32));
7754
112
7755
112
  // Push live variables for the stack map.
7756
112
  addStackMapLiveVars(&CI, 2, DL, Ops, *this);
7757
112
7758
112
  // We are not pushing any register mask info here on the operands list,
7759
112
  // because the stackmap doesn't clobber anything.
7760
112
7761
112
  // Push the chain and the glue flag.
7762
112
  Ops.push_back(Chain);
7763
112
  Ops.push_back(InFlag);
7764
112
7765
112
  // Create the STACKMAP node.
7766
112
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7767
112
  SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
7768
112
  Chain = SDValue(SM, 0);
7769
112
  InFlag = Chain.getValue(1);
7770
112
7771
112
  Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
7772
112
7773
112
  // Stackmaps don't generate values, so nothing goes into the NodeMap.
7774
112
7775
112
  // Set the root to the target-lowered call chain.
7776
112
  DAG.setRoot(Chain);
7777
112
7778
112
  // Inform the Frame Information that we have a stackmap in this function.
7779
112
  FuncInfo.MF->getFrameInfo().setHasStackMap();
7780
112
}
7781
7782
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
7783
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
7784
116
                                          const BasicBlock *EHPadBB) {
7785
116
  // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
7786
116
  //                                                 i32 <numBytes>,
7787
116
  //                                                 i8* <target>,
7788
116
  //                                                 i32 <numArgs>,
7789
116
  //                                                 [Args...],
7790
116
  //                                                 [live variables...])
7791
116
7792
116
  CallingConv::ID CC = CS.getCallingConv();
7793
116
  bool IsAnyRegCC = CC == CallingConv::AnyReg;
7794
116
  bool HasDef = !CS->getType()->isVoidTy();
7795
116
  SDLoc dl = getCurSDLoc();
7796
116
  SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
7797
116
7798
116
  // Handle immediate and symbolic callees.
7799
116
  if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
7800
113
    Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
7801
113
                                   /*isTarget=*/true);
7802
3
  else 
if (auto* 3
SymbolicCallee3
= dyn_cast<GlobalAddressSDNode>(Callee))
7803
3
    Callee =  DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
7804
3
                                         SDLoc(SymbolicCallee),
7805
3
                                         SymbolicCallee->getValueType(0));
7806
116
7807
116
  // Get the real number of arguments participating in the call <numArgs>
7808
116
  SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
7809
116
  unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
7810
116
7811
116
  // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
7812
116
  // Intrinsics include all meta-operands up to but not including CC.
7813
116
  unsigned NumMetaOpers = PatchPointOpers::CCPos;
7814
116
  assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
7815
116
         "Not enough arguments provided to the patchpoint intrinsic");
7816
116
7817
116
  // For AnyRegCC the arguments are lowered later on manually.
7818
116
  unsigned NumCallArgs = IsAnyRegCC ? 
056
:
NumArgs60
;
7819
116
  Type *ReturnTy =
7820
116
    IsAnyRegCC ? 
Type::getVoidTy(*DAG.getContext())56
:
CS->getType()60
;
7821
116
7822
116
  TargetLowering::CallLoweringInfo CLI(DAG);
7823
116
  populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
7824
116
                           true);
7825
116
  std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
7826
116
7827
116
  SDNode *CallEnd = Result.second.getNode();
7828
116
  if (
HasDef && 116
(CallEnd->getOpcode() == ISD::CopyFromReg)66
)
7829
27
    CallEnd = CallEnd->getOperand(0).getNode();
7830
116
7831
116
  /// Get a call instruction from the call sequence chain.
7832
116
  /// Tail calls are not allowed.
7833
116
  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
7834
116
         "Expected a callseq node.");
7835
116
  SDNode *Call = CallEnd->getOperand(0).getNode();
7836
116
  bool HasGlue = Call->getGluedNode();
7837
116
7838
116
  // Replace the target specific call node with the patchable intrinsic.
7839
116
  SmallVector<SDValue, 8> Ops;
7840
116
7841
116
  // Add the <id> and <numBytes> constants.
7842
116
  SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
7843
116
  Ops.push_back(DAG.getTargetConstant(
7844
116
                  cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
7845
116
  SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
7846
116
  Ops.push_back(DAG.getTargetConstant(
7847
116
                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
7848
116
                  MVT::i32));
7849
116
7850
116
  // Add the callee.
7851
116
  Ops.push_back(Callee);
7852
116
7853
116
  // Adjust <numArgs> to account for any arguments that have been passed on the
7854
116
  // stack instead.
7855
116
  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
7856
116
  unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 
447
:
369
);
7857
116
  NumCallRegArgs = IsAnyRegCC ? 
NumArgs56
:
NumCallRegArgs60
;
7858
116
  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
7859
116
7860
116
  // Add the calling convention
7861
116
  Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
7862
116
7863
116
  // Add the arguments we omitted previously. The register allocator should
7864
116
  // place these in any free register.
7865
116
  if (IsAnyRegCC)
7866
288
    
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; 56
i != e288
;
++i232
)
7867
232
      Ops.push_back(getValue(CS.getArgument(i)));
7868
116
7869
116
  // Push the arguments from the call instruction up to the register mask.
7870
116
  SDNode::op_iterator e = HasGlue ? 
Call->op_end()-247
:
Call->op_end()-169
;
7871
116
  Ops.append(Call->op_begin() + 2, e);
7872
116
7873
116
  // Push live variables for the stack map.
7874
116
  addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
7875
116
7876
116
  // Push the register mask info.
7877
116
  if (HasGlue)
7878
47
    Ops.push_back(*(Call->op_end()-2));
7879
116
  else
7880
69
    Ops.push_back(*(Call->op_end()-1));
7881
116
7882
116
  // Push the chain (this is originally the first operand of the call, but
7883
116
  // becomes now the last or second to last operand).
7884
116
  Ops.push_back(*(Call->op_begin()));
7885
116
7886
116
  // Push the glue flag (last operand).
7887
116
  if (HasGlue)
7888
47
    Ops.push_back(*(Call->op_end()-1));
7889
116
7890
116
  SDVTList NodeTys;
7891
116
  if (
IsAnyRegCC && 116
HasDef56
) {
7892
39
    // Create the return types based on the intrinsic definition
7893
39
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7894
39
    SmallVector<EVT, 3> ValueVTs;
7895
39
    ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
7896
39
    assert(ValueVTs.size() == 1 && "Expected only one return value type.");
7897
39
7898
39
    // There is always a chain and a glue type at the end
7899
39
    ValueVTs.push_back(MVT::Other);
7900
39
    ValueVTs.push_back(MVT::Glue);
7901
39
    NodeTys = DAG.getVTList(ValueVTs);
7902
39
  } else
7903
77
    NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7904
116
7905
116
  // Replace the target specific call node with a PATCHPOINT node.
7906
116
  MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
7907
116
                                         dl, NodeTys, Ops);
7908
116
7909
116
  // Update the NodeMap.
7910
116
  if (
HasDef116
) {
7911
66
    if (IsAnyRegCC)
7912
39
      setValue(CS.getInstruction(), SDValue(MN, 0));
7913
66
    else
7914
27
      setValue(CS.getInstruction(), Result.first);
7915
66
  }
7916
116
7917
116
  // Fixup the consumers of the intrinsic. The chain and glue may be used in the
7918
116
  // call sequence. Furthermore the location of the chain and glue can change
7919
116
  // when the AnyReg calling convention is used and the intrinsic returns a
7920
116
  // value.
7921
116
  if (
IsAnyRegCC && 116
HasDef56
) {
7922
39
    SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
7923
39
    SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
7924
39
    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
7925
39
  } else
7926
77
    DAG.ReplaceAllUsesWith(Call, MN);
7927
116
  DAG.DeleteNode(Call);
7928
116
7929
116
  // Inform the Frame Information that we have a patchpoint in this function.
7930
116
  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
7931
116
}
7932
7933
void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
7934
913
                                            unsigned Intrinsic) {
7935
913
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7936
913
  SDValue Op1 = getValue(I.getArgOperand(0));
7937
913
  SDValue Op2;
7938
913
  if (I.getNumArgOperands() > 1)
7939
0
    Op2 = getValue(I.getArgOperand(1));
7940
913
  SDLoc dl = getCurSDLoc();
7941
913
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
7942
913
  SDValue Res;
7943
913
  FastMathFlags FMF;
7944
913
  if (isa<FPMathOperator>(I))
7945
2
    FMF = I.getFastMathFlags();
7946
913
  SDNodeFlags SDFlags;
7947
913
  SDFlags.setNoNaNs(FMF.noNaNs());
7948
913
7949
913
  switch (Intrinsic) {
7950
0
  case Intrinsic::experimental_vector_reduce_fadd:
7951
0
    if (FMF.unsafeAlgebra())
7952
0
      Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
7953
0
    else
7954
0
      Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
7955
0
    break;
7956
0
  case Intrinsic::experimental_vector_reduce_fmul:
7957
0
    if (FMF.unsafeAlgebra())
7958
0
      Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
7959
0
    else
7960
0
      Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
7961
0
    break;
7962
875
  case Intrinsic::experimental_vector_reduce_add:
7963
875
    Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
7964
875
    break;
7965
0
  case Intrinsic::experimental_vector_reduce_mul:
7966
0
    Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
7967
0
    break;
7968
0
  case Intrinsic::experimental_vector_reduce_and:
7969
0
    Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
7970
0
    break;
7971
0
  case Intrinsic::experimental_vector_reduce_or:
7972
0
    Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
7973
0
    break;
7974
0
  case Intrinsic::experimental_vector_reduce_xor:
7975
0
    Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
7976
0
    break;
7977
15
  case Intrinsic::experimental_vector_reduce_smax:
7978
15
    Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
7979
15
    break;
7980
11
  case Intrinsic::experimental_vector_reduce_smin:
7981
11
    Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
7982
11
    break;
7983
5
  case Intrinsic::experimental_vector_reduce_umax:
7984
5
    Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
7985
5
    break;
7986
5
  case Intrinsic::experimental_vector_reduce_umin:
7987
5
    Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
7988
5
    break;
7989
1
  case Intrinsic::experimental_vector_reduce_fmax:
7990
1
    Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
7991
1
    break;
7992
1
  case Intrinsic::experimental_vector_reduce_fmin:
7993
1
    Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
7994
1
    break;
7995
0
  default:
7996
0
    llvm_unreachable("Unhandled vector reduce intrinsic");
7997
913
  }
7998
913
  setValue(&I, Res);
7999
913
}
8000
8001
/// Returns an AttributeList representing the attributes applied to the return
8002
/// value of the given call.
8003
1.79M
static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
8004
1.79M
  SmallVector<Attribute::AttrKind, 2> Attrs;
8005
1.79M
  if (CLI.RetSExt)
8006
1.48k
    Attrs.push_back(Attribute::SExt);
8007
1.79M
  if (CLI.RetZExt)
8008
46.1k
    Attrs.push_back(Attribute::ZExt);
8009
1.79M
  if (CLI.IsInReg)
8010
206
    Attrs.push_back(Attribute::InReg);
8011
1.79M
8012
1.79M
  return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
8013
1.79M
                            Attrs);
8014
1.79M
}
8015
8016
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
8017
/// implementation, which just calls LowerCall.
8018
/// FIXME: When all targets are
8019
/// migrated to using LowerCall, this hook should be integrated into SDISel.
8020
std::pair<SDValue, SDValue>
8021
1.79M
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
8022
1.79M
  // Handle the incoming return values from the call.
8023
1.79M
  CLI.Ins.clear();
8024
1.79M
  Type *OrigRetTy = CLI.RetTy;
8025
1.79M
  SmallVector<EVT, 4> RetTys;
8026
1.79M
  SmallVector<uint64_t, 4> Offsets;
8027
1.79M
  auto &DL = CLI.DAG.getDataLayout();
8028
1.79M
  ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
8029
1.79M
8030
1.79M
  if (
CLI.IsPostTypeLegalization1.79M
) {
8031
2.53k
    // If we are lowering a libcall after legalization, split the return type.
8032
2.53k
    SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
8033
2.53k
    SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
8034
5.06k
    for (size_t i = 0, e = OldRetTys.size(); 
i != e5.06k
;
++i2.53k
) {
8035
2.53k
      EVT RetVT = OldRetTys[i];
8036
2.53k
      uint64_t Offset = OldOffsets[i];
8037
2.53k
      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
8038
2.53k
      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
8039
2.53k
      unsigned RegisterVTSize = RegisterVT.getSizeInBits();
8040
2.53k
      RetTys.append(NumRegs, RegisterVT);
8041
5.07k
      for (unsigned j = 0; 
j != NumRegs5.07k
;
++j2.53k
)
8042
2.53k
        Offsets.push_back(Offset + j * RegisterVTSize);
8043
2.53k
    }
8044
2.53k
  }
8045
1.79M
8046
1.79M
  SmallVector<ISD::OutputArg, 4> Outs;
8047
1.79M
  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
8048
1.79M
8049
1.79M
  bool CanLowerReturn =
8050
1.79M
      this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
8051
1.79M
                           CLI.IsVarArg, Outs, CLI.RetTy->getContext());
8052
1.79M
8053
1.79M
  SDValue DemoteStackSlot;
8054
1.79M
  int DemoteStackIdx = -100;
8055
1.79M
  if (
!CanLowerReturn1.79M
) {
8056
264
    // FIXME: equivalent assert?
8057
264
    // assert(!CS.hasInAllocaArgument() &&
8058
264
    //        "sret demotion is incompatible with inalloca");
8059
264
    uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
8060
264
    unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
8061
264
    MachineFunction &MF = CLI.DAG.getMachineFunction();
8062
264
    DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
8063
264
    Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
8064
264
8065
264
    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
8066
264
    ArgListEntry Entry;
8067
264
    Entry.Node = DemoteStackSlot;
8068
264
    Entry.Ty = StackSlotPtrType;
8069
264
    Entry.IsSExt = false;
8070
264
    Entry.IsZExt = false;
8071
264
    Entry.IsInReg = false;
8072
264
    Entry.IsSRet = true;
8073
264
    Entry.IsNest = false;
8074
264
    Entry.IsByVal = false;
8075
264
    Entry.IsReturned = false;
8076
264
    Entry.IsSwiftSelf = false;
8077
264
    Entry.IsSwiftError = false;
8078
264
    Entry.Alignment = Align;
8079
264
    CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
8080
264
    CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
8081
264
8082
264
    // sret demotion isn't compatible with tail-calls, since the sret argument
8083
264
    // points into the callers stack frame.
8084
264
    CLI.IsTailCall = false;
8085
1.79M
  } else {
8086
3.05M
    for (unsigned I = 0, E = RetTys.size(); 
I != E3.05M
;
++I1.25M
) {
8087
1.25M
      EVT VT = RetTys[I];
8088
1.25M
      MVT RegisterVT =
8089
1.25M
          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
8090
1.25M
      unsigned NumRegs =
8091
1.25M
          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
8092
2.51M
      for (unsigned i = 0; 
i != NumRegs2.51M
;
++i1.25M
) {
8093
1.25M
        ISD::InputArg MyFlags;
8094
1.25M
        MyFlags.VT = RegisterVT;
8095
1.25M
        MyFlags.ArgVT = VT;
8096
1.25M
        MyFlags.Used = CLI.IsReturnValueUsed;
8097
1.25M
        if (CLI.RetSExt)
8098
2.01k
          MyFlags.Flags.setSExt();
8099
1.25M
        if (CLI.RetZExt)
8100
44.3k
          MyFlags.Flags.setZExt();
8101
1.25M
        if (CLI.IsInReg)
8102
428
          MyFlags.Flags.setInReg();
8103
1.25M
        CLI.Ins.push_back(MyFlags);
8104
1.25M
      }
8105
1.25M
    }
8106
1.79M
  }
8107
1.79M
8108
1.79M
  // We push in swifterror return as the last element of CLI.Ins.
8109
1.79M
  ArgListTy &Args = CLI.getArgs();
8110
1.79M
  if (
supportSwiftError()1.79M
) {
8111
5.81M
    for (unsigned i = 0, e = Args.size(); 
i != e5.81M
;
++i4.03M
) {
8112
4.03M
      if (
Args[i].IsSwiftError4.03M
) {
8113
110
        ISD::InputArg MyFlags;
8114
110
        MyFlags.VT = getPointerTy(DL);
8115
110
        MyFlags.ArgVT = EVT(getPointerTy(DL));
8116
110
        MyFlags.Flags.setSwiftError();
8117
110
        CLI.Ins.push_back(MyFlags);
8118
110
      }
8119
4.03M
    }
8120
1.78M
  }
8121
1.79M
8122
1.79M
  // Handle all of the outgoing arguments.
8123
1.79M
  CLI.Outs.clear();
8124
1.79M
  CLI.OutVals.clear();
8125
5.86M
  for (unsigned i = 0, e = Args.size(); 
i != e5.86M
;
++i4.06M
) {
8126
4.06M
    SmallVector<EVT, 4> ValueVTs;
8127
4.06M
    ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
8128
4.06M
    // FIXME: Split arguments if CLI.IsPostTypeLegalization
8129
4.06M
    Type *FinalType = Args[i].Ty;
8130
4.06M
    if (Args[i].IsByVal)
8131
629
      FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
8132
4.06M
    bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
8133
4.06M
        FinalType, CLI.CallConv, CLI.IsVarArg);
8134
8.13M
    for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
8135
4.07M
         
++Value4.07M
) {
8136
4.07M
      EVT VT = ValueVTs[Value];
8137
4.07M
      Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
8138
4.07M
      SDValue Op = SDValue(Args[i].Node.getNode(),
8139
4.07M
                           Args[i].Node.getResNo() + Value);
8140
4.07M
      ISD::ArgFlagsTy Flags;
8141
4.07M
8142
4.07M
      // Certain targets (such as MIPS), may have a different ABI alignment
8143
4.07M
      // for a type depending on the context. Give the target a chance to
8144
4.07M
      // specify the alignment it wants.
8145
4.07M
      unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
8146
4.07M
8147
4.07M
      if (Args[i].IsZExt)
8148
20.2k
        Flags.setZExt();
8149
4.07M
      if (Args[i].IsSExt)
8150
3.42k
        Flags.setSExt();
8151
4.07M
      if (
Args[i].IsInReg4.07M
) {
8152
211
        // If we are using vectorcall calling convention, a structure that is
8153
211
        // passed InReg - is surely an HVA
8154
211
        if (CLI.CallConv == CallingConv::X86_VectorCall &&
8155
211
            
isa<StructType>(FinalType)14
) {
8156
8
          // The first value of a structure is marked
8157
8
          if (0 == Value)
8158
2
            Flags.setHvaStart();
8159
8
          Flags.setHva();
8160
8
        }
8161
211
        // Set InReg Flag
8162
211
        Flags.setInReg();
8163
211
      }
8164
4.07M
      if (Args[i].IsSRet)
8165
3.88k
        Flags.setSRet();
8166
4.07M
      if (Args[i].IsSwiftSelf)
8167
97
        Flags.setSwiftSelf();
8168
4.07M
      if (Args[i].IsSwiftError)
8169
129
        Flags.setSwiftError();
8170
4.07M
      if (Args[i].IsByVal)
8171
629
        Flags.setByVal();
8172
4.07M
      if (
Args[i].IsInAlloca4.07M
) {
8173
22
        Flags.setInAlloca();
8174
22
        // Set the byval flag for CCAssignFn callbacks that don't know about
8175
22
        // inalloca.  This way we can know how many bytes we should've allocated
8176
22
        // and how many bytes a callee cleanup function will pop.  If we port
8177
22
        // inalloca to more targets, we'll have to add custom inalloca handling
8178
22
        // in the various CC lowering callbacks.
8179
22
        Flags.setByVal();
8180
22
      }
8181
4.07M
      if (
Args[i].IsByVal || 4.07M
Args[i].IsInAlloca4.07M
) {
8182
651
        PointerType *Ty = cast<PointerType>(Args[i].Ty);
8183
651
        Type *ElementTy = Ty->getElementType();
8184
651
        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
8185
651
        // For ByVal, alignment should come from FE.  BE will guess if this
8186
651
        // info is not there but there are cases it cannot get right.
8187
651
        unsigned FrameAlign;
8188
651
        if (Args[i].Alignment)
8189
414
          FrameAlign = Args[i].Alignment;
8190
651
        else
8191
237
          FrameAlign = getByValTypeAlignment(ElementTy, DL);
8192
651
        Flags.setByValAlign(FrameAlign);
8193
651
      }
8194
4.07M
      if (Args[i].IsNest)
8195
5
        Flags.setNest();
8196
4.07M
      if (NeedsRegBlock)
8197
2.11k
        Flags.setInConsecutiveRegs();
8198
4.07M
      Flags.setOrigAlign(OriginalAlignment);
8199
4.07M
8200
4.07M
      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
8201
4.07M
      unsigned NumParts =
8202
4.07M
          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
8203
4.07M
      SmallVector<SDValue, 4> Parts(NumParts);
8204
4.07M
      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
8205
4.07M
8206
4.07M
      if (Args[i].IsSExt)
8207
3.42k
        ExtendKind = ISD::SIGN_EXTEND;
8208
4.06M
      else 
if (4.06M
Args[i].IsZExt4.06M
)
8209
20.2k
        ExtendKind = ISD::ZERO_EXTEND;
8210
4.07M
8211
4.07M
      // Conservatively only handle 'returned' on non-vectors for now
8212
4.07M
      if (
Args[i].IsReturned && 4.07M
!Op.getValueType().isVector()5.28k
) {
8213
5.28k
        assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
8214
5.28k
               "unexpected use of 'returned'");
8215
5.28k
        // Before passing 'returned' to the target lowering code, ensure that
8216
5.28k
        // either the register MVT and the actual EVT are the same size or that
8217
5.28k
        // the return value and argument are extended in the same way; in these
8218
5.28k
        // cases it's safe to pass the argument register value unchanged as the
8219
5.28k
        // return register value (although it's at the target's option whether
8220
5.28k
        // to do so)
8221
5.28k
        // TODO: allow code generation to take advantage of partially preserved
8222
5.28k
        // registers rather than clobbering the entire register when the
8223
5.28k
        // parameter extension method is not compatible with the return
8224
5.28k
        // extension method
8225
5.28k
        if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
8226
21
            
(ExtendKind != ISD::ANY_EXTEND && 21
CLI.RetSExt == Args[i].IsSExt12
&&
8227
12
             CLI.RetZExt == Args[i].IsZExt))
8228
5.26k
          Flags.setReturned();
8229
5.28k
      }
8230
4.07M
8231
4.07M
      getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
8232
4.07M
                     CLI.CS.getInstruction(), ExtendKind, true);
8233
4.07M
8234
8.15M
      for (unsigned j = 0; 
j != NumParts8.15M
;
++j4.08M
) {
8235
4.08M
        // if it isn't first piece, alignment must be 1
8236
4.08M
        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
8237
4.08M
                               i < CLI.NumFixedArgs,
8238
4.08M
                               i, j*Parts[j].getValueType().getStoreSize());
8239
4.08M
        if (
NumParts > 1 && 4.08M
j == 024.7k
)
8240
11.5k
          MyFlags.Flags.setSplit();
8241
4.07M
        else 
if (4.07M
j != 04.07M
) {
8242
13.2k
          MyFlags.Flags.setOrigAlign(1);
8243
13.2k
          if (j == NumParts - 1)
8244
11.5k
            MyFlags.Flags.setSplitEnd();
8245
4.07M
        }
8246
4.08M
8247
4.08M
        CLI.Outs.push_back(MyFlags);
8248
4.08M
        CLI.OutVals.push_back(Parts[j]);
8249
4.08M
      }
8250
4.07M
8251
4.07M
      if (
NeedsRegBlock && 4.07M
Value == NumValues - 12.11k
)
8252
987
        CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
8253
4.07M
    }
8254
4.06M
  }
8255
1.79M
8256
1.79M
  SmallVector<SDValue, 4> InVals;
8257
1.79M
  CLI.Chain = LowerCall(CLI, InVals);
8258
1.79M
8259
1.79M
  // Update CLI.InVals to use outside of this function.
8260
1.79M
  CLI.InVals = InVals;
8261
1.79M
8262
1.79M
  // Verify that the target's LowerCall behaved as expected.
8263
1.79M
  assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
8264
1.79M
         "LowerCall didn't return a valid chain!");
8265
1.79M
  assert((!CLI.IsTailCall || InVals.empty()) &&
8266
1.79M
         "LowerCall emitted a return value for a tail call!");
8267
1.79M
  assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
8268
1.79M
         "LowerCall didn't emit the correct number of values!");
8269
1.79M
8270
1.79M
  // For a tail call, the return value is merely live-out and there aren't
8271
1.79M
  // any nodes in the DAG representing it. Return a special value to
8272
1.79M
  // indicate that a tail call has been emitted and no more Instructions
8273
1.79M
  // should be processed in the current block.
8274
1.79M
  if (
CLI.IsTailCall1.79M
) {
8275
231k
    CLI.DAG.setRoot(CLI.Chain);
8276
231k
    return std::make_pair(SDValue(), SDValue());
8277
231k
  }
8278
1.56M
8279
#ifndef NDEBUG
8280
  for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
8281
    assert(InVals[i].getNode() && "LowerCall emitted a null value!");
8282
    assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
8283
           "LowerCall emitted a value with the wrong type!");
8284
  }
8285
#endif
8286
8287
1.56M
  SmallVector<SDValue, 4> ReturnValues;
8288
1.56M
  if (
!CanLowerReturn1.56M
) {
8289
264
    // The instruction result is the result of loading from the
8290
264
    // hidden sret parameter.
8291
264
    SmallVector<EVT, 1> PVTs;
8292
264
    Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
8293
264
8294
264
    ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
8295
264
    assert(PVTs.size() == 1 && "Pointers should fit in one register");
8296
264
    EVT PtrVT = PVTs[0];
8297
264
8298
264
    unsigned NumValues = RetTys.size();
8299
264
    ReturnValues.resize(NumValues);
8300
264
    SmallVector<SDValue, 4> Chains(NumValues);
8301
264
8302
264
    // An aggregate return value cannot wrap around the address space, so
8303
264
    // offsets to its parts don't wrap either.
8304
264
    SDNodeFlags Flags;
8305
264
    Flags.setNoUnsignedWrap(true);
8306
264
8307
678
    for (unsigned i = 0; 
i < NumValues678
;
++i414
) {
8308
414
      SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
8309
414
                                    CLI.DAG.getConstant(Offsets[i], CLI.DL,
8310
414
                                                        PtrVT), Flags);
8311
414
      SDValue L = CLI.DAG.getLoad(
8312
414
          RetTys[i], CLI.DL, CLI.Chain, Add,
8313
414
          MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
8314
414
                                            DemoteStackIdx, Offsets[i]),
8315
414
          /* Alignment = */ 1);
8316
414
      ReturnValues[i] = L;
8317
414
      Chains[i] = L.getValue(1);
8318
414
    }
8319
264
8320
264
    CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
8321
1.56M
  } else {
8322
1.56M
    // Collect the legal value parts into potentially illegal values
8323
1.56M
    // that correspond to the original function's return values.
8324
1.56M
    Optional<ISD::NodeType> AssertOp;
8325
1.56M
    if (CLI.RetSExt)
8326
1.28k
      AssertOp = ISD::AssertSext;
8327
1.56M
    else 
if (1.56M
CLI.RetZExt1.56M
)
8328
45.7k
      AssertOp = ISD::AssertZext;
8329
1.56M
    unsigned CurReg = 0;
8330
2.62M
    for (unsigned I = 0, E = RetTys.size(); 
I != E2.62M
;
++I1.05M
) {
8331
1.05M
      EVT VT = RetTys[I];
8332
1.05M
      MVT RegisterVT =
8333
1.05M
          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
8334
1.05M
      unsigned NumRegs =
8335
1.05M
          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
8336
1.05M
8337
1.05M
      ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
8338
1.05M
                                              NumRegs, RegisterVT, VT, nullptr,
8339
1.05M
                                              AssertOp, true));
8340
1.05M
      CurReg += NumRegs;
8341
1.05M
    }
8342
1.56M
8343
1.56M
    // For a function returning void, there is no return value. We can't create
8344
1.56M
    // such a node, so we just return a null return value in that case. In
8345
1.56M
    // that case, nothing will actually look at the value.
8346
1.56M
    if (ReturnValues.empty())
8347
510k
      return std::make_pair(SDValue(), CLI.Chain);
8348
1.05M
  }
8349
1.05M
8350
1.05M
  SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
8351
1.05M
                                CLI.DAG.getVTList(RetTys), ReturnValues);
8352
1.05M
  return std::make_pair(Res, CLI.Chain);
8353
1.05M
}
8354
8355
void TargetLowering::LowerOperationWrapper(SDNode *N,
8356
                                           SmallVectorImpl<SDValue> &Results,
8357
958
                                           SelectionDAG &DAG) const {
8358
958
  if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
8359
867
    Results.push_back(Res);
8360
958
}
8361
8362
0
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8363
0
  llvm_unreachable("LowerOperation not implemented for this target!");
8364
0
}
8365
8366
void
8367
2.52M
SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
8368
2.52M
  SDValue Op = getNonRegisterValue(V);
8369
2.52M
  assert((Op.getOpcode() != ISD::CopyFromReg ||
8370
2.52M
          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
8371
2.52M
         "Copy from a reg to the same reg!");
8372
2.52M
  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
8373
2.52M
8374
2.52M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8375
2.52M
  // If this is an InlineAsm we have to match the registers required, not the
8376
2.52M
  // notional registers required by the type.
8377
2.52M
8378
2.52M
  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
8379
2.52M
                   V->getType(), isABIRegCopy(V));
8380
2.52M
  SDValue Chain = DAG.getEntryNode();
8381
2.52M
8382
2.52M
  ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
8383
2.52M
                              FuncInfo.PreferredExtendType.end())
8384
605k
                                 ? ISD::ANY_EXTEND
8385
1.92M
                                 : FuncInfo.PreferredExtendType[V];
8386
2.52M
  RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
8387
2.52M
  PendingExports.push_back(Chain);
8388
2.52M
}
8389
8390
#include "llvm/CodeGen/SelectionDAGISel.h"
8391
8392
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
8393
/// entry block, return true.  This includes arguments used by switches, since
8394
/// the switch may expand into multiple basic blocks.
8395
124k
static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
8396
124k
  // With FastISel active, we may be splitting blocks, so force creation
8397
124k
  // of virtual registers for all non-dead arguments.
8398
124k
  if (FastISel)
8399
11.9k
    return A->use_empty();
8400
112k
8401
112k
  const BasicBlock &Entry = A->getParent()->front();
8402
112k
  for (const User *U : A->users())
8403
124k
    
if (124k
cast<Instruction>(U)->getParent() != &Entry || 124k
isa<SwitchInst>(U)91.4k
)
8404
33.4k
      return false;  // Use not in entry block.
8405
78.7k
8406
78.7k
  return true;
8407
78.7k
}
8408
8409
using ArgCopyElisionMapTy =
8410
    DenseMap<const Argument *,
8411
             std::pair<const AllocaInst *, const StoreInst *>>;
8412
8413
/// Scan the entry block of the function in FuncInfo for arguments that look
8414
/// like copies into a local alloca. Record any copied arguments in
8415
/// ArgCopyElisionCandidates.
8416
static void
8417
findArgumentCopyElisionCandidates(const DataLayout &DL,
8418
                                  FunctionLoweringInfo *FuncInfo,
8419
433k
                                  ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
8420
433k
  // Record the state of every static alloca used in the entry block. Argument
8421
433k
  // allocas are all used in the entry block, so we need approximately as many
8422
433k
  // entries as we have arguments.
8423
433k
  enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
8424
433k
  SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
8425
433k
  unsigned NumArgs = FuncInfo->Fn->arg_size();
8426
433k
  StaticAllocas.reserve(NumArgs * 2);
8427
433k
8428
5.00M
  auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
8429
5.00M
    if (!V)
8430
0
      return nullptr;
8431
5.00M
    V = V->stripPointerCasts();
8432
5.00M
    const auto *AI = dyn_cast<AllocaInst>(V);
8433
5.00M
    if (
!AI || 5.00M
!AI->isStaticAlloca()113k
||
!FuncInfo->StaticAllocaMap.count(AI)112k
)
8434
4.89M
      return nullptr;
8435
112k
    auto Iter = StaticAllocas.insert({AI, Unknown});
8436
112k
    return &Iter.first->second;
8437
112k
  };
8438
433k
8439
433k
  // Look for stores of arguments to static allocas. Look through bitcasts and
8440
433k
  // GEPs to handle type coercions, as long as the alloca is fully initialized
8441
433k
  // by the store. Any non-store use of an alloca escapes it and any subsequent
8442
433k
  // unanalyzed store might write it.
8443
433k
  // FIXME: Handle structs initialized with multiple stores.
8444
2.59M
  for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
8445
2.59M
    // Look for stores, and handle non-store uses conservatively.
8446
2.59M
    const auto *SI = dyn_cast<StoreInst>(&I);
8447
2.59M
    if (
!SI2.59M
) {
8448
2.43M
      // We will look through cast uses, so ignore them completely.
8449
2.43M
      if (I.isCast())
8450
300k
        continue;
8451
2.13M
      // Ignore debug info intrinsics, they don't escape or store to allocas.
8452
2.13M
      
if (2.13M
isa<DbgInfoIntrinsic>(I)2.13M
)
8453
656
        continue;
8454
2.13M
      // This is an unknown instruction. Assume it escapes or writes to all
8455
2.13M
      // static alloca operands.
8456
2.13M
      
for (const Use &U : I.operands()) 2.13M
{
8457
4.67M
        if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
8458
97.8k
          *Info = StaticAllocaInfo::Clobbered;
8459
4.67M
      }
8460
2.43M
      continue;
8461
2.43M
    }
8462
162k
8463
162k
    // If the stored value is a static alloca, mark it as escaped.
8464
162k
    
if (StaticAllocaInfo *162k
Info162k
= GetInfoIfStaticAlloca(SI->getValueOperand()))
8465
242
      *Info = StaticAllocaInfo::Clobbered;
8466
162k
8467
162k
    // Check if the destination is a static alloca.
8468
162k
    const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
8469
162k
    StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
8470
162k
    if (!Info)
8471
147k
      continue;
8472
14.9k
    const AllocaInst *AI = cast<AllocaInst>(Dst);
8473
14.9k
8474
14.9k
    // Skip allocas that have been initialized or clobbered.
8475
14.9k
    if (*Info != StaticAllocaInfo::Unknown)
8476
8.28k
      continue;
8477
6.63k
8478
6.63k
    // Check if the stored value is an argument, and that this store fully
8479
6.63k
    // initializes the alloca. Don't elide copies from the same argument twice.
8480
6.63k
    const Value *Val = SI->getValueOperand()->stripPointerCasts();
8481
6.63k
    const auto *Arg = dyn_cast<Argument>(Val);
8482
6.63k
    if (
!Arg || 6.63k
Arg->hasInAllocaAttr()4.32k
||
Arg->hasByValAttr()4.32k
||
8483
4.31k
        Arg->getType()->isEmptyTy() ||
8484
4.31k
        DL.getTypeStoreSize(Arg->getType()) !=
8485
4.31k
            DL.getTypeAllocSize(AI->getAllocatedType()) ||
8486
6.63k
        
ArgCopyElisionCandidates.count(Arg)4.25k
) {
8487
2.38k
      *Info = StaticAllocaInfo::Clobbered;
8488
2.38k
      continue;
8489
2.38k
    }
8490
4.25k
8491
4.25k
    
DEBUG4.25k
(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
8492
4.25k
8493
4.25k
    // Mark this alloca and store for argument copy elision.
8494
4.25k
    *Info = StaticAllocaInfo::Elidable;
8495
4.25k
    ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
8496
4.25k
8497
4.25k
    // Stop scanning if we've seen all arguments. This will happen early in -O0
8498
4.25k
    // builds, which is useful, because -O0 builds have large entry blocks and
8499
4.25k
    // many allocas.
8500
4.25k
    if (ArgCopyElisionCandidates.size() == NumArgs)
8501
868
      break;
8502
433k
  }
8503
433k
}
8504
8505
/// Try to elide argument copies from memory into a local alloca. Succeeds if
8506
/// ArgVal is a load from a suitable fixed stack object.
8507
static void tryToElideArgumentCopy(
8508
    FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
8509
    DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
8510
    SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
8511
    ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
8512
4.25k
    SDValue ArgVal, bool &ArgHasUses) {
8513
4.25k
  // Check if this is a load from a fixed stack object.
8514
4.25k
  auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
8515
4.25k
  if (!LNode)
8516
3.97k
    return;
8517
280
  auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
8518
280
  if (!FINode)
8519
0
    return;
8520
280
8521
280
  // Check that the fixed stack object is the right size and alignment.
8522
280
  // Look at the alignment that the user wrote on the alloca instead of looking
8523
280
  // at the stack object.
8524
280
  auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
8525
280
  assert(ArgCopyIter != ArgCopyElisionCandidates.end());
8526
280
  const AllocaInst *AI = ArgCopyIter->second.first;
8527
280
  int FixedIndex = FINode->getIndex();
8528
280
  int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
8529
280
  int OldIndex = AllocaIndex;
8530
280
  MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
8531
280
  if (
MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)280
) {
8532
1
    DEBUG(dbgs() << "  argument copy elision failed due to bad fixed stack "
8533
1
                    "object size\n");
8534
1
    return;
8535
1
  }
8536
279
  unsigned RequiredAlignment = AI->getAlignment();
8537
279
  if (
!RequiredAlignment279
) {
8538
63
    RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
8539
63
        AI->getAllocatedType());
8540
63
  }
8541
279
  if (
MFI.getObjectAlignment(FixedIndex) < RequiredAlignment279
) {
8542
25
    DEBUG(dbgs() << "  argument copy elision failed: alignment of alloca "
8543
25
                    "greater than stack argument alignment ("
8544
25
                 << RequiredAlignment << " vs "
8545
25
                 << MFI.getObjectAlignment(FixedIndex) << ")\n");
8546
25
    return;
8547
25
  }
8548
254
8549
254
  // Perform the elision. Delete the old stack object and replace its only use
8550
254
  // in the variable info map. Mark the stack object as mutable.
8551
254
  
DEBUG254
({
8552
254
    dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
8553
254
           << "  Replacing frame index " << OldIndex << " with " << FixedIndex
8554
254
           << '\n';
8555
254
  });
8556
254
  MFI.RemoveStackObject(OldIndex);
8557
254
  MFI.setIsImmutableObjectIndex(FixedIndex, false);
8558
254
  AllocaIndex = FixedIndex;
8559
254
  ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
8560
254
  Chains.push_back(ArgVal.getValue(1));
8561
254
8562
254
  // Avoid emitting code for the store implementing the copy.
8563
254
  const StoreInst *SI = ArgCopyIter->second.second;
8564
254
  ElidedArgCopyInstrs.insert(SI);
8565
254
8566
254
  // Check for uses of the argument again so that we can avoid exporting ArgVal
8567
254
  // if it is't used by anything other than the store.
8568
254
  for (const Value *U : Arg.users()) {
8569
254
    if (
U != SI254
) {
8570
23
      ArgHasUses = true;
8571
23
      break;
8572
23
    }
8573
254
  }
8574
4.25k
}
8575
8576
433k
void SelectionDAGISel::LowerArguments(const Function &F) {
8577
433k
  SelectionDAG &DAG = SDB->DAG;
8578
433k
  SDLoc dl = SDB->getCurSDLoc();
8579
433k
  const DataLayout &DL = DAG.getDataLayout();
8580
433k
  SmallVector<ISD::InputArg, 16> Ins;
8581
433k
8582
433k
  if (
!FuncInfo->CanLowerReturn433k
) {
8583
516
    // Put in an sret pointer parameter before all the other parameters.
8584
516
    SmallVector<EVT, 1> ValueVTs;
8585
516
    ComputeValueVTs(*TLI, DAG.getDataLayout(),
8586
516
                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
8587
516
8588
516
    // NOTE: Assuming that a pointer will never break down to more than one VT
8589
516
    // or one register.
8590
516
    ISD::ArgFlagsTy Flags;
8591
516
    Flags.setSRet();
8592
516
    MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
8593
516
    ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
8594
516
                         ISD::InputArg::NoArgIndex, 0);
8595
516
    Ins.push_back(RetArg);
8596
516
  }
8597
433k
8598
433k
  // Look for stores of arguments to static allocas. Mark such arguments with a
8599
433k
  // flag to ask the target to give us the memory location of that argument if
8600
433k
  // available.
8601
433k
  ArgCopyElisionMapTy ArgCopyElisionCandidates;
8602
433k
  findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
8603
433k
8604
433k
  // Set up the incoming argument description vector.
8605
958k
  for (const Argument &Arg : F.args()) {
8606
958k
    unsigned ArgNo = Arg.getArgNo();
8607
958k
    SmallVector<EVT, 4> ValueVTs;
8608
958k
    ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
8609
958k
    bool isArgValueUsed = !Arg.use_empty();
8610
958k
    unsigned PartBase = 0;
8611
958k
    Type *FinalType = Arg.getType();
8612
958k
    if (Arg.hasAttribute(Attribute::ByVal))
8613
545
      FinalType = cast<PointerType>(FinalType)->getElementType();
8614
958k
    bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
8615
958k
        FinalType, F.getCallingConv(), F.isVarArg());
8616
958k
    for (unsigned Value = 0, NumValues = ValueVTs.size();
8617
1.92M
         
Value != NumValues1.92M
;
++Value962k
) {
8618
962k
      EVT VT = ValueVTs[Value];
8619
962k
      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
8620
962k
      ISD::ArgFlagsTy Flags;
8621
962k
8622
962k
      // Certain targets (such as MIPS), may have a different ABI alignment
8623
962k
      // for a type depending on the context. Give the target a chance to
8624
962k
      // specify the alignment it wants.
8625
962k
      unsigned OriginalAlignment =
8626
962k
          TLI->getABIAlignmentForCallingConv(ArgTy, DL);
8627
962k
8628
962k
      if (Arg.hasAttribute(Attribute::ZExt))
8629
8.31k
        Flags.setZExt();
8630
962k
      if (Arg.hasAttribute(Attribute::SExt))
8631
7.43k
        Flags.setSExt();
8632
962k
      if (
Arg.hasAttribute(Attribute::InReg)962k
) {
8633
1.17k
        // If we are using vectorcall calling convention, a structure that is
8634
1.17k
        // passed InReg - is surely an HVA
8635
1.17k
        if (F.getCallingConv() == CallingConv::X86_VectorCall &&
8636
1.17k
            
isa<StructType>(Arg.getType())72
) {
8637
50
          // The first value of a structure is marked
8638
50
          if (0 == Value)
8639
14
            Flags.setHvaStart();
8640
50
          Flags.setHva();
8641
50
        }
8642
1.17k
        // Set InReg Flag
8643
1.17k
        Flags.setInReg();
8644
1.17k
      }
8645
962k
      if (Arg.hasAttribute(Attribute::StructRet))
8646
1.86k
        Flags.setSRet();
8647
962k
      if (Arg.hasAttribute(Attribute::SwiftSelf))
8648
74
        Flags.setSwiftSelf();
8649
962k
      if (Arg.hasAttribute(Attribute::SwiftError))
8650
119
        Flags.setSwiftError();
8651
962k
      if (Arg.hasAttribute(Attribute::ByVal))
8652
545
        Flags.setByVal();
8653
962k
      if (
Arg.hasAttribute(Attribute::InAlloca)962k
) {
8654
18
        Flags.setInAlloca();
8655
18
        // Set the byval flag for CCAssignFn callbacks that don't know about
8656
18
        // inalloca.  This way we can know how many bytes we should've allocated
8657
18
        // and how many bytes a callee cleanup function will pop.  If we port
8658
18
        // inalloca to more targets, we'll have to add custom inalloca handling
8659
18
        // in the various CC lowering callbacks.
8660
18
        Flags.setByVal();
8661
18
      }
8662
962k
      if (
F.getCallingConv() == CallingConv::X86_INTR962k
) {
8663
34
        // IA Interrupt passes frame (1st parameter) by value in the stack.
8664
34
        if (ArgNo == 0)
8665
25
          Flags.setByVal();
8666
34
      }
8667
962k
      if (
Flags.isByVal() || 962k
Flags.isInAlloca()961k
) {
8668
588
        PointerType *Ty = cast<PointerType>(Arg.getType());
8669
588
        Type *ElementTy = Ty->getElementType();
8670
588
        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
8671
588
        // For ByVal, alignment should be passed from FE.  BE will guess if
8672
588
        // this info is not there but there are cases it cannot get right.
8673
588
        unsigned FrameAlign;
8674
588
        if (Arg.getParamAlignment())
8675
92
          FrameAlign = Arg.getParamAlignment();
8676
588
        else
8677
496
          FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
8678
588
        Flags.setByValAlign(FrameAlign);
8679
588
      }
8680
962k
      if (Arg.hasAttribute(Attribute::Nest))
8681
35
        Flags.setNest();
8682
962k
      if (NeedsRegBlock)
8683
4.03k
        Flags.setInConsecutiveRegs();
8684
962k
      Flags.setOrigAlign(OriginalAlignment);
8685
962k
      if (ArgCopyElisionCandidates.count(&Arg))
8686
4.26k
        Flags.setCopyElisionCandidate();
8687
962k
8688
962k
      MVT RegisterVT =
8689
962k
          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
8690
962k
      unsigned NumRegs =
8691
962k
          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
8692
1.95M
      for (unsigned i = 0; 
i != NumRegs1.95M
;
++i988k
) {
8693
988k
        ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
8694
988k
                              ArgNo, PartBase+i*RegisterVT.getStoreSize());
8695
988k
        if (
NumRegs > 1 && 988k
i == 039.3k
)
8696
13.8k
          MyFlags.Flags.setSplit();
8697
988k
        // if it isn't first piece, alignment must be 1
8698
974k
        else 
if (974k
i > 0974k
) {
8699
25.5k
          MyFlags.Flags.setOrigAlign(1);
8700
25.5k
          if (i == NumRegs - 1)
8701
13.8k
            MyFlags.Flags.setSplitEnd();
8702
974k
        }
8703
988k
        Ins.push_back(MyFlags);
8704
988k
      }
8705
962k
      if (
NeedsRegBlock && 962k
Value == NumValues - 14.03k
)
8706
2.44k
        Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
8707
962k
      PartBase += VT.getStoreSize();
8708
962k
    }
8709
958k
  }
8710
433k
8711
433k
  // Call the target to set up the argument values.
8712
433k
  SmallVector<SDValue, 8> InVals;
8713
433k
  SDValue NewRoot = TLI->LowerFormalArguments(
8714
433k
      DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
8715
433k
8716
433k
  // Verify that the target's LowerFormalArguments behaved as expected.
8717
433k
  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
8718
433k
         "LowerFormalArguments didn't return a valid chain!");
8719
433k
  assert(InVals.size() == Ins.size() &&
8720
433k
         "LowerFormalArguments didn't emit the correct number of values!");
8721
433k
  DEBUG({
8722
433k
      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8723
433k
        assert(InVals[i].getNode() &&
8724
433k
               "LowerFormalArguments emitted a null value!");
8725
433k
        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
8726
433k
               "LowerFormalArguments emitted a value with the wrong type!");
8727
433k
      }
8728
433k
    });
8729
433k
8730
433k
  // Update the DAG with the new chain value resulting from argument lowering.
8731
433k
  DAG.setRoot(NewRoot);
8732
433k
8733
433k
  // Set up the argument values.
8734
433k
  unsigned i = 0;
8735
433k
  if (
!FuncInfo->CanLowerReturn433k
) {
8736
516
    // Create a virtual register for the sret pointer, and put in a copy
8737
516
    // from the sret argument into it.
8738
516
    SmallVector<EVT, 1> ValueVTs;
8739
516
    ComputeValueVTs(*TLI, DAG.getDataLayout(),
8740
516
                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
8741
516
    MVT VT = ValueVTs[0].getSimpleVT();
8742
516
    MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
8743
516
    Optional<ISD::NodeType> AssertOp = None;
8744
516
    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
8745
516
                                        RegVT, VT, nullptr, AssertOp);
8746
516
8747
516
    MachineFunction& MF = SDB->DAG.getMachineFunction();
8748
516
    MachineRegisterInfo& RegInfo = MF.getRegInfo();
8749
516
    unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
8750
516
    FuncInfo->DemoteRegister = SRetReg;
8751
516
    NewRoot =
8752
516
        SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
8753
516
    DAG.setRoot(NewRoot);
8754
516
8755
516
    // i indexes lowered arguments.  Bump it past the hidden sret argument.
8756
516
    ++i;
8757
516
  }
8758
433k
8759
433k
  SmallVector<SDValue, 4> Chains;
8760
433k
  DenseMap<int, int> ArgCopyElisionFrameIndexMap;
8761
958k
  for (const Argument &Arg : F.args()) {
8762
958k
    SmallVector<SDValue, 4> ArgValues;
8763
958k
    SmallVector<EVT, 4> ValueVTs;
8764
958k
    ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
8765
958k
    unsigned NumValues = ValueVTs.size();
8766
958k
    if (NumValues == 0)
8767
18
      continue;
8768
958k
8769
958k
    bool ArgHasUses = !Arg.use_empty();
8770
958k
8771
958k
    // Elide the copying store if the target loaded this argument from a
8772
958k
    // suitable fixed stack object.
8773
958k
    if (
Ins[i].Flags.isCopyElisionCandidate()958k
) {
8774
4.25k
      tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
8775
4.25k
                             ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
8776
4.25k
                             InVals[i], ArgHasUses);
8777
4.25k
    }
8778
958k
8779
958k
    // If this argument is unused then remember its value. It is used to generate
8780
958k
    // debugging information.
8781
958k
    bool isSwiftErrorArg =
8782
958k
        TLI->supportSwiftError() &&
8783
859k
        Arg.hasAttribute(Attribute::SwiftError);
8784
958k
    if (
!ArgHasUses && 958k
!isSwiftErrorArg50.5k
) {
8785
50.5k
      SDB->setUnusedArgValue(&Arg, InVals[i]);
8786
50.5k
8787
50.5k
      // Also remember any frame index for use in FastISel.
8788
50.5k
      if (FrameIndexSDNode *FI =
8789
50.5k
          dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
8790
64
        FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
8791
50.5k
    }
8792
958k
8793
1.92M
    for (unsigned Val = 0; 
Val != NumValues1.92M
;
++Val962k
) {
8794
962k
      EVT VT = ValueVTs[Val];
8795
962k
      MVT PartVT =
8796
962k
          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
8797
962k
      unsigned NumParts =
8798
962k
          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
8799
962k
8800
962k
      // Even an apparant 'unused' swifterror argument needs to be returned. So
8801
962k
      // we do generate a copy for it that can be used on return from the
8802
962k
      // function.
8803
962k
      if (
ArgHasUses || 962k
isSwiftErrorArg52.8k
) {
8804
909k
        Optional<ISD::NodeType> AssertOp;
8805
909k
        if (Arg.hasAttribute(Attribute::SExt))
8806
7.35k
          AssertOp = ISD::AssertSext;
8807
902k
        else 
if (902k
Arg.hasAttribute(Attribute::ZExt)902k
)
8808
8.13k
          AssertOp = ISD::AssertZext;
8809
909k
8810
909k
        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
8811
909k
                                             PartVT, VT, nullptr, AssertOp,
8812
909k
                                             true));
8813
909k
      }
8814
962k
8815
962k
      i += NumParts;
8816
962k
    }
8817
958k
8818
958k
    // We don't need to do anything else for unused arguments.
8819
958k
    if (ArgValues.empty())
8820
50.5k
      continue;
8821
908k
8822
908k
    // Note down frame index.
8823
908k
    
if (FrameIndexSDNode *908k
FI908k
=
8824
908k
        dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
8825
236
      FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
8826
908k
8827
908k
    SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
8828
908k
                                     SDB->getCurSDLoc());
8829
908k
8830
908k
    SDB->setValue(&Arg, Res);
8831
908k
    if (
!TM.Options.EnableFastISel && 908k
Res.getOpcode() == ISD::BUILD_PAIR896k
) {
8832
4.56k
      // We want to associate the argument with the frame index, among
8833
4.56k
      // involved operands, that correspond to the lowest address. The
8834
4.56k
      // getCopyFromParts function, called earlier, is swapping the order of
8835
4.56k
      // the operands to BUILD_PAIR depending on endianness. The result of
8836
4.56k
      // that swapping is that the least significant bits of the argument will
8837
4.56k
      // be in the first operand of the BUILD_PAIR node, and the most
8838
4.56k
      // significant bits will be in the second operand.
8839
4.56k
      unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 
11.61k
:
02.95k
;
8840
4.56k
      if (LoadSDNode *LNode =
8841
4.56k
          dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
8842
1.26k
        
if (FrameIndexSDNode *1.26k
FI1.26k
=
8843
1.26k
            dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
8844
1.00k
          FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
8845
4.56k
    }
8846
908k
8847
908k
    // Update the SwiftErrorVRegDefMap.
8848
908k
    if (
Res.getOpcode() == ISD::CopyFromReg && 908k
isSwiftErrorArg792k
) {
8849
101
      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
8850
101
      if (TargetRegisterInfo::isVirtualRegister(Reg))
8851
101
        FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
8852
101
                                           FuncInfo->SwiftErrorArg, Reg);
8853
101
    }
8854
908k
8855
908k
    // If this argument is live outside of the entry block, insert a copy from
8856
908k
    // wherever we got it to the vreg that other BB's will reference it as.
8857
908k
    if (
!TM.Options.EnableFastISel && 908k
Res.getOpcode() == ISD::CopyFromReg896k
) {
8858
784k
      // If we can, though, try to skip creating an unnecessary vreg.
8859
784k
      // FIXME: This isn't very clean... it would be nice to make this more
8860
784k
      // general.  It's also subtly incompatible with the hacks FastISel
8861
784k
      // uses with vregs.
8862
784k
      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
8863
784k
      if (
TargetRegisterInfo::isVirtualRegister(Reg)784k
) {
8864
784k
        FuncInfo->ValueMap[&Arg] = Reg;
8865
784k
        continue;
8866
784k
      }
8867
124k
    }
8868
124k
    
if (124k
!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)124k
) {
8869
45.3k
      FuncInfo->InitializeRegForValue(&Arg);
8870
45.3k
      SDB->CopyToExportRegsIfNeeded(&Arg);
8871
45.3k
    }
8872
958k
  }
8873
433k
8874
433k
  if (
!Chains.empty()433k
) {
8875
158
    Chains.push_back(NewRoot);
8876
158
    NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
8877
158
  }
8878
433k
8879
433k
  DAG.setRoot(NewRoot);
8880
433k
8881
433k
  assert(i == InVals.size() && "Argument register count mismatch!");
8882
433k
8883
433k
  // If any argument copy elisions occurred and we have debug info, update the
8884
433k
  // stale frame indices used in the dbg.declare variable info table.
8885
433k
  MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
8886
433k
  if (
!DbgDeclareInfo.empty() && 433k
!ArgCopyElisionFrameIndexMap.empty()0
) {
8887
0
    for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
8888
0
      auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
8889
0
      if (I != ArgCopyElisionFrameIndexMap.end())
8890
0
        VI.Slot = I->second;
8891
0
    }
8892
0
  }
8893
433k
8894
433k
  // Finally, if the target has anything special to do, allow it to do so.
8895
433k
  EmitFunctionEntryCode();
8896
433k
}
8897
8898
/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
8899
/// ensure constants are generated when needed.  Remember the virtual registers
8900
/// that need to be added to the Machine PHI nodes as input.  We cannot just
8901
/// directly add them, because expansion might result in multiple MBB's for one
8902
/// BB.  As such, the start of the BB might correspond to a different MBB than
8903
/// the end.
8904
void
8905
3.06M
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
8906
3.06M
  const TerminatorInst *TI = LLVMBB->getTerminator();
8907
3.06M
8908
3.06M
  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
8909
3.06M
8910
3.06M
  // Check PHI nodes in successors that expect a value to be available from this
8911
3.06M
  // block.
8912
7.48M
  for (unsigned succ = 0, e = TI->getNumSuccessors(); 
succ != e7.48M
;
++succ4.41M
) {
8913
4.41M
    const BasicBlock *SuccBB = TI->getSuccessor(succ);
8914
4.41M
    if (
!isa<PHINode>(SuccBB->begin())4.41M
)
continue2.88M
;
8915
1.53M
    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
8916
1.53M
8917
1.53M
    // If this terminator has multiple identical successors (common for
8918
1.53M
    // switches), only handle each succ once.
8919
1.53M
    if (!SuccsHandled.insert(SuccMBB).second)
8920
1.16k
      continue;
8921
1.53M
8922
1.53M
    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
8923
1.53M
8924
1.53M
    // At this point we know that there is a 1-1 correspondence between LLVM PHI
8925
1.53M
    // nodes and Machine PHI nodes, but the incoming operands have not been
8926
1.53M
    // emitted yet.
8927
1.53M
    for (BasicBlock::const_iterator I = SuccBB->begin();
8928
3.76M
         const PHINode *
PN3.76M
= dyn_cast<PHINode>(I);
++I2.23M
) {
8929
2.23M
      // Ignore dead phi's.
8930
2.23M
      if (
PN->use_empty()2.23M
)
continue19.3k
;
8931
2.21M
8932
2.21M
      // Skip empty types
8933
2.21M
      
if (2.21M
PN->getType()->isEmptyTy()2.21M
)
8934
4
        continue;
8935
2.21M
8936
2.21M
      unsigned Reg;
8937
2.21M
      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
8938
2.21M
8939
2.21M
      if (const Constant *
C2.21M
= dyn_cast<Constant>(PHIOp)) {
8940
576k
        unsigned &RegOut = ConstantsOut[C];
8941
576k
        if (
RegOut == 0576k
) {
8942
559k
          RegOut = FuncInfo.CreateRegs(C->getType());
8943
559k
          CopyValueToVirtualRegister(C, RegOut);
8944
559k
        }
8945
576k
        Reg = RegOut;
8946
2.21M
      } else {
8947
1.63M
        DenseMap<const Value *, unsigned>::iterator I =
8948
1.63M
          FuncInfo.ValueMap.find(PHIOp);
8949
1.63M
        if (I != FuncInfo.ValueMap.end())
8950
1.63M
          Reg = I->second;
8951
730
        else {
8952
730
          assert(isa<AllocaInst>(PHIOp) &&
8953
730
                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
8954
730
                 "Didn't codegen value into a register!??");
8955
730
          Reg = FuncInfo.CreateRegs(PHIOp->getType());
8956
730
          CopyValueToVirtualRegister(PHIOp, Reg);
8957
730
        }
8958
1.63M
      }
8959
2.21M
8960
2.21M
      // Remember that this register needs to added to the machine PHI node as
8961
2.21M
      // the input for this MBB.
8962
2.21M
      SmallVector<EVT, 4> ValueVTs;
8963
2.21M
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8964
2.21M
      ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
8965
4.43M
      for (unsigned vti = 0, vte = ValueVTs.size(); 
vti != vte4.43M
;
++vti2.22M
) {
8966
2.22M
        EVT VT = ValueVTs[vti];
8967
2.22M
        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
8968
4.44M
        for (unsigned i = 0, e = NumRegisters; 
i != e4.44M
;
++i2.22M
)
8969
2.22M
          FuncInfo.PHINodesToUpdate.push_back(
8970
2.22M
              std::make_pair(&*MBBI++, Reg + i));
8971
2.22M
        Reg += NumRegisters;
8972
2.22M
      }
8973
2.23M
    }
8974
4.41M
  }
8975
3.06M
8976
3.06M
  ConstantsOut.clear();
8977
3.06M
}
8978
8979
/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
8980
/// is 0.
8981
MachineBasicBlock *
8982
SelectionDAGBuilder::StackProtectorDescriptor::
8983
AddSuccessorMBB(const BasicBlock *BB,
8984
                MachineBasicBlock *ParentMBB,
8985
                bool IsLikely,
8986
6.68k
                MachineBasicBlock *SuccMBB) {
8987
6.68k
  // If SuccBB has not been created yet, create it.
8988
6.68k
  if (
!SuccMBB6.68k
) {
8989
6.60k
    MachineFunction *MF = ParentMBB->getParent();
8990
6.60k
    MachineFunction::iterator BBI(ParentMBB);
8991
6.60k
    SuccMBB = MF->CreateMachineBasicBlock(BB);
8992
6.60k
    MF->insert(++BBI, SuccMBB);
8993
6.60k
  }
8994
6.68k
  // Add it as a successor of ParentMBB.
8995
6.68k
  ParentMBB->addSuccessor(
8996
6.68k
      SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
8997
6.68k
  return SuccMBB;
8998
6.68k
}
8999
9000
2.75M
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
9001
2.75M
  MachineFunction::iterator I(MBB);
9002
2.75M
  if (++I == FuncInfo.MF->end())
9003
26.1k
    return nullptr;
9004
2.72M
  return &*I;
9005
2.72M
}
9006
9007
/// During lowering new call nodes can be created (such as memset, etc.).
9008
/// Those will become new roots of the current DAG, but complications arise
9009
/// when they are tail calls. In such cases, the call lowering will update
9010
/// the root, but the builder still needs to know that a tail call has been
9011
/// lowered in order to avoid generating an additional return.
9012
45.1k
void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
9013
45.1k
  // If the node is null, we do have a tail call.
9014
45.1k
  if (MaybeTC.getNode() != nullptr)
9015
45.1k
    DAG.setRoot(MaybeTC);
9016
45.1k
  else
9017
37
    HasTailCall = true;
9018
45.1k
}
9019
9020
uint64_t
9021
SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
9022
37.4k
                                       unsigned First, unsigned Last) const {
9023
37.4k
  assert(Last >= First);
9024
37.4k
  const APInt &LowCase = Clusters[First].Low->getValue();
9025
37.4k
  const APInt &HighCase = Clusters[Last].High->getValue();
9026
37.4k
  assert(LowCase.getBitWidth() == HighCase.getBitWidth());
9027
37.4k
9028
37.4k
  // FIXME: A range of consecutive cases has 100% density, but only requires one
9029
37.4k
  // comparison to lower. We should discriminate against such consecutive ranges
9030
37.4k
  // in jump tables.
9031
37.4k
9032
37.4k
  return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
9033
37.4k
}
9034
9035
uint64_t SelectionDAGBuilder::getJumpTableNumCases(
9036
    const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
9037
37.4k
    unsigned Last) const {
9038
37.4k
  assert(Last >= First);
9039
37.4k
  assert(TotalCases[Last] >= TotalCases[First]);
9040
37.4k
  uint64_t NumCases =
9041
37.4k
      TotalCases[Last] - (First == 0 ? 
010.6k
:
TotalCases[First - 1]26.8k
);
9042
37.4k
  return NumCases;
9043
37.4k
}
9044
9045
bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
9046
                                         unsigned First, unsigned Last,
9047
                                         const SwitchInst *SI,
9048
                                         MachineBasicBlock *DefaultMBB,
9049
5.54k
                                         CaseCluster &JTCluster) {
9050
5.54k
  assert(First <= Last);
9051
5.54k
9052
5.54k
  auto Prob = BranchProbability::getZero();
9053
5.54k
  unsigned NumCmps = 0;
9054
5.54k
  std::vector<MachineBasicBlock*> Table;
9055
5.54k
  DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
9056
5.54k
9057
5.54k
  // Initialize probabilities in JTProbs.
9058
37.8k
  for (unsigned I = First; 
I <= Last37.8k
;
++I32.3k
)
9059
32.3k
    JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
9060
5.54k
9061
37.8k
  for (unsigned I = First; 
I <= Last37.8k
;
++I32.3k
) {
9062
32.3k
    assert(Clusters[I].Kind == CC_Range);
9063
32.3k
    Prob += Clusters[I].Prob;
9064
32.3k
    const APInt &Low = Clusters[I].Low->getValue();
9065
32.3k
    const APInt &High = Clusters[I].High->getValue();
9066
32.3k
    NumCmps += (Low == High) ? 
130.2k
:
22.10k
;
9067
32.3k
    if (
I != First32.3k
) {
9068
26.7k
      // Fill the gap between this and the previous cluster.
9069
26.7k
      const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
9070
26.7k
      assert(PreviousHigh.slt(Low));
9071
26.7k
      uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
9072
74.1k
      for (uint64_t J = 0; 
J < Gap74.1k
;
J++47.3k
)
9073
47.3k
        Table.push_back(DefaultMBB);
9074
26.7k
    }
9075
32.3k
    uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
9076
69.3k
    for (uint64_t J = 0; 
J < ClusterSize69.3k
;
++J37.0k
)
9077
37.0k
      Table.push_back(Clusters[I].MBB);
9078
32.3k
    JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
9079
32.3k
  }
9080
5.54k
9081
5.54k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9082
5.54k
  unsigned NumDests = JTProbs.size();
9083
5.54k
  if (TLI.isSuitableForBitTests(
9084
5.54k
          NumDests, NumCmps, Clusters[First].Low->getValue(),
9085
5.54k
          Clusters[Last].High->getValue(), DAG.getDataLayout())) {
9086
384
    // Clusters[First..Last] should be lowered as bit tests instead.
9087
384
    return false;
9088
384
  }
9089
5.16k
9090
5.16k
  // Create the MBB that will load from and jump through the table.
9091
5.16k
  // Note: We create it here, but it's not inserted into the function yet.
9092
5.16k
  MachineFunction *CurMF = FuncInfo.MF;
9093
5.16k
  MachineBasicBlock *JumpTableMBB =
9094
5.16k
      CurMF->CreateMachineBasicBlock(SI->getParent());
9095
5.16k
9096
5.16k
  // Add successors. Note: use table order for determinism.
9097
5.16k
  SmallPtrSet<MachineBasicBlock *, 8> Done;
9098
69.5k
  for (MachineBasicBlock *Succ : Table) {
9099
69.5k
    if (Done.count(Succ))
9100
40.0k
      continue;
9101
29.5k
    addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
9102
29.5k
    Done.insert(Succ);
9103
29.5k
  }
9104
5.54k
  JumpTableMBB->normalizeSuccProbs();
9105
5.54k
9106
5.54k
  unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
9107
5.54k
                     ->createJumpTableIndex(Table);
9108
5.54k
9109
5.54k
  // Set up the jump table info.
9110
5.54k
  JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
9111
5.54k
  JumpTableHeader JTH(Clusters[First].Low->getValue(),
9112
5.54k
                      Clusters[Last].High->getValue(), SI->getCondition(),
9113
5.54k
                      nullptr, false);
9114
5.54k
  JTCases.emplace_back(std::move(JTH), std::move(JT));
9115
5.54k
9116
5.54k
  JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
9117
5.54k
                                     JTCases.size() - 1, Prob);
9118
5.54k
  return true;
9119
5.54k
}
9120
9121
void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
9122
                                         const SwitchInst *SI,
9123
30.5k
                                         MachineBasicBlock *DefaultMBB) {
9124
#ifndef NDEBUG
9125
  // Clusters must be non-empty, sorted, and only contain Range clusters.
9126
  assert(!Clusters.empty());
9127
  for (CaseCluster &C : Clusters)
9128
    assert(C.Kind == CC_Range);
9129
  for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
9130
    assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
9131
#endif
9132
9133
30.5k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9134
30.5k
  if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
9135
9
    return;
9136
30.5k
9137
30.5k
  const int64_t N = Clusters.size();
9138
30.5k
  const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
9139
30.5k
  const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
9140
30.5k
9141
30.5k
  if (
N < 2 || 30.5k
N < MinJumpTableEntries30.5k
)
9142
24.8k
    return;
9143
5.68k
9144
5.68k
  // TotalCases[i]: Total nbr of cases in Clusters[0..i].
9145
5.68k
  SmallVector<unsigned, 8> TotalCases(N);
9146
39.2k
  for (unsigned i = 0; 
i < N39.2k
;
++i33.5k
) {
9147
33.5k
    const APInt &Hi = Clusters[i].High->getValue();
9148
33.5k
    const APInt &Lo = Clusters[i].Low->getValue();
9149
33.5k
    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
9150
33.5k
    if (i != 0)
9151
27.8k
      TotalCases[i] += TotalCases[i - 1];
9152
33.5k
  }
9153
5.68k
9154
5.68k
  // Cheap case: the whole range may be suitable for jump table.
9155
5.68k
  uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
9156
5.68k
  uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
9157
5.68k
  assert(NumCases < UINT64_MAX / 100);
9158
5.68k
  assert(Range >= NumCases);
9159
5.68k
  if (
TLI.isSuitableForJumpTable(SI, NumCases, Range)5.68k
) {
9160
5.10k
    CaseCluster JTCluster;
9161
5.10k
    if (
buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)5.10k
) {
9162
4.92k
      Clusters[0] = JTCluster;
9163
4.92k
      Clusters.resize(1);
9164
4.92k
      return;
9165
4.92k
    }
9166
768
  }
9167
768
9168
768
  // The algorithm below is not suitable for -O0.
9169
768
  
if (768
TM.getOptLevel() == CodeGenOpt::None768
)
9170
16
    return;
9171
752
9172
752
  // Split Clusters into minimum number of dense partitions. The algorithm uses
9173
752
  // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
9174
752
  // for the Case Statement'" (1994), but builds the MinPartitions array in
9175
752
  // reverse order to make it easier to reconstruct the partitions in ascending
9176
752
  // order. In the choice between two optimal partitionings, it picks the one
9177
752
  // which yields more jump tables.
9178
752
9179
752
  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
9180
752
  SmallVector<unsigned, 8> MinPartitions(N);
9181
752
  // LastElement[i] is the last element of the partition starting at i.
9182
752
  SmallVector<unsigned, 8> LastElement(N);
9183
752
  // PartitionsScore[i] is used to break ties when choosing between two
9184
752
  // partitionings resulting in the same number of partitions.
9185
752
  SmallVector<unsigned, 8> PartitionsScore(N);
9186
752
  // For PartitionsScore, a small number of comparisons is considered as good as
9187
752
  // a jump table and a single comparison is considered better than a jump
9188
752
  // table.
9189
752
  enum PartitionScores : unsigned {
9190
752
    NoTable = 0,
9191
752
    Table = 1,
9192
752
    FewCases = 1,
9193
752
    SingleCase = 2
9194
752
  };
9195
752
9196
752
  // Base case: There is only one way to partition Clusters[N-1].
9197
752
  MinPartitions[N - 1] = 1;
9198
752
  LastElement[N - 1] = N - 1;
9199
752
  PartitionsScore[N - 1] = PartitionScores::SingleCase;
9200
752
9201
752
  // Note: loop indexes are signed to avoid underflow.
9202
5.68k
  for (int64_t i = N - 2; 
i >= 05.68k
;
i--4.93k
) {
9203
4.93k
    // Find optimal partitioning of Clusters[i..N-1].
9204
4.93k
    // Baseline: Put Clusters[i] into a partition on its own.
9205
4.93k
    MinPartitions[i] = MinPartitions[i + 1] + 1;
9206
4.93k
    LastElement[i] = i;
9207
4.93k
    PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
9208
4.93k
9209
4.93k
    // Search for a solution that results in fewer partitions.
9210
36.7k
    for (int64_t j = N - 1; 
j > i36.7k
;
j--31.7k
) {
9211
31.7k
      // Try building a partition from Clusters[i..j].
9212
31.7k
      uint64_t Range = getJumpTableRange(Clusters, i, j);
9213
31.7k
      uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
9214
31.7k
      assert(NumCases < UINT64_MAX / 100);
9215
31.7k
      assert(Range >= NumCases);
9216
31.7k
      if (
TLI.isSuitableForJumpTable(SI, NumCases, Range)31.7k
) {
9217
18.1k
        unsigned NumPartitions = 1 + (j == N - 1 ? 
01.60k
:
MinPartitions[j + 1]16.5k
);
9218
18.1k
        unsigned Score = j == N - 1 ? 
01.60k
:
PartitionsScore[j + 1]16.5k
;
9219
18.1k
        int64_t NumEntries = j - i + 1;
9220
18.1k
9221
18.1k
        if (NumEntries == 1)
9222
0
          Score += PartitionScores::SingleCase;
9223
18.1k
        else 
if (18.1k
NumEntries <= SmallNumberOfEntries18.1k
)
9224
3.56k
          Score += PartitionScores::FewCases;
9225
14.6k
        else 
if (14.6k
NumEntries >= MinJumpTableEntries14.6k
)
9226
12.0k
          Score += PartitionScores::Table;
9227
18.1k
9228
18.1k
        // If this leads to fewer partitions, or to the same number of
9229
18.1k
        // partitions with better score, it is a better partitioning.
9230
18.1k
        if (NumPartitions < MinPartitions[i] ||
9231
18.1k
            
(NumPartitions == MinPartitions[i] && 14.6k
Score > PartitionsScore[i]330
)) {
9232
3.52k
          MinPartitions[i] = NumPartitions;
9233
3.52k
          LastElement[i] = j;
9234
3.52k
          PartitionsScore[i] = Score;
9235
3.52k
        }
9236
18.1k
      }
9237
31.7k
    }
9238
4.93k
  }
9239
752
9240
752
  // Iterate over the partitions, replacing some with jump tables in-place.
9241
752
  unsigned DstIndex = 0;
9242
2.71k
  for (unsigned First = 0, Last; 
First < N2.71k
;
First = Last + 11.96k
) {
9243
1.96k
    Last = LastElement[First];
9244
1.96k
    assert(Last >= First);
9245
1.96k
    assert(DstIndex <= First);
9246
1.96k
    unsigned NumClusters = Last - First + 1;
9247
1.96k
9248
1.96k
    CaseCluster JTCluster;
9249
1.96k
    if (NumClusters >= MinJumpTableEntries &&
9250
1.96k
        
buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)444
) {
9251
243
      Clusters[DstIndex++] = JTCluster;
9252
1.96k
    } else {
9253
4.92k
      for (unsigned I = First; 
I <= Last4.92k
;
++I3.20k
)
9254
3.20k
        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
9255
1.72k
    }
9256
1.96k
  }
9257
30.5k
  Clusters.resize(DstIndex);
9258
30.5k
}
9259
9260
bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
9261
                                        unsigned First, unsigned Last,
9262
                                        const SwitchInst *SI,
9263
35.1k
                                        CaseCluster &BTCluster) {
9264
35.1k
  assert(First <= Last);
9265
35.1k
  if (First == Last)
9266
10.3k
    return false;
9267
24.7k
9268
24.7k
  BitVector Dests(FuncInfo.MF->getNumBlockIDs());
9269
24.7k
  unsigned NumCmps = 0;
9270
76.7k
  for (int64_t I = First; 
I <= Last76.7k
;
++I51.9k
) {
9271
51.9k
    assert(Clusters[I].Kind == CC_Range);
9272
51.9k
    Dests.set(Clusters[I].MBB->getNumber());
9273
51.9k
    NumCmps += (Clusters[I].Low == Clusters[I].High) ? 
150.7k
:
21.20k
;
9274
51.9k
  }
9275
24.7k
  unsigned NumDests = Dests.count();
9276
24.7k
9277
24.7k
  APInt Low = Clusters[First].Low->getValue();
9278
24.7k
  APInt High = Clusters[Last].High->getValue();
9279
24.7k
  assert(Low.slt(High));
9280
24.7k
9281
24.7k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9282
24.7k
  const DataLayout &DL = DAG.getDataLayout();
9283
24.7k
  if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
9284
24.1k
    return false;
9285
621
9286
621
  APInt LowBound;
9287
621
  APInt CmpRange;
9288
621
9289
621
  const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
9290
621
  assert(TLI.rangeFitsInWord(Low, High, DL) &&
9291
621
         "Case range must fit in bit mask!");
9292
621
9293
621
  // Check if the clusters cover a contiguous range such that no value in the
9294
621
  // range will jump to the default statement.
9295
621
  bool ContiguousRange = true;
9296
717
  for (int64_t I = First + 1; 
I <= Last717
;
++I96
) {
9297
708
    if (
Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1708
) {
9298
612
      ContiguousRange = false;
9299
612
      break;
9300
612
    }
9301
708
  }
9302
621
9303
621
  if (
Low.isStrictlyPositive() && 621
High.slt(BitWidth)401
) {
9304
240
    // Optimize the case where all the case values fit in a word without having
9305
240
    // to subtract minValue. In this case, we can optimize away the subtraction.
9306
240
    LowBound = APInt::getNullValue(Low.getBitWidth());
9307
240
    CmpRange = High;
9308
240
    ContiguousRange = false;
9309
621
  } else {
9310
381
    LowBound = Low;
9311
381
    CmpRange = High - Low;
9312
381
  }
9313
621
9314
621
  CaseBitsVector CBV;
9315
621
  auto TotalProb = BranchProbability::getZero();
9316
2.73k
  for (unsigned i = First; 
i <= Last2.73k
;
++i2.11k
) {
9317
2.11k
    // Find the CaseBits for this destination.
9318
2.11k
    unsigned j;
9319
2.52k
    for (j = 0; 
j < CBV.size()2.52k
;
++j410
)
9320
1.75k
      
if (1.75k
CBV[j].BB == Clusters[i].MBB1.75k
)
9321
1.34k
        break;
9322
2.11k
    if (j == CBV.size())
9323
775
      CBV.push_back(
9324
775
          CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
9325
2.11k
    CaseBits *CB = &CBV[j];
9326
2.11k
9327
2.11k
    // Update Mask, Bits and ExtraProb.
9328
2.11k
    uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
9329
2.11k
    uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
9330
2.11k
    assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
9331
2.11k
    CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
9332
2.11k
    CB->Bits += Hi - Lo + 1;
9333
2.11k
    CB->ExtraProb += Clusters[i].Prob;
9334
2.11k
    TotalProb += Clusters[i].Prob;
9335
2.11k
  }
9336
621
9337
621
  BitTestInfo BTI;
9338
169
  std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
9339
169
    // Sort by probability first, number of bits second.
9340
169
    if (a.ExtraProb != b.ExtraProb)
9341
140
      return a.ExtraProb > b.ExtraProb;
9342
29
    return a.Bits > b.Bits;
9343
29
  });
9344
621
9345
775
  for (auto &CB : CBV) {
9346
775
    MachineBasicBlock *BitTestBB =
9347
775
        FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
9348
775
    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
9349
775
  }
9350
35.1k
  BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
9351
35.1k
                            SI->getCondition(), -1U, MVT::Other, false,
9352
35.1k
                            ContiguousRange, nullptr, nullptr, std::move(BTI),
9353
35.1k
                            TotalProb);
9354
35.1k
9355
35.1k
  BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
9356
35.1k
                                    BitTestCases.size() - 1, TotalProb);
9357
35.1k
  return true;
9358
35.1k
}
9359
9360
void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
9361
30.5k
                                              const SwitchInst *SI) {
9362
30.5k
// Partition Clusters into as few subsets as possible, where each subset has a
9363
30.5k
// range that fits in a machine word and has <= 3 unique destinations.
9364
30.5k
9365
#ifndef NDEBUG
9366
  // Clusters must be sorted and contain Range or JumpTable clusters.
9367
  assert(!Clusters.empty());
9368
  assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
9369
  for (const CaseCluster &C : Clusters)
9370
    assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
9371
  for (unsigned i = 1; i < Clusters.size(); ++i)
9372
    assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
9373
#endif
9374
9375
30.5k
  // The algorithm below is not suitable for -O0.
9376
30.5k
  if (TM.getOptLevel() == CodeGenOpt::None)
9377
36
    return;
9378
30.5k
9379
30.5k
  // If target does not have legal shift left, do not emit bit tests at all.
9380
30.5k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9381
30.5k
  const DataLayout &DL = DAG.getDataLayout();
9382
30.5k
9383
30.5k
  EVT PTy = TLI.getPointerTy(DL);
9384
30.5k
  if (!TLI.isOperationLegal(ISD::SHL, PTy))
9385
2
    return;
9386
30.5k
9387
30.5k
  int BitWidth = PTy.getSizeInBits();
9388
30.5k
  const int64_t N = Clusters.size();
9389
30.5k
9390
30.5k
  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
9391
30.5k
  SmallVector<unsigned, 8> MinPartitions(N);
9392
30.5k
  // LastElement[i] is the last element of the partition starting at i.
9393
30.5k
  SmallVector<unsigned, 8> LastElement(N);
9394
30.5k
9395
30.5k
  // FIXME: This might not be the best algorithm for finding bit test clusters.
9396
30.5k
9397
30.5k
  // Base case: There is only one way to partition Clusters[N-1].
9398
30.5k
  MinPartitions[N - 1] = 1;
9399
30.5k
  LastElement[N - 1] = N - 1;
9400
30.5k
9401
30.5k
  // Note: loop indexes are signed to avoid underflow.
9402
62.3k
  for (int64_t i = N - 2; 
i >= 062.3k
;
--i31.8k
) {
9403
31.8k
    // Find optimal partitioning of Clusters[i..N-1].
9404
31.8k
    // Baseline: Put Clusters[i] into a partition on its own.
9405
31.8k
    MinPartitions[i] = MinPartitions[i + 1] + 1;
9406
31.8k
    LastElement[i] = i;
9407
31.8k
9408
31.8k
    // Search for a solution that results in fewer partitions.
9409
31.8k
    // Note: the search is limited by BitWidth, reducing time complexity.
9410
74.4k
    for (int64_t j = std::min(N - 1, i + BitWidth - 1); 
j > i74.4k
;
--j42.5k
) {
9411
42.6k
      // Try building a partition from Clusters[i..j].
9412
42.6k
9413
42.6k
      // Check the range.
9414
42.6k
      if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
9415
42.6k
                               Clusters[j].High->getValue(), DL))
9416
11.5k
        continue;
9417
31.0k
9418
31.0k
      // Check nbr of destinations and cluster types.
9419
31.0k
      // FIXME: This works, but doesn't seem very efficient.
9420
31.0k
      bool RangesOnly = true;
9421
31.0k
      BitVector Dests(FuncInfo.MF->getNumBlockIDs());
9422
99.1k
      for (int64_t k = i; 
k <= j99.1k
;
k++68.1k
) {
9423
68.1k
        if (
Clusters[k].Kind != CC_Range68.1k
) {
9424
18
          RangesOnly = false;
9425
18
          break;
9426
18
        }
9427
68.1k
        Dests.set(Clusters[k].MBB->getNumber());
9428
68.1k
      }
9429
31.0k
      if (
!RangesOnly || 31.0k
Dests.count() > 331.0k
)
9430
64
        break;
9431
31.0k
9432
31.0k
      // Check if it's a better partition.
9433
31.0k
      
unsigned NumPartitions = 1 + (j == N - 1 ? 31.0k
026.9k
:
MinPartitions[j + 1]4.05k
);
9434
31.0k
      if (
NumPartitions < MinPartitions[i]31.0k
) {
9435
27.2k
        // Found a better partition.
9436
27.2k
        MinPartitions[i] = NumPartitions;
9437
27.2k
        LastElement[i] = j;
9438
27.2k
      }
9439
42.6k
    }
9440
31.8k
  }
9441
30.5k
9442
30.5k
  // Iterate over the partitions, replacing with bit-test clusters in-place.
9443
30.5k
  unsigned DstIndex = 0;
9444
65.6k
  for (unsigned First = 0, Last; 
First < N65.6k
;
First = Last + 135.1k
) {
9445
35.1k
    Last = LastElement[First];
9446
35.1k
    assert(First <= Last);
9447
35.1k
    assert(DstIndex <= First);
9448
35.1k
9449
35.1k
    CaseCluster BitTestCluster;
9450
35.1k
    if (
buildBitTests(Clusters, First, Last, SI, BitTestCluster)35.1k
) {
9451
621
      Clusters[DstIndex++] = BitTestCluster;
9452
35.1k
    } else {
9453
34.5k
      size_t NumClusters = Last - First + 1;
9454
34.5k
      std::memmove(&Clusters[DstIndex], &Clusters[First],
9455
34.5k
                   sizeof(Clusters[0]) * NumClusters);
9456
34.5k
      DstIndex += NumClusters;
9457
34.5k
    }
9458
35.1k
  }
9459
30.5k
  Clusters.resize(DstIndex);
9460
30.5k
}
9461
9462
void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
9463
                                        MachineBasicBlock *SwitchMBB,
9464
31.0k
                                        MachineBasicBlock *DefaultMBB) {
9465
31.0k
  MachineFunction *CurMF = FuncInfo.MF;
9466
31.0k
  MachineBasicBlock *NextMBB = nullptr;
9467
31.0k
  MachineFunction::iterator BBI(W.MBB);
9468
31.0k
  if (++BBI != FuncInfo.MF->end())
9469
31.0k
    NextMBB = &*BBI;
9470
31.0k
9471
31.0k
  unsigned Size = W.LastCluster - W.FirstCluster + 1;
9472
31.0k
9473
31.0k
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
9474
31.0k
9475
31.0k
  if (
Size == 2 && 31.0k
W.MBB == SwitchMBB20.9k
) {
9476
20.3k
    // If any two of the cases has the same destination, and if one value
9477
20.3k
    // is the same as the other, but has one bit unset that the other has set,
9478
20.3k
    // use bit manipulation to do two compares at once.  For example:
9479
20.3k
    // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
9480
20.3k
    // TODO: This could be extended to merge any 2 cases in switches with 3
9481
20.3k
    // cases.
9482
20.3k
    // TODO: Handle cases where W.CaseBB != SwitchBB.
9483
20.3k
    CaseCluster &Small = *W.FirstCluster;
9484
20.3k
    CaseCluster &Big = *W.LastCluster;
9485
20.3k
9486
20.3k
    if (
Small.Low == Small.High && 20.3k
Big.Low == Big.High20.0k
&&
9487
20.3k
        
Small.MBB == Big.MBB19.8k
) {
9488
9.90k
      const APInt &SmallValue = Small.Low->getValue();
9489
9.90k
      const APInt &BigValue = Big.Low->getValue();
9490
9.90k
9491
9.90k
      // Check that there is only one bit different.
9492
9.90k
      APInt CommonBit = BigValue ^ SmallValue;
9493
9.90k
      if (
CommonBit.isPowerOf2()9.90k
) {
9494
8.56k
        SDValue CondLHS = getValue(Cond);
9495
8.56k
        EVT VT = CondLHS.getValueType();
9496
8.56k
        SDLoc DL = getCurSDLoc();
9497
8.56k
9498
8.56k
        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
9499
8.56k
                                 DAG.getConstant(CommonBit, DL, VT));
9500
8.56k
        SDValue Cond = DAG.getSetCC(
9501
8.56k
            DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
9502
8.56k
            ISD::SETEQ);
9503
8.56k
9504
8.56k
        // Update successor info.
9505
8.56k
        // Both Small and Big will jump to Small.BB, so we sum up the
9506
8.56k
        // probabilities.
9507
8.56k
        addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
9508
8.56k
        if (BPI)
9509
8.56k
          addSuccessorWithProb(
9510
8.56k
              SwitchMBB, DefaultMBB,
9511
8.56k
              // The default destination is the first successor in IR.
9512
8.56k
              BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
9513
8.56k
        else
9514
0
          addSuccessorWithProb(SwitchMBB, DefaultMBB);
9515
8.56k
9516
8.56k
        // Insert the true branch.
9517
8.56k
        SDValue BrCond =
9518
8.56k
            DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
9519
8.56k
                        DAG.getBasicBlock(Small.MBB));
9520
8.56k
        // Insert the false branch.
9521
8.56k
        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
9522
8.56k
                             DAG.getBasicBlock(DefaultMBB));
9523
8.56k
9524
8.56k
        DAG.setRoot(BrCond);
9525
8.56k
        return;
9526
8.56k
      }
9527
22.4k
    }
9528
20.3k
  }
9529
22.4k
9530
22.4k
  
if (22.4k
TM.getOptLevel() != CodeGenOpt::None22.4k
) {
9531
22.4k
    // Order cases by probability so the most likely case will be checked first.
9532
22.4k
    std::sort(W.FirstCluster, W.LastCluster + 1,
9533
21.5k
              [](const CaseCluster &a, const CaseCluster &b) {
9534
21.5k
      return a.Prob > b.Prob;
9535
21.5k
    });
9536
22.4k
9537
22.4k
    // Rearrange the case blocks so that the last one falls through if possible
9538
22.4k
    // without without changing the order of probabilities.
9539
32.4k
    for (CaseClusterIt I = W.LastCluster; 
I > W.FirstCluster32.4k
; ) {
9540
20.8k
      --I;
9541
20.8k
      if (I->Prob > W.LastCluster->Prob)
9542
2.04k
        break;
9543
18.8k
      
if (18.8k
I->Kind == CC_Range && 18.8k
I->MBB == NextMBB18.8k
) {
9544
8.79k
        std::swap(*I, *W.LastCluster);
9545
8.79k
        break;
9546
8.79k
      }
9547
20.8k
    }
9548
22.4k
  }
9549
22.4k
9550
22.4k
  // Compute total probability.
9551
22.4k
  BranchProbability DefaultProb = W.DefaultProb;
9552
22.4k
  BranchProbability UnhandledProbs = DefaultProb;
9553
66.3k
  for (CaseClusterIt I = W.FirstCluster; 
I <= W.LastCluster66.3k
;
++I43.9k
)
9554
43.9k
    UnhandledProbs += I->Prob;
9555
22.4k
9556
22.4k
  MachineBasicBlock *CurMBB = W.MBB;
9557
66.3k
  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; 
I <= E66.3k
;
++I43.9k
) {
9558
43.9k
    MachineBasicBlock *Fallthrough;
9559
43.9k
    if (
I == W.LastCluster43.9k
) {
9560
22.4k
      // For the last cluster, fall through to the default destination.
9561
22.4k
      Fallthrough = DefaultMBB;
9562
43.9k
    } else {
9563
21.4k
      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
9564
21.4k
      CurMF->insert(BBI, Fallthrough);
9565
21.4k
      // Put Cond in a virtual register to make it available from the new blocks.
9566
21.4k
      ExportFromCurrentBlock(Cond);
9567
21.4k
    }
9568
43.9k
    UnhandledProbs -= I->Prob;
9569
43.9k
9570
43.9k
    switch (I->Kind) {
9571
5.16k
      case CC_JumpTable: {
9572
5.16k
        // FIXME: Optimize away range check based on pivot comparisons.
9573
5.16k
        JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
9574
5.16k
        JumpTable *JT = &JTCases[I->JTCasesIndex].second;
9575
5.16k
9576
5.16k
        // The jump block hasn't been inserted yet; insert it here.
9577
5.16k
        MachineBasicBlock *JumpMBB = JT->MBB;
9578
5.16k
        CurMF->insert(BBI, JumpMBB);
9579
5.16k
9580
5.16k
        auto JumpProb = I->Prob;
9581
5.16k
        auto FallthroughProb = UnhandledProbs;
9582
5.16k
9583
5.16k
        // If the default statement is a target of the jump table, we evenly
9584
5.16k
        // distribute the default probability to successors of CurMBB. Also
9585
5.16k
        // update the probability on the edge from JumpMBB to Fallthrough.
9586
5.16k
        for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
9587
5.16k
                                              SE = JumpMBB->succ_end();
9588
25.7k
             
SI != SE25.7k
;
++SI20.5k
) {
9589
21.6k
          if (
*SI == DefaultMBB21.6k
) {
9590
1.05k
            JumpProb += DefaultProb / 2;
9591
1.05k
            FallthroughProb -= DefaultProb / 2;
9592
1.05k
            JumpMBB->setSuccProbability(SI, DefaultProb / 2);
9593
1.05k
            JumpMBB->normalizeSuccProbs();
9594
1.05k
            break;
9595
1.05k
          }
9596
21.6k
        }
9597
5.16k
9598
5.16k
        addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
9599
5.16k
        addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
9600
5.16k
        CurMBB->normalizeSuccProbs();
9601
5.16k
9602
5.16k
        // The jump table header will be inserted in our current block, do the
9603
5.16k
        // range check, and fall through to our fallthrough block.
9604
5.16k
        JTH->HeaderBB = CurMBB;
9605
5.16k
        JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
9606
5.16k
9607
5.16k
        // If we're in the right place, emit the jump table header right now.
9608
5.16k
        if (
CurMBB == SwitchMBB5.16k
) {
9609
5.11k
          visitJumpTableHeader(*JT, *JTH, SwitchMBB);
9610
5.11k
          JTH->Emitted = true;
9611
5.11k
        }
9612
5.16k
        break;
9613
43.9k
      }
9614
621
      case CC_BitTests: {
9615
621
        // FIXME: Optimize away range check based on pivot comparisons.
9616
621
        BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
9617
621
9618
621
        // The bit test blocks haven't been inserted yet; insert them here.
9619
621
        for (BitTestCase &BTC : BTB->Cases)
9620
775
          CurMF->insert(BBI, BTC.ThisBB);
9621
621
9622
621
        // Fill in fields of the BitTestBlock.
9623
621
        BTB->Parent = CurMBB;
9624
621
        BTB->Default = Fallthrough;
9625
621
9626
621
        BTB->DefaultProb = UnhandledProbs;
9627
621
        // If the cases in bit test don't form a contiguous range, we evenly
9628
621
        // distribute the probability on the edge to Fallthrough to two
9629
621
        // successors of CurMBB.
9630
621
        if (
!BTB->ContiguousRange621
) {
9631
614
          BTB->Prob += DefaultProb / 2;
9632
614
          BTB->DefaultProb -= DefaultProb / 2;
9633
614
        }
9634
621
9635
621
        // If we're in the right place, emit the bit test header right now.
9636
621
        if (
CurMBB == SwitchMBB621
) {
9637
619
          visitBitTestHeader(*BTB, SwitchMBB);
9638
619
          BTB->Emitted = true;
9639
619
        }
9640
621
        break;
9641
43.9k
      }
9642
38.1k
      case CC_Range: {
9643
38.1k
        const Value *RHS, *LHS, *MHS;
9644
38.1k
        ISD::CondCode CC;
9645
38.1k
        if (
I->Low == I->High38.1k
) {
9646
37.5k
          // Check Cond == I->Low.
9647
37.5k
          CC = ISD::SETEQ;
9648
37.5k
          LHS = Cond;
9649
37.5k
          RHS=I->Low;
9650
37.5k
          MHS = nullptr;
9651
38.1k
        } else {
9652
549
          // Check I->Low <= Cond <= I->High.
9653
549
          CC = ISD::SETLE;
9654
549
          LHS = I->Low;
9655
549
          MHS = Cond;
9656
549
          RHS = I->High;
9657
549
        }
9658
38.1k
9659
38.1k
        // The false probability is the sum of all unhandled cases.
9660
38.1k
        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
9661
38.1k
                     getCurSDLoc(), I->Prob, UnhandledProbs);
9662
38.1k
9663
38.1k
        if (CurMBB == SwitchMBB)
9664
15.9k
          visitSwitchCase(CB, SwitchMBB);
9665
38.1k
        else
9666
22.1k
          SwitchCases.push_back(CB);
9667
38.1k
9668
38.1k
        break;
9669
43.9k
      }
9670
43.9k
    }
9671
43.9k
    CurMBB = Fallthrough;
9672
43.9k
  }
9673
31.0k
}
9674
9675
unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
9676
                                              CaseClusterIt First,
9677
68
                                              CaseClusterIt Last) {
9678
237
  return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
9679
237
    if (X.Prob != CC.Prob)
9680
56
      return X.Prob > CC.Prob;
9681
181
9682
181
    // Ties are broken by comparing the case value.
9683
181
    return X.Low->getValue().slt(CC.Low->getValue());
9684
181
  });
9685
68
}
9686
9687
void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
9688
                                        const SwitchWorkListItem &W,
9689
                                        Value *Cond,
9690
462
                                        MachineBasicBlock *SwitchMBB) {
9691
462
  assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
9692
462
         "Clusters not sorted?");
9693
462
9694
462
  assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
9695
462
9696
462
  // Balance the tree based on branch probabilities to create a near-optimal (in
9697
462
  // terms of search time given key frequency) binary search tree. See e.g. Kurt
9698
462
  // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
9699
462
  CaseClusterIt LastLeft = W.FirstCluster;
9700
462
  CaseClusterIt FirstRight = W.LastCluster;
9701
462
  auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
9702
462
  auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
9703
462
9704
462
  // Move LastLeft and FirstRight towards each other from opposite directions to
9705
462
  // find a partitioning of the clusters which balances the probability on both
9706
462
  // sides. If LeftProb and RightProb are equal, alternate which side is
9707
462
  // taken to ensure 0-probability nodes are distributed evenly.
9708
462
  unsigned I = 0;
9709
2.02k
  while (
LastLeft + 1 < FirstRight2.02k
) {
9710
1.55k
    if (
LeftProb < RightProb || 1.55k
(LeftProb == RightProb && 846
(I & 1)676
))
9711
719
      LeftProb += (++LastLeft)->Prob;
9712
1.55k
    else
9713
839
      RightProb += (--FirstRight)->Prob;
9714
1.55k
    I++;
9715
1.55k
  }
9716
462
9717
480
  while (
true480
) {
9718
480
    // Our binary search tree differs from a typical BST in that ours can have up
9719
480
    // to three values in each leaf. The pivot selection above doesn't take that
9720
480
    // into account, which means the tree might require more nodes and be less
9721
480
    // efficient. We compensate for this here.
9722
480
9723
480
    unsigned NumLeft = LastLeft - W.FirstCluster + 1;
9724
480
    unsigned NumRight = W.LastCluster - FirstRight + 1;
9725
480
9726
480
    if (
std::min(NumLeft, NumRight) < 3 && 480
std::max(NumLeft, NumRight) > 3371
) {
9727
34
      // If one side has less than 3 clusters, and the other has more than 3,
9728
34
      // consider taking a cluster from the other side.
9729
34
9730
34
      if (
NumLeft < NumRight34
) {
9731
17
        // Consider moving the first cluster on the right to the left side.
9732
17
        CaseCluster &CC = *FirstRight;
9733
17
        unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
9734
17
        unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
9735
17
        if (
LeftSideRank <= RightSideRank17
) {
9736
2
          // Moving the cluster to the left does not demote it.
9737
2
          ++LastLeft;
9738
2
          ++FirstRight;
9739
2
          continue;
9740
2
        }
9741
17
      } else {
9742
17
        assert(NumRight < NumLeft);
9743
17
        // Consider moving the last element on the left to the right side.
9744
17
        CaseCluster &CC = *LastLeft;
9745
17
        unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
9746
17
        unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
9747
17
        if (
RightSideRank <= LeftSideRank17
) {
9748
16
          // Moving the cluster to the right does not demot it.
9749
16
          --LastLeft;
9750
16
          --FirstRight;
9751
16
          continue;
9752
16
        }
9753
462
      }
9754
34
    }
9755
462
    break;
9756
462
  }
9757
462
9758
462
  assert(LastLeft + 1 == FirstRight);
9759
462
  assert(LastLeft >= W.FirstCluster);
9760
462
  assert(FirstRight <= W.LastCluster);
9761
462
9762
462
  // Use the first element on the right as pivot since we will make less-than
9763
462
  // comparisons against it.
9764
462
  CaseClusterIt PivotCluster = FirstRight;
9765
462
  assert(PivotCluster > W.FirstCluster);
9766
462
  assert(PivotCluster <= W.LastCluster);
9767
462
9768
462
  CaseClusterIt FirstLeft = W.FirstCluster;
9769
462
  CaseClusterIt LastRight = W.LastCluster;
9770
462
9771
462
  const ConstantInt *Pivot = PivotCluster->Low;
9772
462
9773
462
  // New blocks will be inserted immediately after the current one.
9774
462
  MachineFunction::iterator BBI(W.MBB);
9775
462
  ++BBI;
9776
462
9777
462
  // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
9778
462
  // we can branch to its destination directly if it's squeezed exactly in
9779
462
  // between the known lower bound and Pivot - 1.
9780
462
  MachineBasicBlock *LeftMBB;
9781
462
  if (
FirstLeft == LastLeft && 462
FirstLeft->Kind == CC_Range21
&&
9782
12
      FirstLeft->Low == W.GE &&
9783
462
      
(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()1
) {
9784
0
    LeftMBB = FirstLeft->MBB;
9785
462
  } else {
9786
462
    LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
9787
462
    FuncInfo.MF->insert(BBI, LeftMBB);
9788
462
    WorkList.push_back(
9789
462
        {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
9790
462
    // Put Cond in a virtual register to make it available from the new blocks.
9791
462
    ExportFromCurrentBlock(Cond);
9792
462
  }
9793
462
9794
462
  // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
9795
462
  // single cluster, RHS.Low == Pivot, and we can branch to its destination
9796
462
  // directly if RHS.High equals the current upper bound.
9797
462
  MachineBasicBlock *RightMBB;
9798
462
  if (
FirstRight == LastRight && 462
FirstRight->Kind == CC_Range19
&&
9799
462
      
W.LT10
&&
(FirstRight->High->getValue() + 1ULL) == W.LT->getValue()0
) {
9800
0
    RightMBB = FirstRight->MBB;
9801
462
  } else {
9802
462
    RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
9803
462
    FuncInfo.MF->insert(BBI, RightMBB);
9804
462
    WorkList.push_back(
9805
462
        {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
9806
462
    // Put Cond in a virtual register to make it available from the new blocks.
9807
462
    ExportFromCurrentBlock(Cond);
9808
462
  }
9809
462
9810
462
  // Create the CaseBlock record that will be used to lower the branch.
9811
462
  CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
9812
462
               getCurSDLoc(), LeftProb, RightProb);
9813
462
9814
462
  if (W.MBB == SwitchMBB)
9815
333
    visitSwitchCase(CB, SwitchMBB);
9816
462
  else
9817
129
    SwitchCases.push_back(CB);
9818
462
}
9819
9820
30.5k
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
9821
30.5k
  // Extract cases from the switch.
9822
30.5k
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
9823
30.5k
  CaseClusterVector Clusters;
9824
30.5k
  Clusters.reserve(SI.getNumCases());
9825
93.2k
  for (auto I : SI.cases()) {
9826
93.2k
    MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
9827
93.2k
    const ConstantInt *CaseVal = I.getCaseValue();
9828
93.2k
    BranchProbability Prob =
9829
92.8k
        BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
9830
375
            : BranchProbability(1, SI.getNumCases() + 1);
9831
93.2k
    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
9832
93.2k
  }
9833
30.5k
9834
30.5k
  MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
9835
30.5k
9836
30.5k
  // Cluster adjacent cases with the same destination. We do this at all
9837
30.5k
  // optimization levels because it's cheap to do and will make codegen faster
9838
30.5k
  // if there are many clusters.
9839
30.5k
  sortAndRangeify(Clusters);
9840
30.5k
9841
30.5k
  if (
TM.getOptLevel() != CodeGenOpt::None30.5k
) {
9842
30.5k
    // Replace an unreachable default with the most popular destination.
9843
30.5k
    // FIXME: Exploit unreachable default more aggressively.
9844
30.5k
    bool UnreachableDefault =
9845
30.5k
        isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
9846
30.5k
    if (
UnreachableDefault && 30.5k
!Clusters.empty()46
) {
9847
46
      DenseMap<const BasicBlock *, unsigned> Popularity;
9848
46
      unsigned MaxPop = 0;
9849
46
      const BasicBlock *MaxBB = nullptr;
9850
456
      for (auto I : SI.cases()) {
9851
456
        const BasicBlock *BB = I.getCaseSuccessor();
9852
456
        if (
++Popularity[BB] > MaxPop456
) {
9853
128
          MaxPop = Popularity[BB];
9854
128
          MaxBB = BB;
9855
128
        }
9856
456
      }
9857
46
      // Set new default.
9858
46
      assert(MaxPop > 0 && MaxBB);
9859
46
      DefaultMBB = FuncInfo.MBBMap[MaxBB];
9860
46
9861
46
      // Remove cases that were pointing to the destination that is now the
9862
46
      // default.
9863
46
      CaseClusterVector New;
9864
46
      New.reserve(Clusters.size());
9865
379
      for (CaseCluster &CC : Clusters) {
9866
379
        if (CC.MBB != DefaultMBB)
9867
319
          New.push_back(CC);
9868
379
      }
9869
46
      Clusters = std::move(New);
9870
46
    }
9871
30.5k
  }
9872
30.5k
9873
30.5k
  // If there is only the default destination, jump there directly.
9874
30.5k
  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
9875
30.5k
  if (
Clusters.empty()30.5k
) {
9876
4
    SwitchMBB->addSuccessor(DefaultMBB);
9877
4
    if (
DefaultMBB != NextBlock(SwitchMBB)4
) {
9878
2
      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
9879
2
                              getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
9880
2
    }
9881
4
    return;
9882
4
  }
9883
30.5k
9884
30.5k
  findJumpTables(Clusters, &SI, DefaultMBB);
9885
30.5k
  findBitTestClusters(Clusters, &SI);
9886
30.5k
9887
30.5k
  DEBUG({
9888
30.5k
    dbgs() << "Case clusters: ";
9889
30.5k
    for (const CaseCluster &C : Clusters) {
9890
30.5k
      if (C.Kind == CC_JumpTable) dbgs() << "JT:";
9891
30.5k
      if (C.Kind == CC_BitTests) dbgs() << "BT:";
9892
30.5k
9893
30.5k
      C.Low->getValue().print(dbgs(), true);
9894
30.5k
      if (C.Low != C.High) {
9895
30.5k
        dbgs() << '-';
9896
30.5k
        C.High->getValue().print(dbgs(), true);
9897
30.5k
      }
9898
30.5k
      dbgs() << ' ';
9899
30.5k
    }
9900
30.5k
    dbgs() << '\n';
9901
30.5k
  });
9902
30.5k
9903
30.5k
  assert(!Clusters.empty());
9904
30.5k
  SwitchWorkList WorkList;
9905
30.5k
  CaseClusterIt First = Clusters.begin();
9906
30.5k
  CaseClusterIt Last = Clusters.end() - 1;
9907
30.5k
  auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
9908
30.5k
  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
9909
30.5k
9910
62.0k
  while (
!WorkList.empty()62.0k
) {
9911
31.4k
    SwitchWorkListItem W = WorkList.back();
9912
31.4k
    WorkList.pop_back();
9913
31.4k
    unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
9914
31.4k
9915
31.4k
    if (
NumClusters > 3 && 31.4k
TM.getOptLevel() != CodeGenOpt::None480
&&
9916
31.4k
        
!DefaultMBB->getParent()->getFunction()->optForMinSize()463
) {
9917
462
      // For optimized builds, lower large range as a balanced binary tree.
9918
462
      splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
9919
462
      continue;
9920
462
    }
9921
31.0k
9922
31.0k
    lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
9923
31.0k
  }
9924
30.5k
}