Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This implements routines for translating from LLVM IR into SelectionDAG IR.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "SelectionDAGBuilder.h"
14
#include "SDNodeDbgValue.h"
15
#include "llvm/ADT/APFloat.h"
16
#include "llvm/ADT/APInt.h"
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/BitVector.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/ADT/None.h"
21
#include "llvm/ADT/Optional.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/ADT/SmallPtrSet.h"
24
#include "llvm/ADT/SmallSet.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/StringRef.h"
27
#include "llvm/ADT/Triple.h"
28
#include "llvm/ADT/Twine.h"
29
#include "llvm/Analysis/AliasAnalysis.h"
30
#include "llvm/Analysis/BranchProbabilityInfo.h"
31
#include "llvm/Analysis/ConstantFolding.h"
32
#include "llvm/Analysis/EHPersonalities.h"
33
#include "llvm/Analysis/Loads.h"
34
#include "llvm/Analysis/MemoryLocation.h"
35
#include "llvm/Analysis/TargetLibraryInfo.h"
36
#include "llvm/Analysis/ValueTracking.h"
37
#include "llvm/Analysis/VectorUtils.h"
38
#include "llvm/CodeGen/Analysis.h"
39
#include "llvm/CodeGen/FunctionLoweringInfo.h"
40
#include "llvm/CodeGen/GCMetadata.h"
41
#include "llvm/CodeGen/ISDOpcodes.h"
42
#include "llvm/CodeGen/MachineBasicBlock.h"
43
#include "llvm/CodeGen/MachineFrameInfo.h"
44
#include "llvm/CodeGen/MachineFunction.h"
45
#include "llvm/CodeGen/MachineInstr.h"
46
#include "llvm/CodeGen/MachineInstrBuilder.h"
47
#include "llvm/CodeGen/MachineJumpTableInfo.h"
48
#include "llvm/CodeGen/MachineMemOperand.h"
49
#include "llvm/CodeGen/MachineModuleInfo.h"
50
#include "llvm/CodeGen/MachineOperand.h"
51
#include "llvm/CodeGen/MachineRegisterInfo.h"
52
#include "llvm/CodeGen/RuntimeLibcalls.h"
53
#include "llvm/CodeGen/SelectionDAG.h"
54
#include "llvm/CodeGen/SelectionDAGNodes.h"
55
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
56
#include "llvm/CodeGen/StackMaps.h"
57
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
58
#include "llvm/CodeGen/TargetFrameLowering.h"
59
#include "llvm/CodeGen/TargetInstrInfo.h"
60
#include "llvm/CodeGen/TargetLowering.h"
61
#include "llvm/CodeGen/TargetOpcodes.h"
62
#include "llvm/CodeGen/TargetRegisterInfo.h"
63
#include "llvm/CodeGen/TargetSubtargetInfo.h"
64
#include "llvm/CodeGen/ValueTypes.h"
65
#include "llvm/CodeGen/WinEHFuncInfo.h"
66
#include "llvm/IR/Argument.h"
67
#include "llvm/IR/Attributes.h"
68
#include "llvm/IR/BasicBlock.h"
69
#include "llvm/IR/CFG.h"
70
#include "llvm/IR/CallSite.h"
71
#include "llvm/IR/CallingConv.h"
72
#include "llvm/IR/Constant.h"
73
#include "llvm/IR/ConstantRange.h"
74
#include "llvm/IR/Constants.h"
75
#include "llvm/IR/DataLayout.h"
76
#include "llvm/IR/DebugInfoMetadata.h"
77
#include "llvm/IR/DebugLoc.h"
78
#include "llvm/IR/DerivedTypes.h"
79
#include "llvm/IR/Function.h"
80
#include "llvm/IR/GetElementPtrTypeIterator.h"
81
#include "llvm/IR/InlineAsm.h"
82
#include "llvm/IR/InstrTypes.h"
83
#include "llvm/IR/Instruction.h"
84
#include "llvm/IR/Instructions.h"
85
#include "llvm/IR/IntrinsicInst.h"
86
#include "llvm/IR/Intrinsics.h"
87
#include "llvm/IR/LLVMContext.h"
88
#include "llvm/IR/Metadata.h"
89
#include "llvm/IR/Module.h"
90
#include "llvm/IR/Operator.h"
91
#include "llvm/IR/PatternMatch.h"
92
#include "llvm/IR/Statepoint.h"
93
#include "llvm/IR/Type.h"
94
#include "llvm/IR/User.h"
95
#include "llvm/IR/Value.h"
96
#include "llvm/MC/MCContext.h"
97
#include "llvm/MC/MCSymbol.h"
98
#include "llvm/Support/AtomicOrdering.h"
99
#include "llvm/Support/BranchProbability.h"
100
#include "llvm/Support/Casting.h"
101
#include "llvm/Support/CodeGen.h"
102
#include "llvm/Support/CommandLine.h"
103
#include "llvm/Support/Compiler.h"
104
#include "llvm/Support/Debug.h"
105
#include "llvm/Support/ErrorHandling.h"
106
#include "llvm/Support/MachineValueType.h"
107
#include "llvm/Support/MathExtras.h"
108
#include "llvm/Support/raw_ostream.h"
109
#include "llvm/Target/TargetIntrinsicInfo.h"
110
#include "llvm/Target/TargetMachine.h"
111
#include "llvm/Target/TargetOptions.h"
112
#include "llvm/Transforms/Utils/Local.h"
113
#include <algorithm>
114
#include <cassert>
115
#include <cstddef>
116
#include <cstdint>
117
#include <cstring>
118
#include <iterator>
119
#include <limits>
120
#include <numeric>
121
#include <tuple>
122
#include <utility>
123
#include <vector>
124
125
using namespace llvm;
126
using namespace PatternMatch;
127
using namespace SwitchCG;
128
129
#define DEBUG_TYPE "isel"
130
131
/// LimitFloatPrecision - Generate low-precision inline sequences for
132
/// some float libcalls (6, 8 or 12 bits).
133
static unsigned LimitFloatPrecision;
134
135
static cl::opt<unsigned, true>
136
    LimitFPPrecision("limit-float-precision",
137
                     cl::desc("Generate low-precision inline sequences "
138
                              "for some float libcalls"),
139
                     cl::location(LimitFloatPrecision), cl::Hidden,
140
                     cl::init(0));
141
142
static cl::opt<unsigned> SwitchPeelThreshold(
143
    "switch-peel-threshold", cl::Hidden, cl::init(66),
144
    cl::desc("Set the case probability threshold for peeling the case from a "
145
             "switch statement. A value greater than 100 will void this "
146
             "optimization"));
147
148
// Limit the width of DAG chains. This is important in general to prevent
149
// DAG-based analysis from blowing up. For example, alias analysis and
150
// load clustering may not complete in reasonable time. It is difficult to
151
// recognize and avoid this situation within each individual analysis, and
152
// future analyses are likely to have the same behavior. Limiting DAG width is
153
// the safe approach and will be especially important with global DAGs.
154
//
155
// MaxParallelChains default is arbitrarily high to avoid affecting
156
// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
157
// sequence over this should have been converted to llvm.memcpy by the
158
// frontend. It is easy to induce this behavior with .ll code such as:
159
// %buffer = alloca [4096 x i8]
160
// %data = load [4096 x i8]* %argPtr
161
// store [4096 x i8] %data, [4096 x i8]* %buffer
162
static const unsigned MaxParallelChains = 64;
163
164
// Return the calling convention if the Value passed requires ABI mangling as it
165
// is a parameter to a function or a return value from a function which is not
166
// an intrinsic.
167
2.39k
static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) {
168
2.39k
  if (auto *R = dyn_cast<ReturnInst>(V))
169
0
    return R->getParent()->getParent()->getCallingConv();
170
2.39k
171
2.39k
  if (auto *CI = dyn_cast<CallInst>(V)) {
172
27
    const bool IsInlineAsm = CI->isInlineAsm();
173
27
    const bool IsIndirectFunctionCall =
174
27
        !IsInlineAsm && 
!CI->getCalledFunction()24
;
175
27
176
27
    // It is possible that the call instruction is an inline asm statement or an
177
27
    // indirect function call in which case the return value of
178
27
    // getCalledFunction() would be nullptr.
179
27
    const bool IsInstrinsicCall =
180
27
        !IsInlineAsm && 
!IsIndirectFunctionCall24
&&
181
27
        
CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic23
;
182
27
183
27
    if (!IsInlineAsm && 
!IsInstrinsicCall24
)
184
14
      return CI->getCallingConv();
185
2.37k
  }
186
2.37k
187
2.37k
  return None;
188
2.37k
}
189
190
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
191
                                      const SDValue *Parts, unsigned NumParts,
192
                                      MVT PartVT, EVT ValueVT, const Value *V,
193
                                      Optional<CallingConv::ID> CC);
194
195
/// getCopyFromParts - Create a value that contains the specified legal parts
196
/// combined into the value they represent.  If the parts combine to a type
197
/// larger than ValueVT then AssertOp can be used to specify whether the extra
198
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
199
/// (ISD::AssertSext).
200
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
201
                                const SDValue *Parts, unsigned NumParts,
202
                                MVT PartVT, EVT ValueVT, const Value *V,
203
                                Optional<CallingConv::ID> CC = None,
204
2.57M
                                Optional<ISD::NodeType> AssertOp = None) {
205
2.57M
  if (ValueVT.isVector())
206
302k
    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
207
302k
                                  CC);
208
2.27M
209
2.27M
  assert(NumParts > 0 && "No parts to assemble!");
210
2.27M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
211
2.27M
  SDValue Val = Parts[0];
212
2.27M
213
2.27M
  if (NumParts > 1) {
214
33.8k
    // Assemble the value from multiple parts.
215
33.8k
    if (ValueVT.isInteger()) {
216
31.0k
      unsigned PartBits = PartVT.getSizeInBits();
217
31.0k
      unsigned ValueBits = ValueVT.getSizeInBits();
218
31.0k
219
31.0k
      // Assemble the power of 2 part.
220
31.0k
      unsigned RoundParts =
221
31.0k
          (NumParts & (NumParts - 1)) ? 
1 << Log2_32(NumParts)470
:
NumParts30.5k
;
222
31.0k
      unsigned RoundBits = PartBits * RoundParts;
223
31.0k
      EVT RoundVT = RoundBits == ValueBits ?
224
30.0k
        ValueVT : 
EVT::getIntegerVT(*DAG.getContext(), RoundBits)1.02k
;
225
31.0k
      SDValue Lo, Hi;
226
31.0k
227
31.0k
      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
228
31.0k
229
31.0k
      if (RoundParts > 2) {
230
2.01k
        Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
231
2.01k
                              PartVT, HalfVT, V);
232
2.01k
        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
233
2.01k
                              RoundParts / 2, PartVT, HalfVT, V);
234
29.0k
      } else {
235
29.0k
        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
236
29.0k
        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
237
29.0k
      }
238
31.0k
239
31.0k
      if (DAG.getDataLayout().isBigEndian())
240
3.95k
        std::swap(Lo, Hi);
241
31.0k
242
31.0k
      Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
243
31.0k
244
31.0k
      if (RoundParts < NumParts) {
245
470
        // Assemble the trailing non-power-of-2 part.
246
470
        unsigned OddParts = NumParts - RoundParts;
247
470
        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
248
470
        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
249
470
                              OddVT, V, CC);
250
470
251
470
        // Combine the round and odd parts.
252
470
        Lo = Val;
253
470
        if (DAG.getDataLayout().isBigEndian())
254
5
          std::swap(Lo, Hi);
255
470
        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
256
470
        Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
257
470
        Hi =
258
470
            DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
259
470
                        DAG.getConstant(Lo.getValueSizeInBits(), DL,
260
470
                                        TLI.getPointerTy(DAG.getDataLayout())));
261
470
        Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
262
470
        Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
263
470
      }
264
31.0k
    } else 
if (2.79k
PartVT.isFloatingPoint()2.79k
) {
265
176
      // FP split into multiple FP parts (for ppcf128)
266
176
      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
267
176
             "Unexpected split");
268
176
      SDValue Lo, Hi;
269
176
      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
270
176
      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
271
176
      if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
272
176
        std::swap(Lo, Hi);
273
176
      Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
274
2.61k
    } else {
275
2.61k
      // FP split into integer parts (soft fp)
276
2.61k
      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
277
2.61k
             !PartVT.isVector() && "Unexpected split");
278
2.61k
      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
279
2.61k
      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
280
2.61k
    }
281
33.8k
  }
282
2.27M
283
2.27M
  // There is now one part, held in Val.  Correct it to match ValueVT.
284
2.27M
  // PartEVT is the type of the register class that holds the value.
285
2.27M
  // ValueVT is the type of the inline asm operation.
286
2.27M
  EVT PartEVT = Val.getValueType();
287
2.27M
288
2.27M
  if (PartEVT == ValueVT)
289
2.19M
    return Val;
290
75.7k
291
75.7k
  if (PartEVT.isInteger() && 
ValueVT.isFloatingPoint()74.5k
&&
292
75.7k
      
ValueVT.bitsLT(PartEVT)4.77k
) {
293
40
    // For an FP value in an integer part, we need to truncate to the right
294
40
    // width first.
295
40
    PartEVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
296
40
    Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
297
40
  }
298
75.7k
299
75.7k
  // Handle types that have the same size.
300
75.7k
  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
301
4.78k
    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
302
70.9k
303
70.9k
  // Handle types with different sizes.
304
70.9k
  if (PartEVT.isInteger() && 
ValueVT.isInteger()69.7k
) {
305
69.7k
    if (ValueVT.bitsLT(PartEVT)) {
306
69.7k
      // For a truncate, see if we have any information to
307
69.7k
      // indicate whether the truncated bits will always be
308
69.7k
      // zero or sign-extension.
309
69.7k
      if (AssertOp.hasValue())
310
24.9k
        Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
311
24.9k
                          DAG.getValueType(ValueVT));
312
69.7k
      return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
313
69.7k
    }
314
13
    return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
315
13
  }
316
1.14k
317
1.14k
  
if (1.14k
PartEVT.isFloatingPoint()1.14k
&& ValueVT.isFloatingPoint()) {
318
1.14k
    // FP_ROUND's are always exact here.
319
1.14k
    if (ValueVT.bitsLT(Val.getValueType()))
320
1.14k
      return DAG.getNode(
321
1.14k
          ISD::FP_ROUND, DL, ValueVT, Val,
322
1.14k
          DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
323
0
324
0
    return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
325
0
  }
326
18.4E
327
18.4E
  // Handle MMX to a narrower integer type by bitcasting MMX to integer and
328
18.4E
  // then truncating.
329
18.4E
  if (PartEVT == MVT::x86mmx && 
ValueVT.isInteger()1
&&
330
18.4E
      
ValueVT.bitsLT(PartEVT)1
) {
331
1
    Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
332
1
    return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
333
1
  }
334
18.4E
335
18.4E
  report_fatal_error("Unknown mismatch in getCopyFromParts!");
336
18.4E
}
337
338
static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
339
12
                                              const Twine &ErrMsg) {
340
12
  const Instruction *I = dyn_cast_or_null<Instruction>(V);
341
12
  if (!V)
342
0
    return Ctx.emitError(ErrMsg);
343
12
344
12
  const char *AsmError = ", possible invalid constraint for vector type";
345
12
  if (const CallInst *CI = dyn_cast<CallInst>(I))
346
12
    if (isa<InlineAsm>(CI->getCalledValue()))
347
12
      return Ctx.emitError(I, ErrMsg + AsmError);
348
0
349
0
  return Ctx.emitError(I, ErrMsg);
350
0
}
351
352
/// getCopyFromPartsVector - Create a value that contains the specified legal
353
/// parts combined into the value they represent.  If the parts combine to a
354
/// type larger than ValueVT then AssertOp can be used to specify whether the
355
/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
356
/// ValueVT (ISD::AssertSext).
357
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
358
                                      const SDValue *Parts, unsigned NumParts,
359
                                      MVT PartVT, EVT ValueVT, const Value *V,
360
302k
                                      Optional<CallingConv::ID> CallConv) {
361
302k
  assert(ValueVT.isVector() && "Not a vector value");
362
302k
  assert(NumParts > 0 && "No parts to assemble!");
363
302k
  const bool IsABIRegCopy = CallConv.hasValue();
364
302k
365
302k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
366
302k
  SDValue Val = Parts[0];
367
302k
368
302k
  // Handle a multi-element vector.
369
302k
  if (NumParts > 1) {
370
30.7k
    EVT IntermediateVT;
371
30.7k
    MVT RegisterVT;
372
30.7k
    unsigned NumIntermediates;
373
30.7k
    unsigned NumRegs;
374
30.7k
375
30.7k
    if (IsABIRegCopy) {
376
16.6k
      NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
377
16.6k
          *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
378
16.6k
          NumIntermediates, RegisterVT);
379
16.6k
    } else {
380
14.1k
      NumRegs =
381
14.1k
          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
382
14.1k
                                     NumIntermediates, RegisterVT);
383
14.1k
    }
384
30.7k
385
30.7k
    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
386
30.7k
    NumParts = NumRegs; // Silence a compiler warning.
387
30.7k
    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
388
30.7k
    assert(RegisterVT.getSizeInBits() ==
389
30.7k
           Parts[0].getSimpleValueType().getSizeInBits() &&
390
30.7k
           "Part type sizes don't match!");
391
30.7k
392
30.7k
    // Assemble the parts into intermediate operands.
393
30.7k
    SmallVector<SDValue, 8> Ops(NumIntermediates);
394
30.7k
    if (NumIntermediates == NumParts) {
395
29.9k
      // If the register was not expanded, truncate or copy the value,
396
29.9k
      // as appropriate.
397
125k
      for (unsigned i = 0; i != NumParts; 
++i95.1k
)
398
95.1k
        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
399
95.1k
                                  PartVT, IntermediateVT, V);
400
29.9k
    } else 
if (842
NumParts > 0842
) {
401
842
      // If the intermediate type was expanded, build the intermediate
402
842
      // operands from the parts.
403
842
      assert(NumParts % NumIntermediates == 0 &&
404
842
             "Must expand into a divisible number of parts!");
405
842
      unsigned Factor = NumParts / NumIntermediates;
406
2.70k
      for (unsigned i = 0; i != NumIntermediates; 
++i1.86k
)
407
1.86k
        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
408
1.86k
                                  PartVT, IntermediateVT, V);
409
842
    }
410
30.7k
411
30.7k
    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
412
30.7k
    // intermediate operands.
413
30.7k
    EVT BuiltVectorTy =
414
30.7k
        EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
415
30.7k
                         (IntermediateVT.isVector()
416
30.7k
                              ? 
IntermediateVT.getVectorNumElements() * NumParts23.5k
417
30.7k
                              : 
NumIntermediates7.19k
));
418
30.7k
    Val = DAG.getNode(IntermediateVT.isVector() ? 
ISD::CONCAT_VECTORS23.5k
419
30.7k
                                                : 
ISD::BUILD_VECTOR7.19k
,
420
30.7k
                      DL, BuiltVectorTy, Ops);
421
30.7k
  }
422
302k
423
302k
  // There is now one part, held in Val.  Correct it to match ValueVT.
424
302k
  EVT PartEVT = Val.getValueType();
425
302k
426
302k
  if (PartEVT == ValueVT)
427
291k
    return Val;
428
11.0k
429
11.0k
  if (PartEVT.isVector()) {
430
10.0k
    // If the element type of the source/dest vectors are the same, but the
431
10.0k
    // parts vector has more elements than the value vector, then we have a
432
10.0k
    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
433
10.0k
    // elements we want.
434
10.0k
    if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
435
3.37k
      assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
436
3.37k
             "Cannot narrow, it would be a lossy transformation");
437
3.37k
      return DAG.getNode(
438
3.37k
          ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
439
3.37k
          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
440
3.37k
    }
441
6.69k
442
6.69k
    // Vector/Vector bitcast.
443
6.69k
    if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
444
2.20k
      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
445
4.49k
446
4.49k
    assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
447
4.49k
      "Cannot handle this kind of promotion");
448
4.49k
    // Promoted vector extract
449
4.49k
    return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
450
4.49k
451
4.49k
  }
452
1.00k
453
1.00k
  // Trivial bitcast if the types are the same size and the destination
454
1.00k
  // vector type is legal.
455
1.00k
  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
456
1.00k
      
TLI.isTypeLegal(ValueVT)829
)
457
23
    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
458
979
459
979
  if (ValueVT.getVectorNumElements() != 1) {
460
194
     // Certain ABIs require that vectors are passed as integers. For vectors
461
194
     // are the same size, this is an obvious bitcast.
462
194
     if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
463
106
       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
464
106
     } else 
if (88
ValueVT.getSizeInBits() < PartEVT.getSizeInBits()88
) {
465
80
       // Bitcast Val back the original type and extract the corresponding
466
80
       // vector we want.
467
80
       unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
468
80
       EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
469
80
                                           ValueVT.getVectorElementType(), Elts);
470
80
       Val = DAG.getBitcast(WiderVecType, Val);
471
80
       return DAG.getNode(
472
80
           ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
473
80
           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
474
80
     }
475
8
476
8
     diagnosePossiblyInvalidConstraint(
477
8
         *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
478
8
     return DAG.getUNDEF(ValueVT);
479
8
  }
480
785
481
785
  // Handle cases such as i8 -> <1 x i1>
482
785
  EVT ValueSVT = ValueVT.getVectorElementType();
483
785
  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
484
85
    Val = ValueVT.isFloatingPoint() ? 
DAG.getFPExtendOrRound(Val, DL, ValueSVT)6
485
85
                                    : 
DAG.getAnyExtOrTrunc(Val, DL, ValueSVT)79
;
486
785
487
785
  return DAG.getBuildVector(ValueVT, DL, Val);
488
785
}
489
490
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
491
                                 SDValue Val, SDValue *Parts, unsigned NumParts,
492
                                 MVT PartVT, const Value *V,
493
                                 Optional<CallingConv::ID> CallConv);
494
495
/// getCopyToParts - Create a series of nodes that contain the specified value
496
/// split into legal parts.  If the parts contain more bits than Val, then, for
497
/// integers, ExtendKind can be used to specify how to generate the extra bits.
498
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
499
                           SDValue *Parts, unsigned NumParts, MVT PartVT,
500
                           const Value *V,
501
                           Optional<CallingConv::ID> CallConv = None,
502
2.08M
                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
503
2.08M
  EVT ValueVT = Val.getValueType();
504
2.08M
505
2.08M
  // Handle the vector case separately.
506
2.08M
  if (ValueVT.isVector())
507
160k
    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
508
160k
                                CallConv);
509
1.92M
510
1.92M
  unsigned PartBits = PartVT.getSizeInBits();
511
1.92M
  unsigned OrigNumParts = NumParts;
512
1.92M
  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
513
1.92M
         "Copying to an illegal type!");
514
1.92M
515
1.92M
  if (NumParts == 0)
516
0
    return;
517
1.92M
518
1.92M
  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
519
1.92M
  EVT PartEVT = PartVT;
520
1.92M
  if (PartEVT == ValueVT) {
521
1.81M
    assert(NumParts == 1 && "No-op copy with multiple parts!");
522
1.81M
    Parts[0] = Val;
523
1.81M
    return;
524
1.81M
  }
525
109k
526
109k
  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
527
76.5k
    // If the parts cover more bits than the value has, promote the value.
528
76.5k
    if (PartVT.isFloatingPoint() && 
ValueVT.isFloatingPoint()669
) {
529
669
      assert(NumParts == 1 && "Do not know what to promote to!");
530
669
      Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
531
75.9k
    } else {
532
75.9k
      if (ValueVT.isFloatingPoint()) {
533
53
        // FP values need to be bitcast, then extended if they are being put
534
53
        // into a larger container.
535
53
        ValueVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
536
53
        Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
537
53
      }
538
75.9k
      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
539
75.9k
             ValueVT.isInteger() &&
540
75.9k
             "Unknown mismatch!");
541
75.9k
      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
542
75.9k
      Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
543
75.9k
      if (PartVT == MVT::x86mmx)
544
1
        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
545
75.9k
    }
546
76.5k
  } else 
if (32.8k
PartBits == ValueVT.getSizeInBits()32.8k
) {
547
1.69k
    // Different types of the same size.
548
1.69k
    assert(NumParts == 1 && PartEVT != ValueVT);
549
1.69k
    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
550
31.1k
  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
551
186
    // If the parts cover less bits than value has, truncate the value.
552
186
    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
553
186
           ValueVT.isInteger() &&
554
186
           "Unknown mismatch!");
555
186
    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
556
186
    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
557
186
    if (PartVT == MVT::x86mmx)
558
0
      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
559
186
  }
560
109k
561
109k
  // The value may have changed - recompute ValueVT.
562
109k
  ValueVT = Val.getValueType();
563
109k
  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
564
109k
         "Failed to tile the value with PartVT!");
565
109k
566
109k
  if (NumParts == 1) {
567
78.3k
    if (PartEVT != ValueVT) {
568
4
      diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
569
4
                                        "scalar-to-vector conversion failed");
570
4
      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
571
4
    }
572
78.3k
573
78.3k
    Parts[0] = Val;
574
78.3k
    return;
575
78.3k
  }
576
31.0k
577
31.0k
  // Expand the value into multiple parts.
578
31.0k
  if (NumParts & (NumParts - 1)) {
579
159
    // The number of parts is not a power of 2.  Split off and copy the tail.
580
159
    assert(PartVT.isInteger() && ValueVT.isInteger() &&
581
159
           "Do not know what to expand to!");
582
159
    unsigned RoundParts = 1 << Log2_32(NumParts);
583
159
    unsigned RoundBits = RoundParts * PartBits;
584
159
    unsigned OddParts = NumParts - RoundParts;
585
159
    SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
586
159
      DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
587
159
588
159
    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
589
159
                   CallConv);
590
159
591
159
    if (DAG.getDataLayout().isBigEndian())
592
3
      // The odd parts were reversed by getCopyToParts - unreverse them.
593
3
      std::reverse(Parts + RoundParts, Parts + NumParts);
594
159
595
159
    NumParts = RoundParts;
596
159
    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
597
159
    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
598
159
  }
599
31.0k
600
31.0k
  // The number of parts is a power of 2.  Repeatedly bisect the value using
601
31.0k
  // EXTRACT_ELEMENT.
602
31.0k
  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
603
31.0k
                         EVT::getIntegerVT(*DAG.getContext(),
604
31.0k
                                           ValueVT.getSizeInBits()),
605
31.0k
                         Val);
606
31.0k
607
63.2k
  for (unsigned StepSize = NumParts; StepSize > 1; 
StepSize /= 232.2k
) {
608
66.1k
    for (unsigned i = 0; i < NumParts; 
i += StepSize33.9k
) {
609
33.9k
      unsigned ThisBits = StepSize * PartBits / 2;
610
33.9k
      EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
611
33.9k
      SDValue &Part0 = Parts[i];
612
33.9k
      SDValue &Part1 = Parts[i+StepSize/2];
613
33.9k
614
33.9k
      Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
615
33.9k
                          ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
616
33.9k
      Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
617
33.9k
                          ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
618
33.9k
619
33.9k
      if (ThisBits == PartBits && 
ThisVT != PartVT32.5k
) {
620
181
        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
621
181
        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
622
181
      }
623
33.9k
    }
624
32.2k
  }
625
31.0k
626
31.0k
  if (DAG.getDataLayout().isBigEndian())
627
2.52k
    std::reverse(Parts, Parts + OrigNumParts);
628
31.0k
}
629
630
static SDValue widenVectorToPartType(SelectionDAG &DAG,
631
5.07k
                                     SDValue Val, const SDLoc &DL, EVT PartVT) {
632
5.07k
  if (!PartVT.isVector())
633
102
    return SDValue();
634
4.97k
635
4.97k
  EVT ValueVT = Val.getValueType();
636
4.97k
  unsigned PartNumElts = PartVT.getVectorNumElements();
637
4.97k
  unsigned ValueNumElts = ValueVT.getVectorNumElements();
638
4.97k
  if (PartNumElts > ValueNumElts &&
639
4.97k
      
PartVT.getVectorElementType() == ValueVT.getVectorElementType()2.21k
) {
640
2.13k
    EVT ElementVT = PartVT.getVectorElementType();
641
2.13k
    // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
642
2.13k
    // undef elements.
643
2.13k
    SmallVector<SDValue, 16> Ops;
644
2.13k
    DAG.ExtractVectorElements(Val, Ops);
645
2.13k
    SDValue EltUndef = DAG.getUNDEF(ElementVT);
646
13.2k
    for (unsigned i = ValueNumElts, e = PartNumElts; i != e; 
++i11.0k
)
647
11.0k
      Ops.push_back(EltUndef);
648
2.13k
649
2.13k
    // FIXME: Use CONCAT for 2x -> 4x.
650
2.13k
    return DAG.getBuildVector(PartVT, DL, Ops);
651
2.13k
  }
652
2.83k
653
2.83k
  return SDValue();
654
2.83k
}
655
656
/// getCopyToPartsVector - Create a series of nodes that contain the specified
657
/// value split into legal parts.
658
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
659
                                 SDValue Val, SDValue *Parts, unsigned NumParts,
660
                                 MVT PartVT, const Value *V,
661
160k
                                 Optional<CallingConv::ID> CallConv) {
662
160k
  EVT ValueVT = Val.getValueType();
663
160k
  assert(ValueVT.isVector() && "Not a vector");
664
160k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
665
160k
  const bool IsABIRegCopy = CallConv.hasValue();
666
160k
667
160k
  if (NumParts == 1) {
668
143k
    EVT PartEVT = PartVT;
669
143k
    if (PartEVT == ValueVT) {
670
138k
      // Nothing to do.
671
138k
    } else 
if (5.51k
PartVT.getSizeInBits() == ValueVT.getSizeInBits()5.51k
) {
672
712
      // Bitconvert vector->vector case.
673
712
      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
674
4.80k
    } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
675
2.10k
      Val = Widened;
676
2.69k
    } else if (PartVT.isVector() &&
677
2.69k
               PartEVT.getVectorElementType().bitsGE(
678
2.59k
                 ValueVT.getVectorElementType()) &&
679
2.69k
               
PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()2.59k
) {
680
2.59k
681
2.59k
      // Promoted vector extract
682
2.59k
      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
683
2.59k
    } else {
684
102
      if (ValueVT.getVectorNumElements() == 1) {
685
54
        Val = DAG.getNode(
686
54
            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
687
54
            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
688
54
      } else {
689
48
        assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
690
48
               "lossy conversion of vector to scalar type");
691
48
        EVT IntermediateType =
692
48
            EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
693
48
        Val = DAG.getBitcast(IntermediateType, Val);
694
48
        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
695
48
      }
696
102
    }
697
143k
698
143k
    assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
699
143k
    Parts[0] = Val;
700
143k
    return;
701
143k
  }
702
16.1k
703
16.1k
  // Handle a multi-element vector.
704
16.1k
  EVT IntermediateVT;
705
16.1k
  MVT RegisterVT;
706
16.1k
  unsigned NumIntermediates;
707
16.1k
  unsigned NumRegs;
708
16.1k
  if (IsABIRegCopy) {
709
7.82k
    NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
710
7.82k
        *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
711
7.82k
        NumIntermediates, RegisterVT);
712
8.31k
  } else {
713
8.31k
    NumRegs =
714
8.31k
        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
715
8.31k
                                   NumIntermediates, RegisterVT);
716
8.31k
  }
717
16.1k
718
16.1k
  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
719
16.1k
  NumParts = NumRegs; // Silence a compiler warning.
720
16.1k
  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
721
16.1k
722
16.1k
  unsigned IntermediateNumElts = IntermediateVT.isVector() ?
723
13.0k
    IntermediateVT.getVectorNumElements() : 
13.08k
;
724
16.1k
725
16.1k
  // Convert the vector to the appropiate type if necessary.
726
16.1k
  unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
727
16.1k
728
16.1k
  EVT BuiltVectorTy = EVT::getVectorVT(
729
16.1k
      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
730
16.1k
  MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
731
16.1k
  if (ValueVT != BuiltVectorTy) {
732
275
    if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
733
34
      Val = Widened;
734
275
735
275
    Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
736
275
  }
737
16.1k
738
16.1k
  // Split the vector into intermediate operands.
739
16.1k
  SmallVector<SDValue, 8> Ops(NumIntermediates);
740
58.2k
  for (unsigned i = 0; i != NumIntermediates; 
++i42.0k
) {
741
42.0k
    if (IntermediateVT.isVector()) {
742
29.6k
      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
743
29.6k
                           DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
744
29.6k
    } else {
745
12.4k
      Ops[i] = DAG.getNode(
746
12.4k
          ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
747
12.4k
          DAG.getConstant(i, DL, IdxVT));
748
12.4k
    }
749
42.0k
  }
750
16.1k
751
16.1k
  // Split the intermediate operands into legal parts.
752
16.1k
  if (NumParts == NumIntermediates) {
753
15.9k
    // If the register was not expanded, promote or copy the value,
754
15.9k
    // as appropriate.
755
57.5k
    for (unsigned i = 0; i != NumParts; 
++i41.6k
)
756
41.6k
      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
757
15.9k
  } else 
if (142
NumParts > 0142
) {
758
142
    // If the intermediate type was expanded, split each the value into
759
142
    // legal parts.
760
142
    assert(NumIntermediates != 0 && "division by zero");
761
142
    assert(NumParts % NumIntermediates == 0 &&
762
142
           "Must expand into a divisible number of parts!");
763
142
    unsigned Factor = NumParts / NumIntermediates;
764
615
    for (unsigned i = 0; i != NumIntermediates; 
++i473
)
765
473
      getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
766
473
                     CallConv);
767
142
  }
768
16.1k
}
769
770
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
771
                           EVT valuevt, Optional<CallingConv::ID> CC)
772
    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
773
92.7k
      RegCount(1, regs.size()), CallConv(CC) {}
774
775
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
776
                           const DataLayout &DL, unsigned Reg, Type *Ty,
777
2.61M
                           Optional<CallingConv::ID> CC) {
778
2.61M
  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
779
2.61M
780
2.61M
  CallConv = CC;
781
2.61M
782
2.61M
  for (EVT ValueVT : ValueVTs) {
783
2.61M
    unsigned NumRegs =
784
2.61M
        isABIMangled()
785
2.61M
            ? 
TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)16
786
2.61M
            : 
TLI.getNumRegisters(Context, ValueVT)2.61M
;
787
2.61M
    MVT RegisterVT =
788
2.61M
        isABIMangled()
789
2.61M
            ? 
TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)16
790
2.61M
            : 
TLI.getRegisterType(Context, ValueVT)2.61M
;
791
5.29M
    for (unsigned i = 0; i != NumRegs; 
++i2.67M
)
792
2.67M
      Regs.push_back(Reg + i);
793
2.61M
    RegVTs.push_back(RegisterVT);
794
2.61M
    RegCount.push_back(NumRegs);
795
2.61M
    Reg += NumRegs;
796
2.61M
  }
797
2.61M
}
798
799
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
800
                                      FunctionLoweringInfo &FuncInfo,
801
                                      const SDLoc &dl, SDValue &Chain,
802
1.80M
                                      SDValue *Flag, const Value *V) const {
803
1.80M
  // A Value with type {} or [0 x %t] needs no registers.
804
1.80M
  if (ValueVTs.empty())
805
0
    return SDValue();
806
1.80M
807
1.80M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
808
1.80M
809
1.80M
  // Assemble the legal parts into the final values.
810
1.80M
  SmallVector<SDValue, 4> Values(ValueVTs.size());
811
1.80M
  SmallVector<SDValue, 8> Parts;
812
3.61M
  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; 
++Value1.80M
) {
813
1.80M
    // Copy the legal parts from the registers.
814
1.80M
    EVT ValueVT = ValueVTs[Value];
815
1.80M
    unsigned NumRegs = RegCount[Value];
816
1.80M
    MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
817
14
                                          *DAG.getContext(),
818
14
                                          CallConv.getValue(), RegVTs[Value])
819
1.80M
                                    : 
RegVTs[Value]1.80M
;
820
1.80M
821
1.80M
    Parts.resize(NumRegs);
822
3.64M
    for (unsigned i = 0; i != NumRegs; 
++i1.84M
) {
823
1.84M
      SDValue P;
824
1.84M
      if (!Flag) {
825
1.83M
        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
826
1.83M
      } else {
827
4.58k
        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
828
4.58k
        *Flag = P.getValue(2);
829
4.58k
      }
830
1.84M
831
1.84M
      Chain = P.getValue(1);
832
1.84M
      Parts[i] = P;
833
1.84M
834
1.84M
      // If the source register was virtual and if we know something about it,
835
1.84M
      // add an assert node.
836
1.84M
      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
837
1.84M
          
!RegisterVT.isInteger()1.84M
)
838
70.6k
        continue;
839
1.77M
840
1.77M
      const FunctionLoweringInfo::LiveOutInfo *LOI =
841
1.77M
        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
842
1.77M
      if (!LOI)
843
738k
        continue;
844
1.03M
845
1.03M
      unsigned RegSize = RegisterVT.getScalarSizeInBits();
846
1.03M
      unsigned NumSignBits = LOI->NumSignBits;
847
1.03M
      unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
848
1.03M
849
1.03M
      if (NumZeroBits == RegSize) {
850
3.27k
        // The current value is a zero.
851
3.27k
        // Explicitly express that as it would be easier for
852
3.27k
        // optimizations to kick in.
853
3.27k
        Parts[i] = DAG.getConstant(0, dl, RegisterVT);
854
3.27k
        continue;
855
3.27k
      }
856
1.02M
857
1.02M
      // FIXME: We capture more information than the dag can represent.  For
858
1.02M
      // now, just use the tightest assertzext/assertsext possible.
859
1.02M
      bool isSExt;
860
1.02M
      EVT FromVT(MVT::Other);
861
1.02M
      if (NumZeroBits) {
862
125k
        FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
863
125k
        isSExt = false;
864
903k
      } else if (NumSignBits > 1) {
865
97.4k
        FromVT =
866
97.4k
            EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
867
97.4k
        isSExt = true;
868
806k
      } else {
869
806k
        continue;
870
806k
      }
871
222k
      // Add an assertion node.
872
222k
      assert(FromVT != MVT::Other);
873
222k
      Parts[i] = DAG.getNode(isSExt ? 
ISD::AssertSext97.4k
:
ISD::AssertZext125k
, dl,
874
222k
                             RegisterVT, P, DAG.getValueType(FromVT));
875
222k
    }
876
1.80M
877
1.80M
    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
878
1.80M
                                     RegisterVT, ValueVT, V, CallConv);
879
1.80M
    Part += NumRegs;
880
1.80M
    Parts.clear();
881
1.80M
  }
882
1.80M
883
1.80M
  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
884
1.80M
}
885
886
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
887
                                 const SDLoc &dl, SDValue &Chain, SDValue *Flag,
888
                                 const Value *V,
889
819k
                                 ISD::NodeType PreferredExtendType) const {
890
819k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
891
819k
  ISD::NodeType ExtendKind = PreferredExtendType;
892
819k
893
819k
  // Get the list of the values's legal parts.
894
819k
  unsigned NumRegs = Regs.size();
895
819k
  SmallVector<SDValue, 8> Parts(NumRegs);
896
1.64M
  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; 
++Value822k
) {
897
822k
    unsigned NumParts = RegCount[Value];
898
822k
899
822k
    MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
900
2
                                          *DAG.getContext(),
901
2
                                          CallConv.getValue(), RegVTs[Value])
902
822k
                                    : 
RegVTs[Value]822k
;
903
822k
904
822k
    if (ExtendKind == ISD::ANY_EXTEND && 
TLI.isZExtFree(Val, RegisterVT)775k
)
905
44.4k
      ExtendKind = ISD::ZERO_EXTEND;
906
822k
907
822k
    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
908
822k
                   NumParts, RegisterVT, V, CallConv, ExtendKind);
909
822k
    Part += NumParts;
910
822k
  }
911
819k
912
819k
  // Copy the parts into the registers.
913
819k
  SmallVector<SDValue, 8> Chains(NumRegs);
914
1.66M
  for (unsigned i = 0; i != NumRegs; 
++i842k
) {
915
842k
    SDValue Part;
916
842k
    if (!Flag) {
917
838k
      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
918
838k
    } else {
919
4.79k
      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
920
4.79k
      *Flag = Part.getValue(1);
921
4.79k
    }
922
842k
923
842k
    Chains[i] = Part.getValue(0);
924
842k
  }
925
819k
926
819k
  if (NumRegs == 1 || 
Flag17.7k
)
927
802k
    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
928
802k
    // flagged to it. That is the CopyToReg nodes and the user are considered
929
802k
    // a single scheduling unit. If we create a TokenFactor and return it as
930
802k
    // chain, then the TokenFactor is both a predecessor (operand) of the
931
802k
    // user as well as a successor (the TF operands are flagged to the user).
932
802k
    // c1, f1 = CopyToReg
933
802k
    // c2, f2 = CopyToReg
934
802k
    // c3     = TokenFactor c1, c2
935
802k
    // ...
936
802k
    //        = op c3, ..., f2
937
802k
    Chain = Chains[NumRegs-1];
938
17.6k
  else
939
17.6k
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
940
819k
}
941
942
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
943
                                        unsigned MatchingIdx, const SDLoc &dl,
944
                                        SelectionDAG &DAG,
945
92.7k
                                        std::vector<SDValue> &Ops) const {
946
92.7k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
947
92.7k
948
92.7k
  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
949
92.7k
  if (HasMatching)
950
435
    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
951
92.3k
  else if (!Regs.empty() &&
952
92.3k
           TargetRegisterInfo::isVirtualRegister(Regs.front())) {
953
7.00k
    // Put the register class of the virtual registers in the flag word.  That
954
7.00k
    // way, later passes can recompute register class constraints for inline
955
7.00k
    // assembly as well as normal instructions.
956
7.00k
    // Don't do this for tied operands that can use the regclass information
957
7.00k
    // from the def.
958
7.00k
    const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
959
7.00k
    const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
960
7.00k
    Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
961
7.00k
  }
962
92.7k
963
92.7k
  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
964
92.7k
  Ops.push_back(Res);
965
92.7k
966
92.7k
  if (Code == InlineAsm::Kind_Clobber) {
967
83.4k
    // Clobbers should always have a 1:1 mapping with registers, and may
968
83.4k
    // reference registers that have illegal (e.g. vector) types. Hence, we
969
83.4k
    // shouldn't try to apply any sort of splitting logic to them.
970
83.4k
    assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
971
83.4k
           "No 1:1 mapping from clobbers to regs?");
972
83.4k
    unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
973
83.4k
    (void)SP;
974
166k
    for (unsigned I = 0, E = ValueVTs.size(); I != E; 
++I83.4k
) {
975
83.4k
      Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
976
83.4k
      assert(
977
83.4k
          (Regs[I] != SP ||
978
83.4k
           DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
979
83.4k
          "If we clobbered the stack pointer, MFI should know about it.");
980
83.4k
    }
981
83.4k
    return;
982
83.4k
  }
983
9.24k
984
18.4k
  
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); 9.24k
Value != e;
++Value9.24k
) {
985
9.24k
    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
986
9.24k
    MVT RegisterVT = RegVTs[Value];
987
18.7k
    for (unsigned i = 0; i != NumRegs; 
++i9.46k
) {
988
9.46k
      assert(Reg < Regs.size() && "Mismatch in # registers expected");
989
9.46k
      unsigned TheReg = Regs[Reg++];
990
9.46k
      Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
991
9.46k
    }
992
9.24k
  }
993
9.24k
}
994
995
SmallVector<std::pair<unsigned, unsigned>, 4>
996
10
RegsForValue::getRegsAndSizes() const {
997
10
  SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
998
10
  unsigned I = 0;
999
14
  for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
1000
14
    unsigned RegCount = std::get<0>(CountAndVT);
1001
14
    MVT RegisterVT = std::get<1>(CountAndVT);
1002
14
    unsigned RegisterSize = RegisterVT.getSizeInBits();
1003
39
    for (unsigned E = I + RegCount; I != E; 
++I25
)
1004
25
      OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
1005
14
  }
1006
10
  return OutVec;
1007
10
}
1008
1009
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
1010
278k
                               const TargetLibraryInfo *li) {
1011
278k
  AA = aa;
1012
278k
  GFI = gfi;
1013
278k
  LibInfo = li;
1014
278k
  DL = &DAG.getDataLayout();
1015
278k
  Context = DAG.getContext();
1016
278k
  LPadToCallSiteMap.clear();
1017
278k
  SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
1018
278k
}
1019
1020
1.24M
void SelectionDAGBuilder::clear() {
1021
1.24M
  NodeMap.clear();
1022
1.24M
  UnusedArgNodeMap.clear();
1023
1.24M
  PendingLoads.clear();
1024
1.24M
  PendingExports.clear();
1025
1.24M
  CurInst = nullptr;
1026
1.24M
  HasTailCall = false;
1027
1.24M
  SDNodeOrder = LowestSDNodeOrder;
1028
1.24M
  StatepointLowering.clear();
1029
1.24M
}
1030
1031
1.47M
void SelectionDAGBuilder::clearDanglingDebugInfo() {
1032
1.47M
  DanglingDebugInfoMap.clear();
1033
1.47M
}
1034
1035
1.27M
SDValue SelectionDAGBuilder::getRoot() {
1036
1.27M
  if (PendingLoads.empty())
1037
1.05M
    return DAG.getRoot();
1038
212k
1039
212k
  if (PendingLoads.size() == 1) {
1040
163k
    SDValue Root = PendingLoads[0];
1041
163k
    DAG.setRoot(Root);
1042
163k
    PendingLoads.clear();
1043
163k
    return Root;
1044
163k
  }
1045
48.6k
1046
48.6k
  // Otherwise, we have to make a token factor node.
1047
48.6k
  SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
1048
48.6k
  PendingLoads.clear();
1049
48.6k
  DAG.setRoot(Root);
1050
48.6k
  return Root;
1051
48.6k
}
1052
1053
2.14M
SDValue SelectionDAGBuilder::getControlRoot() {
1054
2.14M
  SDValue Root = DAG.getRoot();
1055
2.14M
1056
2.14M
  if (PendingExports.empty())
1057
1.65M
    return Root;
1058
483k
1059
483k
  // Turn all of the CopyToReg chains into one factored node.
1060
483k
  if (Root.getOpcode() != ISD::EntryToken) {
1061
229k
    unsigned i = 0, e = PendingExports.size();
1062
572k
    for (; i != e; 
++i342k
) {
1063
342k
      assert(PendingExports[i].getNode()->getNumOperands() > 1);
1064
342k
      if (PendingExports[i].getNode()->getOperand(0) == Root)
1065
0
        break;  // Don't add the root if we already indirectly depend on it.
1066
342k
    }
1067
229k
1068
229k
    if (i == e)
1069
229k
      PendingExports.push_back(Root);
1070
229k
  }
1071
483k
1072
483k
  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1073
483k
                     PendingExports);
1074
483k
  PendingExports.clear();
1075
483k
  DAG.setRoot(Root);
1076
483k
  return Root;
1077
483k
}
1078
1079
6.77M
void SelectionDAGBuilder::visit(const Instruction &I) {
1080
6.77M
  // Set up outgoing PHI node register values before emitting the terminator.
1081
6.77M
  if (I.isTerminator()) {
1082
1.12M
    HandlePHINodesInSuccessorBlocks(I.getParent());
1083
1.12M
  }
1084
6.77M
1085
6.77M
  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1086
6.77M
  if (!isa<DbgInfoIntrinsic>(I))
1087
6.77M
    ++SDNodeOrder;
1088
6.77M
1089
6.77M
  CurInst = &I;
1090
6.77M
1091
6.77M
  visit(I.getOpcode(), I);
1092
6.77M
1093
6.77M
  if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
1094
238k
    // Propagate the fast-math-flags of this IR instruction to the DAG node that
1095
238k
    // maps to this instruction.
1096
238k
    // TODO: We could handle all flags (nsw, etc) here.
1097
238k
    // TODO: If an IR instruction maps to >1 node, only the final node will have
1098
238k
    //       flags set.
1099
238k
    if (SDNode *Node = getNodeForIRValue(&I)) {
1100
237k
      SDNodeFlags IncomingFlags;
1101
237k
      IncomingFlags.copyFMF(*FPMO);
1102
237k
      if (!Node->getFlags().isDefined())
1103
234k
        Node->setFlags(IncomingFlags);
1104
3.23k
      else
1105
3.23k
        Node->intersectFlagsWith(IncomingFlags);
1106
237k
    }
1107
238k
  }
1108
6.77M
1109
6.77M
  if (!I.isTerminator() && 
!HasTailCall5.65M
&&
1110
6.77M
      
!isStatepoint(&I)5.58M
) // statepoints handle their exports internally
1111
5.58M
    CopyToExportRegsIfNeeded(&I);
1112
6.77M
1113
6.77M
  CurInst = nullptr;
1114
6.77M
}
1115
1116
0
void SelectionDAGBuilder::visitPHI(const PHINode &) {
1117
0
  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1118
0
}
1119
1120
7.07M
void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1121
7.07M
  // Note: this doesn't use InstVisitor, because it has to work with
1122
7.07M
  // ConstantExpr's in addition to instructions.
1123
7.07M
  switch (Opcode) {
1124
7.07M
  
default: 0
llvm_unreachable0
("Unknown instruction type encountered!");
1125
7.07M
    // Build the switch statement using the Instruction.def file.
1126
7.07M
#define HANDLE_INST(NUM, OPCODE, CLASS) \
1127
7.07M
    
case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break7.07M
;
1128
7.07M
#include 
"llvm/IR/Instruction.def"259k
1129
7.07M
  }
1130
7.07M
}
1131
1132
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
1133
5.47k
                                                const DIExpression *Expr) {
1134
5.47k
  auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
1135
40
    const DbgValueInst *DI = DDI.getDI();
1136
40
    DIVariable *DanglingVariable = DI->getVariable();
1137
40
    DIExpression *DanglingExpr = DI->getExpression();
1138
40
    if (DanglingVariable == Variable && 
Expr->fragmentsOverlap(DanglingExpr)4
) {
1139
4
      LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
1140
4
      return true;
1141
4
    }
1142
36
    return false;
1143
36
  };
1144
5.47k
1145
5.47k
  for (auto &DDIMI : DanglingDebugInfoMap) {
1146
26
    DanglingDebugInfoVector &DDIV = DDIMI.second;
1147
26
1148
26
    // If debug info is to be dropped, run it through final checks to see
1149
26
    // whether it can be salvaged.
1150
26
    for (auto &DDI : DDIV)
1151
20
      if (isMatchingDbgValue(DDI))
1152
2
        salvageUnresolvedDbgValue(DDI);
1153
26
1154
26
    DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
1155
26
  }
1156
5.47k
}
1157
1158
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1159
// generate the debug data structures now that we've seen its definition.
1160
void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
1161
4.77M
                                                   SDValue Val) {
1162
4.77M
  auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
1163
4.77M
  if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
1164
4.77M
    return;
1165
14
1166
14
  DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
1167
18
  for (auto &DDI : DDIV) {
1168
18
    const DbgValueInst *DI = DDI.getDI();
1169
18
    assert(DI && "Ill-formed DanglingDebugInfo");
1170
18
    DebugLoc dl = DDI.getdl();
1171
18
    unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1172
18
    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1173
18
    DILocalVariable *Variable = DI->getVariable();
1174
18
    DIExpression *Expr = DI->getExpression();
1175
18
    assert(Variable->isValidLocationForIntrinsic(dl) &&
1176
18
           "Expected inlined-at fields to agree");
1177
18
    SDDbgValue *SDV;
1178
18
    if (Val.getNode()) {
1179
18
      // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
1180
18
      // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
1181
18
      // we couldn't resolve it directly when examining the DbgValue intrinsic
1182
18
      // in the first place we should not be more successful here). Unless we
1183
18
      // have some test case that prove this to be correct we should avoid
1184
18
      // calling EmitFuncArgumentDbgValue here.
1185
18
      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1186
16
        LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1187
16
                          << DbgSDNodeOrder << "] for:\n  " << *DI << "\n");
1188
16
        LLVM_DEBUG(dbgs() << "  By mapping to:\n    "; Val.dump());
1189
16
        // Increase the SDNodeOrder for the DbgValue here to make sure it is
1190
16
        // inserted after the definition of Val when emitting the instructions
1191
16
        // after ISel. An alternative could be to teach
1192
16
        // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1193
16
        LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1194
16
                   << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1195
16
                   << ValSDNodeOrder << "\n");
1196
16
        SDV = getDbgValue(Val, Variable, Expr, dl,
1197
16
                          std::max(DbgSDNodeOrder, ValSDNodeOrder));
1198
16
        DAG.AddDbgValue(SDV, Val.getNode(), false);
1199
16
      } else
1200
18
        LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1201
18
                          << "in EmitFuncArgumentDbgValue\n");
1202
18
    } else {
1203
0
      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1204
0
      auto Undef =
1205
0
          UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
1206
0
      auto SDV =
1207
0
          DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
1208
0
      DAG.AddDbgValue(SDV, nullptr, false);
1209
0
    }
1210
18
  }
1211
14
  DDIV.clear();
1212
14
}
1213
1214
14
void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
1215
14
  Value *V = DDI.getDI()->getValue();
1216
14
  DILocalVariable *Var = DDI.getDI()->getVariable();
1217
14
  DIExpression *Expr = DDI.getDI()->getExpression();
1218
14
  DebugLoc DL = DDI.getdl();
1219
14
  DebugLoc InstDL = DDI.getDI()->getDebugLoc();
1220
14
  unsigned SDOrder = DDI.getSDNodeOrder();
1221
14
1222
14
  // Currently we consider only dbg.value intrinsics -- we tell the salvager
1223
14
  // that DW_OP_stack_value is desired.
1224
14
  assert(isa<DbgValueInst>(DDI.getDI()));
1225
14
  bool StackValue = true;
1226
14
1227
14
  // Can this Value can be encoded without any further work?
1228
14
  if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder))
1229
0
    return;
1230
14
1231
14
  // Attempt to salvage back through as many instructions as possible. Bail if
1232
14
  // a non-instruction is seen, such as a constant expression or global
1233
14
  // variable. FIXME: Further work could recover those too.
1234
15
  
while (14
isa<Instruction>(V)) {
1235
2
    Instruction &VAsInst = *cast<Instruction>(V);
1236
2
    DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue);
1237
2
1238
2
    // If we cannot salvage any further, and haven't yet found a suitable debug
1239
2
    // expression, bail out.
1240
2
    if (!NewExpr)
1241
0
      break;
1242
2
1243
2
    // New value and expr now represent this debuginfo.
1244
2
    V = VAsInst.getOperand(0);
1245
2
    Expr = NewExpr;
1246
2
1247
2
    // Some kind of simplification occurred: check whether the operand of the
1248
2
    // salvaged debug expression can be encoded in this DAG.
1249
2
    if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) {
1250
1
      LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n  "
1251
1
                        << DDI.getDI() << "\nBy stripping back to:\n  " << V);
1252
1
      return;
1253
1
    }
1254
2
  }
1255
14
1256
14
  // This was the final opportunity to salvage this debug information, and it
1257
14
  // couldn't be done. Place an undef DBG_VALUE at this location to terminate
1258
14
  // any earlier variable location.
1259
14
  auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
1260
13
  auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
1261
13
  DAG.AddDbgValue(SDV, nullptr, false);
1262
13
1263
13
  LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n  " << DDI.getDI()
1264
13
                    << "\n");
1265
13
  LLVM_DEBUG(dbgs() << "  Last seen at:\n    " << *DDI.getDI()->getOperand(0)
1266
13
                    << "\n");
1267
13
}
1268
1269
bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
1270
                                           DIExpression *Expr, DebugLoc dl,
1271
4.94k
                                           DebugLoc InstDL, unsigned Order) {
1272
4.94k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1273
4.94k
  SDDbgValue *SDV;
1274
4.94k
  if (isa<ConstantInt>(V) || 
isa<ConstantFP>(V)818
||
isa<UndefValue>(V)810
||
1275
4.94k
      
isa<ConstantPointerNull>(V)786
) {
1276
4.17k
    SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder);
1277
4.17k
    DAG.AddDbgValue(SDV, nullptr, false);
1278
4.17k
    return true;
1279
4.17k
  }
1280
774
1281
774
  // If the Value is a frame index, we can create a FrameIndex debug value
1282
774
  // without relying on the DAG at all.
1283
774
  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1284
57
    auto SI = FuncInfo.StaticAllocaMap.find(AI);
1285
57
    if (SI != FuncInfo.StaticAllocaMap.end()) {
1286
56
      auto SDV =
1287
56
          DAG.getFrameIndexDbgValue(Var, Expr, SI->second,
1288
56
                                    /*IsIndirect*/ false, dl, SDNodeOrder);
1289
56
      // Do not attach the SDNodeDbgValue to an SDNode: this variable location
1290
56
      // is still available even if the SDNode gets optimized out.
1291
56
      DAG.AddDbgValue(SDV, nullptr, false);
1292
56
      return true;
1293
56
    }
1294
718
  }
1295
718
1296
718
  // Do not use getValue() in here; we don't want to generate code at
1297
718
  // this point if it hasn't been done yet.
1298
718
  SDValue N = NodeMap[V];
1299
718
  if (!N.getNode() && 
isa<Argument>(V)145
) // Check unused arguments map.
1300
61
    N = UnusedArgNodeMap[V];
1301
718
  if (N.getNode()) {
1302
627
    if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
1303
283
      return true;
1304
344
    SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder);
1305
344
    DAG.AddDbgValue(SDV, N.getNode(), false);
1306
344
    return true;
1307
344
  }
1308
91
1309
91
  // Special rules apply for the first dbg.values of parameter variables in a
1310
91
  // function. Identify them by the fact they reference Argument Values, that
1311
91
  // they're parameters, and they are parameters of the current function. We
1312
91
  // need to let them dangle until they get an SDNode.
1313
91
  bool IsParamOfFunc = isa<Argument>(V) && 
Var->isParameter()7
&&
1314
91
                       
!InstDL.getInlinedAt()6
;
1315
91
  if (!IsParamOfFunc) {
1316
86
    // The value is not used in this block yet (or it would have an SDNode).
1317
86
    // We still want the value to appear for the user if possible -- if it has
1318
86
    // an associated VReg, we can refer to that instead.
1319
86
    auto VMI = FuncInfo.ValueMap.find(V);
1320
86
    if (VMI != FuncInfo.ValueMap.end()) {
1321
44
      unsigned Reg = VMI->second;
1322
44
      // If this is a PHI node, it may be split up into several MI PHI nodes
1323
44
      // (in FunctionLoweringInfo::set).
1324
44
      RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
1325
44
                       V->getType(), None);
1326
44
      if (RFV.occupiesMultipleRegs()) {
1327
7
        unsigned Offset = 0;
1328
7
        unsigned BitsToDescribe = 0;
1329
7
        if (auto VarSize = Var->getSizeInBits())
1330
7
          BitsToDescribe = *VarSize;
1331
7
        if (auto Fragment = Expr->getFragmentInfo())
1332
2
          BitsToDescribe = Fragment->SizeInBits;
1333
16
        for (auto RegAndSize : RFV.getRegsAndSizes()) {
1334
16
          unsigned RegisterSize = RegAndSize.second;
1335
16
          // Bail out if all bits are described already.
1336
16
          if (Offset >= BitsToDescribe)
1337
1
            break;
1338
15
          unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
1339
15
              ? 
BitsToDescribe - Offset2
1340
15
              : 
RegisterSize13
;
1341
15
          auto FragmentExpr = DIExpression::createFragmentExpression(
1342
15
              Expr, Offset, FragmentSize);
1343
15
          if (!FragmentExpr)
1344
0
              continue;
1345
15
          SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first,
1346
15
                                    false, dl, SDNodeOrder);
1347
15
          DAG.AddDbgValue(SDV, nullptr, false);
1348
15
          Offset += RegisterSize;
1349
15
        }
1350
37
      } else {
1351
37
        SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder);
1352
37
        DAG.AddDbgValue(SDV, nullptr, false);
1353
37
      }
1354
44
      return true;
1355
44
    }
1356
47
  }
1357
47
1358
47
  return false;
1359
47
}
1360
1361
1.19M
void SelectionDAGBuilder::resolveOrClearDbgInfo() {
1362
1.19M
  // Try to fixup any remaining dangling debug info -- and drop it if we can't.
1363
1.19M
  for (auto &Pair : DanglingDebugInfoMap)
1364
28
    for (auto &DDI : Pair.second)
1365
12
      salvageUnresolvedDbgValue(DDI);
1366
1.19M
  clearDanglingDebugInfo();
1367
1.19M
}
1368
1369
/// getCopyFromRegs - If there was virtual register allocated for the value V
1370
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1371
4.62M
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
1372
4.62M
  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1373
4.62M
  SDValue Result;
1374
4.62M
1375
4.62M
  if (It != FuncInfo.ValueMap.end()) {
1376
1.79M
    unsigned InReg = It->second;
1377
1.79M
1378
1.79M
    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1379
1.79M
                     DAG.getDataLayout(), InReg, Ty,
1380
1.79M
                     None); // This is not an ABI copy.
1381
1.79M
    SDValue Chain = DAG.getEntryNode();
1382
1.79M
    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1383
1.79M
                                 V);
1384
1.79M
    resolveDanglingDebugInfo(V, Result);
1385
1.79M
  }
1386
4.62M
1387
4.62M
  return Result;
1388
4.62M
}
1389
1390
/// getValue - Return an SDValue for the given Value.
1391
10.8M
SDValue SelectionDAGBuilder::getValue(const Value *V) {
1392
10.8M
  // If we already have an SDValue for this value, use it. It's important
1393
10.8M
  // to do this first, so that we don't create a CopyFromReg if we already
1394
10.8M
  // have a regular SDValue.
1395
10.8M
  SDValue &N = NodeMap[V];
1396
10.8M
  if (N.getNode()) 
return N6.18M
;
1397
4.62M
1398
4.62M
  // If there's a virtual register allocated and initialized for this
1399
4.62M
  // value, use it.
1400
4.62M
  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1401
1.79M
    return copyFromReg;
1402
2.82M
1403
2.82M
  // Otherwise create a new SDValue and remember it.
1404
2.82M
  SDValue Val = getValueImpl(V);
1405
2.82M
  NodeMap[V] = Val;
1406
2.82M
  resolveDanglingDebugInfo(V, Val);
1407
2.82M
  return Val;
1408
2.82M
}
1409
1410
// Return true if SDValue exists for the given Value
1411
463
bool SelectionDAGBuilder::findValue(const Value *V) const {
1412
463
  return (NodeMap.find(V) != NodeMap.end()) ||
1413
463
    
(FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end())53
;
1414
463
}
1415
1416
/// getNonRegisterValue - Return an SDValue for the given Value, but
1417
/// don't look in FuncInfo.ValueMap for a virtual register.
1418
815k
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1419
815k
  // If we already have an SDValue for this value, use it.
1420
815k
  SDValue &N = NodeMap[V];
1421
815k
  if (N.getNode()) {
1422
666k
    if (isa<ConstantSDNode>(N) || 
isa<ConstantFPSDNode>(N)647k
) {
1423
19.0k
      // Remove the debug location from the node as the node is about to be used
1424
19.0k
      // in a location which may differ from the original debug location.  This
1425
19.0k
      // is relevant to Constant and ConstantFP nodes because they can appear
1426
19.0k
      // as constant expressions inside PHI nodes.
1427
19.0k
      N->setDebugLoc(DebugLoc());
1428
19.0k
    }
1429
666k
    return N;
1430
666k
  }
1431
148k
1432
148k
  // Otherwise create a new SDValue and remember it.
1433
148k
  SDValue Val = getValueImpl(V);
1434
148k
  NodeMap[V] = Val;
1435
148k
  resolveDanglingDebugInfo(V, Val);
1436
148k
  return Val;
1437
148k
}
1438
1439
/// getValueImpl - Helper function for getValue and getNonRegisterValue.
1440
/// Create an SDValue for the given value.
1441
2.97M
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1442
2.97M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1443
2.97M
1444
2.97M
  if (const Constant *C = dyn_cast<Constant>(V)) {
1445
2.79M
    EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1446
2.79M
1447
2.79M
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1448
1.37M
      return DAG.getConstant(*CI, getCurSDLoc(), VT);
1449
1.42M
1450
1.42M
    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1451
764k
      return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1452
659k
1453
659k
    if (isa<ConstantPointerNull>(C)) {
1454
138k
      unsigned AS = V->getType()->getPointerAddressSpace();
1455
138k
      return DAG.getConstant(0, getCurSDLoc(),
1456
138k
                             TLI.getPointerTy(DAG.getDataLayout(), AS));
1457
138k
    }
1458
520k
1459
520k
    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1460
55.7k
      return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1461
464k
1462
464k
    if (isa<UndefValue>(C) && 
!V->getType()->isAggregateType()47.0k
)
1463
45.6k
      return DAG.getUNDEF(VT);
1464
419k
1465
419k
    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1466
296k
      visit(CE->getOpcode(), *CE);
1467
296k
      SDValue N1 = NodeMap[V];
1468
296k
      assert(N1.getNode() && "visit didn't populate the NodeMap!");
1469
296k
      return N1;
1470
296k
    }
1471
122k
1472
122k
    if (isa<ConstantStruct>(C) || 
isa<ConstantArray>(C)122k
) {
1473
247
      SmallVector<SDValue, 4> Constants;
1474
247
      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1475
1.06k
           OI != OE; 
++OI821
) {
1476
821
        SDNode *Val = getValue(*OI).getNode();
1477
821
        // If the operand is an empty aggregate, there are no values.
1478
821
        if (!Val) 
continue1
;
1479
820
        // Add each leaf value from the operand to the Constants list
1480
820
        // to form a flattened list of all the values.
1481
1.64k
        
for (unsigned i = 0, e = Val->getNumValues(); 820
i != e;
++i827
)
1482
827
          Constants.push_back(SDValue(Val, i));
1483
820
      }
1484
247
1485
247
      return DAG.getMergeValues(Constants, getCurSDLoc());
1486
247
    }
1487
121k
1488
121k
    if (const ConstantDataSequential *CDS =
1489
84.7k
          dyn_cast<ConstantDataSequential>(C)) {
1490
84.7k
      SmallVector<SDValue, 4> Ops;
1491
532k
      for (unsigned i = 0, e = CDS->getNumElements(); i != e; 
++i447k
) {
1492
447k
        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1493
447k
        // Add each leaf value from the operand to the Constants list
1494
447k
        // to form a flattened list of all the values.
1495
895k
        for (unsigned i = 0, e = Val->getNumValues(); i != e; 
++i447k
)
1496
447k
          Ops.push_back(SDValue(Val, i));
1497
447k
      }
1498
84.7k
1499
84.7k
      if (isa<ArrayType>(CDS->getType()))
1500
36
        return DAG.getMergeValues(Ops, getCurSDLoc());
1501
84.7k
      return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1502
84.7k
    }
1503
37.1k
1504
37.1k
    if (C->getType()->isStructTy() || 
C->getType()->isArrayTy()35.9k
) {
1505
1.51k
      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1506
1.51k
             "Unknown struct or array constant!");
1507
1.51k
1508
1.51k
      SmallVector<EVT, 4> ValueVTs;
1509
1.51k
      ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1510
1.51k
      unsigned NumElts = ValueVTs.size();
1511
1.51k
      if (NumElts == 0)
1512
1
        return SDValue(); // empty struct
1513
1.51k
      SmallVector<SDValue, 4> Constants(NumElts);
1514
5.68k
      for (unsigned i = 0; i != NumElts; 
++i4.17k
) {
1515
4.17k
        EVT EltVT = ValueVTs[i];
1516
4.17k
        if (isa<UndefValue>(C))
1517
3.84k
          Constants[i] = DAG.getUNDEF(EltVT);
1518
334
        else if (EltVT.isFloatingPoint())
1519
30
          Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1520
304
        else
1521
304
          Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1522
4.17k
      }
1523
1.51k
1524
1.51k
      return DAG.getMergeValues(Constants, getCurSDLoc());
1525
1.51k
    }
1526
35.6k
1527
35.6k
    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1528
135
      return DAG.getBlockAddress(BA, VT);
1529
35.5k
1530
35.5k
    VectorType *VecTy = cast<VectorType>(V->getType());
1531
35.5k
    unsigned NumElements = VecTy->getNumElements();
1532
35.5k
1533
35.5k
    // Now that we know the number and type of the elements, get that number of
1534
35.5k
    // elements into the Ops array based on what kind of constant it is.
1535
35.5k
    SmallVector<SDValue, 16> Ops;
1536
35.5k
    if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1537
36.1k
      for (unsigned i = 0; i != NumElements; 
++i32.3k
)
1538
32.3k
        Ops.push_back(getValue(CV->getOperand(i)));
1539
31.8k
    } else {
1540
31.8k
      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1541
31.8k
      EVT EltVT =
1542
31.8k
          TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1543
31.8k
1544
31.8k
      SDValue Op;
1545
31.8k
      if (EltVT.isFloatingPoint())
1546
4.13k
        Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1547
27.6k
      else
1548
27.6k
        Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1549
31.8k
      Ops.assign(NumElements, Op);
1550
31.8k
    }
1551
35.5k
1552
35.5k
    // Create a BUILD_VECTOR node.
1553
35.5k
    return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1554
35.5k
  }
1555
178k
1556
178k
  // If this is a static alloca, generate it as the frameindex instead of
1557
178k
  // computation.
1558
178k
  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1559
175k
    DenseMap<const AllocaInst*, int>::iterator SI =
1560
175k
      FuncInfo.StaticAllocaMap.find(AI);
1561
175k
    if (SI != FuncInfo.StaticAllocaMap.end())
1562
175k
      return DAG.getFrameIndex(SI->second,
1563
175k
                               TLI.getFrameIndexTy(DAG.getDataLayout()));
1564
2.38k
  }
1565
2.38k
1566
2.38k
  // If this is an instruction which fast-isel has deferred, select it now.
1567
2.38k
  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1568
2.38k
    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1569
2.38k
1570
2.38k
    RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1571
2.38k
                     Inst->getType(), getABIRegCopyCC(V));
1572
2.38k
    SDValue Chain = DAG.getEntryNode();
1573
2.38k
    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1574
2.38k
  }
1575
2
1576
2
  llvm_unreachable("Can't get register for value!");
1577
2
}
1578
1579
168
void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1580
168
  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1581
168
  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1582
168
  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1583
168
  bool IsSEH = isAsynchronousEHPersonality(Pers);
1584
168
  bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
1585
168
  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1586
168
  if (!IsSEH)
1587
135
    CatchPadMBB->setIsEHScopeEntry();
1588
168
  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1589
168
  if (IsMSVCCXX || 
IsCoreCLR94
)
1590
85
    CatchPadMBB->setIsEHFuncletEntry();
1591
168
  // Wasm does not need catchpads anymore
1592
168
  if (!IsWasmCXX)
1593
118
    DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
1594
118
                            getControlRoot()));
1595
168
}
1596
1597
159
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1598
159
  // Update machine-CFG edge.
1599
159
  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1600
159
  FuncInfo.MBB->addSuccessor(TargetMBB);
1601
159
1602
159
  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1603
159
  bool IsSEH = isAsynchronousEHPersonality(Pers);
1604
159
  if (IsSEH) {
1605
31
    // If this is not a fall-through branch or optimizations are switched off,
1606
31
    // emit the branch.
1607
31
    if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1608
31
        
TM.getOptLevel() == CodeGenOpt::None22
)
1609
11
      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1610
11
                              getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1611
31
    return;
1612
31
  }
1613
128
1614
128
  // Figure out the funclet membership for the catchret's successor.
1615
128
  // This will be used by the FuncletLayout pass to determine how to order the
1616
128
  // BB's.
1617
128
  // A 'catchret' returns to the outer scope's color.
1618
128
  Value *ParentPad = I.getCatchSwitchParentPad();
1619
128
  const BasicBlock *SuccessorColor;
1620
128
  if (isa<ConstantTokenNone>(ParentPad))
1621
119
    SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1622
9
  else
1623
9
    SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1624
128
  assert(SuccessorColor && "No parent funclet for catchret!");
1625
128
  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1626
128
  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1627
128
1628
128
  // Create the terminator node.
1629
128
  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1630
128
                            getControlRoot(), DAG.getBasicBlock(TargetMBB),
1631
128
                            DAG.getBasicBlock(SuccessorColorMBB));
1632
128
  DAG.setRoot(Ret);
1633
128
}
1634
1635
66
void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1636
66
  // Don't emit any special code for the cleanuppad instruction. It just marks
1637
66
  // the start of an EH scope/funclet.
1638
66
  FuncInfo.MBB->setIsEHScopeEntry();
1639
66
  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1640
66
  if (Pers != EHPersonality::Wasm_CXX) {
1641
46
    FuncInfo.MBB->setIsEHFuncletEntry();
1642
46
    FuncInfo.MBB->setIsCleanupFuncletEntry();
1643
46
  }
1644
66
}
1645
1646
// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
1647
// the control flow always stops at the single catch pad, as it does for a
1648
// cleanup pad. In case the exception caught is not of the types the catch pad
1649
// catches, it will be rethrown by a rethrow.
1650
static void findWasmUnwindDestinations(
1651
    FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1652
    BranchProbability Prob,
1653
    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1654
88
        &UnwindDests) {
1655
88
  while (EHPadBB) {
1656
76
    const Instruction *Pad = EHPadBB->getFirstNonPHI();
1657
76
    if (isa<CleanupPadInst>(Pad)) {
1658
23
      // Stop on cleanup pads.
1659
23
      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1660
23
      UnwindDests.back().first->setIsEHScopeEntry();
1661
23
      break;
1662
53
    } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1663
53
      // Add the catchpad handlers to the possible destinations. We don't
1664
53
      // continue to the unwind destination of the catchswitch for wasm.
1665
53
      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1666
53
        UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1667
53
        UnwindDests.back().first->setIsEHScopeEntry();
1668
53
      }
1669
53
      break;
1670
53
    } else {
1671
0
      continue;
1672
0
    }
1673
76
  }
1674
88
}
1675
1676
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
1677
/// many places it could ultimately go. In the IR, we have a single unwind
1678
/// destination, but in the machine CFG, we enumerate all the possible blocks.
1679
/// This function skips over imaginary basic blocks that hold catchswitch
1680
/// instructions, and finds all the "real" machine
1681
/// basic block destinations. As those destinations may not be successors of
1682
/// EHPadBB, here we also calculate the edge probability to those destinations.
1683
/// The passed-in Prob is the edge probability to EHPadBB.
1684
static void findUnwindDestinations(
1685
    FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1686
    BranchProbability Prob,
1687
    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1688
6.42k
        &UnwindDests) {
1689
6.42k
  EHPersonality Personality =
1690
6.42k
    classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1691
6.42k
  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1692
6.42k
  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1693
6.42k
  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
1694
6.42k
  bool IsSEH = isAsynchronousEHPersonality(Personality);
1695
6.42k
1696
6.42k
  if (IsWasmCXX) {
1697
88
    findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
1698
88
    assert(UnwindDests.size() <= 1 &&
1699
88
           "There should be at most one unwind destination for wasm");
1700
88
    return;
1701
88
  }
1702
6.33k
1703
6.46k
  
while (6.33k
EHPadBB) {
1704
6.33k
    const Instruction *Pad = EHPadBB->getFirstNonPHI();
1705
6.33k
    BasicBlock *NewEHPadBB = nullptr;
1706
6.33k
    if (isa<LandingPadInst>(Pad)) {
1707
6.12k
      // Stop on landingpads. They are not funclets.
1708
6.12k
      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1709
6.12k
      break;
1710
6.12k
    } else 
if (205
isa<CleanupPadInst>(Pad)205
) {
1711
70
      // Stop on cleanup pads. Cleanups are always funclet entries for all known
1712
70
      // personalities.
1713
70
      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1714
70
      UnwindDests.back().first->setIsEHScopeEntry();
1715
70
      UnwindDests.back().first->setIsEHFuncletEntry();
1716
70
      break;
1717
135
    } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1718
135
      // Add the catchpad handlers to the possible destinations.
1719
146
      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1720
146
        UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1721
146
        // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1722
146
        if (IsMSVCCXX || 
IsCoreCLR56
)
1723
101
          UnwindDests.back().first->setIsEHFuncletEntry();
1724
146
        if (!IsSEH)
1725
101
          UnwindDests.back().first->setIsEHScopeEntry();
1726
146
      }
1727
135
      NewEHPadBB = CatchSwitch->getUnwindDest();
1728
135
    } else {
1729
0
      continue;
1730
0
    }
1731
135
1732
135
    BranchProbabilityInfo *BPI = FuncInfo.BPI;
1733
135
    if (BPI && 
NewEHPadBB127
)
1734
28
      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1735
135
    EHPadBB = NewEHPadBB;
1736
135
  }
1737
6.33k
}
1738
1739
53
void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1740
53
  // Update successor info.
1741
53
  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
1742
53
  auto UnwindDest = I.getUnwindDest();
1743
53
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1744
53
  BranchProbability UnwindDestProb =
1745
53
      (BPI && 
UnwindDest45
)
1746
53
          ? 
BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)12
1747
53
          : 
BranchProbability::getZero()41
;
1748
53
  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1749
53
  for (auto &UnwindDest : UnwindDests) {
1750
15
    UnwindDest.first->setIsEHPad();
1751
15
    addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1752
15
  }
1753
53
  FuncInfo.MBB->normalizeSuccProbs();
1754
53
1755
53
  // Create the terminator node.
1756
53
  SDValue Ret =
1757
53
      DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1758
53
  DAG.setRoot(Ret);
1759
53
}
1760
1761
0
void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1762
0
  report_fatal_error("visitCatchSwitch not yet implemented!");
1763
0
}
1764
1765
259k
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1766
259k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1767
259k
  auto &DL = DAG.getDataLayout();
1768
259k
  SDValue Chain = getControlRoot();
1769
259k
  SmallVector<ISD::OutputArg, 8> Outs;
1770
259k
  SmallVector<SDValue, 8> OutVals;
1771
259k
1772
259k
  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1773
259k
  // lower
1774
259k
  //
1775
259k
  //   %val = call <ty> @llvm.experimental.deoptimize()
1776
259k
  //   ret <ty> %val
1777
259k
  //
1778
259k
  // differently.
1779
259k
  if (I.getParent()->getTerminatingDeoptimizeCall()) {
1780
0
    LowerDeoptimizingReturn();
1781
0
    return;
1782
0
  }
1783
259k
1784
259k
  if (!FuncInfo.CanLowerReturn) {
1785
1.94k
    unsigned DemoteReg = FuncInfo.DemoteRegister;
1786
1.94k
    const Function *F = I.getParent()->getParent();
1787
1.94k
1788
1.94k
    // Emit a store of the return value through the virtual register.
1789
1.94k
    // Leave Outs empty so that LowerReturn won't try to load return
1790
1.94k
    // registers the usual way.
1791
1.94k
    SmallVector<EVT, 1> PtrValueVTs;
1792
1.94k
    ComputeValueVTs(TLI, DL,
1793
1.94k
                    F->getReturnType()->getPointerTo(
1794
1.94k
                        DAG.getDataLayout().getAllocaAddrSpace()),
1795
1.94k
                    PtrValueVTs);
1796
1.94k
1797
1.94k
    SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1798
1.94k
                                        DemoteReg, PtrValueVTs[0]);
1799
1.94k
    SDValue RetOp = getValue(I.getOperand(0));
1800
1.94k
1801
1.94k
    SmallVector<EVT, 4> ValueVTs, MemVTs;
1802
1.94k
    SmallVector<uint64_t, 4> Offsets;
1803
1.94k
    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
1804
1.94k
                    &Offsets);
1805
1.94k
    unsigned NumValues = ValueVTs.size();
1806
1.94k
1807
1.94k
    SmallVector<SDValue, 4> Chains(NumValues);
1808
4.21k
    for (unsigned i = 0; i != NumValues; 
++i2.26k
) {
1809
2.26k
      // An aggregate return value cannot wrap around the address space, so
1810
2.26k
      // offsets to its parts don't wrap either.
1811
2.26k
      SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1812
2.26k
1813
2.26k
      SDValue Val = RetOp.getValue(i);
1814
2.26k
      if (MemVTs[i] != ValueVTs[i])
1815
0
        Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
1816
2.26k
      Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
1817
2.26k
          // FIXME: better loc info would be nice.
1818
2.26k
          Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
1819
2.26k
    }
1820
1.94k
1821
1.94k
    Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1822
1.94k
                        MVT::Other, Chains);
1823
257k
  } else if (I.getNumOperands() != 0) {
1824
177k
    SmallVector<EVT, 4> ValueVTs;
1825
177k
    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1826
177k
    unsigned NumValues = ValueVTs.size();
1827
177k
    if (NumValues) {
1828
177k
      SDValue RetOp = getValue(I.getOperand(0));
1829
177k
1830
177k
      const Function *F = I.getParent()->getParent();
1831
177k
1832
177k
      bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
1833
177k
          I.getOperand(0)->getType(), F->getCallingConv(),
1834
177k
          /*IsVarArg*/ false);
1835
177k
1836
177k
      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1837
177k
      if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1838
177k
                                          Attribute::SExt))
1839
4.12k
        ExtendKind = ISD::SIGN_EXTEND;
1840
173k
      else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1841
173k
                                               Attribute::ZExt))
1842
6.38k
        ExtendKind = ISD::ZERO_EXTEND;
1843
177k
1844
177k
      LLVMContext &Context = F->getContext();
1845
177k
      bool RetInReg = F->getAttributes().hasAttribute(
1846
177k
          AttributeList::ReturnIndex, Attribute::InReg);
1847
177k
1848
358k
      for (unsigned j = 0; j != NumValues; 
++j180k
) {
1849
180k
        EVT VT = ValueVTs[j];
1850
180k
1851
180k
        if (ExtendKind != ISD::ANY_EXTEND && 
VT.isInteger()10.5k
)
1852
10.5k
          VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1853
180k
1854
180k
        CallingConv::ID CC = F->getCallingConv();
1855
180k
1856
180k
        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
1857
180k
        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
1858
180k
        SmallVector<SDValue, 4> Parts(NumParts);
1859
180k
        getCopyToParts(DAG, getCurSDLoc(),
1860
180k
                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1861
180k
                       &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
1862
180k
1863
180k
        // 'inreg' on function refers to return value
1864
180k
        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1865
180k
        if (RetInReg)
1866
42
          Flags.setInReg();
1867
180k
1868
180k
        if (I.getOperand(0)->getType()->isPointerTy()) {
1869
9.07k
          Flags.setPointer();
1870
9.07k
          Flags.setPointerAddrSpace(
1871
9.07k
              cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
1872
9.07k
        }
1873
180k
1874
180k
        if (NeedsRegBlock) {
1875
2.28k
          Flags.setInConsecutiveRegs();
1876
2.28k
          if (j == NumValues - 1)
1877
2.11k
            Flags.setInConsecutiveRegsLast();
1878
2.28k
        }
1879
180k
1880
180k
        // Propagate extension type if any
1881
180k
        if (ExtendKind == ISD::SIGN_EXTEND)
1882
4.12k
          Flags.setSExt();
1883
176k
        else if (ExtendKind == ISD::ZERO_EXTEND)
1884
6.38k
          Flags.setZExt();
1885
180k
1886
378k
        for (unsigned i = 0; i < NumParts; 
++i198k
) {
1887
198k
          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1888
198k
                                        VT, /*isfixed=*/true, 0, 0));
1889
198k
          OutVals.push_back(Parts[i]);
1890
198k
        }
1891
180k
      }
1892
177k
    }
1893
177k
  }
1894
259k
1895
259k
  // Push in swifterror virtual register as the last element of Outs. This makes
1896
259k
  // sure swifterror virtual register will be returned in the swifterror
1897
259k
  // physical register.
1898
259k
  const Function *F = I.getParent()->getParent();
1899
259k
  if (TLI.supportSwiftError() &&
1900
259k
      
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)170k
) {
1901
112
    assert(SwiftError.getFunctionArg() && "Need a swift error argument");
1902
112
    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1903
112
    Flags.setSwiftError();
1904
112
    Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1905
112
                                  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1906
112
                                  true /*isfixed*/, 1 /*origidx*/,
1907
112
                                  0 /*partOffs*/));
1908
112
    // Create SDNode for the swifterror virtual register.
1909
112
    OutVals.push_back(
1910
112
        DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
1911
112
                            &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
1912
112
                        EVT(TLI.getPointerTy(DL))));
1913
112
  }
1914
259k
1915
259k
  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1916
259k
  CallingConv::ID CallConv =
1917
259k
    DAG.getMachineFunction().getFunction().getCallingConv();
1918
259k
  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1919
259k
      Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1920
259k
1921
259k
  // Verify that the target's LowerReturn behaved as expected.
1922
259k
  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1923
259k
         "LowerReturn didn't return a valid chain!");
1924
259k
1925
259k
  // Update the DAG with the new chain value resulting from return lowering.
1926
259k
  DAG.setRoot(Chain);
1927
259k
}
1928
1929
/// CopyToExportRegsIfNeeded - If the given value has virtual registers
1930
/// created for it, emit nodes to copy the value into the virtual
1931
/// registers.
1932
5.61M
void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
1933
5.61M
  // Skip empty types
1934
5.61M
  if (V->getType()->isEmptyTy())
1935
16
    return;
1936
5.61M
1937
5.61M
  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1938
5.61M
  if (VMI != FuncInfo.ValueMap.end()) {
1939
624k
    assert(!V->use_empty() && "Unused value assigned virtual registers!");
1940
624k
    CopyValueToVirtualRegister(V, VMI->second);
1941
624k
  }
1942
5.61M
}
1943
1944
/// ExportFromCurrentBlock - If this condition isn't known to be exported from
1945
/// the current basic block, add it to ValueMap now so that we'll get a
1946
/// CopyTo/FromReg.
1947
64.8k
void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
1948
64.8k
  // No need to export constants.
1949
64.8k
  if (!isa<Instruction>(V) && 
!isa<Argument>(V)17.7k
)
return14.6k
;
1950
50.1k
1951
50.1k
  // Already exported?
1952
50.1k
  if (FuncInfo.isExportedInst(V)) 
return21.3k
;
1953
28.8k
1954
28.8k
  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1955
28.8k
  CopyValueToVirtualRegister(V, Reg);
1956
28.8k
}
1957
1958
bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
1959
67.5k
                                                     const BasicBlock *FromBB) {
1960
67.5k
  // The operands of the setcc have to be in this block.  We don't know
1961
67.5k
  // how to export them from some other block.
1962
67.5k
  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1963
46.5k
    // Can export from current BB.
1964
46.5k
    if (VI->getParent() == FromBB)
1965
39.8k
      return true;
1966
6.68k
1967
6.68k
    // Is already exported, noop.
1968
6.68k
    return FuncInfo.isExportedInst(V);
1969
6.68k
  }
1970
21.0k
1971
21.0k
  // If this is an argument, we can export it if the BB is the entry block or
1972
21.0k
  // if it is already exported.
1973
21.0k
  if (isa<Argument>(V)) {
1974
2.06k
    if (FromBB == &FromBB->getParent()->getEntryBlock())
1975
365
      return true;
1976
1.69k
1977
1.69k
    // Otherwise, can only export this if it is already exported.
1978
1.69k
    return FuncInfo.isExportedInst(V);
1979
1.69k
  }
1980
19.0k
1981
19.0k
  // Otherwise, constants can always be exported.
1982
19.0k
  return true;
1983
19.0k
}
1984
1985
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1986
BranchProbability
1987
SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1988
1.07M
                                        const MachineBasicBlock *Dst) const {
1989
1.07M
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1990
1.07M
  const BasicBlock *SrcBB = Src->getBasicBlock();
1991
1.07M
  const BasicBlock *DstBB = Dst->getBasicBlock();
1992
1.07M
  if (!BPI) {
1993
49
    // If BPI is not available, set the default probability as 1 / N, where N is
1994
49
    // the number of successors.
1995
49
    auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
1996
49
    return BranchProbability(1, SuccSize);
1997
49
  }
1998
1.07M
  return BPI->getEdgeProbability(SrcBB, DstBB);
1999
1.07M
}
2000
2001
void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
2002
                                               MachineBasicBlock *Dst,
2003
1.15M
                                               BranchProbability Prob) {
2004
1.15M
  if (!FuncInfo.BPI)
2005
1.61k
    Src->addSuccessorWithoutProb(Dst);
2006
1.15M
  else {
2007
1.15M
    if (Prob.isUnknown())
2008
991k
      Prob = getEdgeProbability(Src, Dst);
2009
1.15M
    Src->addSuccessor(Dst, Prob);
2010
1.15M
  }
2011
1.15M
}
2012
2013
70.5k
static bool InBlock(const Value *V, const BasicBlock *BB) {
2014
70.5k
  if (const Instruction *I = dyn_cast<Instruction>(V))
2015
70.3k
    return I->getParent() == BB;
2016
249
  return true;
2017
249
}
2018
2019
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
2020
/// This function emits a branch and is used at the leaves of an OR or an
2021
/// AND operator tree.
2022
void
2023
SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
2024
                                                  MachineBasicBlock *TBB,
2025
                                                  MachineBasicBlock *FBB,
2026
                                                  MachineBasicBlock *CurBB,
2027
                                                  MachineBasicBlock *SwitchBB,
2028
                                                  BranchProbability TProb,
2029
                                                  BranchProbability FProb,
2030
70.9k
                                                  bool InvertCond) {
2031
70.9k
  const BasicBlock *BB = CurBB->getBasicBlock();
2032
70.9k
2033
70.9k
  // If the leaf of the tree is a comparison, merge the condition into
2034
70.9k
  // the caseblock.
2035
70.9k
  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
2036
66.9k
    // The operands of the cmp have to be in this block.  We don't know
2037
66.9k
    // how to export them from some other block.  If this is the first block
2038
66.9k
    // of the sequence, no exporting is needed.
2039
66.9k
    if (CurBB == SwitchBB ||
2040
66.9k
        
(33.7k
isExportableFromCurrentBlock(BOp->getOperand(0), BB)33.7k
&&
2041
66.9k
         
isExportableFromCurrentBlock(BOp->getOperand(1), BB)33.7k
)) {
2042
66.9k
      ISD::CondCode Condition;
2043
66.9k
      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
2044
63.5k
        ICmpInst::Predicate Pred =
2045
63.5k
            InvertCond ? 
IC->getInversePredicate()294
:
IC->getPredicate()63.2k
;
2046
63.5k
        Condition = getICmpCondCode(Pred);
2047
63.5k
      } else {
2048
3.37k
        const FCmpInst *FC = cast<FCmpInst>(Cond);
2049
3.37k
        FCmpInst::Predicate Pred =
2050
3.37k
            InvertCond ? 
FC->getInversePredicate()2
:
FC->getPredicate()3.37k
;
2051
3.37k
        Condition = getFCmpCondCode(Pred);
2052
3.37k
        if (TM.Options.NoNaNsFPMath)
2053
0
          Condition = getFCmpCodeWithoutNaN(Condition);
2054
3.37k
      }
2055
66.9k
2056
66.9k
      CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
2057
66.9k
                   TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2058
66.9k
      SL->SwitchCases.push_back(CB);
2059
66.9k
      return;
2060
66.9k
    }
2061
4.02k
  }
2062
4.02k
2063
4.02k
  // Create a CaseBlock record representing this branch.
2064
4.02k
  ISD::CondCode Opc = InvertCond ? 
ISD::SETNE197
:
ISD::SETEQ3.82k
;
2065
4.02k
  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
2066
4.02k
               nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2067
4.02k
  SL->SwitchCases.push_back(CB);
2068
4.02k
}
2069
2070
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
2071
                                               MachineBasicBlock *TBB,
2072
                                               MachineBasicBlock *FBB,
2073
                                               MachineBasicBlock *CurBB,
2074
                                               MachineBasicBlock *SwitchBB,
2075
                                               Instruction::BinaryOps Opc,
2076
                                               BranchProbability TProb,
2077
                                               BranchProbability FProb,
2078
106k
                                               bool InvertCond) {
2079
106k
  // Skip over not part of the tree and remember to invert op and operands at
2080
106k
  // next level.
2081
106k
  Value *NotCond;
2082
106k
  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
2083
106k
      
InBlock(NotCond, CurBB->getBasicBlock())482
) {
2084
363
    FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
2085
363
                         !InvertCond);
2086
363
    return;
2087
363
  }
2088
105k
2089
105k
  const Instruction *BOp = dyn_cast<Instruction>(Cond);
2090
105k
  // Compute the effective opcode for Cond, taking into account whether it needs
2091
105k
  // to be inverted, e.g.
2092
105k
  //   and (not (or A, B)), C
2093
105k
  // gets lowered as
2094
105k
  //   and (and (not A, not B), C)
2095
105k
  unsigned BOpc = 0;
2096
105k
  if (BOp) {
2097
105k
    BOpc = BOp->getOpcode();
2098
105k
    if (InvertCond) {
2099
581
      if (BOpc == Instruction::And)
2100
101
        BOpc = Instruction::Or;
2101
480
      else if (BOpc == Instruction::Or)
2102
111
        BOpc = Instruction::And;
2103
581
    }
2104
105k
  }
2105
105k
2106
105k
  // If this node is not part of the or/and tree, emit it as a branch.
2107
105k
  if (!BOp || 
!(105k
isa<BinaryOperator>(BOp)105k
||
isa<CmpInst>(BOp)67.3k
) ||
2108
105k
      
BOpc != unsigned(Opc)105k
||
!BOp->hasOneUse()37.3k
||
2109
105k
      
BOp->getParent() != CurBB->getBasicBlock()37.3k
||
2110
105k
      
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock())35.1k
||
2111
105k
      
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())34.9k
) {
2112
70.9k
    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
2113
70.9k
                                 TProb, FProb, InvertCond);
2114
70.9k
    return;
2115
70.9k
  }
2116
34.8k
2117
34.8k
  //  Create TmpBB after CurBB.
2118
34.8k
  MachineFunction::iterator BBI(CurBB);
2119
34.8k
  MachineFunction &MF = DAG.getMachineFunction();
2120
34.8k
  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
2121
34.8k
  CurBB->getParent()->insert(++BBI, TmpBB);
2122
34.8k
2123
34.8k
  if (Opc == Instruction::Or) {
2124
9.20k
    // Codegen X | Y as:
2125
9.20k
    // BB1:
2126
9.20k
    //   jmp_if_X TBB
2127
9.20k
    //   jmp TmpBB
2128
9.20k
    // TmpBB:
2129
9.20k
    //   jmp_if_Y TBB
2130
9.20k
    //   jmp FBB
2131
9.20k
    //
2132
9.20k
2133
9.20k
    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2134
9.20k
    // The requirement is that
2135
9.20k
    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
2136
9.20k
    //     = TrueProb for original BB.
2137
9.20k
    // Assuming the original probabilities are A and B, one choice is to set
2138
9.20k
    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
2139
9.20k
    // A/(1+B) and 2B/(1+B). This choice assumes that
2140
9.20k
    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
2141
9.20k
    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
2142
9.20k
    // TmpBB, but the math is more complicated.
2143
9.20k
2144
9.20k
    auto NewTrueProb = TProb / 2;
2145
9.20k
    auto NewFalseProb = TProb / 2 + FProb;
2146
9.20k
    // Emit the LHS condition.
2147
9.20k
    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
2148
9.20k
                         NewTrueProb, NewFalseProb, InvertCond);
2149
9.20k
2150
9.20k
    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
2151
9.20k
    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
2152
9.20k
    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
2153
9.20k
    // Emit the RHS condition into TmpBB.
2154
9.20k
    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
2155
9.20k
                         Probs[0], Probs[1], InvertCond);
2156
25.6k
  } else {
2157
25.6k
    assert(Opc == Instruction::And && "Unknown merge op!");
2158
25.6k
    // Codegen X & Y as:
2159
25.6k
    // BB1:
2160
25.6k
    //   jmp_if_X TmpBB
2161
25.6k
    //   jmp FBB
2162
25.6k
    // TmpBB:
2163
25.6k
    //   jmp_if_Y TBB
2164
25.6k
    //   jmp FBB
2165
25.6k
    //
2166
25.6k
    //  This requires creation of TmpBB after CurBB.
2167
25.6k
2168
25.6k
    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2169
25.6k
    // The requirement is that
2170
25.6k
    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
2171
25.6k
    //     = FalseProb for original BB.
2172
25.6k
    // Assuming the original probabilities are A and B, one choice is to set
2173
25.6k
    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
2174
25.6k
    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
2175
25.6k
    // TrueProb for BB1 * FalseProb for TmpBB.
2176
25.6k
2177
25.6k
    auto NewTrueProb = TProb + FProb / 2;
2178
25.6k
    auto NewFalseProb = FProb / 2;
2179
25.6k
    // Emit the LHS condition.
2180
25.6k
    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
2181
25.6k
                         NewTrueProb, NewFalseProb, InvertCond);
2182
25.6k
2183
25.6k
    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
2184
25.6k
    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
2185
25.6k
    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
2186
25.6k
    // Emit the RHS condition into TmpBB.
2187
25.6k
    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
2188
25.6k
                         Probs[0], Probs[1], InvertCond);
2189
25.6k
  }
2190
34.8k
}
2191
2192
/// If the set of cases should be emitted as a series of branches, return true.
2193
/// If we should emit this as a bunch of and/or'd together conditions, return
2194
/// false.
2195
bool
2196
36.0k
SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
2197
36.0k
  if (Cases.size() != 2) 
return true3.26k
;
2198
32.8k
2199
32.8k
  // If this is two comparisons of the same values or'd or and'd together, they
2200
32.8k
  // will get folded into a single comparison, so don't emit two blocks.
2201
32.8k
  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
2202
32.8k
       
Cases[0].CmpRHS == Cases[1].CmpRHS1.40k
) ||
2203
32.8k
      
(32.7k
Cases[0].CmpRHS == Cases[1].CmpLHS32.7k
&&
2204
32.7k
       
Cases[0].CmpLHS == Cases[1].CmpRHS30
)) {
2205
9
    return false;
2206
9
  }
2207
32.7k
2208
32.7k
  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
2209
32.7k
  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
2210
32.7k
  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
2211
32.7k
      
Cases[0].CC == Cases[1].CC9.88k
&&
2212
32.7k
      
isa<Constant>(Cases[0].CmpRHS)7.07k
&&
2213
32.7k
      
cast<Constant>(Cases[0].CmpRHS)->isNullValue()6.99k
) {
2214
6.32k
    if (Cases[0].CC == ISD::SETEQ && 
Cases[0].TrueBB == Cases[1].ThisBB5.87k
)
2215
5.56k
      return false;
2216
761
    if (Cases[0].CC == ISD::SETNE && 
Cases[0].FalseBB == Cases[1].ThisBB168
)
2217
15
      return false;
2218
27.2k
  }
2219
27.2k
2220
27.2k
  return true;
2221
27.2k
}
2222
2223
825k
void SelectionDAGBuilder::visitBr(const BranchInst &I) {
2224
825k
  MachineBasicBlock *BrMBB = FuncInfo.MBB;
2225
825k
2226
825k
  // Update machine-CFG edges.
2227
825k
  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
2228
825k
2229
825k
  if (I.isUnconditional()) {
2230
301k
    // Update machine-CFG edges.
2231
301k
    BrMBB->addSuccessor(Succ0MBB);
2232
301k
2233
301k
    // If this is not a fall-through branch or optimizations are switched off,
2234
301k
    // emit the branch.
2235
301k
    if (Succ0MBB != NextBlock(BrMBB) || 
TM.getOptLevel() == CodeGenOpt::None207k
)
2236
94.3k
      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2237
94.3k
                              MVT::Other, getControlRoot(),
2238
94.3k
                              DAG.getBasicBlock(Succ0MBB)));
2239
301k
2240
301k
    return;
2241
301k
  }
2242
523k
2243
523k
  // If this condition is one of the special cases we handle, do special stuff
2244
523k
  // now.
2245
523k
  const Value *CondVal = I.getCondition();
2246
523k
  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
2247
523k
2248
523k
  // If this is a series of conditions that are or'd or and'd together, emit
2249
523k
  // this as a sequence of branches instead of setcc's with and/or operations.
2250
523k
  // As long as jumps are not expensive, this should improve performance.
2251
523k
  // For example, instead of something like:
2252
523k
  //     cmp A, B
2253
523k
  //     C = seteq
2254
523k
  //     cmp D, E
2255
523k
  //     F = setle
2256
523k
  //     or C, F
2257
523k
  //     jnz foo
2258
523k
  // Emit:
2259
523k
  //     cmp A, B
2260
523k
  //     je foo
2261
523k
  //     cmp D, E
2262
523k
  //     jle foo
2263
523k
  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
2264
40.4k
    Instruction::BinaryOps Opcode = BOp->getOpcode();
2265
40.4k
    if (!DAG.getTargetLoweringInfo().isJumpExpensive() && 
BOp->hasOneUse()40.3k
&&
2266
40.4k
        
!I.getMetadata(LLVMContext::MD_unpredictable)36.2k
&&
2267
40.4k
        
(36.2k
Opcode == Instruction::And36.2k
||
Opcode == Instruction::Or8.79k
)) {
2268
36.0k
      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
2269
36.0k
                           Opcode,
2270
36.0k
                           getEdgeProbability(BrMBB, Succ0MBB),
2271
36.0k
                           getEdgeProbability(BrMBB, Succ1MBB),
2272
36.0k
                           /*InvertCond=*/false);
2273
36.0k
      // If the compares in later blocks need to use values not currently
2274
36.0k
      // exported from this block, export them now.  This block should always
2275
36.0k
      // be the first entry.
2276
36.0k
      assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2277
36.0k
2278
36.0k
      // Allow some cases to be rejected.
2279
36.0k
      if (ShouldEmitAsBranches(SL->SwitchCases)) {
2280
59.7k
        for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; 
++i29.2k
) {
2281
29.2k
          ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
2282
29.2k
          ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
2283
29.2k
        }
2284
30.4k
2285
30.4k
        // Emit the branch for this block.
2286
30.4k
        visitSwitchCase(SL->SwitchCases[0], BrMBB);
2287
30.4k
        SL->SwitchCases.erase(SL->SwitchCases.begin());
2288
30.4k
        return;
2289
30.4k
      }
2290
5.58k
2291
5.58k
      // Okay, we decided not to do this, remove any inserted MBB's and clear
2292
5.58k
      // SwitchCases.
2293
11.1k
      
for (unsigned i = 1, e = SL->SwitchCases.size(); 5.58k
i != e;
++i5.58k
)
2294
5.58k
        FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
2295
5.58k
2296
5.58k
      SL->SwitchCases.clear();
2297
5.58k
    }
2298
40.4k
  }
2299
523k
2300
523k
  // Create a CaseBlock record representing this branch.
2301
523k
  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
2302
493k
               nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2303
493k
2304
493k
  // Use visitSwitchCase to actually insert the fast branch sequence for this
2305
493k
  // cond branch.
2306
493k
  visitSwitchCase(CB, BrMBB);
2307
493k
}
2308
2309
/// visitSwitchCase - Emits the necessary code to represent a single node in
2310
/// the binary search tree resulting from lowering a switch instruction.
2311
void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
2312
564k
                                          MachineBasicBlock *SwitchBB) {
2313
564k
  SDValue Cond;
2314
564k
  SDValue CondLHS = getValue(CB.CmpLHS);
2315
564k
  SDLoc dl = CB.DL;
2316
564k
2317
564k
  if (CB.CC == ISD::SETTRUE) {
2318
17
    // Branch or fall through to TrueBB.
2319
17
    addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2320
17
    SwitchBB->normalizeSuccProbs();
2321
17
    if (CB.TrueBB != NextBlock(SwitchBB)) {
2322
14
      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
2323
14
                              DAG.getBasicBlock(CB.TrueBB)));
2324
14
    }
2325
17
    return;
2326
17
  }
2327
564k
2328
564k
  auto &TLI = DAG.getTargetLoweringInfo();
2329
564k
  EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
2330
564k
2331
564k
  // Build the setcc now.
2332
564k
  if (!CB.CmpMHS) {
2333
563k
    // Fold "(X == true)" to X and "(X == false)" to !X to
2334
563k
    // handle common cases produced by branch lowering.
2335
563k
    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
2336
563k
        
CB.CC == ISD::SETEQ497k
)
2337
496k
      Cond = CondLHS;
2338
66.9k
    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
2339
66.9k
             
CB.CC == ISD::SETEQ4
) {
2340
4
      SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
2341
4
      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
2342
66.9k
    } else {
2343
66.9k
      SDValue CondRHS = getValue(CB.CmpRHS);
2344
66.9k
2345
66.9k
      // If a pointer's DAG type is larger than its memory type then the DAG
2346
66.9k
      // values are zero-extended. This breaks signed comparisons so truncate
2347
66.9k
      // back to the underlying type before doing the compare.
2348
66.9k
      if (CondLHS.getValueType() != MemVT) {
2349
0
        CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
2350
0
        CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
2351
0
      }
2352
66.9k
      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
2353
66.9k
    }
2354
563k
  } else {
2355
338
    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2356
338
2357
338
    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2358
338
    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2359
338
2360
338
    SDValue CmpOp = getValue(CB.CmpMHS);
2361
338
    EVT VT = CmpOp.getValueType();
2362
338
2363
338
    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2364
1
      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2365
1
                          ISD::SETLE);
2366
337
    } else {
2367
337
      SDValue SUB = DAG.getNode(ISD::SUB, dl,
2368
337
                                VT, CmpOp, DAG.getConstant(Low, dl, VT));
2369
337
      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2370
337
                          DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2371
337
    }
2372
338
  }
2373
564k
2374
564k
  // Update successor info
2375
564k
  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2376
564k
  // TrueBB and FalseBB are always different unless the incoming IR is
2377
564k
  // degenerate. This only happens when running llc on weird IR.
2378
564k
  if (CB.TrueBB != CB.FalseBB)
2379
564k
    addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2380
564k
  SwitchBB->normalizeSuccProbs();
2381
564k
2382
564k
  // If the lhs block is the next block, invert the condition so that we can
2383
564k
  // fall through to the lhs instead of the rhs block.
2384
564k
  if (CB.TrueBB == NextBlock(SwitchBB)) {
2385
234k
    std::swap(CB.TrueBB, CB.FalseBB);
2386
234k
    SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2387
234k
    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2388
234k
  }
2389
564k
2390
564k
  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2391
564k
                               MVT::Other, getControlRoot(), Cond,
2392
564k
                               DAG.getBasicBlock(CB.TrueBB));
2393
564k
2394
564k
  // Insert the false branch. Do this even if it's a fall through branch,
2395
564k
  // this makes it easier to do DAG optimizations which require inverting
2396
564k
  // the branch condition.
2397
564k
  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2398
564k
                       DAG.getBasicBlock(CB.FalseBB));
2399
564k
2400
564k
  DAG.setRoot(BrCond);
2401
564k
}
2402
2403
/// visitJumpTable - Emit JumpTable node in the current MBB
2404
2.26k
void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
2405
2.26k
  // Emit the code for the jump table
2406
2.26k
  assert(JT.Reg != -1U && "Should lower JT Header first!");
2407
2.26k
  EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2408
2.26k
  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2409
2.26k
                                     JT.Reg, PTy);
2410
2.26k
  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2411
2.26k
  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2412
2.26k
                                    MVT::Other, Index.getValue(1),
2413
2.26k
                                    Table, Index);
2414
2.26k
  DAG.setRoot(BrJumpTable);
2415
2.26k
}
2416
2417
/// visitJumpTableHeader - This function emits necessary code to produce index
2418
/// in the JumpTable from switch case.
2419
void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
2420
                                               JumpTableHeader &JTH,
2421
2.26k
                                               MachineBasicBlock *SwitchBB) {
2422
2.26k
  SDLoc dl = getCurSDLoc();
2423
2.26k
2424
2.26k
  // Subtract the lowest switch case value from the value being switched on.
2425
2.26k
  SDValue SwitchOp = getValue(JTH.SValue);
2426
2.26k
  EVT VT = SwitchOp.getValueType();
2427
2.26k
  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2428
2.26k
                            DAG.getConstant(JTH.First, dl, VT));
2429
2.26k
2430
2.26k
  // The SDNode we just created, which holds the value being switched on minus
2431
2.26k
  // the smallest case value, needs to be copied to a virtual register so it
2432
2.26k
  // can be used as an index into the jump table in a subsequent basic block.
2433
2.26k
  // This value may be smaller or larger than the target's pointer type, and
2434
2.26k
  // therefore require extension or truncating.
2435
2.26k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2436
2.26k
  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2437
2.26k
2438
2.26k
  unsigned JumpTableReg =
2439
2.26k
      FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2440
2.26k
  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2441
2.26k
                                    JumpTableReg, SwitchOp);
2442
2.26k
  JT.Reg = JumpTableReg;
2443
2.26k
2444
2.26k
  if (!JTH.OmitRangeCheck) {
2445
2.21k
    // Emit the range check for the jump table, and branch to the default block
2446
2.21k
    // for the switch statement if the value being switched on exceeds the
2447
2.21k
    // largest case in the switch.
2448
2.21k
    SDValue CMP = DAG.getSetCC(
2449
2.21k
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2450
2.21k
                                   Sub.getValueType()),
2451
2.21k
        Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2452
2.21k
2453
2.21k
    SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2454
2.21k
                                 MVT::Other, CopyTo, CMP,
2455
2.21k
                                 DAG.getBasicBlock(JT.Default));
2456
2.21k
2457
2.21k
    // Avoid emitting unnecessary branches to the next block.
2458
2.21k
    if (JT.MBB != NextBlock(SwitchBB))
2459
77
      BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2460
77
                           DAG.getBasicBlock(JT.MBB));
2461
2.21k
2462
2.21k
    DAG.setRoot(BrCond);
2463
2.21k
  } else {
2464
48
    // Avoid emitting unnecessary branches to the next block.
2465
48
    if (JT.MBB != NextBlock(SwitchBB))
2466
2
      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
2467
2
                              DAG.getBasicBlock(JT.MBB)));
2468
46
    else
2469
46
      DAG.setRoot(CopyTo);
2470
48
  }
2471
2.26k
}
2472
2473
/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2474
/// variable if there exists one.
2475
static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
2476
2.14k
                                 SDValue &Chain) {
2477
2.14k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2478
2.14k
  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2479
2.14k
  EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
2480
2.14k
  MachineFunction &MF = DAG.getMachineFunction();
2481
2.14k
  Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2482
2.14k
  MachineSDNode *Node =
2483
2.14k
      DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2484
2.14k
  if (Global) {
2485
2.12k
    MachinePointerInfo MPInfo(Global);
2486
2.12k
    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
2487
2.12k
                 MachineMemOperand::MODereferenceable;
2488
2.12k
    MachineMemOperand *MemRef = MF.getMachineMemOperand(
2489
2.12k
        MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
2490
2.12k
    DAG.setNodeMemRefs(Node, {MemRef});
2491
2.12k
  }
2492
2.14k
  if (PtrTy != PtrMemTy)
2493
0
    return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
2494
2.14k
  return SDValue(Node, 0);
2495
2.14k
}
2496
2497
/// Codegen a new tail for a stack protector check ParentMBB which has had its
2498
/// tail spliced into a stack protector check success bb.
2499
///
2500
/// For a high level explanation of how this fits into the stack protector
2501
/// generation see the comment on the declaration of class
2502
/// StackProtectorDescriptor.
2503
void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2504
486
                                                  MachineBasicBlock *ParentBB) {
2505
486
2506
486
  // First create the loads to the guard/stack slot for the comparison.
2507
486
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2508
486
  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2509
486
  EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
2510
486
2511
486
  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2512
486
  int FI = MFI.getStackProtectorIndex();
2513
486
2514
486
  SDValue Guard;
2515
486
  SDLoc dl = getCurSDLoc();
2516
486
  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2517
486
  const Module &M = *ParentBB->getParent()->getFunction().getParent();
2518
486
  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2519
486
2520
486
  // Generate code to load the content of the guard slot.
2521
486
  SDValue GuardVal = DAG.getLoad(
2522
486
      PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
2523
486
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
2524
486
      MachineMemOperand::MOVolatile);
2525
486
2526
486
  if (TLI.useStackGuardXorFP())
2527
145
    GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2528
486
2529
486
  // Retrieve guard check function, nullptr if instrumentation is inlined.
2530
486
  if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
2531
84
    // The target provides a guard check function to validate the guard value.
2532
84
    // Generate a call to that function with the content of the guard slot as
2533
84
    // argument.
2534
84
    FunctionType *FnTy = GuardCheckFn->getFunctionType();
2535
84
    assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2536
84
2537
84
    TargetLowering::ArgListTy Args;
2538
84
    TargetLowering::ArgListEntry Entry;
2539
84
    Entry.Node = GuardVal;
2540
84
    Entry.Ty = FnTy->getParamType(0);
2541
84
    if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
2542
84
      Entry.IsInReg = true;
2543
84
    Args.push_back(Entry);
2544
84
2545
84
    TargetLowering::CallLoweringInfo CLI(DAG);
2546
84
    CLI.setDebugLoc(getCurSDLoc())
2547
84
        .setChain(DAG.getEntryNode())
2548
84
        .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
2549
84
                   getValue(GuardCheckFn), std::move(Args));
2550
84
2551
84
    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2552
84
    DAG.setRoot(Result.second);
2553
84
    return;
2554
84
  }
2555
402
2556
402
  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2557
402
  // Otherwise, emit a volatile load to retrieve the stack guard value.
2558
402
  SDValue Chain = DAG.getEntryNode();
2559
402
  if (TLI.useLoadStackGuardNode()) {
2560
268
    Guard = getLoadStackGuard(DAG, dl, Chain);
2561
268
  } else {
2562
134
    const Value *IRGuard = TLI.getSDagStackGuard(M);
2563
134
    SDValue GuardPtr = getValue(IRGuard);
2564
134
2565
134
    Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
2566
134
                        MachinePointerInfo(IRGuard, 0), Align,
2567
134
                        MachineMemOperand::MOVolatile);
2568
134
  }
2569
402
2570
402
  // Perform the comparison via a subtract/getsetcc.
2571
402
  EVT VT = Guard.getValueType();
2572
402
  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2573
402
2574
402
  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2575
402
                                                        *DAG.getContext(),
2576
402
                                                        Sub.getValueType()),
2577
402
                             Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2578
402
2579
402
  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2580
402
  // branch to failure MBB.
2581
402
  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2582
402
                               MVT::Other, GuardVal.getOperand(0),
2583
402
                               Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2584
402
  // Otherwise branch to success MBB.
2585
402
  SDValue Br = DAG.getNode(ISD::BR, dl,
2586
402
                           MVT::Other, BrCond,
2587
402
                           DAG.getBasicBlock(SPD.getSuccessMBB()));
2588
402
2589
402
  DAG.setRoot(Br);
2590
402
}
2591
2592
/// Codegen the failure basic block for a stack protector check.
2593
///
2594
/// A failure stack protector machine basic block consists simply of a call to
2595
/// __stack_chk_fail().
2596
///
2597
/// For a high level explanation of how this fits into the stack protector
2598
/// generation see the comment on the declaration of class
2599
/// StackProtectorDescriptor.
2600
void
2601
392
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2602
392
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2603
392
  SDValue Chain =
2604
392
      TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2605
392
                      None, false, getCurSDLoc(), false, false).second;
2606
392
  // On PS4, the "return address" must still be within the calling function,
2607
392
  // even if it's at the very end, so emit an explicit TRAP here.
2608
392
  // Passing 'true' for doesNotReturn above won't generate the trap for us.
2609
392
  if (TM.getTargetTriple().isPS4CPU())
2610
1
    Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
2611
392
2612
392
  DAG.setRoot(Chain);
2613
392
}
2614
2615
/// visitBitTestHeader - This function emits necessary code to produce value
2616
/// suitable for "bit tests"
2617
void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
2618
286
                                             MachineBasicBlock *SwitchBB) {
2619
286
  SDLoc dl = getCurSDLoc();
2620
286
2621
286
  // Subtract the minimum value
2622
286
  SDValue SwitchOp = getValue(B.SValue);
2623
286
  EVT VT = SwitchOp.getValueType();
2624
286
  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2625
286
                            DAG.getConstant(B.First, dl, VT));
2626
286
2627
286
  // Check range
2628
286
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2629
286
  SDValue RangeCmp = DAG.getSetCC(
2630
286
      dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2631
286
                                 Sub.getValueType()),
2632
286
      Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2633
286
2634
286
  // Determine the type of the test operands.
2635
286
  bool UsePtrType = false;
2636
286
  if (!TLI.isTypeLegal(VT))
2637
0
    UsePtrType = true;
2638
286
  else {
2639
522
    for (unsigned i = 0, e = B.Cases.size(); i != e; 
++i236
)
2640
367
      if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2641
131
        // Switch table case range are encoded into series of masks.
2642
131
        // Just use pointer type, it's guaranteed to fit.
2643
131
        UsePtrType = true;
2644
131
        break;
2645
131
      }
2646
286
  }
2647
286
  if (UsePtrType) {
2648
131
    VT = TLI.getPointerTy(DAG.getDataLayout());
2649
131
    Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2650
131
  }
2651
286
2652
286
  B.RegVT = VT.getSimpleVT();
2653
286
  B.Reg = FuncInfo.CreateReg(B.RegVT);
2654
286
  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2655
286
2656
286
  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2657
286
2658
286
  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2659
286
  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2660
286
  SwitchBB->normalizeSuccProbs();
2661
286
2662
286
  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2663
286
                                MVT::Other, CopyTo, RangeCmp,
2664
286
                                DAG.getBasicBlock(B.Default));
2665
286
2666
286
  // Avoid emitting unnecessary branches to the next block.
2667
286
  if (MBB != NextBlock(SwitchBB))
2668
28
    BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2669
28
                          DAG.getBasicBlock(MBB));
2670
286
2671
286
  DAG.setRoot(BrRange);
2672
286
}
2673
2674
/// visitBitTestCase - this function produces one "bit test"
2675
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
2676
                                           MachineBasicBlock* NextMBB,
2677
                                           BranchProbability BranchProbToNext,
2678
                                           unsigned Reg,
2679
                                           BitTestCase &B,
2680
410
                                           MachineBasicBlock *SwitchBB) {
2681
410
  SDLoc dl = getCurSDLoc();
2682
410
  MVT VT = BB.RegVT;
2683
410
  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2684
410
  SDValue Cmp;
2685
410
  unsigned PopCount = countPopulation(B.Mask);
2686
410
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2687
410
  if (PopCount == 1) {
2688
110
    // Testing for a single bit; just compare the shift count with what it
2689
110
    // would need to be to shift a 1 bit in that position.
2690
110
    Cmp = DAG.getSetCC(
2691
110
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2692
110
        ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2693
110
        ISD::SETEQ);
2694
300
  } else if (PopCount == BB.Range) {
2695
28
    // There is only one zero bit in the range, test for it directly.
2696
28
    Cmp = DAG.getSetCC(
2697
28
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2698
28
        ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2699
28
        ISD::SETNE);
2700
272
  } else {
2701
272
    // Make desired shift
2702
272
    SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2703
272
                                    DAG.getConstant(1, dl, VT), ShiftOp);
2704
272
2705
272
    // Emit bit tests and jumps
2706
272
    SDValue AndOp = DAG.getNode(ISD::AND, dl,
2707
272
                                VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2708
272
    Cmp = DAG.getSetCC(
2709
272
        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2710
272
        AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2711
272
  }
2712
410
2713
410
  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2714
410
  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2715
410
  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2716
410
  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2717
410
  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2718
410
  // one as they are relative probabilities (and thus work more like weights),
2719
410
  // and hence we need to normalize them to let the sum of them become one.
2720
410
  SwitchBB->normalizeSuccProbs();
2721
410
2722
410
  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2723
410
                              MVT::Other, getControlRoot(),
2724
410
                              Cmp, DAG.getBasicBlock(B.TargetBB));
2725
410
2726
410
  // Avoid emitting unnecessary branches to the next block.
2727
410
  if (NextMBB != NextBlock(SwitchBB))
2728
170
    BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2729
170
                        DAG.getBasicBlock(NextMBB));
2730
410
2731
410
  DAG.setRoot(BrAnd);
2732
410
}
2733
2734
6.36k
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2735
6.36k
  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2736
6.36k
2737
6.36k
  // Retrieve successors. Look through artificial IR level blocks like
2738
6.36k
  // catchswitch for successors.
2739
6.36k
  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2740
6.36k
  const BasicBlock *EHPadBB = I.getSuccessor(1);
2741
6.36k
2742
6.36k
  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2743
6.36k
  // have to do anything here to lower funclet bundles.
2744
6.36k
  assert(!I.hasOperandBundlesOtherThan(
2745
6.36k
             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2746
6.36k
         "Cannot lower invokes with arbitrary operand bundles yet!");
2747
6.36k
2748
6.36k
  const Value *Callee(I.getCalledValue());
2749
6.36k
  const Function *Fn = dyn_cast<Function>(Callee);
2750
6.36k
  if (isa<InlineAsm>(Callee))
2751
1
    visitInlineAsm(&I);
2752
6.36k
  else if (Fn && 
Fn->isIntrinsic()5.84k
) {
2753
14
    switch (Fn->getIntrinsicID()) {
2754
14
    default:
2755
0
      llvm_unreachable("Cannot invoke this intrinsic");
2756
14
    case Intrinsic::donothing:
2757
1
      // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2758
1
      break;
2759
14
    case Intrinsic::experimental_patchpoint_void:
2760
2
    case Intrinsic::experimental_patchpoint_i64:
2761
2
      visitPatchpoint(&I, EHPadBB);
2762
2
      break;
2763
8
    case Intrinsic::experimental_gc_statepoint:
2764
8
      LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2765
8
      break;
2766
3
    case Intrinsic::wasm_rethrow_in_catch: {
2767
3
      // This is usually done in visitTargetIntrinsic, but this intrinsic is
2768
3
      // special because it can be invoked, so we manually lower it to a DAG
2769
3
      // node here.
2770
3
      SmallVector<SDValue, 8> Ops;
2771
3
      Ops.push_back(getRoot()); // inchain
2772
3
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2773
3
      Ops.push_back(
2774
3
          DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
2775
3
                                TLI.getPointerTy(DAG.getDataLayout())));
2776
3
      SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
2777
3
      DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
2778
3
      break;
2779
6.35k
    }
2780
6.35k
    }
2781
6.35k
  } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
2782
0
    // Currently we do not lower any intrinsic calls with deopt operand bundles.
2783
0
    // Eventually we will support lowering the @llvm.experimental.deoptimize
2784
0
    // intrinsic, and right now there are no plans to support other intrinsics
2785
0
    // with deopt state.
2786
0
    LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2787
6.35k
  } else {
2788
6.35k
    LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2789
6.35k
  }
2790
6.36k
2791
6.36k
  // If the value of the invoke is used outside of its defining block, make it
2792
6.36k
  // available as a virtual register.
2793
6.36k
  // We already took care of the exported value for the statepoint instruction
2794
6.36k
  // during call to the LowerStatepoint.
2795
6.36k
  if (!isStatepoint(I)) {
2796
6.36k
    CopyToExportRegsIfNeeded(&I);
2797
6.36k
  }
2798
6.36k
2799
6.36k
  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2800
6.36k
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2801
6.36k
  BranchProbability EHPadBBProb =
2802
6.36k
      BPI ? 
BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)6.21k
2803
6.36k
          : 
BranchProbability::getZero()153
;
2804
6.36k
  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2805
6.36k
2806
6.36k
  // Update successor info.
2807
6.36k
  addSuccessorWithProb(InvokeMBB, Return);
2808
6.40k
  for (auto &UnwindDest : UnwindDests) {
2809
6.40k
    UnwindDest.first->setIsEHPad();
2810
6.40k
    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2811
6.40k
  }
2812
6.36k
  InvokeMBB->normalizeSuccProbs();
2813
6.36k
2814
6.36k
  // Drop into normal successor.
2815
6.36k
  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
2816
6.36k
                          DAG.getBasicBlock(Return)));
2817
6.36k
}
2818
2819
5
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
2820
5
  MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
2821
5
2822
5
  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2823
5
  // have to do anything here to lower funclet bundles.
2824
5
  assert(!I.hasOperandBundlesOtherThan(
2825
5
             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2826
5
         "Cannot lower callbrs with arbitrary operand bundles yet!");
2827
5
2828
5
  assert(isa<InlineAsm>(I.getCalledValue()) &&
2829
5
         "Only know how to handle inlineasm callbr");
2830
5
  visitInlineAsm(&I);
2831
5
2832
5
  // Retrieve successors.
2833
5
  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
2834
5
2835
5
  // Update successor info.
2836
5
  addSuccessorWithProb(CallBrMBB, Return);
2837
15
  for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; 
++i10
) {
2838
10
    MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
2839
10
    addSuccessorWithProb(CallBrMBB, Target);
2840
10
  }
2841
5
  CallBrMBB->normalizeSuccProbs();
2842
5
2843
5
  // Drop into default successor.
2844
5
  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2845
5
                          MVT::Other, getControlRoot(),
2846
5
                          DAG.getBasicBlock(Return)));
2847
5
}
2848
2849
0
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2850
0
  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2851
0
}
2852
2853
3.55k
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2854
3.55k
  assert(FuncInfo.MBB->isEHPad() &&
2855
3.55k
         "Call to landingpad not in landing pad!");
2856
3.55k
2857
3.55k
  // If there aren't registers to copy the values into (e.g., during SjLj
2858
3.55k
  // exceptions), then don't bother to create these DAG nodes.
2859
3.55k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2860
3.55k
  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2861
3.55k
  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2862
3.55k
      
TLI.getExceptionSelectorRegister(PersonalityFn) == 0124
)
2863
124
    return;
2864
3.43k
2865
3.43k
  // If landingpad's return type is token type, we don't create DAG nodes
2866
3.43k
  // for its exception pointer and selector value. The extraction of exception
2867
3.43k
  // pointer or selector value from token type landingpads is not currently
2868
3.43k
  // supported.
2869
3.43k
  if (LP.getType()->isTokenTy())
2870
8
    return;
2871
3.42k
2872
3.42k
  SmallVector<EVT, 2> ValueVTs;
2873
3.42k
  SDLoc dl = getCurSDLoc();
2874
3.42k
  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2875
3.42k
  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2876
3.42k
2877
3.42k
  // Get the two live-in registers as SDValues. The physregs have already been
2878
3.42k
  // copied into virtual registers.
2879
3.42k
  SDValue Ops[2];
2880
3.42k
  if (FuncInfo.ExceptionPointerVirtReg) {
2881
3.42k
    Ops[0] = DAG.getZExtOrTrunc(
2882
3.42k
        DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2883
3.42k
                           FuncInfo.ExceptionPointerVirtReg,
2884
3.42k
                           TLI.getPointerTy(DAG.getDataLayout())),
2885
3.42k
        dl, ValueVTs[0]);
2886
3.42k
  } else {
2887
0
    Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2888
0
  }
2889
3.42k
  Ops[1] = DAG.getZExtOrTrunc(
2890
3.42k
      DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2891
3.42k
                         FuncInfo.ExceptionSelectorVirtReg,
2892
3.42k
                         TLI.getPointerTy(DAG.getDataLayout())),
2893
3.42k
      dl, ValueVTs[1]);
2894
3.42k
2895
3.42k
  // Merge into one.
2896
3.42k
  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2897
3.42k
                            DAG.getVTList(ValueVTs), Ops);
2898
3.42k
  setValue(&LP, Res);
2899
3.42k
}
2900
2901
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
2902
0
                                           MachineBasicBlock *Last) {
2903
0
  // Update JTCases.
2904
0
  for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
2905
0
    if (SL->JTCases[i].first.HeaderBB == First)
2906
0
      SL->JTCases[i].first.HeaderBB = Last;
2907
0
2908
0
  // Update BitTestCases.
2909
0
  for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
2910
0
    if (SL->BitTestCases[i].Parent == First)
2911
0
      SL->BitTestCases[i].Parent = Last;
2912
0
}
2913
2914
100
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2915
100
  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2916
100
2917
100
  // Update machine-CFG edges with unique successors.
2918
100
  SmallSet<BasicBlock*, 32> Done;
2919
387
  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; 
++i287
) {
2920
287
    BasicBlock *BB = I.getSuccessor(i);
2921
287
    bool Inserted = Done.insert(BB).second;
2922
287
    if (!Inserted)
2923
6
        continue;
2924
281
2925
281
    MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2926
281
    addSuccessorWithProb(IndirectBrMBB, Succ);
2927
281
  }
2928
100
  IndirectBrMBB->normalizeSuccProbs();
2929
100
2930
100
  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2931
100
                          MVT::Other, getControlRoot(),
2932
100
                          getValue(I.getAddress())));
2933
100
}
2934
2935
26.1k
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2936
26.1k
  if (!DAG.getTarget().Options.TrapUnreachable)
2937
1.67k
    return;
2938
24.4k
2939
24.4k
  // We may be able to ignore unreachable behind a noreturn call.
2940
24.4k
  if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
2941
24.3k
    const BasicBlock &BB = *I.getParent();
2942
24.3k
    if (&I != &BB.front()) {
2943
23.7k
      BasicBlock::const_iterator PredI =
2944
23.7k
        std::prev(BasicBlock::const_iterator(&I));
2945
23.7k
      if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
2946
23.6k
        if (Call->doesNotReturn())
2947
22.8k
          return;
2948
1.63k
      }
2949
23.7k
    }
2950
24.3k
  }
2951
1.63k
2952
1.63k
  DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2953
1.63k
}
2954
2955
11.5k
void SelectionDAGBuilder::visitFSub(const User &I) {
2956
11.5k
  // -0.0 - X --> fneg
2957
11.5k
  Type *Ty = I.getType();
2958
11.5k
  if (isa<Constant>(I.getOperand(0)) &&
2959
11.5k
      
I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)6.45k
) {
2960
5.20k
    SDValue Op2 = getValue(I.getOperand(1));
2961
5.20k
    setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2962
5.20k
                             Op2.getValueType(), Op2));
2963
5.20k
    return;
2964
5.20k
  }
2965
6.35k
2966
6.35k
  visitBinary(I, ISD::FSUB);
2967
6.35k
}
2968
2969
/// Checks if the given instruction performs a vector reduction, in which case
2970
/// we have the freedom to alter the elements in the result as long as the
2971
/// reduction of them stays unchanged.
2972
609k
static bool isVectorReductionOp(const User *I) {
2973
609k
  const Instruction *Inst = dyn_cast<Instruction>(I);
2974
609k
  if (!Inst || 
!Inst->getType()->isVectorTy()608k
)
2975
492k
    return false;
2976
117k
2977
117k
  auto OpCode = Inst->getOpcode();
2978
117k
  switch (OpCode) {
2979
117k
  case Instruction::Add:
2980
69.1k
  case Instruction::Mul:
2981
69.1k
  case Instruction::And:
2982
69.1k
  case Instruction::Or:
2983
69.1k
  case Instruction::Xor:
2984
69.1k
    break;
2985
69.1k
  case Instruction::FAdd:
2986
23.1k
  case Instruction::FMul:
2987
23.1k
    if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2988
23.1k
      if (FPOp->getFastMathFlags().isFast())
2989
893
        break;
2990
22.2k
    LLVM_FALLTHROUGH;
2991
47.0k
  default:
2992
47.0k
    return false;
2993
69.9k
  }
2994
69.9k
2995
69.9k
  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2996
69.9k
  // Ensure the reduction size is a power of 2.
2997
69.9k
  if (!isPowerOf2_32(ElemNum))
2998
257
    return false;
2999
69.7k
3000
69.7k
  unsigned ElemNumToReduce = ElemNum;
3001
69.7k
3002
69.7k
  // Do DFS search on the def-use chain from the given instruction. We only
3003
69.7k
  // allow four kinds of operations during the search until we reach the
3004
69.7k
  // instruction that extracts the first element from the vector:
3005
69.7k
  //
3006
69.7k
  //   1. The reduction operation of the same opcode as the given instruction.
3007
69.7k
  //
3008
69.7k
  //   2. PHI node.
3009
69.7k
  //
3010
69.7k
  //   3. ShuffleVector instruction together with a reduction operation that
3011
69.7k
  //      does a partial reduction.
3012
69.7k
  //
3013
69.7k
  //   4. ExtractElement that extracts the first element from the vector, and we
3014
69.7k
  //      stop searching the def-use chain here.
3015
69.7k
  //
3016
69.7k
  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
3017
69.7k
  // from 1-3 to the stack to continue the DFS. The given instruction is not
3018
69.7k
  // a reduction operation if we meet any other instructions other than those
3019
69.7k
  // listed above.
3020
69.7k
3021
69.7k
  SmallVector<const User *, 16> UsersToVisit{Inst};
3022
69.7k
  SmallPtrSet<const User *, 16> Visited;
3023
69.7k
  bool ReduxExtracted = false;
3024
69.7k
3025
99.2k
  while (!UsersToVisit.empty()) {
3026
98.1k
    auto User = UsersToVisit.back();
3027
98.1k
    UsersToVisit.pop_back();
3028
98.1k
    if (!Visited.insert(User).second)
3029
4.34k
      continue;
3030
93.8k
3031
114k
    
for (const auto &U : User->users())93.8k
{
3032
114k
      auto Inst = dyn_cast<Instruction>(U);
3033
114k
      if (!Inst)
3034
0
        return false;
3035
114k
3036
114k
      if (Inst->getOpcode() == OpCode || 
isa<PHINode>(U)85.4k
) {
3037
42.4k
        if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
3038
499
          if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
3039
0
            return false;
3040
42.4k
        UsersToVisit.push_back(U);
3041
71.6k
      } else if (const ShuffleVectorInst *ShufInst =
3042
9.68k
                     dyn_cast<ShuffleVectorInst>(U)) {
3043
9.68k
        // Detect the following pattern: A ShuffleVector instruction together
3044
9.68k
        // with a reduction that do partial reduction on the first and second
3045
9.68k
        // ElemNumToReduce / 2 elements, and store the result in
3046
9.68k
        // ElemNumToReduce / 2 elements in another vector.
3047
9.68k
3048
9.68k
        unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
3049
9.68k
        if (ResultElements < ElemNum)
3050
112
          return false;
3051
9.57k
3052
9.57k
        if (ElemNumToReduce == 1)
3053
0
          return false;
3054
9.57k
        if (!isa<UndefValue>(U->getOperand(1)))
3055
1.23k
          return false;
3056
12.6k
        
for (unsigned i = 0; 8.34k
i < ElemNumToReduce / 2;
++i4.35k
)
3057
10.7k
          if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
3058
6.35k
            return false;
3059
13.4k
        
for (unsigned i = ElemNumToReduce / 2; 1.98k
i < ElemNum;
++i11.4k
)
3060
11.4k
          if (ShufInst->getMaskValue(i) != -1)
3061
8
            return false;
3062
1.98k
3063
1.98k
        // There is only one user of this ShuffleVector instruction, which
3064
1.98k
        // must be a reduction operation.
3065
1.98k
        
if (1.98k
!U->hasOneUse()1.98k
)
3066
0
          return false;
3067
1.98k
3068
1.98k
        auto U2 = dyn_cast<Instruction>(*U->user_begin());
3069
1.98k
        if (!U2 || U2->getOpcode() != OpCode)
3070
2
          return false;
3071
1.97k
3072
1.97k
        // Check operands of the reduction operation.
3073
1.97k
        if ((U2->getOperand(0) == U->getOperand(0) && 
U2->getOperand(1) == U1.96k
) ||
3074
1.97k
            
(16
U2->getOperand(1) == U->getOperand(0)16
&&
U2->getOperand(0) == U16
)) {
3075
1.97k
          UsersToVisit.push_back(U2);
3076
1.97k
          ElemNumToReduce /= 2;
3077
1.97k
        } else
3078
0
          return false;
3079
61.9k
      } else if (isa<ExtractElementInst>(U)) {
3080
4.29k
        // At this moment we should have reduced all elements in the vector.
3081
4.29k
        if (ElemNumToReduce != 1)
3082
3.30k
          return false;
3083
991
3084
991
        const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
3085
991
        if (!Val || !Val->isZero())
3086
0
          return false;
3087
991
3088
991
        ReduxExtracted = true;
3089
991
      } else
3090
57.6k
        return false;
3091
114k
    }
3092
93.8k
  }
3093
69.7k
  
return ReduxExtracted1.09k
;
3094
69.7k
}
3095
3096
66
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
3097
66
  SDNodeFlags Flags;
3098
66
3099
66
  SDValue Op = getValue(I.getOperand(0));
3100
66
  SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
3101
66
                                    Op, Flags);
3102
66
  setValue(&I, UnNodeValue);
3103
66
}
3104
3105
609k
void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
3106
609k
  SDNodeFlags Flags;
3107
609k
  if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
3108
335k
    Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
3109
335k
    Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
3110
335k
  }
3111
609k
  if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
3112
2.76k
    Flags.setExact(ExactOp->isExact());
3113
2.76k
  }
3114
609k
  if (isVectorReductionOp(&I)) {
3115
991
    Flags.setVectorReduction(true);
3116
991
    LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
3117
991
  }
3118
609k
3119
609k
  SDValue Op1 = getValue(I.getOperand(0));
3120
609k
  SDValue Op2 = getValue(I.getOperand(1));
3121
609k
  SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
3122
609k
                                     Op1, Op2, Flags);
3123
609k
  setValue(&I, BinNodeValue);
3124
609k
}
3125
3126
117k
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
3127
117k
  SDValue Op1 = getValue(I.getOperand(0));
3128
117k
  SDValue Op2 = getValue(I.getOperand(1));
3129
117k
3130
117k
  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
3131
117k
      Op1.getValueType(), DAG.getDataLayout());
3132
117k
3133
117k
  // Coerce the shift amount to the right type if we can.
3134
117k
  if (!I.getType()->isVectorTy() && 
Op2.getValueType() != ShiftTy105k
) {
3135
67.3k
    unsigned ShiftSize = ShiftTy.getSizeInBits();
3136
67.3k
    unsigned Op2Size = Op2.getValueSizeInBits();
3137
67.3k
    SDLoc DL = getCurSDLoc();
3138
67.3k
3139
67.3k
    // If the operand is smaller than the shift count type, promote it.
3140
67.3k
    if (ShiftSize > Op2Size)
3141
23.0k
      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
3142
44.3k
3143
44.3k
    // If the operand is larger than the shift count type but the shift
3144
44.3k
    // count type has enough bits to represent any shift value, truncate
3145
44.3k
    // it now. This is a common case and it exposes the truncate to
3146
44.3k
    // optimization early.
3147
44.3k
    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
3148
44.3k
      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
3149
9
    // Otherwise we'll need to temporarily settle for some other convenient
3150
9
    // type.  Type legalization will make adjustments once the shiftee is split.
3151
9
    else
3152
9
      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
3153
67.3k
  }
3154
117k
3155
117k
  bool nuw = false;
3156
117k
  bool nsw = false;
3157
117k
  bool exact = false;
3158
117k
3159
117k
  if (Opcode == ISD::SRL || 
Opcode == ISD::SRA71.2k
||
Opcode == ISD::SHL61.4k
) {
3160
117k
3161
117k
    if (const OverflowingBinaryOperator *OFBinOp =
3162
61.4k
            dyn_cast<const OverflowingBinaryOperator>(&I)) {
3163
61.4k
      nuw = OFBinOp->hasNoUnsignedWrap();
3164
61.4k
      nsw = OFBinOp->hasNoSignedWrap();
3165
61.4k
    }
3166
117k
    if (const PossiblyExactOperator *ExactOp =
3167
56.1k
            dyn_cast<const PossiblyExactOperator>(&I))
3168
56.1k
      exact = ExactOp->isExact();
3169
117k
  }
3170
117k
  SDNodeFlags Flags;
3171
117k
  Flags.setExact(exact);
3172
117k
  Flags.setNoSignedWrap(nsw);
3173
117k
  Flags.setNoUnsignedWrap(nuw);
3174
117k
  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
3175
117k
                            Flags);
3176
117k
  setValue(&I, Res);
3177
117k
}
3178
3179
3.27k
void SelectionDAGBuilder::visitSDiv(const User &I) {
3180
3.27k
  SDValue Op1 = getValue(I.getOperand(0));
3181
3.27k
  SDValue Op2 = getValue(I.getOperand(1));
3182
3.27k
3183
3.27k
  SDNodeFlags Flags;
3184
3.27k
  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
3185
3.27k
                 cast<PossiblyExactOperator>(&I)->isExact());
3186
3.27k
  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
3187
3.27k
                           Op2, Flags));
3188
3.27k
}
3189
3190
608k
void SelectionDAGBuilder::visitICmp(const User &I) {
3191
608k
  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
3192
608k
  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
3193
608k
    predicate = IC->getPredicate();
3194
353
  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
3195
353
    predicate = ICmpInst::Predicate(IC->getPredicate());
3196
608k
  SDValue Op1 = getValue(I.getOperand(0));
3197
608k
  SDValue Op2 = getValue(I.getOperand(1));
3198
608k
  ISD::CondCode Opcode = getICmpCondCode(predicate);
3199
608k
3200
608k
  auto &TLI = DAG.getTargetLoweringInfo();
3201
608k
  EVT MemVT =
3202
608k
      TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
3203
608k
3204
608k
  // If a pointer's DAG type is larger than its memory type then the DAG values
3205
608k
  // are zero-extended. This breaks signed comparisons so truncate back to the
3206
608k
  // underlying type before doing the compare.
3207
608k
  if (Op1.getValueType() != MemVT) {
3208
0
    Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
3209
0
    Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
3210
0
  }
3211
608k
3212
608k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3213
608k
                                                        I.getType());
3214
608k
  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
3215
608k
}
3216
3217
15.9k
void SelectionDAGBuilder::visitFCmp(const User &I) {
3218
15.9k
  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
3219
15.9k
  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
3220
15.9k
    predicate = FC->getPredicate();
3221
2
  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
3222
2
    predicate = FCmpInst::Predicate(FC->getPredicate());
3223
15.9k
  SDValue Op1 = getValue(I.getOperand(0));
3224
15.9k
  SDValue Op2 = getValue(I.getOperand(1));
3225
15.9k
3226
15.9k
  ISD::CondCode Condition = getFCmpCondCode(predicate);
3227
15.9k
  auto *FPMO = dyn_cast<FPMathOperator>(&I);
3228
15.9k
  if ((FPMO && FPMO->hasNoNaNs()) || 
TM.Options.NoNaNsFPMath14.8k
)
3229
1.37k
    Condition = getFCmpCodeWithoutNaN(Condition);
3230
15.9k
3231
15.9k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3232
15.9k
                                                        I.getType());
3233
15.9k
  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
3234
15.9k
}
3235
3236
// Check if the condition of the select has one use or two users that are both
3237
// selects with the same condition.
3238
14.6k
static bool hasOnlySelectUsers(const Value *Cond) {
3239
16.7k
  return llvm::all_of(Cond->users(), [](const Value *V) {
3240
16.7k
    return isa<SelectInst>(V);
3241
16.7k
  });
3242
14.6k
}
3243
3244
70.5k
void SelectionDAGBuilder::visitSelect(const User &I) {
3245
70.5k
  SmallVector<EVT, 4> ValueVTs;
3246
70.5k
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
3247
70.5k
                  ValueVTs);
3248
70.5k
  unsigned NumValues = ValueVTs.size();
3249
70.5k
  if (NumValues == 0) 
return7
;
3250
70.5k
3251
70.5k
  SmallVector<SDValue, 4> Values(NumValues);
3252
70.5k
  SDValue Cond     = getValue(I.getOperand(0));
3253
70.5k
  SDValue LHSVal   = getValue(I.getOperand(1));
3254
70.5k
  SDValue RHSVal   = getValue(I.getOperand(2));
3255
70.5k
  auto BaseOps = {Cond};
3256
70.5k
  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
3257
42.9k
    
ISD::VSELECT27.6k
: ISD::SELECT;
3258
70.5k
3259
70.5k
  bool IsUnaryAbs = false;
3260
70.5k
3261
70.5k
  // Min/max matching is only viable if all output VTs are the same.
3262
70.5k
  if (is_splat(ValueVTs)) {
3263
70.5k
    EVT VT = ValueVTs[0];
3264
70.5k
    LLVMContext &Ctx = *DAG.getContext();
3265
70.5k
    auto &TLI = DAG.getTargetLoweringInfo();
3266
70.5k
3267
70.5k
    // We care about the legality of the operation after it has been type
3268
70.5k
    // legalized.
3269
85.1k
    while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
3270
85.1k
           
VT != TLI.getTypeToTransformTo(Ctx, VT)14.6k
)
3271
14.6k
      VT = TLI.getTypeToTransformTo(Ctx, VT);
3272
70.5k
3273
70.5k
    // If the vselect is legal, assume we want to leave this as a vector setcc +
3274
70.5k
    // vselect. Otherwise, if this is going to be scalarized, we want to see if
3275
70.5k
    // min/max is legal on the scalar type.
3276
70.5k
    bool UseScalarMinMax = VT.isVector() &&
3277
70.5k
      
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)27.8k
;
3278
70.5k
3279
70.5k
    Value *LHS, *RHS;
3280
70.5k
    auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
3281
70.5k
    ISD::NodeType Opc = ISD::DELETED_NODE;
3282
70.5k
    switch (SPR.Flavor) {
3283
70.5k
    
case SPF_UMAX: Opc = ISD::UMAX; break5.69k
;
3284
70.5k
    
case SPF_UMIN: Opc = ISD::UMIN; break5.62k
;
3285
70.5k
    
case SPF_SMAX: Opc = ISD::SMAX; break5.44k
;
3286
70.5k
    
case SPF_SMIN: Opc = ISD::SMIN; break5.05k
;
3287
70.5k
    case SPF_FMINNUM:
3288
546
      switch (SPR.NaNBehavior) {
3289
546
      
case SPNB_NA: 0
llvm_unreachable0
("No NaN behavior for FP op?");
3290
546
      
case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break99
;
3291
546
      
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break88
;
3292
546
      case SPNB_RETURNS_ANY: {
3293
359
        if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
3294
109
          Opc = ISD::FMINNUM;
3295
250
        else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
3296
0
          Opc = ISD::FMINIMUM;
3297
250
        else if (UseScalarMinMax)
3298
3
          Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
3299
3
            ISD::FMINNUM : 
ISD::FMINIMUM0
;
3300
359
        break;
3301
546
      }
3302
546
      }
3303
546
      break;
3304
600
    case SPF_FMAXNUM:
3305
600
      switch (SPR.NaNBehavior) {
3306
600
      
case SPNB_NA: 0
llvm_unreachable0
("No NaN behavior for FP op?");
3307
600
      
case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break147
;
3308
600
      
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break71
;
3309
600
      case SPNB_RETURNS_ANY:
3310
382
3311
382
        if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
3312
131
          Opc = ISD::FMAXNUM;
3313
251
        else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
3314
0
          Opc = ISD::FMAXIMUM;
3315
251
        else if (UseScalarMinMax)
3316
5
          Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
3317
3
            ISD::FMAXNUM : 
ISD::FMAXIMUM2
;
3318
382
        break;
3319
600
      }
3320
600
      break;
3321
1.04k
    case SPF_ABS:
3322
1.04k
      IsUnaryAbs = true;
3323
1.04k
      Opc = ISD::ABS;
3324
1.04k
      break;
3325
600
    case SPF_NABS:
3326
11
      // TODO: we need to produce sub(0, abs(X)).
3327
46.5k
    default: break;
3328
70.5k
    }
3329
70.5k
3330
70.5k
    if (!IsUnaryAbs && 
Opc != ISD::DELETED_NODE69.5k
&&
3331
70.5k
        
(22.4k
TLI.isOperationLegalOrCustom(Opc, VT)22.4k
||
3332
22.4k
         
(7.96k
UseScalarMinMax7.96k
&&
3333
7.96k
          
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType())298
)) &&
3334
70.5k
        // If the underlying comparison instruction is used by any other
3335
70.5k
        // instruction, the consumed instructions won't be destroyed, so it is
3336
70.5k
        // not profitable to convert to a min/max.
3337
70.5k
        
hasOnlySelectUsers(cast<SelectInst>(I).getCondition())14.6k
) {
3338
14.5k
      OpCode = Opc;
3339
14.5k
      LHSVal = getValue(LHS);
3340
14.5k
      RHSVal = getValue(RHS);
3341
14.5k
      BaseOps = {};
3342
14.5k
    }
3343
70.5k
3344
70.5k
    if (IsUnaryAbs) {
3345
1.04k
      OpCode = Opc;
3346
1.04k
      LHSVal = getValue(LHS);
3347
1.04k
      BaseOps = {};
3348
1.04k
    }
3349
70.5k
  }
3350
70.5k
3351
70.5k
  if (IsUnaryAbs) {
3352
2.09k
    for (unsigned i = 0; i != NumValues; 
++i1.04k
) {
3353
1.04k
      Values[i] =
3354
1.04k
          DAG.getNode(OpCode, getCurSDLoc(),
3355
1.04k
                      LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
3356
1.04k
                      SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3357
1.04k
    }
3358
69.5k
  } else {
3359
139k
    for (unsigned i = 0; i != NumValues; 
++i69.5k
) {
3360
69.5k
      SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
3361
69.5k
      Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3362
69.5k
      Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
3363
69.5k
      Values[i] = DAG.getNode(
3364
69.5k
          OpCode, getCurSDLoc(),
3365
69.5k
          LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
3366
69.5k
    }
3367
69.5k
  }
3368
70.5k
3369
70.5k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3370
70.5k
                           DAG.getVTList(ValueVTs), Values));
3371
70.5k
}
3372
3373
101k
void SelectionDAGBuilder::visitTrunc(const User &I) {
3374
101k
  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3375
101k
  SDValue N = getValue(I.getOperand(0));
3376
101k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3377
101k
                                                        I.getType());
3378
101k
  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3379
101k
}
3380
3381
81.0k
void SelectionDAGBuilder::visitZExt(const User &I) {
3382
81.0k
  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3383
81.0k
  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3384
81.0k
  SDValue N = getValue(I.getOperand(0));
3385
81.0k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3386
81.0k
                                                        I.getType());
3387
81.0k
  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3388
81.0k
}
3389
3390
61.9k
void SelectionDAGBuilder::visitSExt(const User &I) {
3391
61.9k
  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3392
61.9k
  // SExt also can't be a cast to bool for same reason. So, nothing much to do
3393
61.9k
  SDValue N = getValue(I.getOperand(0));
3394
61.9k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3395
61.9k
                                                        I.getType());
3396
61.9k
  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3397
61.9k
}
3398
3399
1.55k
void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3400
1.55k
  // FPTrunc is never a no-op cast, no need to check
3401
1.55k
  SDValue N = getValue(I.getOperand(0));
3402
1.55k
  SDLoc dl = getCurSDLoc();
3403
1.55k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3404
1.55k
  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3405
1.55k
  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3406
1.55k
                           DAG.getTargetConstant(
3407
1.55k
                               0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3408
1.55k
}
3409
3410
4.69k
void SelectionDAGBuilder::visitFPExt(const User &I) {
3411
4.69k
  // FPExt is never a no-op cast, no need to check
3412
4.69k
  SDValue N = getValue(I.getOperand(0));
3413
4.69k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3414
4.69k
                                                        I.getType());
3415
4.69k
  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3416
4.69k
}
3417
3418
2.67k
void SelectionDAGBuilder::visitFPToUI(const User &I) {
3419
2.67k
  // FPToUI is never a no-op cast, no need to check
3420
2.67k
  SDValue N = getValue(I.getOperand(0));
3421
2.67k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3422
2.67k
                                                        I.getType());
3423
2.67k
  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3424
2.67k
}
3425
3426
3.61k
void SelectionDAGBuilder::visitFPToSI(const User &I) {
3427
3.61k
  // FPToSI is never a no-op cast, no need to check
3428
3.61k
  SDValue N = getValue(I.getOperand(0));
3429
3.61k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3430
3.61k
                                                        I.getType());
3431
3.61k
  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3432
3.61k
}
3433
3434
6.31k
void SelectionDAGBuilder::visitUIToFP(const User &I) {
3435
6.31k
  // UIToFP is never a no-op cast, no need to check
3436
6.31k
  SDValue N = getValue(I.getOperand(0));
3437
6.31k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3438
6.31k
                                                        I.getType());
3439
6.31k
  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3440
6.31k
}
3441
3442
26.1k
void SelectionDAGBuilder::visitSIToFP(const User &I) {
3443
26.1k
  // SIToFP is never a no-op cast, no need to check
3444
26.1k
  SDValue N = getValue(I.getOperand(0));
3445
26.1k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3446
26.1k
                                                        I.getType());
3447
26.1k
  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3448
26.1k
}
3449
3450
61.1k
void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3451
61.1k
  // What to do depends on the size of the integer and the size of the pointer.
3452
61.1k
  // We can either truncate, zero extend, or no-op, accordingly.
3453
61.1k
  SDValue N = getValue(I.getOperand(0));
3454
61.1k
  auto &TLI = DAG.getTargetLoweringInfo();
3455
61.1k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3456
61.1k
                                                        I.getType());
3457
61.1k
  EVT PtrMemVT =
3458
61.1k
      TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
3459
61.1k
  N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
3460
61.1k
  N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
3461
61.1k
  setValue(&I, N);
3462
61.1k
}
3463
3464
43.2k
void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3465
43.2k
  // What to do depends on the size of the integer and the size of the pointer.
3466
43.2k
  // We can either truncate, zero extend, or no-op, accordingly.
3467
43.2k
  SDValue N = getValue(I.getOperand(0));
3468
43.2k
  auto &TLI = DAG.getTargetLoweringInfo();
3469
43.2k
  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3470
43.2k
  EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
3471
43.2k
  N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
3472
43.2k
  N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
3473
43.2k
  setValue(&I, N);
3474
43.2k
}
3475
3476
705k
void SelectionDAGBuilder::visitBitCast(const User &I) {
3477
705k
  SDValue N = getValue(I.getOperand(0));
3478
705k
  SDLoc dl = getCurSDLoc();
3479
705k
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3480
705k
                                                        I.getType());
3481
705k
3482
705k
  // BitCast assures us that source and destination are the same size so this is
3483
705k
  // either a BITCAST or a no-op.
3484
705k
  if (DestVT != N.getValueType())
3485
44.1k
    setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3486
44.1k
                             DestVT, N)); // convert types.
3487
661k
  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3488
661k
  // might fold any kind of constant expression to an integer constant and that
3489
661k
  // is not what we are looking for. Only recognize a bitcast of a genuine
3490
661k
  // constant integer as an opaque constant.
3491
661k
  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3492
11.0k
    setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3493
11.0k
                                 /*isOpaque*/true));
3494
649k
  else
3495
649k
    setValue(&I, N);            // noop cast.
3496
705k
}
3497
3498
321
void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3499
321
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3500
321
  const Value *SV = I.getOperand(0);
3501
321
  SDValue N = getValue(SV);
3502
321
  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3503
321
3504
321
  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3505
321
  unsigned DestAS = I.getType()->getPointerAddressSpace();
3506
321
3507
321
  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3508
228
    N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3509
321
3510
321
  setValue(&I, N);
3511
321
}
3512
3513
42.0k
void SelectionDAGBuilder::visitInsertElement(const User &I) {
3514
42.0k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3515
42.0k
  SDValue InVec = getValue(I.getOperand(0));
3516
42.0k
  SDValue InVal = getValue(I.getOperand(1));
3517
42.0k
  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3518
42.0k
                                     TLI.getVectorIdxTy(DAG.getDataLayout()));
3519
42.0k
  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3520
42.0k
                           TLI.getValueType(DAG.getDataLayout(), I.getType()),
3521
42.0k
                           InVec, InVal, InIdx));
3522
42.0k
}
3523
3524
73.1k
void SelectionDAGBuilder::visitExtractElement(const User &I) {
3525
73.1k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3526
73.1k
  SDValue InVec = getValue(I.getOperand(0));
3527
73.1k
  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3528
73.1k
                                     TLI.getVectorIdxTy(DAG.getDataLayout()));
3529
73.1k
  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3530
73.1k
                           TLI.getValueType(DAG.getDataLayout(), I.getType()),
3531
73.1k
                           InVec, InIdx));
3532
73.1k
}
3533
3534
81.2k
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3535
81.2k
  SDValue Src1 = getValue(I.getOperand(0));
3536
81.2k
  SDValue Src2 = getValue(I.getOperand(1));
3537
81.2k
  SDLoc DL = getCurSDLoc();
3538
81.2k
3539
81.2k
  SmallVector<int, 8> Mask;
3540
81.2k
  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3541
81.2k
  unsigned MaskNumElts = Mask.size();
3542
81.2k
3543
81.2k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3544
81.2k
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3545
81.2k
  EVT SrcVT = Src1.getValueType();
3546
81.2k
  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3547
81.2k
3548
81.2k
  if (SrcNumElts == MaskNumElts) {
3549
49.1k
    setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3550
49.1k
    return;
3551
49.1k
  }
3552
32.1k
3553
32.1k
  // Normalize the shuffle vector since mask and vector length don't match.
3554
32.1k
  if (SrcNumElts < MaskNumElts) {
3555
9.03k
    // Mask is longer than the source vectors. We can use concatenate vector to
3556
9.03k
    // make the mask and vectors lengths match.
3557
9.03k
3558
9.03k
    if (MaskNumElts % SrcNumElts == 0) {
3559
8.40k
      // Mask length is a multiple of the source vector length.
3560
8.40k
      // Check if the shuffle is some kind of concatenation of the input
3561
8.40k
      // vectors.
3562
8.40k
      unsigned NumConcat = MaskNumElts / SrcNumElts;
3563
8.40k
      bool IsConcat = true;
3564
8.40k
      SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3565
141k
      for (unsigned i = 0; i != MaskNumElts; 
++i133k
) {
3566
134k
        int Idx = Mask[i];
3567
134k
        if (Idx < 0)
3568
5.87k
          continue;
3569
128k
        // Ensure the indices in each SrcVT sized piece are sequential and that
3570
128k
        // the same source is used for the whole piece.
3571
128k
        if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3572
128k
            
(127k
ConcatSrcs[i / SrcNumElts] >= 0127k
&&
3573
127k
             
ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)104k
)) {
3574
1.52k
          IsConcat = false;
3575
1.52k
          break;
3576
1.52k
        }
3577
127k
        // Remember which source this index came from.
3578
127k
        ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3579
127k
      }
3580
8.40k
3581
8.40k
      // The shuffle is concatenating multiple vectors together. Just emit
3582
8.40k
      // a CONCAT_VECTORS operation.
3583
8.40k
      if (IsConcat) {
3584
6.87k
        SmallVector<SDValue, 8> ConcatOps;
3585
22.3k
        for (auto Src : ConcatSrcs) {
3586
22.3k
          if (Src < 0)
3587
1.23k
            ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3588
21.0k
          else if (Src == 0)
3589
8.80k
            ConcatOps.push_back(Src1);
3590
12.2k
          else
3591
12.2k
            ConcatOps.push_back(Src2);
3592
22.3k
        }
3593
6.87k
        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3594
6.87k
        return;
3595
6.87k
      }
3596
2.16k
    }
3597
2.16k
3598
2.16k
    unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3599
2.16k
    unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3600
2.16k
    EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3601
2.16k
                                    PaddedMaskNumElts);
3602
2.16k
3603
2.16k
    // Pad both vectors with undefs to make them the same length as the mask.
3604
2.16k
    SDValue UndefVal = DAG.getUNDEF(SrcVT);
3605
2.16k
3606
2.16k
    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3607
2.16k
    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3608
2.16k
    MOps1[0] = Src1;
3609
2.16k
    MOps2[0] = Src2;
3610
2.16k
3611
2.16k
    Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3612
2.16k
    Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3613
2.16k
3614
2.16k
    // Readjust mask for new input vector length.
3615
2.16k
    SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3616
35.5k
    for (unsigned i = 0; i != MaskNumElts; 
++i33.3k
) {
3617
33.3k
      int Idx = Mask[i];
3618
33.3k
      if (Idx >= (int)SrcNumElts)
3619
8.29k
        Idx -= SrcNumElts - PaddedMaskNumElts;
3620
33.3k
      MappedOps[i] = Idx;
3621
33.3k
    }
3622
2.16k
3623
2.16k
    SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3624
2.16k
3625
2.16k
    // If the concatenated vector was padded, extract a subvector with the
3626
2.16k
    // correct number of elements.
3627
2.16k
    if (MaskNumElts != PaddedMaskNumElts)
3628
636
      Result = DAG.getNode(
3629
636
          ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3630
636
          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3631
2.16k
3632
2.16k
    setValue(&I, Result);
3633
2.16k
    return;
3634
2.16k
  }
3635
23.0k
3636
23.0k
  if (SrcNumElts > MaskNumElts) {
3637
23.0k
    // Analyze the access pattern of the vector to see if we can extract
3638
23.0k
    // two subvectors and do the shuffle.
3639
23.0k
    int StartIdx[2] = { -1, -1 };  // StartIdx to extract from
3640
23.0k
    bool CanExtract = true;
3641
76.0k
    for (int Idx : Mask) {
3642
76.0k
      unsigned Input = 0;
3643
76.0k
      if (Idx < 0)
3644
20
        continue;
3645
76.0k
3646
76.0k
      if (Idx >= (int)SrcNumElts) {
3647
2.58k
        Input = 1;
3648
2.58k
        Idx -= SrcNumElts;
3649
2.58k
      }
3650
76.0k
3651
76.0k
      // If all the indices come from the same MaskNumElts sized portion of
3652
76.0k
      // the sources we can use extract. Also make sure the extract wouldn't
3653
76.0k
      // extract past the end of the source.
3654
76.0k
      int NewStartIdx = alignDown(Idx, MaskNumElts);
3655
76.0k
      if (NewStartIdx + MaskNumElts > SrcNumElts ||
3656
76.0k
          
(75.9k
StartIdx[Input] >= 075.9k
&&
StartIdx[Input] != NewStartIdx52.8k
))
3657
3.67k
        CanExtract = false;
3658
76.0k
      // Make sure we always update StartIdx as we use it to track if all
3659
76.0k
      // elements are undef.
3660
76.0k
      StartIdx[Input] = NewStartIdx;
3661
76.0k
    }
3662
23.0k
3663
23.0k
    if (StartIdx[0] < 0 && 
StartIdx[1] < 0156
) {
3664
0
      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3665
0
      return;
3666
0
    }
3667
23.0k
    if (CanExtract) {
3668
21.0k
      // Extract appropriate subvector and generate a vector shuffle
3669
63.2k
      for (unsigned Input = 0; Input < 2; 
++Input42.1k
) {
3670
42.1k
        SDValue &Src = Input == 0 ? 
Src121.0k
:
Src221.0k
;
3671
42.1k
        if (StartIdx[Input] < 0)
3672
20.9k
          Src = DAG.getUNDEF(VT);
3673
21.1k
        else {
3674
21.1k
          Src = DAG.getNode(
3675
21.1k
              ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3676
21.1k
              DAG.getConstant(StartIdx[Input], DL,
3677
21.1k
                              TLI.getVectorIdxTy(DAG.getDataLayout())));
3678
21.1k
        }
3679
42.1k
      }
3680
21.0k
3681
21.0k
      // Calculate new mask.
3682
21.0k
      SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3683
61.0k
      for (int &Idx : MappedOps) {
3684
61.0k
        if (Idx >= (int)SrcNumElts)
3685
2.52k
          Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3686
58.5k
        else if (Idx >= 0)
3687
58.5k
          Idx -= StartIdx[0];
3688
61.0k
      }
3689
21.0k
3690
21.0k
      setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3691
21.0k
      return;
3692
21.0k
    }
3693
2.00k
  }
3694
2.00k
3695
2.00k
  // We can't use either concat vectors or extract subvectors so fall back to
3696
2.00k
  // replacing the shuffle with extract and build vector.
3697
2.00k
  // to insert and build vector.
3698
2.00k
  EVT EltVT = VT.getVectorElementType();
3699
2.00k
  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3700
2.00k
  SmallVector<SDValue,8> Ops;
3701
15.0k
  for (int Idx : Mask) {
3702
15.0k
    SDValue Res;
3703
15.0k
3704
15.0k
    if (Idx < 0) {
3705
8
      Res = DAG.getUNDEF(EltVT);
3706
15.0k
    } else {
3707
15.0k
      SDValue &Src = Idx < (int)SrcNumElts ? 
Src114.9k
:
Src267
;
3708
15.0k
      if (Idx >= (int)SrcNumElts) 
Idx -= SrcNumElts67
;
3709
15.0k
3710
15.0k
      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3711
15.0k
                        EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3712
15.0k
    }
3713
15.0k
3714
15.0k
    Ops.push_back(Res);
3715
15.0k
  }
3716
2.00k
3717
2.00k
  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3718
2.00k
}
3719
3720
3.61k
void SelectionDAGBuilder::visitInsertValue(const User &I) {
3721
3.61k
  ArrayRef<unsigned> Indices;
3722
3.61k
  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3723
3.61k
    Indices = IV->getIndices();
3724
0
  else
3725
0
    Indices = cast<ConstantExpr>(&I)->getIndices();
3726
3.61k
3727
3.61k
  const Value *Op0 = I.getOperand(0);
3728
3.61k
  const Value *Op1 = I.getOperand(1);
3729
3.61k
  Type *AggTy = I.getType();
3730
3.61k
  Type *ValTy = Op1->getType();
3731
3.61k
  bool IntoUndef = isa<UndefValue>(Op0);
3732
3.61k
  bool FromUndef = isa<UndefValue>(Op1);
3733
3.61k
3734
3.61k
  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3735
3.61k
3736
3.61k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3737
3.61k
  SmallVector<EVT, 4> AggValueVTs;
3738
3.61k
  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3739
3.61k
  SmallVector<EVT, 4> ValValueVTs;
3740
3.61k
  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3741
3.61k
3742
3.61k
  unsigned NumAggValues = AggValueVTs.size();
3743
3.61k
  unsigned NumValValues = ValValueVTs.size();
3744
3.61k
  SmallVector<SDValue, 4> Values(NumAggValues);
3745
3.61k
3746
3.61k
  // Ignore an insertvalue that produces an empty object
3747
3.61k
  if (!NumAggValues) {
3748
1
    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3749
1
    return;
3750
1
  }
3751
3.61k
3752
3.61k
  SDValue Agg = getValue(Op0);
3753
3.61k
  unsigned i = 0;
3754
3.61k
  // Copy the beginning value(s) from the original aggregate.
3755
12.8k
  for (; i != LinearIndex; 
++i9.23k
)
3756
9.23k
    Values[i] = IntoUndef ? 
DAG.getUNDEF(AggValueVTs[i])90
:
3757
9.23k
                
SDValue(Agg.getNode(), Agg.getResNo() + i)9.14k
;
3758
3.61k
  // Copy values from the inserted value(s).
3759
3.61k
  if (NumValValues) {
3760
3.61k
    SDValue Val = getValue(Op1);
3761
7.25k
    for (; i != LinearIndex + NumValValues; 
++i3.63k
)
3762
3.63k
      Values[i] = FromUndef ? 
DAG.getUNDEF(AggValueVTs[i])13
:
3763
3.63k
                  
SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex)3.62k
;
3764
3.61k
  }
3765
3.61k
  // Copy remaining value(s) from the original aggregate.
3766
12.7k
  for (; i != NumAggValues; 
++i9.09k
)
3767
9.09k
    Values[i] = IntoUndef ? 
DAG.getUNDEF(AggValueVTs[i])1.84k
:
3768
9.09k
                
SDValue(Agg.getNode(), Agg.getResNo() + i)7.24k
;
3769
3.61k
3770
3.61k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3771
3.61k
                           DAG.getVTList(AggValueVTs), Values));
3772
3.61k
}
3773
3774
23.6k
void SelectionDAGBuilder::visitExtractValue(const User &I) {
3775
23.6k
  ArrayRef<unsigned> Indices;
3776
23.6k
  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3777
23.6k
    Indices = EV->getIndices();
3778
1
  else
3779
1
    Indices = cast<ConstantExpr>(&I)->getIndices();
3780
23.6k
3781
23.6k
  const Value *Op0 = I.getOperand(0);
3782
23.6k
  Type *AggTy = Op0->getType();
3783
23.6k
  Type *ValTy = I.getType();
3784
23.6k
  bool OutOfUndef = isa<UndefValue>(Op0);
3785
23.6k
3786
23.6k
  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3787
23.6k
3788
23.6k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3789
23.6k
  SmallVector<EVT, 4> ValValueVTs;
3790
23.6k
  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3791
23.6k
3792
23.6k
  unsigned NumValValues = ValValueVTs.size();
3793
23.6k
3794
23.6k
  // Ignore a extractvalue that produces an empty object
3795
23.6k
  if (!NumValValues) {
3796
4
    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3797
4
    return;
3798
4
  }
3799
23.6k
3800
23.6k
  SmallVector<SDValue, 4> Values(NumValValues);
3801
23.6k
3802
23.6k
  SDValue Agg = getValue(Op0);
3803
23.6k
  // Copy out the selected value(s).
3804
47.3k
  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; 
++i23.7k
)
3805
23.7k
    Values[i - LinearIndex] =
3806
23.7k
      OutOfUndef ?
3807
15
        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3808
23.7k
        
SDValue(Agg.getNode(), Agg.getResNo() + i)23.6k
;
3809
23.6k
3810
23.6k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3811
23.6k
                           DAG.getVTList(ValValueVTs), Values));
3812
23.6k
}
3813
3814
1.22M
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3815
1.22M
  Value *Op0 = I.getOperand(0);
3816
1.22M
  // Note that the pointer operand may be a vector of pointers. Take the scalar
3817
1.22M
  // element which holds a pointer.
3818
1.22M
  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3819
1.22M
  SDValue N = getValue(Op0);
3820
1.22M
  SDLoc dl = getCurSDLoc();
3821
1.22M
  auto &TLI = DAG.getTargetLoweringInfo();
3822
1.22M
  MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
3823
1.22M
  MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
3824
1.22M
3825
1.22M
  // Normalize Vector GEP - all scalar operands should be converted to the
3826
1.22M
  // splat vector.
3827
1.22M
  unsigned VectorWidth = I.getType()->isVectorTy() ?
3828
1.22M
    
cast<VectorType>(I.getType())->getVectorNumElements()389
: 0;
3829
1.22M
3830
1.22M
  if (VectorWidth && 
!N.getValueType().isVector()389
) {
3831
215
    LLVMContext &Context = *DAG.getContext();
3832
215
    EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3833
215
    N = DAG.getSplatBuildVector(VT, dl, N);
3834
215
  }
3835
1.22M
3836
1.22M
  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3837
3.29M
       GTI != E; 
++GTI2.07M
) {
3838
2.07M
    const Value *Idx = GTI.getOperand();
3839
2.07M
    if (StructType *StTy = GTI.getStructTypeOrNull()) {
3840
438k
      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3841
438k
      if (Field) {
3842
324k
        // N = N + Offset
3843
324k
        uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3844
324k
3845
324k
        // In an inbounds GEP with an offset that is nonnegative even when
3846
324k
        // interpreted as signed, assume there is no unsigned overflow.
3847
324k
        SDNodeFlags Flags;
3848
324k
        if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3849
308k
          Flags.setNoUnsignedWrap(true);
3850
324k
3851
324k
        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3852
324k
                        DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3853
324k
      }
3854
1.63M
    } else {
3855
1.63M
      unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3856
1.63M
      MVT IdxTy = MVT::getIntegerVT(IdxSize);
3857
1.63M
      APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3858
1.63M
3859
1.63M
      // If this is a scalar constant or a splat vector of constants,
3860
1.63M
      // handle it quickly.
3861
1.63M
      const auto *CI = dyn_cast<ConstantInt>(Idx);
3862
1.63M
      if (!CI && 
isa<ConstantDataVector>(Idx)166k
&&
3863
1.63M
          
cast<ConstantDataVector>(Idx)->getSplatValue()62
)
3864
27
        CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3865
1.63M
3866
1.63M
      if (CI) {
3867
1.47M
        if (CI->isZero())
3868
852k
          continue;
3869
618k
        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
3870
618k
        LLVMContext &Context = *DAG.getContext();
3871
618k
        SDValue OffsVal = VectorWidth ?
3872
34
          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
3873
618k
          
DAG.getConstant(Offs, dl, IdxTy)618k
;
3874
618k
3875
618k
        // In an inbouds GEP with an offset that is nonnegative even when
3876
618k
        // interpreted as signed, assume there is no unsigned overflow.
3877
618k
        SDNodeFlags Flags;
3878
618k
        if (Offs.isNonNegative() && 
cast<GEPOperator>(I).isInBounds()579k
)
3879
390k
          Flags.setNoUnsignedWrap(true);
3880
618k
3881
618k
        OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
3882
618k
3883
618k
        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3884
618k
        continue;
3885
618k
      }
3886
166k
3887
166k
      // N = N + Idx * ElementSize;
3888
166k
      SDValue IdxN = getValue(Idx);
3889
166k
3890
166k
      if (!IdxN.getValueType().isVector() && 
VectorWidth166k
) {
3891
22
        EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3892
22
        IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3893
22
      }
3894
166k
3895
166k
      // If the index is smaller or larger than intptr_t, truncate or extend
3896
166k
      // it.
3897
166k
      IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3898
166k
3899
166k
      // If this is a multiply by a power of two, turn it into a shl
3900
166k
      // immediately.  This is a very common case.
3901
166k
      if (ElementSize != 1) {
3902
85.1k
        if (ElementSize.isPowerOf2()) {
3903
69.5k
          unsigned Amt = ElementSize.logBase2();
3904
69.5k
          IdxN = DAG.getNode(ISD::SHL, dl,
3905
69.5k
                             N.getValueType(), IdxN,
3906
69.5k
                             DAG.getConstant(Amt, dl, IdxN.getValueType()));
3907
69.5k
        } else {
3908
15.5k
          SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
3909
15.5k
                                          IdxN.getValueType());
3910
15.5k
          IdxN = DAG.getNode(ISD::MUL, dl,
3911
15.5k
                             N.getValueType(), IdxN, Scale);
3912
15.5k
        }
3913
85.1k
      }
3914
166k
3915
166k
      N = DAG.getNode(ISD::ADD, dl,
3916
166k
                      N.getValueType(), N, IdxN);
3917
166k
    }
3918
2.07M
  }
3919
1.22M
3920
1.22M
  if (PtrMemTy != PtrTy && 
!cast<GEPOperator>(I).isInBounds()0
)
3921
0
    N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
3922
1.22M
3923
1.22M
  setValue(&I, N);
3924
1.22M
}
3925
3926
46.6k
void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3927
46.6k
  // If this is a fixed sized alloca in the entry block of the function,
3928
46.6k
  // allocate it statically on the stack.
3929
46.6k
  if (FuncInfo.StaticAllocaMap.count(&I))
3930
45.8k
    return;   // getValue will auto-populate this.
3931
796
3932
796
  SDLoc dl = getCurSDLoc();
3933
796
  Type *Ty = I.getAllocatedType();
3934
796
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3935
796
  auto &DL = DAG.getDataLayout();
3936
796
  uint64_t TySize = DL.getTypeAllocSize(Ty);
3937
796
  unsigned Align =
3938
796
      std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3939
796
3940
796
  SDValue AllocSize = getValue(I.getArraySize());
3941
796
3942
796
  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3943
796
  if (AllocSize.getValueType() != IntPtr)
3944
210
    AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3945
796
3946
796
  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3947
796
                          AllocSize,
3948
796
                          DAG.getConstant(TySize, dl, IntPtr));
3949
796
3950
796
  // Handle alignment.  If the requested alignment is less than or equal to
3951
796
  // the stack alignment, ignore it.  If the size is greater than or equal to
3952
796
  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3953
796
  unsigned StackAlign =
3954
796
      DAG.getSubtarget().getFrameLowering()->getStackAlignment();
3955
796
  if (Align <= StackAlign)
3956
706
    Align = 0;
3957
796
3958
796
  // Round the size of the allocation up to the stack alignment size
3959
796
  // by add SA-1 to the size. This doesn't overflow because we're computing
3960
796
  // an address inside an alloca.
3961
796
  SDNodeFlags Flags;
3962
796
  Flags.setNoUnsignedWrap(true);
3963
796
  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3964
796
                          DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3965
796
3966
796
  // Mask out the low bits for alignment purposes.
3967
796
  AllocSize =
3968
796
      DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3969
796
                  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3970
796
3971
796
  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3972
796
  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3973
796
  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3974
796
  setValue(&I, DSA);
3975
796
  DAG.setRoot(DSA.getValue(1));
3976
796
3977
796
  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3978
796
}
3979
3980
621k
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3981
621k
  if (I.isAtomic())
3982
1.66k
    return visitAtomicLoad(I);
3983
620k
3984
620k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3985
620k
  const Value *SV = I.getOperand(0);
3986
620k
  if (TLI.supportSwiftError()) {
3987
537k
    // Swifterror values can come from either a function parameter with
3988
537k
    // swifterror attribute or an alloca with swifterror attribute.
3989
537k
    if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3990
25.8k
      if (Arg->hasSwiftErrorAttr())
3991
6
        return visitLoadFromSwiftError(I);
3992
537k
    }
3993
537k
3994
537k
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3995
7.80k
      if (Alloca->isSwiftError())
3996
47
        return visitLoadFromSwiftError(I);
3997
620k
    }
3998
537k
  }
3999
620k
4000
620k
  SDValue Ptr = getValue(SV);
4001
620k
4002
620k
  Type *Ty = I.getType();
4003
620k
4004
620k
  bool isVolatile = I.isVolatile();
4005
620k
  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
4006
620k
  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
4007
620k
  bool isDereferenceable =
4008
620k
      isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
4009
620k
  unsigned Alignment = I.getAlignment();
4010
620k
4011
620k
  AAMDNodes AAInfo;
4012
620k
  I.getAAMetadata(AAInfo);
4013
620k
  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4014
620k
4015
620k
  SmallVector<EVT, 4> ValueVTs, MemVTs;
4016
620k
  SmallVector<uint64_t, 4> Offsets;
4017
620k
  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
4018
620k
  unsigned NumValues = ValueVTs.size();
4019
620k
  if (NumValues == 0)
4020
3
    return;
4021
620k
4022
620k
  SDValue Root;
4023
620k
  bool ConstantMemory = false;
4024
620k
  if (isVolatile || 
NumValues > MaxParallelChains597k
)
4025
22.5k
    // Serialize volatile loads with other side effects.
4026
22.5k
    Root = getRoot();
4027
597k
  else if (AA &&
4028
597k
           AA->pointsToConstantMemory(MemoryLocation(
4029
593k
               SV,
4030
593k
               LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
4031
593k
               AAInfo))) {
4032
27.4k
    // Do not serialize (non-volatile) loads of constant memory with anything.
4033
27.4k
    Root = DAG.getEntryNode();
4034
27.4k
    ConstantMemory = true;
4035
570k
  } else {
4036
570k
    // Do not serialize non-volatile loads against each other.
4037
570k
    Root = DAG.getRoot();
4038
570k
  }
4039
620k
4040
620k
  SDLoc dl = getCurSDLoc();
4041
620k
4042
620k
  if (isVolatile)
4043
22.5k
    Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
4044
620k
4045
620k
  // An aggregate load cannot wrap around the address space, so offsets to its
4046
620k
  // parts don't wrap either.
4047
620k
  SDNodeFlags Flags;
4048
620k
  Flags.setNoUnsignedWrap(true);
4049
620k
4050
620k
  SmallVector<SDValue, 4> Values(NumValues);
4051
620k
  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
4052
620k
  EVT PtrVT = Ptr.getValueType();
4053
620k
  unsigned ChainI = 0;
4054
1.24M
  for (unsigned i = 0; i != NumValues; 
++i, ++ChainI623k
) {
4055
623k
    // Serializing loads here may result in excessive register pressure, and
4056
623k
    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
4057
623k
    // could recover a bit by hoisting nodes upward in the chain by recognizing
4058
623k
    // they are side-effect free or do not alias. The optimizer should really
4059
623k
    // avoid this case by converting large object/array copies to llvm.memcpy
4060
623k
    // (MaxParallelChains should always remain as failsafe).
4061
623k
    if (ChainI == MaxParallelChains) {
4062
0
      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
4063
0
      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4064
0
                                  makeArrayRef(Chains.data(), ChainI));
4065
0
      Root = Chain;
4066
0
      ChainI = 0;
4067
0
    }
4068
623k
    SDValue A = DAG.getNode(ISD::ADD, dl,
4069
623k
                            PtrVT, Ptr,
4070
623k
                            DAG.getConstant(Offsets[i], dl, PtrVT),
4071
623k
                            Flags);
4072
623k
    auto MMOFlags = MachineMemOperand::MONone;
4073
623k
    if (isVolatile)
4074
24.7k
      MMOFlags |= MachineMemOperand::MOVolatile;
4075
623k
    if (isNonTemporal)
4076
793
      MMOFlags |= MachineMemOperand::MONonTemporal;
4077
623k
    if (isInvariant)
4078
24.0k
      MMOFlags |= MachineMemOperand::MOInvariant;
4079
623k
    if (isDereferenceable)
4080
170k
      MMOFlags |= MachineMemOperand::MODereferenceable;
4081
623k
    MMOFlags |= TLI.getMMOFlags(I);
4082
623k
4083
623k
    SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
4084
623k
                            MachinePointerInfo(SV, Offsets[i]), Alignment,
4085
623k
                            MMOFlags, AAInfo, Ranges);
4086
623k
    Chains[ChainI] = L.getValue(1);
4087
623k
4088
623k
    if (MemVTs[i] != ValueVTs[i])
4089
0
      L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
4090
623k
4091
623k
    Values[i] = L;
4092
623k
  }
4093
620k
4094
620k
  if (!ConstantMemory) {
4095
592k
    SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4096
592k
                                makeArrayRef(Chains.data(), ChainI));
4097
592k
    if (isVolatile)
4098
22.5k
      DAG.setRoot(Chain);
4099
570k
    else
4100
570k
      PendingLoads.push_back(Chain);
4101
592k
  }
4102
620k
4103
620k
  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
4104
620k
                           DAG.getVTList(ValueVTs), Values));
4105
620k
}
4106
4107
108
void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
4108
108
  assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4109
108
         "call visitStoreToSwiftError when backend supports swifterror");
4110
108
4111
108
  SmallVector<EVT, 4> ValueVTs;
4112
108
  SmallVector<uint64_t, 4> Offsets;
4113
108
  const Value *SrcV = I.getOperand(0);
4114
108
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
4115
108
                  SrcV->getType(), ValueVTs, &Offsets);
4116
108
  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4117
108
         "expect a single EVT for swifterror");
4118
108
4119
108
  SDValue Src = getValue(SrcV);
4120
108
  // Create a virtual register, then update the virtual register.
4121
108
  unsigned VReg =
4122
108
      SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
4123
108
  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
4124
108
  // Chain can be getRoot or getControlRoot.
4125
108
  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
4126
108
                                      SDValue(Src.getNode(), Src.getResNo()));
4127
108
  DAG.setRoot(CopyNode);
4128
108
}
4129
4130
53
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
4131
53
  assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4132
53
         "call visitLoadFromSwiftError when backend supports swifterror");
4133
53
4134
53
  assert(!I.isVolatile() &&
4135
53
         I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
4136
53
         I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
4137
53
         "Support volatile, non temporal, invariant for load_from_swift_error");
4138
53
4139
53
  const Value *SV = I.getOperand(0);
4140
53
  Type *Ty = I.getType();
4141
53
  AAMDNodes AAInfo;
4142
53
  I.getAAMetadata(AAInfo);
4143
53
  assert(
4144
53
      (!AA ||
4145
53
       !AA->pointsToConstantMemory(MemoryLocation(
4146
53
           SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
4147
53
           AAInfo))) &&
4148
53
      "load_from_swift_error should not be constant memory");
4149
53
4150
53
  SmallVector<EVT, 4> ValueVTs;
4151
53
  SmallVector<uint64_t, 4> Offsets;
4152
53
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
4153
53
                  ValueVTs, &Offsets);
4154
53
  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4155
53
         "expect a single EVT for swifterror");
4156
53
4157
53
  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
4158
53
  SDValue L = DAG.getCopyFromReg(
4159
53
      getRoot(), getCurSDLoc(),
4160
53
      SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
4161
53
4162
53
  setValue(&I, L);
4163
53
}
4164
4165
545k
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
4166
545k
  if (I.isAtomic())
4167
6.85k
    return visitAtomicStore(I);
4168
539k
4169
539k
  const Value *SrcV = I.getOperand(0);
4170
539k
  const Value *PtrV = I.getOperand(1);
4171
539k
4172
539k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4173
539k
  if (TLI.supportSwiftError()) {
4174
482k
    // Swifterror values can come from either a function parameter with
4175
482k
    // swifterror attribute or an alloca with swifterror attribute.
4176
482k
    if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
4177
13.8k
      if (Arg->hasSwiftErrorAttr())
4178
45
        return visitStoreToSwiftError(I);
4179
482k
    }
4180
482k
4181
482k
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
4182
7.50k
      if (Alloca->isSwiftError())
4183
63
        return visitStoreToSwiftError(I);
4184
538k
    }
4185
482k
  }
4186
538k
4187
538k
  SmallVector<EVT, 4> ValueVTs, MemVTs;
4188
538k
  SmallVector<uint64_t, 4> Offsets;
4189
538k
  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
4190
538k
                  SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
4191
538k
  unsigned NumValues = ValueVTs.size();
4192
538k
  if (NumValues == 0)
4193
21
    return;
4194
538k
4195
538k
  // Get the lowered operands. Note that we do this after
4196
538k
  // checking if NumResults is zero, because with zero results
4197
538k
  // the operands won't have values in the map.
4198
538k
  SDValue Src = getValue(SrcV);
4199
538k
  SDValue Ptr = getValue(PtrV);
4200
538k
4201
538k
  SDValue Root = getRoot();
4202
538k
  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
4203
538k
  SDLoc dl = getCurSDLoc();
4204
538k
  EVT PtrVT = Ptr.getValueType();
4205
538k
  unsigned Alignment = I.getAlignment();
4206
538k
  AAMDNodes AAInfo;
4207
538k
  I.getAAMetadata(AAInfo);
4208
538k
4209
538k
  auto MMOFlags = MachineMemOperand::MONone;
4210
538k
  if (I.isVolatile())
4211
20.1k
    MMOFlags |= MachineMemOperand::MOVolatile;
4212
538k
  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
4213
833
    MMOFlags |= MachineMemOperand::MONonTemporal;
4214
538k
  MMOFlags |= TLI.getMMOFlags(I);
4215
538k
4216
538k
  // An aggregate load cannot wrap around the address space, so offsets to its
4217
538k
  // parts don't wrap either.
4218
538k
  SDNodeFlags Flags;
4219
538k
  Flags.setNoUnsignedWrap(true);
4220
538k
4221
538k
  unsigned ChainI = 0;
4222
1.08M
  for (unsigned i = 0; i != NumValues; 
++i, ++ChainI542k
) {
4223
542k
    // See visitLoad comments.
4224
542k
    if (ChainI == MaxParallelChains) {
4225
0
      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4226
0
                                  makeArrayRef(Chains.data(), ChainI));
4227
0
      Root = Chain;
4228
0
      ChainI = 0;
4229
0
    }
4230
542k
    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
4231
542k
                              DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
4232
542k
    SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
4233
542k
    if (MemVTs[i] != ValueVTs[i])
4234
0
      Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
4235
542k
    SDValue St =
4236
542k
        DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
4237
542k
                     Alignment, MMOFlags, AAInfo);
4238
542k
    Chains[ChainI] = St;
4239
542k
  }
4240
538k
4241
538k
  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4242
538k
                                  makeArrayRef(Chains.data(), ChainI));
4243
538k
  DAG.setRoot(StoreNode);
4244
538k
}
4245
4246
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
4247
559
                                           bool IsCompressing) {
4248
559
  SDLoc sdl = getCurSDLoc();
4249
559
4250
559
  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4251
559
                           unsigned& Alignment) {
4252
394
    // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
4253
394
    Src0 = I.getArgOperand(0);
4254
394
    Ptr = I.getArgOperand(1);
4255
394
    Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
4256
394
    Mask = I.getArgOperand(3);
4257
394
  };
4258
559
  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4259
559
                           unsigned& Alignment) {
4260
165
    // llvm.masked.compressstore.*(Src0, Ptr, Mask)
4261
165
    Src0 = I.getArgOperand(0);
4262
165
    Ptr = I.getArgOperand(1);
4263
165
    Mask = I.getArgOperand(2);
4264
165
    Alignment = 0;
4265
165
  };
4266
559
4267
559
  Value  *PtrOperand, *MaskOperand, *Src0Operand;
4268
559
  unsigned Alignment;
4269
559
  if (IsCompressing)
4270
165
    getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4271
394
  else
4272
394
    getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4273
559
4274
559
  SDValue Ptr = getValue(PtrOperand);
4275
559
  SDValue Src0 = getValue(Src0Operand);
4276
559
  SDValue Mask = getValue(MaskOperand);
4277
559
4278
559
  EVT VT = Src0.getValueType();
4279
559
  if (!Alignment)
4280
165
    Alignment = DAG.getEVTAlignment(VT);
4281
559
4282
559
  AAMDNodes AAInfo;
4283
559
  I.getAAMetadata(AAInfo);
4284
559
4285
559
  MachineMemOperand *MMO =
4286
559
    DAG.getMachineFunction().
4287
559
    getMachineMemOperand(MachinePointerInfo(PtrOperand),
4288
559
                          MachineMemOperand::MOStore,  VT.getStoreSize(),
4289
559
                          Alignment, AAInfo);
4290
559
  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
4291
559
                                         MMO, false /* Truncating */,
4292
559
                                         IsCompressing);
4293
559
  DAG.setRoot(StoreNode);
4294
559
  setValue(&I, StoreNode);
4295
559
}
4296
4297
// Get a uniform base for the Gather/Scatter intrinsic.
4298
// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
4299
// We try to represent it as a base pointer + vector of indices.
4300
// Usually, the vector of pointers comes from a 'getelementptr' instruction.
4301
// The first operand of the GEP may be a single pointer or a vector of pointers
4302
// Example:
4303
//   %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
4304
//  or
4305
//   %gep.ptr = getelementptr i32, i32* %ptr,        <8 x i32> %ind
4306
// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
4307
//
4308
// When the first GEP operand is a single pointer - it is the uniform base we
4309
// are looking for. If first operand of the GEP is a splat vector - we
4310
// extract the splat value and use it as a uniform base.
4311
// In all other cases the function returns 'false'.
4312
static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
4313
427
                           SDValue &Scale, SelectionDAGBuilder* SDB) {
4314
427
  SelectionDAG& DAG = SDB->DAG;
4315
427
  LLVMContext &Context = *DAG.getContext();
4316
427
4317
427
  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
4318
427
  const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
4319
427
  if (!GEP)
4320
177
    return false;
4321
250
4322
250
  const Value *GEPPtr = GEP->getPointerOperand();
4323
250
  if (!GEPPtr->getType()->isVectorTy())
4324
152
    Ptr = GEPPtr;
4325
98
  else if (!(Ptr = getSplatValue(GEPPtr)))
4326
6
    return false;
4327
244
4328
244
  unsigned FinalIndex = GEP->getNumOperands() - 1;
4329
244
  Value *IndexVal = GEP->getOperand(FinalIndex);
4330
244
4331
244
  // Ensure all the other indices are 0.
4332
250
  for (unsigned i = 1; i < FinalIndex; 
++i6
) {
4333
18
    auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
4334
18
    if (!C || 
!C->isZero()6
)
4335
12
      return false;
4336
18
  }
4337
244
4338
244
  // The operands of the GEP may be defined in another basic block.
4339
244
  // In this case we'll not find nodes for the operands.
4340
244
  
if (232
!SDB->findValue(Ptr)232
||
!SDB->findValue(IndexVal)231
)
4341
17
    return false;
4342
215
4343
215
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4344
215
  const DataLayout &DL = DAG.getDataLayout();
4345
215
  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
4346
215
                                SDB->getCurSDLoc(), TLI.getPointerTy(DL));
4347
215
  Base = SDB->getValue(Ptr);
4348
215
  Index = SDB->getValue(IndexVal);
4349
215
4350
215
  if (!Index.getValueType().isVector()) {
4351
6
    unsigned GEPWidth = GEP->getType()->getVectorNumElements();
4352
6
    EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
4353
6
    Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
4354
6
  }
4355
215
  return true;
4356
215
}
4357
4358
103
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
4359
103
  SDLoc sdl = getCurSDLoc();
4360
103
4361
103
  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
4362
103
  const Value *Ptr = I.getArgOperand(1);
4363
103
  SDValue Src0 = getValue(I.getArgOperand(0));
4364
103
  SDValue Mask = getValue(I.getArgOperand(3));
4365
103
  EVT VT = Src0.getValueType();
4366
103
  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
4367
103
  if (!Alignment)
4368
0
    Alignment = DAG.getEVTAlignment(VT);
4369
103
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4370
103
4371
103
  AAMDNodes AAInfo;
4372
103
  I.getAAMetadata(AAInfo);
4373
103
4374
103
  SDValue Base;
4375
103
  SDValue Index;
4376
103
  SDValue Scale;
4377
103
  const Value *BasePtr = Ptr;
4378
103
  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4379
103
4380
103
  const Value *MemOpBasePtr = UniformBase ? 
BasePtr37
:
nullptr66
;
4381
103
  MachineMemOperand *MMO = DAG.getMachineFunction().
4382
103
    getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
4383
103
                         MachineMemOperand::MOStore,  VT.getStoreSize(),
4384
103
                         Alignment, AAInfo);
4385
103
  if (!UniformBase) {
4386
66
    Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4387
66
    Index = getValue(Ptr);
4388
66
    Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4389
66
  }
4390
103
  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
4391
103
  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
4392
103
                                         Ops, MMO);
4393
103
  DAG.setRoot(Scatter);
4394
103
  setValue(&I, Scatter);
4395
103
}
4396
4397
632
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4398
632
  SDLoc sdl = getCurSDLoc();
4399
632
4400
632
  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4401
632
                           unsigned& Alignment) {
4402
381
    // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4403
381
    Ptr = I.getArgOperand(0);
4404
381
    Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
4405
381
    Mask = I.getArgOperand(2);
4406
381
    Src0 = I.getArgOperand(3);
4407
381
  };
4408
632
  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4409
632
                           unsigned& Alignment) {
4410
251
    // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4411
251
    Ptr = I.getArgOperand(0);
4412
251
    Alignment = 0;
4413
251
    Mask = I.getArgOperand(1);
4414
251
    Src0 = I.getArgOperand(2);
4415
251
  };
4416
632
4417
632
  Value  *PtrOperand, *MaskOperand, *Src0Operand;
4418
632
  unsigned Alignment;
4419
632
  if (IsExpanding)
4420
251
    getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4421
381
  else
4422
381
    getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4423
632
4424
632
  SDValue Ptr = getValue(PtrOperand);
4425
632
  SDValue Src0 = getValue(Src0Operand);
4426
632
  SDValue Mask = getValue(MaskOperand);
4427
632
4428
632
  EVT VT = Src0.getValueType();
4429
632
  if (!Alignment)
4430
251
    Alignment = DAG.getEVTAlignment(VT);
4431
632
4432
632
  AAMDNodes AAInfo;
4433
632
  I.getAAMetadata(AAInfo);
4434
632
  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4435
632
4436
632
  // Do not serialize masked loads of constant memory with anything.
4437
632
  bool AddToChain =
4438
632
      !AA || !AA->pointsToConstantMemory(MemoryLocation(
4439
632
                 PtrOperand,
4440
632
                 LocationSize::precise(
4441
632
                     DAG.getDataLayout().getTypeStoreSize(I.getType())),
4442
632
                 AAInfo));
4443
632
  SDValue InChain = AddToChain ? 
DAG.getRoot()631
:
DAG.getEntryNode()1
;
4444
632
4445
632
  MachineMemOperand *MMO =
4446
632
    DAG.getMachineFunction().
4447
632
    getMachineMemOperand(MachinePointerInfo(PtrOperand),
4448
632
                          MachineMemOperand::MOLoad,  VT.getStoreSize(),
4449
632
                          Alignment, AAInfo, Ranges);
4450
632
4451
632
  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4452
632
                                   ISD::NON_EXTLOAD, IsExpanding);
4453
632
  if (AddToChain)
4454
631
    PendingLoads.push_back(Load.getValue(1));
4455
632
  setValue(&I, Load);
4456
632
}
4457
4458
324
void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4459
324
  SDLoc sdl = getCurSDLoc();
4460
324
4461
324
  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4462
324
  const Value *Ptr = I.getArgOperand(0);
4463
324
  SDValue Src0 = getValue(I.getArgOperand(3));
4464
324
  SDValue Mask = getValue(I.getArgOperand(2));
4465
324
4466
324
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4467
324
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4468
324
  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4469
324
  if (!Alignment)
4470
39
    Alignment = DAG.getEVTAlignment(VT);
4471
324
4472
324
  AAMDNodes AAInfo;
4473
324
  I.getAAMetadata(AAInfo);
4474
324
  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4475
324
4476
324
  SDValue Root = DAG.getRoot();
4477
324
  SDValue Base;
4478
324
  SDValue Index;
4479
324
  SDValue Scale;
4480
324
  const Value *BasePtr = Ptr;
4481
324
  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4482
324
  bool ConstantMemory = false;
4483
324
  if (UniformBase && 
AA178
&&
4484
324
      AA->pointsToConstantMemory(
4485
164
          MemoryLocation(BasePtr,
4486
164
                         LocationSize::precise(
4487
164
                             DAG.getDataLayout().getTypeStoreSize(I.getType())),
4488
164
                         AAInfo))) {
4489
5
    // Do not serialize (non-volatile) loads of constant memory with anything.
4490
5
    Root = DAG.getEntryNode();
4491
5
    ConstantMemory = true;
4492
5
  }
4493
324
4494
324
  MachineMemOperand *MMO =
4495
324
    DAG.getMachineFunction().
4496
324
    getMachineMemOperand(MachinePointerInfo(UniformBase ? 
BasePtr178
:
nullptr146
),
4497
324
                         MachineMemOperand::MOLoad,  VT.getStoreSize(),
4498
324
                         Alignment, AAInfo, Ranges);
4499
324
4500
324
  if (!UniformBase) {
4501
146
    Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4502
146
    Index = getValue(Ptr);
4503
146
    Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4504
146
  }
4505
324
  SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4506
324
  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4507
324
                                       Ops, MMO);
4508
324
4509
324
  SDValue OutChain = Gather.getValue(1);
4510
324
  if (!ConstantMemory)
4511
319
    PendingLoads.push_back(OutChain);
4512
324
  setValue(&I, Gather);
4513
324
}
4514
4515
2.59k
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4516
2.59k
  SDLoc dl = getCurSDLoc();
4517
2.59k
  AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
4518
2.59k
  AtomicOrdering FailureOrdering = I.getFailureOrdering();
4519
2.59k
  SyncScope::ID SSID = I.getSyncScopeID();
4520
2.59k
4521
2.59k
  SDValue InChain = getRoot();
4522
2.59k
4523
2.59k
  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4524
2.59k
  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4525
2.59k
4526
2.59k
  auto Alignment = DAG.getEVTAlignment(MemVT);
4527
2.59k
4528
2.59k
  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
4529
2.59k
  if (I.isVolatile())
4530
696
    Flags |= MachineMemOperand::MOVolatile;
4531
2.59k
  Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
4532
2.59k
4533
2.59k
  MachineFunction &MF = DAG.getMachineFunction();
4534
2.59k
  MachineMemOperand *MMO =
4535
2.59k
    MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4536
2.59k
                            Flags, MemVT.getStoreSize(), Alignment,
4537
2.59k
                            AAMDNodes(), nullptr, SSID, SuccessOrdering,
4538
2.59k
                            FailureOrdering);
4539
2.59k
4540
2.59k
  SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
4541
2.59k
                                   dl, MemVT, VTs, InChain,
4542
2.59k
                                   getValue(I.getPointerOperand()),
4543
2.59k
                                   getValue(I.getCompareOperand()),
4544
2.59k
                                   getValue(I.getNewValOperand()), MMO);
4545
2.59k
4546
2.59k
  SDValue OutChain = L.getValue(2);
4547
2.59k
4548
2.59k
  setValue(&I, L);
4549
2.59k
  DAG.setRoot(OutChain);
4550
2.59k
}
4551
4552
7.29k
void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4553
7.29k
  SDLoc dl = getCurSDLoc();
4554
7.29k
  ISD::NodeType NT;
4555
7.29k
  switch (I.getOperation()) {
4556
7.29k
  
default: 0
llvm_unreachable0
("Unknown atomicrmw operation");
4557
7.29k
  
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break1.89k
;
4558
7.29k
  
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break1.37k
;
4559
7.29k
  
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break781
;
4560
7.29k
  
case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break536
;
4561
7.29k
  
case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break158
;
4562
7.29k
  
case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break526
;
4563
7.29k
  
case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break490
;
4564
7.29k
  
case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break371
;
4565
7.29k
  
case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break384
;
4566
7.29k
  
case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break376
;
4567
7.29k
  
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break376
;
4568
7.29k
  
case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break34
;
4569
7.29k
  
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break0
;
4570
7.29k
  }
4571
7.29k
  AtomicOrdering Ordering = I.getOrdering();
4572
7.29k
  SyncScope::ID SSID = I.getSyncScopeID();
4573
7.29k
4574
7.29k
  SDValue InChain = getRoot();
4575
7.29k
4576
7.29k
  auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
4577
7.29k
  auto Alignment = DAG.getEVTAlignment(MemVT);
4578
7.29k
4579
7.29k
  auto Flags = MachineMemOperand::MOLoad |  MachineMemOperand::MOStore;
4580
7.29k
  if (I.isVolatile())
4581
1.38k
    Flags |= MachineMemOperand::MOVolatile;
4582
7.29k
  Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
4583
7.29k
4584
7.29k
  MachineFunction &MF = DAG.getMachineFunction();
4585
7.29k
  MachineMemOperand *MMO =
4586
7.29k
    MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
4587
7.29k
                            MemVT.getStoreSize(), Alignment, AAMDNodes(),
4588
7.29k
                            nullptr, SSID, Ordering);
4589
7.29k
4590
7.29k
  SDValue L =
4591
7.29k
    DAG.getAtomic(NT, dl, MemVT, InChain,
4592
7.29k
                  getValue(I.getPointerOperand()), getValue(I.getValOperand()),
4593
7.29k
                  MMO);
4594
7.29k
4595
7.29k
  SDValue OutChain = L.getValue(1);
4596
7.29k
4597
7.29k
  setValue(&I, L);
4598
7.29k
  DAG.setRoot(OutChain);
4599
7.29k
}
4600
4601
4.61k
void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4602
4.61k
  SDLoc dl = getCurSDLoc();
4603
4.61k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4604
4.61k
  SDValue Ops[3];
4605
4.61k
  Ops[0] = getRoot();
4606
4.61k
  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4607
4.61k
                           TLI.getFenceOperandTy(DAG.getDataLayout()));
4608
4.61k
  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4609
4.61k
                           TLI.getFenceOperandTy(DAG.getDataLayout()));
4610
4.61k
  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4611
4.61k
}
4612
4613
1.66k
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4614
1.66k
  SDLoc dl = getCurSDLoc();
4615
1.66k
  AtomicOrdering Order = I.getOrdering();
4616
1.66k
  SyncScope::ID SSID = I.getSyncScopeID();
4617
1.66k
4618
1.66k
  SDValue InChain = getRoot();
4619
1.66k
4620
1.66k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4621
1.66k
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4622
1.66k
  EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
4623
1.66k
4624
1.66k
  if (!TLI.supportsUnalignedAtomics() &&
4625
1.66k
      I.getAlignment() < MemVT.getSizeInBits() / 8)
4626
0
    report_fatal_error("Cannot generate unaligned atomic load");
4627
1.66k
4628
1.66k
  auto Flags = MachineMemOperand::MOLoad;
4629
1.66k
  if (I.isVolatile())
4630
2
    Flags |= MachineMemOperand::MOVolatile;
4631
1.66k
  if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
4632
4
    Flags |= MachineMemOperand::MOInvariant;
4633
1.66k
  if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
4634
1.66k
                               DAG.getDataLayout()))
4635
540
    Flags |= MachineMemOperand::MODereferenceable;
4636
1.66k
4637
1.66k
  Flags |= TLI.getMMOFlags(I);
4638
1.66k
4639
1.66k
  MachineMemOperand *MMO =
4640
1.66k
      DAG.getMachineFunction().
4641
1.66k
      getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4642
1.66k
                           Flags, MemVT.getStoreSize(),
4643
1.66k
                           I.getAlignment() ? I.getAlignment() :
4644
1.66k
                                              
DAG.getEVTAlignment(MemVT)0
,
4645
1.66k
                           AAMDNodes(), nullptr, SSID, Order);
4646
1.66k
4647
1.66k
  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4648
1.66k
  SDValue L =
4649
1.66k
      DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
4650
1.66k
                    getValue(I.getPointerOperand()), MMO);
4651
1.66k
4652
1.66k
  SDValue OutChain = L.getValue(1);
4653
1.66k
  if (MemVT != VT)
4654
0
    L = DAG.getPtrExtOrTrunc(L, dl, VT);
4655
1.66k
4656
1.66k
  setValue(&I, L);
4657
1.66k
  DAG.setRoot(OutChain);
4658
1.66k
}
4659
4660
6.85k
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4661
6.85k
  SDLoc dl = getCurSDLoc();
4662
6.85k
4663
6.85k
  AtomicOrdering Ordering = I.getOrdering();
4664
6.85k
  SyncScope::ID SSID = I.getSyncScopeID();
4665
6.85k
4666
6.85k
  SDValue InChain = getRoot();
4667
6.85k
4668
6.85k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4669
6.85k
  EVT MemVT =
4670
6.85k
      TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
4671
6.85k
4672
6.85k
  if (I.getAlignment() < MemVT.getSizeInBits() / 8)
4673
0
    report_fatal_error("Cannot generate unaligned atomic store");
4674
6.85k
4675
6.85k
  auto Flags = MachineMemOperand::MOStore;
4676
6.85k
  if (I.isVolatile())
4677
121
    Flags |= MachineMemOperand::MOVolatile;
4678
6.85k
  Flags |= TLI.getMMOFlags(I);
4679
6.85k
4680
6.85k
  MachineFunction &MF = DAG.getMachineFunction();
4681
6.85k
  MachineMemOperand *MMO =
4682
6.85k
    MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
4683
6.85k
                            MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
4684
6.85k
                            nullptr, SSID, Ordering);
4685
6.85k
4686
6.85k
  SDValue Val = getValue(I.getValueOperand());
4687
6.85k
  if (Val.getValueType() != MemVT)
4688
0
    Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
4689
6.85k
4690
6.85k
  SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
4691
6.85k
                                   getValue(I.getPointerOperand()), Val, MMO);
4692
6.85k
4693
6.85k
4694
6.85k
  DAG.setRoot(OutChain);
4695
6.85k
}
4696
4697
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4698
/// node.
4699
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4700
137k
                                               unsigned Intrinsic) {
4701
137k
  // Ignore the callsite's attributes. A specific call site may be marked with
4702
137k
  // readnone, but the lowering code will expect the chain based on the
4703
137k
  // definition.
4704
137k
  const Function *F = I.getCalledFunction();
4705
137k
  bool HasChain = !F->doesNotAccessMemory();
4706
137k
  bool OnlyLoad = HasChain && 
F->onlyReadsMemory()80.7k
;
4707
137k
4708
137k
  // Build the operand list.
4709
137k
  SmallVector<SDValue, 8> Ops;
4710
137k
  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
4711
80.7k
    if (OnlyLoad) {
4712
3.73k
      // We don't need to serialize loads against other loads.
4713
3.73k
      Ops.push_back(DAG.getRoot());
4714
76.9k
    } else {
4715
76.9k
      Ops.push_back(getRoot());
4716
76.9k
    }
4717
80.7k
  }
4718
137k
4719
137k
  // Info is set by getTgtMemInstrinsic
4720
137k
  TargetLowering::IntrinsicInfo Info;
4721
137k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4722
137k
  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4723
137k
                                               DAG.getMachineFunction(),
4724
137k
                                               Intrinsic);
4725
137k
4726
137k
  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4727
137k
  if (!IsTgtIntrinsic || 
Info.opc == ISD::INTRINSIC_VOID68.6k
||
4728
137k
      
Info.opc == ISD::INTRINSIC_W_CHAIN53.9k
)
4729
137k
    Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4730
137k
                                        TLI.getPointerTy(DAG.getDataLayout())));
4731
137k
4732
137k
  // Add all operands of the call to the operand list.
4733
381k
  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; 
++i244k
) {
4734
244k
    SDValue Op = getValue(I.getArgOperand(i));
4735
244k
    Ops.push_back(Op);
4736
244k
  }
4737
137k
4738
137k
  SmallVector<EVT, 4> ValueVTs;
4739
137k
  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4740
137k
4741
137k
  if (HasChain)
4742
80.7k
    ValueVTs.push_back(MVT::Other);
4743
137k
4744
137k
  SDVTList VTs = DAG.getVTList(ValueVTs);
4745
137k
4746
137k
  // Create the node.
4747
137k
  SDValue Result;
4748
137k
  if (IsTgtIntrinsic) {
4749
68.6k
    // This is target intrinsic that touches memory
4750
68.6k
    AAMDNodes AAInfo;
4751
68.6k
    I.getAAMetadata(AAInfo);
4752
68.6k
    Result =
4753
68.6k
        DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
4754
68.6k
                                MachinePointerInfo(Info.ptrVal, Info.offset),
4755
68.6k
                                Info.align, Info.flags, Info.size, AAInfo);
4756
68.6k
  } else 
if (68.3k
!HasChain68.3k
) {
4757
56.3k
    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4758
56.3k
  } else 
if (12.0k
!I.getType()->isVoidTy()12.0k
) {
4759
2.23k
    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4760
9.80k
  } else {
4761
9.80k
    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4762
9.80k
  }
4763
137k
4764
137k
  if (HasChain) {
4765
80.7k
    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4766
80.7k
    if (OnlyLoad)
4767
3.73k
      PendingLoads.push_back(Chain);
4768
76.9k
    else
4769
76.9k
      DAG.setRoot(Chain);
4770
80.7k
  }
4771
137k
4772
137k
  if (!I.getType()->isVoidTy()) {
4773
112k
    if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4774
24.2k
      EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4775
24.2k
      Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4776
24.2k
    } else
4777
88.2k
      Result = lowerRangeToAssertZExt(DAG, I, Result);
4778
112k
4779
112k
    setValue(&I, Result);
4780
112k
  }
4781
137k
}
4782
4783
/// GetSignificand - Get the significand and build it into a floating-point
4784
/// number with exponent of 1:
4785
///
4786
///   Op = (Op & 0x007fffff) | 0x3f800000;
4787
///
4788
/// where Op is the hexadecimal representation of floating point value.
4789
9
static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
4790
9
  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4791
9
                           DAG.getConstant(0x007fffff, dl, MVT::i32));
4792
9
  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4793
9
                           DAG.getConstant(0x3f800000, dl, MVT::i32));
4794
9
  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4795
9
}
4796
4797
/// GetExponent - Get the exponent:
4798
///
4799
///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4800
///
4801
/// where Op is the hexadecimal representation of floating point value.
4802
static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
4803
9
                           const TargetLowering &TLI, const SDLoc &dl) {
4804
9
  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4805
9
                           DAG.getConstant(0x7f800000, dl, MVT::i32));
4806
9
  SDValue t1 = DAG.getNode(
4807
9
      ISD::SRL, dl, MVT::i32, t0,
4808
9
      DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4809
9
  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4810
9
                           DAG.getConstant(127, dl, MVT::i32));
4811
9
  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4812
9
}
4813
4814
/// getF32Constant - Get 32-bit floating point constant.
4815
static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4816
97
                              const SDLoc &dl) {
4817
97
  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4818
97
                           MVT::f32);
4819
97
}
4820
4821
static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
4822
9
                                       SelectionDAG &DAG) {
4823
9
  // TODO: What fast-math-flags should be set on the floating-point nodes?
4824
9
4825
9
  //   IntegerPartOfX = ((int32_t)(t0);
4826
9
  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4827
9
4828
9
  //   FractionalPartOfX = t0 - (float)IntegerPartOfX;
4829
9
  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4830
9
  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4831
9
4832
9
  //   IntegerPartOfX <<= 23;
4833
9
  IntegerPartOfX = DAG.getNode(
4834
9
      ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4835
9
      DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
4836
9
                                  DAG.getDataLayout())));
4837
9
4838
9
  SDValue TwoToFractionalPartOfX;
4839
9
  if (LimitFloatPrecision <= 6) {
4840
3
    // For floating-point precision of 6:
4841
3
    //
4842
3
    //   TwoToFractionalPartOfX =
4843
3
    //     0.997535578f +
4844
3
    //       (0.735607626f + 0.252464424f * x) * x;
4845
3
    //
4846
3
    // error 0.0144103317, which is 6 bits
4847
3
    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4848
3
                             getF32Constant(DAG, 0x3e814304, dl));
4849
3
    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4850
3
                             getF32Constant(DAG, 0x3f3c50c8, dl));
4851
3
    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4852
3
    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4853
3
                                         getF32Constant(DAG, 0x3f7f5e7e, dl));
4854
6
  } else if (LimitFloatPrecision <= 12) {
4855
3
    // For floating-point precision of 12:
4856
3
    //
4857
3
    //   TwoToFractionalPartOfX =
4858
3
    //     0.999892986f +
4859
3
    //       (0.696457318f +
4860
3
    //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
4861
3
    //
4862
3
    // error 0.000107046256, which is 13 to 14 bits
4863
3
    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4864
3
                             getF32Constant(DAG, 0x3da235e3, dl));
4865
3
    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4866
3
                             getF32Constant(DAG, 0x3e65b8f3, dl));
4867
3
    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4868
3
    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4869
3
                             getF32Constant(DAG, 0x3f324b07, dl));
4870
3
    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4871
3
    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4872
3
                                         getF32Constant(DAG, 0x3f7ff8fd, dl));
4873
3
  } else { // LimitFloatPrecision <= 18
4874
3
    // For floating-point precision of 18:
4875
3
    //
4876
3
    //   TwoToFractionalPartOfX =
4877
3
    //     0.999999982f +
4878
3
    //       (0.693148872f +
4879
3
    //         (0.240227044f +
4880
3
    //           (0.554906021e-1f +
4881
3
    //             (0.961591928e-2f +
4882
3
    //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4883
3
    // error 2.47208000*10^(-7), which is better than 18 bits
4884
3
    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4885
3
                             getF32Constant(DAG, 0x3924b03e, dl));
4886
3
    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4887
3
                             getF32Constant(DAG, 0x3ab24b87, dl));
4888
3
    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4889
3
    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4890
3
                             getF32Constant(DAG, 0x3c1d8c17, dl));
4891
3
    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4892
3
    SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4893
3
                             getF32Constant(DAG, 0x3d634a1d, dl));
4894
3
    SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4895
3
    SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4896
3
                             getF32Constant(DAG, 0x3e75fe14, dl));
4897
3
    SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4898
3
    SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4899
3
                              getF32Constant(DAG, 0x3f317234, dl));
4900
3
    SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4901
3
    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4902
3
                                         getF32Constant(DAG, 0x3f800000, dl));
4903
3
  }
4904
9
4905
9
  // Add the exponent into the result in integer domain.
4906
9
  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4907
9
  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4908
9
                     DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4909
9
}
4910
4911
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
4912
/// limited-precision mode.
4913
static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4914
165
                         const TargetLowering &TLI) {
4915
165
  if (Op.getValueType() == MVT::f32 &&
4916
165
      
LimitFloatPrecision > 032
&&
LimitFloatPrecision <= 183
) {
4917
3
4918
3
    // Put the exponent in the right bit position for later addition to the
4919
3
    // final result:
4920
3
    //
4921
3
    //   #define LOG2OFe 1.4426950f
4922
3
    //   t0 = Op * LOG2OFe
4923
3
4924
3
    // TODO: What fast-math-flags should be set here?
4925
3
    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4926
3
                             getF32Constant(DAG, 0x3fb8aa3b, dl));
4927
3
    return getLimitedPrecisionExp2(t0, dl, DAG);
4928
3
  }
4929
162
4930
162
  // No special expansion.
4931
162
  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4932
162
}
4933
4934
/// expandLog - Lower a log intrinsic. Handles the special sequences for
4935
/// limited-precision mode.
4936
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4937
107
                         const TargetLowering &TLI) {
4938
107
  // TODO: What fast-math-flags should be set on the floating-point nodes?
4939
107
4940
107
  if (Op.getValueType() == MVT::f32 &&
4941
107
      
LimitFloatPrecision > 028
&&
LimitFloatPrecision <= 183
) {
4942
3
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4943
3
4944
3
    // Scale the exponent by log(2) [0.69314718f].
4945
3
    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4946
3
    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4947
3
                                        getF32Constant(DAG, 0x3f317218, dl));
4948
3
4949
3
    // Get the significand and build it into a floating-point number with
4950
3
    // exponent of 1.
4951
3
    SDValue X = GetSignificand(DAG, Op1, dl);
4952
3
4953
3
    SDValue LogOfMantissa;
4954
3
    if (LimitFloatPrecision <= 6) {
4955
1
      // For floating-point precision of 6:
4956
1
      //
4957
1
      //   LogofMantissa =
4958
1
      //     -1.1609546f +
4959
1
      //       (1.4034025f - 0.23903021f * x) * x;
4960
1
      //
4961
1
      // error 0.0034276066, which is better than 8 bits
4962
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4963
1
                               getF32Constant(DAG, 0xbe74c456, dl));
4964
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4965
1
                               getF32Constant(DAG, 0x3fb3a2b1, dl));
4966
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4967
1
      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4968
1
                                  getF32Constant(DAG, 0x3f949a29, dl));
4969
2
    } else if (LimitFloatPrecision <= 12) {
4970
1
      // For floating-point precision of 12:
4971
1
      //
4972
1
      //   LogOfMantissa =
4973
1
      //     -1.7417939f +
4974
1
      //       (2.8212026f +
4975
1
      //         (-1.4699568f +
4976
1
      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4977
1
      //
4978
1
      // error 0.000061011436, which is 14 bits
4979
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4980
1
                               getF32Constant(DAG, 0xbd67b6d6, dl));
4981
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4982
1
                               getF32Constant(DAG, 0x3ee4f4b8, dl));
4983
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4984
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4985
1
                               getF32Constant(DAG, 0x3fbc278b, dl));
4986
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4987
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4988
1
                               getF32Constant(DAG, 0x40348e95, dl));
4989
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4990
1
      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4991
1
                                  getF32Constant(DAG, 0x3fdef31a, dl));
4992
1
    } else { // LimitFloatPrecision <= 18
4993
1
      // For floating-point precision of 18:
4994
1
      //
4995
1
      //   LogOfMantissa =
4996
1
      //     -2.1072184f +
4997
1
      //       (4.2372794f +
4998
1
      //         (-3.7029485f +
4999
1
      //           (2.2781945f +
5000
1
      //             (-0.87823314f +
5001
1
      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
5002
1
      //
5003
1
      // error 0.0000023660568, which is better than 18 bits
5004
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5005
1
                               getF32Constant(DAG, 0xbc91e5ac, dl));
5006
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5007
1
                               getF32Constant(DAG, 0x3e4350aa, dl));
5008
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5009
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5010
1
                               getF32Constant(DAG, 0x3f60d3e3, dl));
5011
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5012
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5013
1
                               getF32Constant(DAG, 0x4011cdf0, dl));
5014
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5015
1
      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5016
1
                               getF32Constant(DAG, 0x406cfd1c, dl));
5017
1
      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5018
1
      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5019
1
                               getF32Constant(DAG, 0x408797cb, dl));
5020
1
      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5021
1
      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5022
1
                                  getF32Constant(DAG, 0x4006dcab, dl));
5023
1
    }
5024
3
5025
3
    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
5026
3
  }
5027
104
5028
104
  // No special expansion.
5029
104
  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
5030
104
}
5031
5032
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
5033
/// limited-precision mode.
5034
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5035
104
                          const TargetLowering &TLI) {
5036
104
  // TODO: What fast-math-flags should be set on the floating-point nodes?
5037
104
5038
104
  if (Op.getValueType() == MVT::f32 &&
5039
104
      
LimitFloatPrecision > 032
&&
LimitFloatPrecision <= 183
) {
5040
3
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5041
3
5042
3
    // Get the exponent.
5043
3
    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
5044
3
5045
3
    // Get the significand and build it into a floating-point number with
5046
3
    // exponent of 1.
5047
3
    SDValue X = GetSignificand(DAG, Op1, dl);
5048
3
5049
3
    // Different possible minimax approximations of significand in
5050
3
    // floating-point for various degrees of accuracy over [1,2].
5051
3
    SDValue Log2ofMantissa;
5052
3
    if (LimitFloatPrecision <= 6) {
5053
1
      // For floating-point precision of 6:
5054
1
      //
5055
1
      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
5056
1
      //
5057
1
      // error 0.0049451742, which is more than 7 bits
5058
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5059
1
                               getF32Constant(DAG, 0xbeb08fe0, dl));
5060
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5061
1
                               getF32Constant(DAG, 0x40019463, dl));
5062
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5063
1
      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5064
1
                                   getF32Constant(DAG, 0x3fd6633d, dl));
5065
2
    } else if (LimitFloatPrecision <= 12) {
5066
1
      // For floating-point precision of 12:
5067
1
      //
5068
1
      //   Log2ofMantissa =
5069
1
      //     -2.51285454f +
5070
1
      //       (4.07009056f +
5071
1
      //         (-2.12067489f +
5072
1
      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
5073
1
      //
5074
1
      // error 0.0000876136000, which is better than 13 bits
5075
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5076
1
                               getF32Constant(DAG, 0xbda7262e, dl));
5077
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5078
1
                               getF32Constant(DAG, 0x3f25280b, dl));
5079
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5080
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5081
1
                               getF32Constant(DAG, 0x4007b923, dl));
5082
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5083
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5084
1
                               getF32Constant(DAG, 0x40823e2f, dl));
5085
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5086
1
      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5087
1
                                   getF32Constant(DAG, 0x4020d29c, dl));
5088
1
    } else { // LimitFloatPrecision <= 18
5089
1
      // For floating-point precision of 18:
5090
1
      //
5091
1
      //   Log2ofMantissa =
5092
1
      //     -3.0400495f +
5093
1
      //       (6.1129976f +
5094
1
      //         (-5.3420409f +
5095
1
      //           (3.2865683f +
5096
1
      //             (-1.2669343f +
5097
1
      //               (0.27515199f -
5098
1
      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
5099
1
      //
5100
1
      // error 0.0000018516, which is better than 18 bits
5101
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5102
1
                               getF32Constant(DAG, 0xbcd2769e, dl));
5103
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5104
1
                               getF32Constant(DAG, 0x3e8ce0b9, dl));
5105
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5106
1
      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5107
1
                               getF32Constant(DAG, 0x3fa22ae7, dl));
5108
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5109
1
      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5110
1
                               getF32Constant(DAG, 0x40525723, dl));
5111
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5112
1
      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5113
1
                               getF32Constant(DAG, 0x40aaf200, dl));
5114
1
      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5115
1
      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5116
1
                               getF32Constant(DAG, 0x40c39dad, dl));
5117
1
      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5118
1
      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5119
1
                                   getF32Constant(DAG, 0x4042902c, dl));
5120
1
    }
5121
3
5122
3
    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
5123
3
  }
5124
101
5125
101
  // No special expansion.
5126
101
  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
5127
101
}
5128
5129
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
5130
/// limited-precision mode.
5131
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5132
134
                           const TargetLowering &TLI) {
5133
134
  // TODO: What fast-math-flags should be set on the floating-point nodes?
5134
134
5135
134
  if (Op.getValueType() == MVT::f32 &&
5136
134
      
LimitFloatPrecision > 033
&&
LimitFloatPrecision <= 183
) {
5137
3
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5138
3
5139
3
    // Scale the exponent by log10(2) [0.30102999f].
5140
3
    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
5141
3
    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5142
3
                                        getF32Constant(DAG, 0x3e9a209a, dl));
5143
3
5144
3
    // Get the significand and build it into a floating-point number with
5145
3
    // exponent of 1.
5146
3
    SDValue X = GetSignificand(DAG, Op1, dl);
5147
3
5148
3
    SDValue Log10ofMantissa;
5149
3
    if (LimitFloatPrecision <= 6) {
5150
1
      // For floating-point precision of 6:
5151
1
      //
5152
1
      //   Log10ofMantissa =
5153
1
      //     -0.50419619f +
5154
1
      //       (0.60948995f - 0.10380950f * x) * x;
5155
1
      //
5156
1
      // error 0.0014886165, which is 6 bits
5157
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5158
1
                               getF32Constant(DAG, 0xbdd49a13, dl));
5159
1
      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5160
1
                               getF32Constant(DAG, 0x3f1c0789, dl));
5161
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5162
1
      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5163
1
                                    getF32Constant(DAG, 0x3f011300, dl));
5164
2
    } else if (LimitFloatPrecision <= 12) {
5165
1
      // For floating-point precision of 12:
5166
1
      //
5167
1
      //   Log10ofMantissa =
5168
1
      //     -0.64831180f +
5169
1
      //       (0.91751397f +
5170
1
      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
5171
1
      //
5172
1
      // error 0.00019228036, which is better than 12 bits
5173
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5174
1
                               getF32Constant(DAG, 0x3d431f31, dl));
5175
1
      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5176
1
                               getF32Constant(DAG, 0x3ea21fb2, dl));
5177
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5178
1
      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5179
1
                               getF32Constant(DAG, 0x3f6ae232, dl));
5180
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5181
1
      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5182
1
                                    getF32Constant(DAG, 0x3f25f7c3, dl));
5183
1
    } else { // LimitFloatPrecision <= 18
5184
1
      // For floating-point precision of 18:
5185
1
      //
5186
1
      //   Log10ofMantissa =
5187
1
      //     -0.84299375f +
5188
1
      //       (1.5327582f +
5189
1
      //         (-1.0688956f +
5190
1
      //           (0.49102474f +
5191
1
      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
5192
1
      //
5193
1
      // error 0.0000037995730, which is better than 18 bits
5194
1
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5195
1
                               getF32Constant(DAG, 0x3c5d51ce, dl));
5196
1
      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5197
1
                               getF32Constant(DAG, 0x3e00685a, dl));
5198
1
      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5199
1
      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5200
1
                               getF32Constant(DAG, 0x3efb6798, dl));
5201
1
      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5202
1
      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5203
1
                               getF32Constant(DAG, 0x3f88d192, dl));
5204
1
      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5205
1
      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5206
1
                               getF32Constant(DAG, 0x3fc4316c, dl));
5207
1
      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5208
1
      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
5209
1
                                    getF32Constant(DAG, 0x3f57ce70, dl));
5210
1
    }
5211
3
5212
3
    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
5213
3
  }
5214
131
5215
131
  // No special expansion.
5216
131
  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
5217
131
}
5218
5219
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
5220
/// limited-precision mode.
5221
static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5222
126
                          const TargetLowering &TLI) {
5223
126
  if (Op.getValueType() == MVT::f32 &&
5224
126
      
LimitFloatPrecision > 045
&&
LimitFloatPrecision <= 183
)
5225
3
    return getLimitedPrecisionExp2(Op, dl, DAG);
5226
123
5227
123
  // No special expansion.
5228
123
  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
5229
123
}
5230
5231
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
5232
/// limited-precision mode with x == 10.0f.
5233
static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
5234
171
                         SelectionDAG &DAG, const TargetLowering &TLI) {
5235
171
  bool IsExp10 = false;
5236
171
  if (LHS.getValueType() == MVT::f32 && 
RHS.getValueType() == MVT::f3248
&&
5237
171
      
LimitFloatPrecision > 048
&&
LimitFloatPrecision <= 183
) {
5238
3
    if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
5239
3
      APFloat Ten(10.0f);
5240
3
      IsExp10 = LHSC->isExactlyValue(Ten);
5241
3
    }
5242
3
  }
5243
171
5244
171
  // TODO: What fast-math-flags should be set on the FMUL node?
5245
171
  if (IsExp10) {
5246
3
    // Put the exponent in the right bit position for later addition to the
5247
3
    // final result:
5248
3
    //
5249
3
    //   #define LOG2OF10 3.3219281f
5250
3
    //   t0 = Op * LOG2OF10;
5251
3
    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
5252
3
                             getF32Constant(DAG, 0x40549a78, dl));
5253
3
    return getLimitedPrecisionExp2(t0, dl, DAG);
5254
3
  }
5255
168
5256
168
  // No special expansion.
5257
168
  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
5258
168
}
5259
5260
/// ExpandPowI - Expand a llvm.powi intrinsic.
5261
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
5262
137
                          SelectionDAG &DAG) {
5263
137
  // If RHS is a constant, we can expand this out to a multiplication tree,
5264
137
  // otherwise we end up lowering to a call to __powidf2 (for example).  When
5265
137
  // optimizing for size, we only want to do this if the expansion would produce
5266
137
  // a small number of multiplies, otherwise we do the full expansion.
5267
137
  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5268
18
    // Get the exponent as a positive value.
5269
18
    unsigned Val = RHSC->getSExtValue();
5270
18
    if ((int)Val < 0) 
Val = -Val1
;
5271
18
5272
18
    // powi(x, 0) -> 1.0
5273
18
    if (Val == 0)
5274
0
      return DAG.getConstantFP(1.0, DL, LHS.getValueType());
5275
18
5276
18
    const Function &F = DAG.getMachineFunction().getFunction();
5277
18
    if (!F.hasOptSize() ||
5278
18
        // If optimizing for size, don't insert too many multiplies.
5279
18
        // This inserts up to 5 multiplies.
5280
18
        
countPopulation(Val) + Log2_32(Val) < 76
) {
5281
12
      // We use the simple binary decomposition method to generate the multiply
5282
12
      // sequence.  There are more optimal ways to do this (for example,
5283
12
      // powi(x,15) generates one more multiply than it should), but this has
5284
12
      // the benefit of being both really simple and much better than a libcall.
5285
12
      SDValue Res;  // Logically starts equal to 1.0
5286
12
      SDValue CurSquare = LHS;
5287
12
      // TODO: Intrinsics should have fast-math-flags that propagate to these
5288
12
      // nodes.
5289
47
      while (Val) {
5290
35
        if (Val & 1) {
5291
22
          if (Res.getNode())
5292
10
            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
5293
12
          else
5294
12
            Res = CurSquare;  // 1.0*CurSquare.
5295
22
        }
5296
35
5297
35
        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
5298
35
                                CurSquare, CurSquare);
5299
35
        Val >>= 1;
5300
35
      }
5301
12
5302
12
      // If the original was negative, invert the result, producing 1/(x*x*x).
5303
12
      if (RHSC->getSExtValue() < 0)
5304
1
        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
5305
1
                          DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
5306
12
      return Res;
5307
12
    }
5308
125
  }
5309
125
5310
125
  // Otherwise, expand to a libcall.
5311
125
  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
5312
125
}
5313
5314
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
5315
// bitcasted, or split argument. Returns a list of <Register, size in bits>
5316
void getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
5317
405
                          const SDValue &N) {
5318
405
  switch (N.getOpcode()) {
5319
405
  case ISD::CopyFromReg: {
5320
291
    SDValue Op = N.getOperand(1);
5321
291
    Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
5322
291
                      Op.getValueType().getSizeInBits());
5323
291
    return;
5324
405
  }
5325
405
  case ISD::BITCAST:
5326
51
  case ISD::AssertZext:
5327
51
  case ISD::AssertSext:
5328
51
  case ISD::TRUNCATE:
5329
51
    getUnderlyingArgRegs(Regs, N.getOperand(0));
5330
51
    return;
5331
51
  case ISD::BUILD_PAIR:
5332
22
  case ISD::BUILD_VECTOR:
5333
22
  case ISD::CONCAT_VECTORS:
5334
22
    for (SDValue Op : N->op_values())
5335
50
      getUnderlyingArgRegs(Regs, Op);
5336
22
    return;
5337
41
  default:
5338
41
    return;
5339
405
  }
5340
405
}
5341
5342
/// If the DbgValueInst is a dbg_value of a function argument, create the
5343
/// corresponding DBG_VALUE machine instruction for it now.  At the end of
5344
/// instruction selection, they will be inserted to the entry BB.
5345
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
5346
    const Value *V, DILocalVariable *Variable, DIExpression *Expr,
5347
658
    DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
5348
658
  const Argument *Arg = dyn_cast<Argument>(V);
5349
658
  if (!Arg)
5350
336
    return false;
5351
322
5352
322
  if (!IsDbgDeclare) {
5353
309
    // ArgDbgValues are hoisted to the beginning of the entry block. So we
5354
309
    // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
5355
309
    // the entry block.
5356
309
    bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
5357
309
    if (!IsInEntryBlock)
5358
1
      return false;
5359
308
5360
308
    // ArgDbgValues are hoisted to the beginning of the entry block.  So we
5361
308
    // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
5362
308
    // variable that also is a param.
5363
308
    //
5364
308
    // Although, if we are at the top of the entry block already, we can still
5365
308
    // emit using ArgDbgValue. This might catch some situations when the
5366
308
    // dbg.value refers to an argument that isn't used in the entry block, so
5367
308
    // any CopyToReg node would be optimized out and the only way to express
5368
308
    // this DBG_VALUE is by using the physical reg (or FI) as done in this
5369
308
    // method.  ArgDbgValues are hoisted to the beginning of the entry block. So
5370
308
    // we should only emit as ArgDbgValue if the Variable is an argument to the
5371
308
    // current function, and the dbg.value intrinsic is found in the entry
5372
308
    // block.
5373
308
    bool VariableIsFunctionInputArg = Variable->isParameter() &&
5374
308
        
!DL->getInlinedAt()295
;
5375
308
    bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
5376
308
    if (!IsInPrologue && 
!VariableIsFunctionInputArg70
)
5377
9
      return false;
5378
299
5379
299
    // Here we assume that a function argument on IR level only can be used to
5380
299
    // describe one input parameter on source level. If we for example have
5381
299
    // source code like this
5382
299
    //
5383
299
    //    struct A { long x, y; };
5384
299
    //    void foo(struct A a, long b) {
5385
299
    //      ...
5386
299
    //      b = a.x;
5387
299
    //      ...
5388
299
    //    }
5389
299
    //
5390
299
    // and IR like this
5391
299
    //
5392
299
    //  define void @foo(i32 %a1, i32 %a2, i32 %b)  {
5393
299
    //  entry:
5394
299
    //    call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
5395
299
    //    call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
5396
299
    //    call void @llvm.dbg.value(metadata i32 %b, "b",
5397
299
    //    ...
5398
299
    //    call void @llvm.dbg.value(metadata i32 %a1, "b"
5399
299
    //    ...
5400
299
    //
5401
299
    // then the last dbg.value is describing a parameter "b" using a value that
5402
299
    // is an argument. But since we already has used %a1 to describe a parameter
5403
299
    // we should not handle that last dbg.value here (that would result in an
5404
299
    // incorrect hoisting of the DBG_VALUE to the function entry).
5405
299
    // Notice that we allow one dbg.value per IR level argument, to accomodate
5406
299
    // for the situation with fragments above.
5407
299
    if (VariableIsFunctionInputArg) {
5408
289
      unsigned ArgNo = Arg->getArgNo();
5409
289
      if (ArgNo >= FuncInfo.DescribedArgs.size())
5410
275
        FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
5411
14
      else if (!IsInPrologue && 
FuncInfo.DescribedArgs.test(ArgNo)10
)
5412
4
        return false;
5413
285
      FuncInfo.DescribedArgs.set(ArgNo);
5414
285
    }
5415
299
  }
5416
322
5417
322
  MachineFunction &MF = DAG.getMachineFunction();
5418
308
  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
5419
308
5420
308
  bool IsIndirect = false;
5421
308
  Optional<MachineOperand> Op;
5422
308
  // Some arguments' frame index is recorded during argument lowering.
5423
308
  int FI = FuncInfo.getArgumentFrameIndex(Arg);
5424
308
  if (FI != std::numeric_limits<int>::max())
5425
4
    Op = MachineOperand::CreateFI(FI);
5426
308
5427
308
  SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
5428
308
  if (!Op && 
N.getNode()304
) {
5429
304
    getUnderlyingArgRegs(ArgRegsAndSizes, N);
5430
304
    Register Reg;
5431
304
    if (ArgRegsAndSizes.size() == 1)
5432
249
      Reg = ArgRegsAndSizes.front().first;
5433
304
5434
304
    if (Reg && 
Reg.isVirtual()249
) {
5435
249
      MachineRegisterInfo &RegInfo = MF.getRegInfo();
5436
249
      Register PR = RegInfo.getLiveInPhysReg(Reg);
5437
249
      if (PR)
5438
248
        Reg = PR;
5439
249
    }
5440
304
    if (Reg) {
5441
249
      Op = MachineOperand::CreateReg(Reg, false);
5442
249
      IsIndirect = IsDbgDeclare;
5443
249
    }
5444
304
  }
5445
308
5446
308
  if (!Op && 
N.getNode()55
) {
5447
55
    // Check if frame index is available.
5448
55
    SDValue LCandidate = peekThroughBitcasts(N);
5449
55
    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
5450
22
      if (FrameIndexSDNode *FINode =
5451
22
          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
5452
22
        Op = MachineOperand::CreateFI(FINode->getIndex());
5453
55
  }
5454
308
5455
308
  if (!Op) {
5456
33
    // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
5457
33
    auto splitMultiRegDbgValue
5458
33
      = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
5459
18
      unsigned Offset = 0;
5460
47
      for (auto RegAndSize : SplitRegs) {
5461
47
        auto FragmentExpr = DIExpression::createFragmentExpression(
5462
47
          Expr, Offset, RegAndSize.second);
5463
47
        if (!FragmentExpr)
5464
0
          continue;
5465
47
        FuncInfo.ArgDbgValues.push_back(
5466
47
          BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
5467
47
                  RegAndSize.first, Variable, *FragmentExpr));
5468
47
        Offset += RegAndSize.second;
5469
47
      }
5470
18
    };
5471
33
5472
33
    // Check if ValueMap has reg number.
5473
33
    DenseMap<const Value *, unsigned>::const_iterator
5474
33
      VMI = FuncInfo.ValueMap.find(V);
5475
33
    if (VMI != FuncInfo.ValueMap.end()) {
5476
7
      const auto &TLI = DAG.getTargetLoweringInfo();
5477
7
      RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
5478
7
                       V->getType(), getABIRegCopyCC(V));
5479
7
      if (RFV.occupiesMultipleRegs()) {
5480
3
        splitMultiRegDbgValue(RFV.getRegsAndSizes());
5481
3
        return true;
5482
3
      }
5483
4
5484
4
      Op = MachineOperand::CreateReg(VMI->second, false);
5485
4
      IsIndirect = IsDbgDeclare;
5486
26
    } else if (ArgRegsAndSizes.size() > 1) {
5487
15
      // This was split due to the calling convention, and no virtual register
5488
15
      // mapping exists for the value.
5489
15
      splitMultiRegDbgValue(ArgRegsAndSizes);
5490
15
      return true;
5491
15
    }
5492
290
  }
5493
290
5494
290
  if (!Op)
5495
11
    return false;
5496
279
5497
279
  assert(Variable->isValidLocationForIntrinsic(DL) &&
5498
279
         "Expected inlined-at fields to agree");
5499
279
  IsIndirect = (Op->isReg()) ? 
IsIndirect253
:
true26
;
5500
279
  FuncInfo.ArgDbgValues.push_back(
5501
279
      BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
5502
279
              *Op, Variable, Expr));
5503
279
5504
279
  return true;
5505
279
}
5506
5507
/// Return the appropriate SDDbgValue based on N.
5508
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
5509
                                             DILocalVariable *Variable,
5510
                                             DIExpression *Expr,
5511
                                             const DebugLoc &dl,
5512
360
                                             unsigned DbgSDNodeOrder) {
5513
360
  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
5514
0
    // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
5515
0
    // stack slot locations.
5516
0
    //
5517
0
    // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
5518
0
    // debug values here after optimization:
5519
0
    //
5520
0
    //   dbg.value(i32* %px, !"int *px", !DIExpression()), and
5521
0
    //   dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
5522
0
    //
5523
0
    // Both describe the direct values of their associated variables.
5524
0
    return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
5525
0
                                     /*IsIndirect*/ false, dl, DbgSDNodeOrder);
5526
0
  }
5527
360
  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
5528
360
                         /*IsIndirect*/ false, dl, DbgSDNodeOrder);
5529
360
}
5530
5531
// VisualStudio defines setjmp as _setjmp
5532
#if defined(_MSC_VER) && defined(setjmp) && \
5533
                         !defined(setjmp_undefined_for_msvc)
5534
#  pragma push_macro("setjmp")
5535
#  undef setjmp
5536
#  define setjmp_undefined_for_msvc
5537
#endif
5538
5539
43
static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
5540
43
  switch (Intrinsic) {
5541
43
  case Intrinsic::smul_fix:
5542
21
    return ISD::SMULFIX;
5543
43
  case Intrinsic::umul_fix:
5544
22
    return ISD::UMULFIX;
5545
43
  default:
5546
0
    llvm_unreachable("Unhandled fixed point intrinsic");
5547
43
  }
5548
43
}
5549
5550
void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
5551
2
                                           const char *FunctionName) {
5552
2
  assert(FunctionName && "FunctionName must not be nullptr");
5553
2
  SDValue Callee = DAG.getExternalSymbol(
5554
2
      FunctionName,
5555
2
      DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
5556
2
  LowerCallTo(&I, Callee, I.isTailCall());
5557
2
}
5558
5559
/// Lower the call to the specified intrinsic function.
5560
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
5561
286k
                                             unsigned Intrinsic) {
5562
286k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5563
286k
  SDLoc sdl = getCurSDLoc();
5564
286k
  DebugLoc dl = getCurDebugLoc();
5565
286k
  SDValue Res;
5566
286k
5567
286k
  switch (Intrinsic) {
5568
286k
  default:
5569
137k
    // By default, turn this into a target intrinsic node.
5570
137k
    visitTargetIntrinsic(I, Intrinsic);
5571
137k
    return;
5572
286k
  
case Intrinsic::vastart: visitVAStart(I); return532
;
5573
286k
  
case Intrinsic::vaend: visitVAEnd(I); return700
;
5574
286k
  
case Intrinsic::vacopy: visitVACopy(I); return263
;
5575
286k
  case Intrinsic::returnaddress:
5576
6.27k
    setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
5577
6.27k
                             TLI.getPointerTy(DAG.getDataLayout()),
5578
6.27k
                             getValue(I.getArgOperand(0))));
5579
6.27k
    return;
5580
286k
  case Intrinsic::addressofreturnaddress:
5581
8
    setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
5582
8
                             TLI.getPointerTy(DAG.getDataLayout())));
5583
8
    return;
5584
286k
  case Intrinsic::sponentry:
5585
10
    setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
5586
10
                             TLI.getFrameIndexTy(DAG.getDataLayout())));
5587
10
    return;
5588
286k
  case Intrinsic::frameaddress:
5589
10.6k
    setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
5590
10.6k
                             TLI.getFrameIndexTy(DAG.getDataLayout()),
5591
10.6k
                             getValue(I.getArgOperand(0))));
5592
10.6k
    return;
5593
286k
  case Intrinsic::read_register: {
5594
201
    Value *Reg = I.getArgOperand(0);
5595
201
    SDValue Chain = getRoot();
5596
201
    SDValue RegName =
5597
201
        DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
5598
201
    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5599
201
    Res = DAG.getNode(ISD::READ_REGISTER, sdl,
5600
201
      DAG.getVTList(VT, MVT::Other), Chain, RegName);
5601
201
    setValue(&I, Res);
5602
201
    DAG.setRoot(Res.getValue(1));
5603
201
    return;
5604
286k
  }
5605
286k
  case Intrinsic::write_register: {
5606
189
    Value *Reg = I.getArgOperand(0);
5607
189
    Value *RegValue = I.getArgOperand(1);
5608
189
    SDValue Chain = getRoot();
5609
189
    SDValue RegName =
5610
189
        DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
5611
189
    DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
5612
189
                            RegName, getValue(RegValue)));
5613
189
    return;
5614
286k
  }
5615
286k
  case Intrinsic::setjmp:
5616
0
    lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
5617
0
    return;
5618
286k
  case Intrinsic::longjmp:
5619
0
    lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
5620
0
    return;
5621
286k
  case Intrinsic::memcpy: {
5622
6.15k
    const auto &MCI = cast<MemCpyInst>(I);
5623
6.15k
    SDValue Op1 = getValue(I.getArgOperand(0));
5624
6.15k
    SDValue Op2 = getValue(I.getArgOperand(1));
5625
6.15k
    SDValue Op3 = getValue(I.getArgOperand(2));
5626
6.15k
    // @llvm.memcpy defines 0 and 1 to both mean no alignment.
5627
6.15k
    unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
5628
6.15k
    unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
5629
6.15k
    unsigned Align = MinAlign(DstAlign, SrcAlign);
5630
6.15k
    bool isVol = MCI.isVolatile();
5631
6.15k
    bool isTC = I.isTailCall() && 
isInTailCallPosition(&I, DAG.getTarget())1.13k
;
5632
6.15k
    // FIXME: Support passing different dest/src alignments to the memcpy DAG
5633
6.15k
    // node.
5634
6.15k
    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5635
6.15k
                               false, isTC,
5636
6.15k
                               MachinePointerInfo(I.getArgOperand(0)),
5637
6.15k
                               MachinePointerInfo(I.getArgOperand(1)));
5638
6.15k
    updateDAGForMaybeTailCall(MC);
5639
6.15k
    return;
5640
286k
  }
5641
286k
  case Intrinsic::memset: {
5642
7.94k
    const auto &MSI = cast<MemSetInst>(I);
5643
7.94k
    SDValue Op1 = getValue(I.getArgOperand(0));
5644
7.94k
    SDValue Op2 = getValue(I.getArgOperand(1));
5645
7.94k
    SDValue Op3 = getValue(I.getArgOperand(2));
5646
7.94k
    // @llvm.memset defines 0 and 1 to both mean no alignment.
5647
7.94k
    unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
5648
7.94k
    bool isVol = MSI.isVolatile();
5649
7.94k
    bool isTC = I.isTailCall() && 
isInTailCallPosition(&I, DAG.getTarget())690
;
5650
7.94k
    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5651
7.94k
                               isTC, MachinePointerInfo(I.getArgOperand(0)));
5652
7.94k
    updateDAGForMaybeTailCall(MS);
5653
7.94k
    return;
5654
286k
  }
5655
286k
  case Intrinsic::memmove: {
5656
185
    const auto &MMI = cast<MemMoveInst>(I);
5657
185
    SDValue Op1 = getValue(I.getArgOperand(0));
5658
185
    SDValue Op2 = getValue(I.getArgOperand(1));
5659
185
    SDValue Op3 = getValue(I.getArgOperand(2));
5660
185
    // @llvm.memmove defines 0 and 1 to both mean no alignment.
5661
185
    unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
5662
185
    unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
5663
185
    unsigned Align = MinAlign(DstAlign, SrcAlign);
5664
185
    bool isVol = MMI.isVolatile();
5665
185
    bool isTC = I.isTailCall() && 
isInTailCallPosition(&I, DAG.getTarget())13
;
5666
185
    // FIXME: Support passing different dest/src alignments to the memmove DAG
5667
185
    // node.
5668
185
    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5669
185
                                isTC, MachinePointerInfo(I.getArgOperand(0)),
5670
185
                                MachinePointerInfo(I.getArgOperand(1)));
5671
185
    updateDAGForMaybeTailCall(MM);
5672
185
    return;
5673
286k
  }
5674
286k
  case Intrinsic::memcpy_element_unordered_atomic: {
5675
15
    const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
5676
15
    SDValue Dst = getValue(MI.getRawDest());
5677
15
    SDValue Src = getValue(MI.getRawSource());
5678
15
    SDValue Length = getValue(MI.getLength());
5679
15
5680
15
    unsigned DstAlign = MI.getDestAlignment();
5681
15
    unsigned SrcAlign = MI.getSourceAlignment();
5682
15
    Type *LengthTy = MI.getLength()->getType();
5683
15
    unsigned ElemSz = MI.getElementSizeInBytes();
5684
15
    bool isTC = I.isTailCall() && 
isInTailCallPosition(&I, DAG.getTarget())0
;
5685
15
    SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
5686
15
                                     SrcAlign, Length, LengthTy, ElemSz, isTC,
5687
15
                                     MachinePointerInfo(MI.getRawDest()),
5688
15
                                     MachinePointerInfo(MI.getRawSource()));
5689
15
    updateDAGForMaybeTailCall(MC);
5690
15
    return;
5691
286k
  }
5692
286k
  case Intrinsic::memmove_element_unordered_atomic: {
5693
13
    auto &MI = cast<AtomicMemMoveInst>(I);
5694
13
    SDValue Dst = getValue(MI.getRawDest());
5695
13
    SDValue Src = getValue(MI.getRawSource());
5696
13
    SDValue Length = getValue(MI.getLength());
5697
13
5698
13
    unsigned DstAlign = MI.getDestAlignment();
5699
13
    unsigned SrcAlign = MI.getSourceAlignment();
5700
13
    Type *LengthTy = MI.getLength()->getType();
5701
13
    unsigned ElemSz = MI.getElementSizeInBytes();
5702
13
    bool isTC = I.isTailCall() && 
isInTailCallPosition(&I, DAG.getTarget())0
;
5703
13
    SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
5704
13
                                      SrcAlign, Length, LengthTy, ElemSz, isTC,
5705
13
                                      MachinePointerInfo(MI.getRawDest()),
5706
13
                                      MachinePointerInfo(MI.getRawSource()));
5707
13
    updateDAGForMaybeTailCall(MC);
5708
13
    return;
5709
286k
  }
5710
286k
  case Intrinsic::memset_element_unordered_atomic: {
5711
23
    auto &MI = cast<AtomicMemSetInst>(I);
5712
23
    SDValue Dst = getValue(MI.getRawDest());
5713
23
    SDValue Val = getValue(MI.getValue());
5714
23
    SDValue Length = getValue(MI.getLength());
5715
23
5716
23
    unsigned DstAlign = MI.getDestAlignment();
5717
23
    Type *LengthTy = MI.getLength()->getType();
5718
23
    unsigned ElemSz = MI.getElementSizeInBytes();
5719
23
    bool isTC = I.isTailCall() && 
isInTailCallPosition(&I, DAG.getTarget())0
;
5720
23
    SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
5721
23
                                     LengthTy, ElemSz, isTC,
5722
23
                                     MachinePointerInfo(MI.getRawDest()));
5723
23
    updateDAGForMaybeTailCall(MC);
5724
23
    return;
5725
286k
  }
5726
286k
  case Intrinsic::dbg_addr:
5727
536
  case Intrinsic::dbg_declare: {
5728
536
    const auto &DI = cast<DbgVariableIntrinsic>(I);
5729
536
    DILocalVariable *Variable = DI.getVariable();
5730
536
    DIExpression *Expression = DI.getExpression();
5731
536
    dropDanglingDebugInfo(Variable, Expression);
5732
536
    assert(Variable && "Missing variable");
5733
536
5734
536
    // Check if address has undef value.
5735
536
    const Value *Address = DI.getVariableLocation();
5736
536
    if (!Address || 
isa<UndefValue>(Address)529
||
5737
536
        
(506
Address->use_empty()506
&&
!isa<Argument>(Address)74
)) {
5738
96
      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
5739
96
      return;
5740
96
    }
5741
440
5742
440
    bool isParameter = Variable->isParameter() || 
isa<Argument>(Address)199
;
5743
440
5744
440
    // Check if this variable can be described by a frame index, typically
5745
440
    // either as a static alloca or a byval parameter.
5746
440
    int FI = std::numeric_limits<int>::max();
5747
440
    if (const auto *AI =
5748
409
            dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
5749
409
      if (AI->isStaticAlloca()) {
5750
403
        auto I = FuncInfo.StaticAllocaMap.find(AI);
5751
403
        if (I != FuncInfo.StaticAllocaMap.end())
5752
403
          FI = I->second;
5753
403
      }
5754
409
    } else 
if (const auto *31
Arg31
= dyn_cast<Argument>(
5755
28
                   Address->stripInBoundsConstantOffsets())) {
5756
28
      FI = FuncInfo.getArgumentFrameIndex(Arg);
5757
28
    }
5758
440
5759
440
    // llvm.dbg.addr is control dependent and always generates indirect
5760
440
    // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
5761
440
    // the MachineFunction variable table.
5762
440
    if (FI != std::numeric_limits<int>::max()) {
5763
418
      if (Intrinsic == Intrinsic::dbg_addr) {
5764
3
        SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
5765
3
            Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
5766
3
        DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
5767
3
      }
5768
418
      return;
5769
418
    }
5770
22
5771
22
    SDValue &N = NodeMap[Address];
5772
22
    if (!N.getNode() && 
isa<Argument>(Address)0
)
5773
0
      // Check unused arguments map.
5774
0
      N = UnusedArgNodeMap[Address];
5775
22
    SDDbgValue *SDV;
5776
22
    if (N.getNode()) {
5777
22
      if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
5778
2
        Address = BCI->getOperand(0);
5779
22
      // Parameters are handled specially.
5780
22
      auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
5781
22
      if (isParameter && 
FINode12
) {
5782
0
        // Byval parameter. We have a frame index at this point.
5783
0
        SDV =
5784
0
            DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
5785
0
                                      /*IsIndirect*/ true, dl, SDNodeOrder);
5786
22
      } else if (isa<Argument>(Address)) {
5787
13
        // Address is an argument, so try to emit its dbg value using
5788
13
        // virtual register info from the FuncInfo.ValueMap.
5789
13
        EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
5790
13
        return;
5791
13
      } else {
5792
9
        SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
5793
9
                              true, dl, SDNodeOrder);
5794
9
      }
5795
22
      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
5796
9
    } else {
5797
0
      // If Address is an argument then try to emit its dbg value using
5798
0
      // virtual register info from the FuncInfo.ValueMap.
5799
0
      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
5800
0
                                    N)) {
5801
0
        LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
5802
0
      }
5803
0
    }
5804
22
    
return9
;
5805
22
  }
5806
22
  case Intrinsic::dbg_label: {
5807
4
    const DbgLabelInst &DI = cast<DbgLabelInst>(I);
5808
4
    DILabel *Label = DI.getLabel();
5809
4
    assert(Label && "Missing label");
5810
4
5811
4
    SDDbgLabel *SDV;
5812
4
    SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
5813
4
    DAG.AddDbgLabel(SDV);
5814
4
    return;
5815
22
  }
5816
4.93k
  case Intrinsic::dbg_value: {
5817
4.93k
    const DbgValueInst &DI = cast<DbgValueInst>(I);
5818
4.93k
    assert(DI.getVariable() && "Missing variable");
5819
4.93k
5820
4.93k
    DILocalVariable *Variable = DI.getVariable();
5821
4.93k
    DIExpression *Expression = DI.getExpression();
5822
4.93k
    dropDanglingDebugInfo(Variable, Expression);
5823
4.93k
    const Value *V = DI.getValue();
5824
4.93k
    if (!V)
5825
2
      return;
5826
4.93k
5827
4.93k
    if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(),
5828
4.93k
        SDNodeOrder))
5829
4.90k
      return;
5830
32
5831
32
    // TODO: Dangling debug info will eventually either be resolved or produce
5832
32
    // an Undef DBG_VALUE. However in the resolution case, a gap may appear
5833
32
    // between the original dbg.value location and its resolved DBG_VALUE, which
5834
32
    // we should ideally fill with an extra Undef DBG_VALUE.
5835
32
5836
32
    DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
5837
32
    return;
5838
32
  }
5839
32
5840
81
  case Intrinsic::eh_typeid_for: {
5841
81
    // Find the type id for the given typeinfo.
5842
81
    GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
5843
81
    unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
5844
81
    Res = DAG.getConstant(TypeID, sdl, MVT::i32);
5845
81
    setValue(&I, Res);
5846
81
    return;
5847
32
  }
5848
32
5849
32
  case Intrinsic::eh_return_i32:
5850
29
  case Intrinsic::eh_return_i64:
5851
29
    DAG.getMachineFunction().setCallsEHReturn(true);
5852
29
    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
5853
29
                            MVT::Other,
5854
29
                            getControlRoot(),
5855
29
                            getValue(I.getArgOperand(0)),
5856
29
                            getValue(I.getArgOperand(1))));
5857
29
    return;
5858
29
  case Intrinsic::eh_unwind_init:
5859
15
    DAG.getMachineFunction().setCallsUnwindInit(true);
5860
15
    return;
5861
29
  case Intrinsic::eh_dwarf_cfa:
5862
19
    setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
5863
19
                             TLI.getPointerTy(DAG.getDataLayout()),
5864
19
                             getValue(I.getArgOperand(0))));
5865
19
    return;
5866
175
  case Intrinsic::eh_sjlj_callsite: {
5867
175
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
5868
175
    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
5869
175
    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
5870
175
    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
5871
175
5872
175
    MMI.setCurrentCallSite(CI->getZExtValue());
5873
175
    return;
5874
29
  }
5875
36
  case Intrinsic::eh_sjlj_functioncontext: {
5876
36
    // Get and store the index of the function context.
5877
36
    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5878
36
    AllocaInst *FnCtx =
5879
36
      cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
5880
36
    int FI = FuncInfo.StaticAllocaMap[FnCtx];
5881
36
    MFI.setFunctionContextIndex(FI);
5882
36
    return;
5883
29
  }
5884
31
  case Intrinsic::eh_sjlj_setjmp: {
5885
31
    SDValue Ops[2];
5886
31
    Ops[0] = getRoot();
5887
31
    Ops[1] = getValue(I.getArgOperand(0));
5888
31
    SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
5889
31
                             DAG.getVTList(MVT::i32, MVT::Other), Ops);
5890
31
    setValue(&I, Op.getValue(0));
5891
31
    DAG.setRoot(Op.getValue(1));
5892
31
    return;
5893
29
  }
5894
29
  case Intrinsic::eh_sjlj_longjmp:
5895
20
    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
5896
20
                            getRoot(), getValue(I.getArgOperand(0))));
5897
20
    return;
5898
36
  case Intrinsic::eh_sjlj_setup_dispatch:
5899
36
    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
5900
36
                            getRoot()));
5901
36
    return;
5902
324
  case Intrinsic::masked_gather:
5903
324
    visitMaskedGather(I);
5904
324
    return;
5905
381
  case Intrinsic::masked_load:
5906
381
    visitMaskedLoad(I);
5907
381
    return;
5908
103
  case Intrinsic::masked_scatter:
5909
103
    visitMaskedScatter(I);
5910
103
    return;
5911
394
  case Intrinsic::masked_store:
5912
394
    visitMaskedStore(I);
5913
394
    return;
5914
251
  case Intrinsic::masked_expandload:
5915
251
    visitMaskedLoad(I, true /* IsExpanding */);
5916
251
    return;
5917
165
  case Intrinsic::masked_compressstore:
5918
165
    visitMaskedStore(I, true /* IsCompressing */);
5919
165
    return;
5920
66
  case Intrinsic::x86_mmx_pslli_w:
5921
66
  case Intrinsic::x86_mmx_pslli_d:
5922
66
  case Intrinsic::x86_mmx_pslli_q:
5923
66
  case Intrinsic::x86_mmx_psrli_w:
5924
66
  case Intrinsic::x86_mmx_psrli_d:
5925
66
  case Intrinsic::x86_mmx_psrli_q:
5926
66
  case Intrinsic::x86_mmx_psrai_w:
5927
66
  case Intrinsic::x86_mmx_psrai_d: {
5928
66
    SDValue ShAmt = getValue(I.getArgOperand(1));
5929
66
    if (isa<ConstantSDNode>(ShAmt)) {
5930
41
      visitTargetIntrinsic(I, Intrinsic);
5931
41
      return;
5932
41
    }
5933
25
    unsigned NewIntrinsic = 0;
5934
25
    EVT ShAmtVT = MVT::v2i32;
5935
25
    switch (Intrinsic) {
5936
25
    case Intrinsic::x86_mmx_pslli_w:
5937
2
      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
5938
2
      break;
5939
25
    case Intrinsic::x86_mmx_pslli_d:
5940
2
      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
5941
2
      break;
5942
25
    case Intrinsic::x86_mmx_pslli_q:
5943
9
      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
5944
9
      break;
5945
25
    case Intrinsic::x86_mmx_psrli_w:
5946
2
      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
5947
2
      break;
5948
25
    case Intrinsic::x86_mmx_psrli_d:
5949
2
      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
5950
2
      break;
5951
25
    case Intrinsic::x86_mmx_psrli_q:
5952
4
      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
5953
4
      break;
5954
25
    case Intrinsic::x86_mmx_psrai_w:
5955
2
      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
5956
2
      break;
5957
25
    case Intrinsic::x86_mmx_psrai_d:
5958
2
      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
5959
2
      break;
5960
25
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
5961
25
    }
5962
25
5963
25
    // The vector shift intrinsics with scalars uses 32b shift amounts but
5964
25
    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
5965
25
    // to be zero.
5966
25
    // We must do this early because v2i32 is not a legal type.
5967
25
    SDValue ShOps[2];
5968
25
    ShOps[0] = ShAmt;
5969
25
    ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
5970
25
    ShAmt =  DAG.getBuildVector(ShAmtVT, sdl, ShOps);
5971
25
    EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5972
25
    ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
5973
25
    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
5974
25
                       DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
5975
25
                       getValue(I.getArgOperand(0)), ShAmt);
5976
25
    setValue(&I, Res);
5977
25
    return;
5978
25
  }
5979
137
  case Intrinsic::powi:
5980
137
    setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
5981
137
                            getValue(I.getArgOperand(1)), DAG));
5982
137
    return;
5983
107
  case Intrinsic::log:
5984
107
    setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5985
107
    return;
5986
104
  case Intrinsic::log2:
5987
104
    setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5988
104
    return;
5989
134
  case Intrinsic::log10:
5990
134
    setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5991
134
    return;
5992
165
  case Intrinsic::exp:
5993
165
    setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5994
165
    return;
5995
126
  case Intrinsic::exp2:
5996
126
    setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5997
126
    return;
5998
171
  case Intrinsic::pow:
5999
171
    setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
6000
171
                           getValue(I.getArgOperand(1)), DAG, TLI));
6001
171
    return;
6002
5.74k
  case Intrinsic::sqrt:
6003
5.74k
  case Intrinsic::fabs:
6004
5.74k
  case Intrinsic::sin:
6005
5.74k
  case Intrinsic::cos:
6006
5.74k
  case Intrinsic::floor:
6007
5.74k
  case Intrinsic::ceil:
6008
5.74k
  case Intrinsic::trunc:
6009
5.74k
  case Intrinsic::rint:
6010
5.74k
  case Intrinsic::nearbyint:
6011
5.74k
  case Intrinsic::round:
6012
5.74k
  case Intrinsic::canonicalize: {
6013
5.74k
    unsigned Opcode;
6014
5.74k
    switch (Intrinsic) {
6015
5.74k
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
6016
5.74k
    
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break878
;
6017
5.74k
    
case Intrinsic::fabs: Opcode = ISD::FABS; break2.27k
;
6018
5.74k
    
case Intrinsic::sin: Opcode = ISD::FSIN; break192
;
6019
5.74k
    
case Intrinsic::cos: Opcode = ISD::FCOS; break142
;
6020
5.74k
    
case Intrinsic::floor: Opcode = ISD::FFLOOR; break498
;
6021
5.74k
    
case Intrinsic::ceil: Opcode = ISD::FCEIL; break370
;
6022
5.74k
    
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break315
;
6023
5.74k
    
case Intrinsic::rint: Opcode = ISD::FRINT; break220
;
6024
5.74k
    
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break210
;
6025
5.74k
    
case Intrinsic::round: Opcode = ISD::FROUND; break114
;
6026
5.74k
    
case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break533
;
6027
5.74k
    }
6028
5.74k
6029
5.74k
    setValue(&I, DAG.getNode(Opcode, sdl,
6030
5.74k
                             getValue(I.getArgOperand(0)).getValueType(),
6031
5.74k
                             getValue(I.getArgOperand(0))));
6032
5.74k
    return;
6033
5.74k
  }
6034
5.74k
  case Intrinsic::lround:
6035
157
  case Intrinsic::llround:
6036
157
  case Intrinsic::lrint:
6037
157
  case Intrinsic::llrint: {
6038
157
    unsigned Opcode;
6039
157
    switch (Intrinsic) {
6040
157
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
6041
157
    
case Intrinsic::lround: Opcode = ISD::LROUND; break43
;
6042
157
    
case Intrinsic::llround: Opcode = ISD::LLROUND; break37
;
6043
157
    
case Intrinsic::lrint: Opcode = ISD::LRINT; break42
;
6044
157
    
case Intrinsic::llrint: Opcode = ISD::LLRINT; break35
;
6045
157
    }
6046
157
6047
157
    EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6048
157
    setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
6049
157
                             getValue(I.getArgOperand(0))));
6050
157
    return;
6051
157
  }
6052
896
  case Intrinsic::minnum:
6053
896
    setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
6054
896
                             getValue(I.getArgOperand(0)).getValueType(),
6055
896
                             getValue(I.getArgOperand(0)),
6056
896
                             getValue(I.getArgOperand(1))));
6057
896
    return;
6058
918
  case Intrinsic::maxnum:
6059
918
    setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
6060
918
                             getValue(I.getArgOperand(0)).getValueType(),
6061
918
                             getValue(I.getArgOperand(0)),
6062
918
                             getValue(I.getArgOperand(1))));
6063
918
    return;
6064
157
  case Intrinsic::minimum:
6065
34
    setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
6066
34
                             getValue(I.getArgOperand(0)).getValueType(),
6067
34
                             getValue(I.getArgOperand(0)),
6068
34
                             getValue(I.getArgOperand(1))));
6069
34
    return;
6070
157
  case Intrinsic::maximum:
6071
34
    setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
6072
34
                             getValue(I.getArgOperand(0)).getValueType(),
6073
34
                             getValue(I.getArgOperand(0)),
6074
34
                             getValue(I.getArgOperand(1))));
6075
34
    return;
6076
1.49k
  case Intrinsic::copysign:
6077
1.49k
    setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
6078
1.49k
                             getValue(I.getArgOperand(0)).getValueType(),
6079
1.49k
                             getValue(I.getArgOperand(0)),
6080
1.49k
                             getValue(I.getArgOperand(1))));
6081
1.49k
    return;
6082
3.24k
  case Intrinsic::fma:
6083
3.24k
    setValue(&I, DAG.getNode(ISD::FMA, sdl,
6084
3.24k
                             getValue(I.getArgOperand(0)).getValueType(),
6085
3.24k
                             getValue(I.getArgOperand(0)),
6086
3.24k
                             getValue(I.getArgOperand(1)),
6087
3.24k
                             getValue(I.getArgOperand(2))));
6088
3.24k
    return;
6089
1.38k
  case Intrinsic::experimental_constrained_fadd:
6090
1.38k
  case Intrinsic::experimental_constrained_fsub:
6091
1.38k
  case Intrinsic::experimental_constrained_fmul:
6092
1.38k
  case Intrinsic::experimental_constrained_fdiv:
6093
1.38k
  case Intrinsic::experimental_constrained_frem:
6094
1.38k
  case Intrinsic::experimental_constrained_fma:
6095
1.38k
  case Intrinsic::experimental_constrained_fptrunc:
6096
1.38k
  case Intrinsic::experimental_constrained_fpext:
6097
1.38k
  case Intrinsic::experimental_constrained_sqrt:
6098
1.38k
  case Intrinsic::experimental_constrained_pow:
6099
1.38k
  case Intrinsic::experimental_constrained_powi:
6100
1.38k
  case Intrinsic::experimental_constrained_sin:
6101
1.38k
  case Intrinsic::experimental_constrained_cos:
6102
1.38k
  case Intrinsic::experimental_constrained_exp:
6103
1.38k
  case Intrinsic::experimental_constrained_exp2:
6104
1.38k
  case Intrinsic::experimental_constrained_log:
6105
1.38k
  case Intrinsic::experimental_constrained_log10:
6106
1.38k
  case Intrinsic::experimental_constrained_log2:
6107
1.38k
  case Intrinsic::experimental_constrained_rint:
6108
1.38k
  case Intrinsic::experimental_constrained_nearbyint:
6109
1.38k
  case Intrinsic::experimental_constrained_maxnum:
6110
1.38k
  case Intrinsic::experimental_constrained_minnum:
6111
1.38k
  case Intrinsic::experimental_constrained_ceil:
6112
1.38k
  case Intrinsic::experimental_constrained_floor:
6113
1.38k
  case Intrinsic::experimental_constrained_round:
6114
1.38k
  case Intrinsic::experimental_constrained_trunc:
6115
1.38k
    visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
6116
1.38k
    return;
6117
1.38k
  case Intrinsic::fmuladd: {
6118
1.08k
    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6119
1.08k
    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
6120
1.08k
        TLI.isFMAFasterThanFMulAndFAdd(VT)) {
6121
174
      setValue(&I, DAG.getNode(ISD::FMA, sdl,
6122
174
                               getValue(I.getArgOperand(0)).getValueType(),
6123
174
                               getValue(I.getArgOperand(0)),
6124
174
                               getValue(I.getArgOperand(1)),
6125
174
                               getValue(I.getArgOperand(2))));
6126
907
    } else {
6127
907
      // TODO: Intrinsic calls should have fast-math-flags.
6128
907
      SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
6129
907
                                getValue(I.getArgOperand(0)).getValueType(),
6130
907
                                getValue(I.getArgOperand(0)),
6131
907
                                getValue(I.getArgOperand(1)));
6132
907
      SDValue Add = DAG.getNode(ISD::FADD, sdl,
6133
907
                                getValue(I.getArgOperand(0)).getValueType(),
6134
907
                                Mul,
6135
907
                                getValue(I.getArgOperand(2)));
6136
907
      setValue(&I, Add);
6137
907
    }
6138
1.08k
    return;
6139
1.38k
  }
6140
1.38k
  case Intrinsic::convert_to_fp16:
6141
231
    setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
6142
231
                             DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
6143
231
                                         getValue(I.getArgOperand(0)),
6144
231
                                         DAG.getTargetConstant(0, sdl,
6145
231
                                                               MVT::i32))));
6146
231
    return;
6147
1.38k
  case Intrinsic::convert_from_fp16:
6148
277
    setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
6149
277
                             TLI.getValueType(DAG.getDataLayout(), I.getType()),
6150
277
                             DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
6151
277
                                         getValue(I.getArgOperand(0)))));
6152
277
    return;
6153
1.38k
  case Intrinsic::pcmarker: {
6154
0
    SDValue Tmp = getValue(I.getArgOperand(0));
6155
0
    DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
6156
0
    return;
6157
1.38k
  }
6158
1.38k
  case Intrinsic::readcyclecounter: {
6159
28
    SDValue Op = getRoot();
6160
28
    Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
6161
28
                      DAG.getVTList(MVT::i64, MVT::Other), Op);
6162
28
    setValue(&I, Res);
6163
28
    DAG.setRoot(Res.getValue(1));
6164
28
    return;
6165
1.38k
  }
6166
1.38k
  case Intrinsic::bitreverse:
6167
296
    setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
6168
296
                             getValue(I.getArgOperand(0)).getValueType(),
6169
296
                             getValue(I.getArgOperand(0))));
6170
296
    return;
6171
1.38k
  case Intrinsic::bswap:
6172
1.00k
    setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
6173
1.00k
                             getValue(I.getArgOperand(0)).getValueType(),
6174
1.00k
                             getValue(I.getArgOperand(0))));
6175
1.00k
    return;
6176
1.38k
  case Intrinsic::cttz: {
6177
1.37k
    SDValue Arg = getValue(I.getArgOperand(0));
6178
1.37k
    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
6179
1.37k
    EVT Ty = Arg.getValueType();
6180
1.37k
    setValue(&I, DAG.getNode(CI->isZero() ? 
ISD::CTTZ564
:
ISD::CTTZ_ZERO_UNDEF814
,
6181
1.37k
                             sdl, Ty, Arg));
6182
1.37k
    return;
6183
1.38k
  }
6184
3.68k
  case Intrinsic::ctlz: {
6185
3.68k
    SDValue Arg = getValue(I.getArgOperand(0));
6186
3.68k
    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
6187
3.68k
    EVT Ty = Arg.getValueType();
6188
3.68k
    setValue(&I, DAG.getNode(CI->isZero() ? 
ISD::CTLZ1.15k
:
ISD::CTLZ_ZERO_UNDEF2.52k
,
6189
3.68k
                             sdl, Ty, Arg));
6190
3.68k
    return;
6191
1.38k
  }
6192
2.82k
  case Intrinsic::ctpop: {
6193
2.82k
    SDValue Arg = getValue(I.getArgOperand(0));
6194
2.82k
    EVT Ty = Arg.getValueType();
6195
2.82k
    setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
6196
2.82k
    return;
6197
1.38k
  }
6198
3.23k
  case Intrinsic::fshl:
6199
3.23k
  case Intrinsic::fshr: {
6200
3.23k
    bool IsFSHL = Intrinsic == Intrinsic::fshl;
6201
3.23k
    SDValue X = getValue(I.getArgOperand(0));
6202
3.23k
    SDValue Y = getValue(I.getArgOperand(1));
6203
3.23k
    SDValue Z = getValue(I.getArgOperand(2));
6204
3.23k
    EVT VT = X.getValueType();
6205
3.23k
    SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
6206
3.23k
    SDValue Zero = DAG.getConstant(0, sdl, VT);
6207
3.23k
    SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
6208
3.23k
6209
3.23k
    auto FunnelOpcode = IsFSHL ? 
ISD::FSHL1.63k
:
ISD::FSHR1.59k
;
6210
3.23k
    if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
6211
994
      setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
6212
994
      return;
6213
994
    }
6214
2.23k
6215
2.23k
    // When X == Y, this is rotate. If the data type has a power-of-2 size, we
6216
2.23k
    // avoid the select that is necessary in the general case to filter out
6217
2.23k
    // the 0-shift possibility that leads to UB.
6218
2.23k
    if (X == Y && 
isPowerOf2_32(VT.getScalarSizeInBits())1.33k
) {
6219
1.32k
      auto RotateOpcode = IsFSHL ? 
ISD::ROTL682
:
ISD::ROTR646
;
6220
1.32k
      if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
6221
929
        setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
6222
929
        return;
6223
929
      }
6224
399
6225
399
      // Some targets only rotate one way. Try the opposite direction.
6226
399
      RotateOpcode = IsFSHL ? 
ISD::ROTR103
:
ISD::ROTL296
;
6227
399
      if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
6228
201
        // Negate the shift amount because it is safe to ignore the high bits.
6229
201
        SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
6230
201
        setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
6231
201
        return;
6232
201
      }
6233
198
6234
198
      // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
6235
198
      // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
6236
198
      SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
6237
198
      SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
6238
198
      SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? 
ShAmt99
:
NShAmt99
);
6239
198
      SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? 
NShAmt99
:
ShAmt99
);
6240
198
      setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
6241
198
      return;
6242
198
    }
6243
910
6244
910
    // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6245
910
    // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6246
910
    SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
6247
910
    SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? 
ShAmt455
:
InvShAmt455
);
6248
910
    SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? 
InvShAmt455
:
ShAmt455
);
6249
910
    SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
6250
910
6251
910
    // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
6252
910
    // and that is undefined. We must compare and select to avoid UB.
6253
910
    EVT CCVT = MVT::i1;
6254
910
    if (VT.isVector())
6255
838
      CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
6256
910
6257
910
    // For fshl, 0-shift returns the 1st arg (X).
6258
910
    // For fshr, 0-shift returns the 2nd arg (Y).
6259
910
    SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
6260
910
    setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? 
X455
:
Y455
, Or));
6261
910
    return;
6262
910
  }
6263
910
  case Intrinsic::sadd_sat: {
6264
489
    SDValue Op1 = getValue(I.getArgOperand(0));
6265
489
    SDValue Op2 = getValue(I.getArgOperand(1));
6266
489
    setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
6267
489
    return;
6268
910
  }
6269
910
  case Intrinsic::uadd_sat: {
6270
491
    SDValue Op1 = getValue(I.getArgOperand(0));
6271
491
    SDValue Op2 = getValue(I.getArgOperand(1));
6272
491
    setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
6273
491
    return;
6274
910
  }
6275
910
  case Intrinsic::ssub_sat: {
6276
454
    SDValue Op1 = getValue(I.getArgOperand(0));
6277
454
    SDValue Op2 = getValue(I.getArgOperand(1));
6278
454
    setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
6279
454
    return;
6280
910
  }
6281
910
  case Intrinsic::usub_sat: {
6282
482
    SDValue Op1 = getValue(I.getArgOperand(0));
6283
482
    SDValue Op2 = getValue(I.getArgOperand(1));
6284
482
    setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
6285
482
    return;
6286
910
  }
6287
910
  case Intrinsic::smul_fix:
6288
43
  case Intrinsic::umul_fix: {
6289
43
    SDValue Op1 = getValue(I.getArgOperand(0));
6290
43
    SDValue Op2 = getValue(I.getArgOperand(1));
6291
43
    SDValue Op3 = getValue(I.getArgOperand(2));
6292
43
    setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
6293
43
                             Op1.getValueType(), Op1, Op2, Op3));
6294
43
    return;
6295
43
  }
6296
43
  case Intrinsic::smul_fix_sat: {
6297
25
    SDValue Op1 = getValue(I.getArgOperand(0));
6298
25
    SDValue Op2 = getValue(I.getArgOperand(1));
6299
25
    SDValue Op3 = getValue(I.getArgOperand(2));
6300
25
    setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
6301
25
                             Op3));
6302
25
    return;
6303
43
  }
6304
224
  case Intrinsic::stacksave: {
6305
224
    SDValue Op = getRoot();
6306
224
    Res = DAG.getNode(
6307
224
        ISD::STACKSAVE, sdl,
6308
224
        DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
6309
224
    setValue(&I, Res);
6310
224
    DAG.setRoot(Res.getValue(1));
6311
224
    return;
6312
43
  }
6313
85
  case Intrinsic::stackrestore:
6314
85
    Res = getValue(I.getArgOperand(0));
6315
85
    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
6316
85
    return;
6317
43
  case Intrinsic::get_dynamic_area_offset: {
6318
4
    SDValue Op = getRoot();
6319
4
    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
6320
4
    EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
6321
4
    // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
6322
4
    // target.
6323
4
    if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
6324
0
      report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
6325
0
                         " intrinsic!");
6326
4
    Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
6327
4
                      Op);
6328
4
    DAG.setRoot(Op);
6329
4
    setValue(&I, Res);
6330
4
    return;
6331
4
  }
6332
1.41k
  case Intrinsic::stackguard: {
6333
1.41k
    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
6334
1.41k
    MachineFunction &MF = DAG.getMachineFunction();
6335
1.41k
    const Module &M = *MF.getFunction().getParent();
6336
1.41k
    SDValue Chain = getRoot();
6337
1.41k
    if (TLI.useLoadStackGuardNode()) {
6338
1.16k
      Res = getLoadStackGuard(DAG, sdl, Chain);
6339
1.16k
    } else {
6340
248
      const Value *Global = TLI.getSDagStackGuard(M);
6341
248
      unsigned Align = DL->getPrefTypeAlignment(Global->getType());
6342
248
      Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
6343
248
                        MachinePointerInfo(Global, 0), Align,
6344
248
                        MachineMemOperand::MOVolatile);
6345
248
    }
6346
1.41k
    if (TLI.useStackGuardXorFP())
6347
137
      Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
6348
1.41k
    DAG.setRoot(Chain);
6349
1.41k
    setValue(&I, Res);
6350
1.41k
    return;
6351
4
  }
6352
1.22k
  case Intrinsic::stackprotector: {
6353
1.22k
    // Emit code into the DAG to store the stack guard onto the stack.
6354
1.22k
    MachineFunction &MF = DAG.getMachineFunction();
6355
1.22k
    MachineFrameInfo &MFI = MF.getFrameInfo();
6356
1.22k
    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
6357
1.22k
    SDValue Src, Chain = getRoot();
6358
1.22k
6359
1.22k
    if (TLI.useLoadStackGuardNode())
6360
709
      Src = getLoadStackGuard(DAG, sdl, Chain);
6361
516
    else
6362
516
      Src = getValue(I.getArgOperand(0));   // The guard's value.
6363
1.22k
6364
1.22k
    AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
6365
1.22k
6366
1.22k
    int FI = FuncInfo.StaticAllocaMap[Slot];
6367
1.22k
    MFI.setStackProtectorIndex(FI);
6368
1.22k
6369
1.22k
    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
6370
1.22k
6371
1.22k
    // Store the stack protector onto the stack.
6372
1.22k
    Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
6373
1.22k
                                                 DAG.getMachineFunction(), FI),
6374
1.22k
                       /* Alignment = */ 0, MachineMemOperand::MOVolatile);
6375
1.22k
    setValue(&I, Res);
6376
1.22k
    DAG.setRoot(Res);
6377
1.22k
    return;
6378
4
  }
6379
4
  case Intrinsic::objectsize: {
6380
2
    // If we don't know by now, we're never going to know.
6381
2
    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
6382
2
6383
2
    assert(CI && "Non-constant type in __builtin_object_size?");
6384
2
6385
2
    SDValue Arg = getValue(I.getCalledValue());
6386
2
    EVT Ty = Arg.getValueType();
6387
2
6388
2
    if (CI->isZero())
6389
2
      Res = DAG.getConstant(-1ULL, sdl, Ty);
6390
0
    else
6391
0
      Res = DAG.getConstant(0, sdl, Ty);
6392
2
6393
2
    setValue(&I, Res);
6394
2
    return;
6395
4
  }
6396
4
6397
19
  case Intrinsic::is_constant:
6398
19
    // If this wasn't constant-folded away by now, then it's not a
6399
19
    // constant.
6400
19
    setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
6401
19
    return;
6402
4
6403
4
  case Intrinsic::annotation:
6404
4
  case Intrinsic::ptr_annotation:
6405
4
  case Intrinsic::launder_invariant_group:
6406
4
  case Intrinsic::strip_invariant_group:
6407
4
    // Drop the intrinsic, but forward the value
6408
4
    setValue(&I, getValue(I.getOperand(0)));
6409
4
    return;
6410
57
  case Intrinsic::assume:
6411
57
  case Intrinsic::var_annotation:
6412
57
  case Intrinsic::sideeffect:
6413
57
    // Discard annotate attributes, assumptions, and artificial side-effects.
6414
57
    return;
6415
57
6416
57
  case Intrinsic::codeview_annotation: {
6417
8
    // Emit a label associated with this metadata.
6418
8
    MachineFunction &MF = DAG.getMachineFunction();
6419
8
    MCSymbol *Label =
6420
8
        MF.getMMI().getContext().createTempSymbol("annotation", true);
6421
8
    Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
6422
8
    MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
6423
8
    Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
6424
8
    DAG.setRoot(Res);
6425
8
    return;
6426
57
  }
6427
57
6428
57
  case Intrinsic::init_trampoline: {
6429
4
    const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
6430
4
6431
4
    SDValue Ops[6];
6432
4
    Ops[0] = getRoot();
6433
4
    Ops[1] = getValue(I.getArgOperand(0));
6434
4
    Ops[2] = getValue(I.getArgOperand(1));
6435
4
    Ops[3] = getValue(I.getArgOperand(2));
6436
4
    Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
6437
4
    Ops[5] = DAG.getSrcValue(F);
6438
4
6439
4
    Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
6440
4
6441
4
    DAG.setRoot(Res);
6442
4
    return;
6443
57
  }
6444
57
  case Intrinsic::adjust_trampoline:
6445
4
    setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
6446
4
                             TLI.getPointerTy(DAG.getDataLayout()),
6447
4
                             getValue(I.getArgOperand(0))));
6448
4
    return;
6449
57
  case Intrinsic::gcroot: {
6450
2
    assert(DAG.getMachineFunction().getFunction().hasGC() &&
6451
2
           "only valid in functions with gc specified, enforced by Verifier");
6452
2
    assert(GFI && "implied by previous");
6453
2
    const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
6454
2
    const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
6455
2
6456
2
    FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
6457
2
    GFI->addStackRoot(FI->getIndex(), TypeMap);
6458
2
    return;
6459
57
  }
6460
57
  case Intrinsic::gcread:
6461
0
  case Intrinsic::gcwrite:
6462
0
    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
6463
9
  case Intrinsic::flt_rounds:
6464
9
    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
6465
9
    return;
6466
0
6467
11
  case Intrinsic::expect:
6468
11
    // Just replace __builtin_expect(exp, c) with EXP.
6469
11
    setValue(&I, getValue(I.getArgOperand(0)));
6470
11
    return;
6471
0
6472
296
  case Intrinsic::debugtrap:
6473
296
  case Intrinsic::trap: {
6474
296
    StringRef TrapFuncName =
6475
296
        I.getAttributes()
6476
296
            .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
6477
296
            .getValueAsString();
6478
296
    if (TrapFuncName.empty()) {
6479
287
      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
6480
252
        ISD::TRAP : 
ISD::DEBUGTRAP35
;
6481
287
      DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
6482
287
      return;
6483
287
    }
6484
9
    TargetLowering::ArgListTy Args;
6485
9
6486
9
    TargetLowering::CallLoweringInfo CLI(DAG);
6487
9
    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
6488
9
        CallingConv::C, I.getType(),
6489
9
        DAG.getExternalSymbol(TrapFuncName.data(),
6490
9
                              TLI.getPointerTy(DAG.getDataLayout())),
6491
9
        std::move(Args));
6492
9
6493
9
    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
6494
9
    DAG.setRoot(Result.second);
6495
9
    return;
6496
9
  }
6497
9
6498
4.61k
  case Intrinsic::uadd_with_overflow:
6499
4.61k
  case Intrinsic::sadd_with_overflow:
6500
4.61k
  case Intrinsic::usub_with_overflow:
6501
4.61k
  case Intrinsic::ssub_with_overflow:
6502
4.61k
  case Intrinsic::umul_with_overflow:
6503
4.61k
  case Intrinsic::smul_with_overflow: {
6504
4.61k
    ISD::NodeType Op;
6505
4.61k
    switch (Intrinsic) {
6506
4.61k
    
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
6507
4.61k
    
case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break2.29k
;
6508
4.61k
    
case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break442
;
6509
4.61k
    
case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break864
;
6510
4.61k
    
case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break382
;
6511
4.61k
    
case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break466
;
6512
4.61k
    
case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break168
;
6513
4.61k
    }
6514
4.61k
    SDValue Op1 = getValue(I.getArgOperand(0));
6515
4.61k
    SDValue Op2 = getValue(I.getArgOperand(1));
6516
4.61k
6517
4.61k
    EVT ResultVT = Op1.getValueType();
6518
4.61k
    EVT OverflowVT = MVT::i1;
6519
4.61k
    if (ResultVT.isVector())
6520
540
      OverflowVT = EVT::getVectorVT(
6521
540
          *Context, OverflowVT, ResultVT.getVectorNumElements());
6522
4.61k
6523
4.61k
    SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
6524
4.61k
    setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
6525
4.61k
    return;
6526
4.61k
  }
6527
4.61k
  case Intrinsic::prefetch: {
6528
370
    SDValue Ops[5];
6529
370
    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
6530
370
    auto Flags = rw == 0 ? 
MachineMemOperand::MOLoad303
:
MachineMemOperand::MOStore67
;
6531
370
    Ops[0] = DAG.getRoot();
6532
370
    Ops[1] = getValue(I.getArgOperand(0));
6533
370
    Ops[2] = getValue(I.getArgOperand(1));
6534
370
    Ops[3] = getValue(I.getArgOperand(2));
6535
370
    Ops[4] = getValue(I.getArgOperand(3));
6536
370
    SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
6537
370
                                             DAG.getVTList(MVT::Other), Ops,
6538
370
                                             EVT::getIntegerVT(*Context, 8),
6539
370
                                             MachinePointerInfo(I.getArgOperand(0)),
6540
370
                                             0, /* align */
6541
370
                                             Flags);
6542
370
6543
370
    // Chain the prefetch in parallell with any pending loads, to stay out of
6544
370
    // the way of later optimizations.
6545
370
    PendingLoads.push_back(Result);
6546
370
    Result = getRoot();
6547
370
    DAG.setRoot(Result);
6548
370
    return;
6549
4.61k
  }
6550
67.9k
  case Intrinsic::lifetime_start:
6551
67.9k
  case Intrinsic::lifetime_end: {
6552
67.9k
    bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
6553
67.9k
    // Stack coloring is not enabled in O0, discard region information.
6554
67.9k
    if (TM.getOptLevel() == CodeGenOpt::None)
6555
19
      return;
6556
67.9k
6557
67.9k
    const int64_t ObjectSize =
6558
67.9k
        cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
6559
67.9k
    Value *const ObjectPtr = I.getArgOperand(1);
6560
67.9k
    SmallVector<const Value *, 4> Allocas;
6561
67.9k
    GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
6562
67.9k
6563
67.9k
    for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
6564
136k
           E = Allocas.end(); Object != E; 
++Object68.1k
) {
6565
68.1k
      const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
6566
68.1k
6567
68.1k
      // Could not find an Alloca.
6568
68.1k
      if (!LifetimeObject)
6569
3
        continue;
6570
68.1k
6571
68.1k
      // First check that the Alloca is static, otherwise it won't have a
6572
68.1k
      // valid frame index.
6573
68.1k
      auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
6574
68.1k
      if (SI == FuncInfo.StaticAllocaMap.end())
6575
2
        return;
6576
68.1k
6577
68.1k
      const int FrameIndex = SI->second;
6578
68.1k
      int64_t Offset;
6579
68.1k
      if (GetPointerBaseWithConstantOffset(
6580
68.1k
              ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
6581
454
        Offset = -1; // Cannot determine offset from alloca to lifetime object.
6582
68.1k
      Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
6583
68.1k
                                Offset);
6584
68.1k
      DAG.setRoot(Res);
6585
68.1k
    }
6586
67.9k
    
return67.9k
;
6587
67.9k
  }
6588
67.9k
  case Intrinsic::invariant_start:
6589
45
    // Discard region information.
6590
45
    setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
6591
45
    return;
6592
67.9k
  case Intrinsic::invariant_end:
6593
0
    // Discard region information.
6594
0
    return;
6595
67.9k
  case Intrinsic::clear_cache:
6596
4
    /// FunctionName may be null.
6597
4
    if (const char *FunctionName = TLI.getClearCacheBuiltinName())
6598
2
      lowerCallToExternalSymbol(I, FunctionName);
6599
4
    return;
6600
67.9k
  case Intrinsic::donothing:
6601
1
    // ignore
6602
1
    return;
6603
67.9k
  case Intrinsic::experimental_stackmap:
6604
140
    visitStackmap(I);
6605
140
    return;
6606
67.9k
  case Intrinsic::experimental_patchpoint_void:
6607
144
  case Intrinsic::experimental_patchpoint_i64:
6608
144
    visitPatchpoint(&I);
6609
144
    return;
6610
144
  case Intrinsic::experimental_gc_statepoint:
6611
75
    LowerStatepoint(ImmutableStatepoint(&I));
6612
75
    return;
6613
144
  case Intrinsic::experimental_gc_result:
6614
24
    visitGCResult(cast<GCResultInst>(I));
6615
24
    return;
6616
144
  case Intrinsic::experimental_gc_relocate:
6617
69
    visitGCRelocate(cast<GCRelocateInst>(I));
6618
69
    return;
6619
144
  case Intrinsic::instrprof_increment:
6620
0
    llvm_unreachable("instrprof failed to lower an increment");
6621
144
  case Intrinsic::instrprof_value_profile:
6622
0
    llvm_unreachable("instrprof failed to lower a value profiling call");
6623
144
  case Intrinsic::localescape: {
6624
18
    MachineFunction &MF = DAG.getMachineFunction();
6625
18
    const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
6626
18
6627
18
    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
6628
18
    // is the same on all targets.
6629
42
    for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; 
++Idx24
) {
6630
24
      Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
6631
24
      if (isa<ConstantPointerNull>(Arg))
6632
0
        continue; // Skip null pointers. They represent a hole in index space.
6633
24
      AllocaInst *Slot = cast<AllocaInst>(Arg);
6634
24
      assert(FuncInfo.StaticAllocaMap.count(Slot) &&
6635
24
             "can only escape static allocas");
6636
24
      int FI = FuncInfo.StaticAllocaMap[Slot];
6637
24
      MCSymbol *FrameAllocSym =
6638
24
          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
6639
24
              GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
6640
24
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
6641
24
              TII->get(TargetOpcode::LOCAL_ESCAPE))
6642
24
          .addSym(FrameAllocSym)
6643
24
          .addFrameIndex(FI);
6644
24
    }
6645
18
6646
18
    return;
6647
144
  }
6648
144
6649
144
  case Intrinsic::localrecover: {
6650
19
    // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
6651
19
    MachineFunction &MF = DAG.getMachineFunction();
6652
19
    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
6653
19
6654
19
    // Get the symbol that defines the frame offset.
6655
19
    auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
6656
19
    auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
6657
19
    unsigned IdxVal =
6658
19
        unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
6659
19
    MCSymbol *FrameAllocSym =
6660
19
        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
6661
19
            GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
6662
19
6663
19
    // Create a MCSymbol for the label to avoid any target lowering
6664
19
    // that would make this PC relative.
6665
19
    SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
6666
19
    SDValue OffsetVal =
6667
19
        DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
6668
19
6669
19
    // Add the offset to the FP.
6670
19
    Value *FP = I.getArgOperand(1);
6671
19
    SDValue FPVal = getValue(FP);
6672
19
    SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
6673
19
    setValue(&I, Add);
6674
19
6675
19
    return;
6676
144
  }
6677
144
6678
144
  case Intrinsic::eh_exceptionpointer:
6679
7
  case Intrinsic::eh_exceptioncode: {
6680
7
    // Get the exception pointer vreg, copy from it, and resize it to fit.
6681
7
    const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
6682
7
    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
6683
7
    const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
6684
7
    unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
6685
7
    SDValue N =
6686
7
        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
6687
7
    if (Intrinsic == Intrinsic::eh_exceptioncode)
6688
4
      N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
6689
7
    setValue(&I, N);
6690
7
    return;
6691
7
  }
6692
7
  case Intrinsic::xray_customevent: {
6693
2
    // Here we want to make sure that the intrinsic behaves as if it has a
6694
2
    // specific calling convention, and only for x86_64.
6695
2
    // FIXME: Support other platforms later.
6696
2
    const auto &Triple = DAG.getTarget().getTargetTriple();
6697
2
    if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
6698
0
      return;
6699
2
6700
2
    SDLoc DL = getCurSDLoc();
6701
2
    SmallVector<SDValue, 8> Ops;
6702
2
6703
2
    // We want to say that we always want the arguments in registers.
6704
2
    SDValue LogEntryVal = getValue(I.getArgOperand(0));
6705
2
    SDValue StrSizeVal = getValue(I.getArgOperand(1));
6706
2
    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6707
2
    SDValue Chain = getRoot();
6708
2
    Ops.push_back(LogEntryVal);
6709
2
    Ops.push_back(StrSizeVal);
6710
2
    Ops.push_back(Chain);
6711
2
6712
2
    // We need to enforce the calling convention for the callsite, so that
6713
2
    // argument ordering is enforced correctly, and that register allocation can
6714
2
    // see that some registers may be assumed clobbered and have to preserve
6715
2
    // them across calls to the intrinsic.
6716
2
    MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
6717
2
                                           DL, NodeTys, Ops);
6718
2
    SDValue patchableNode = SDValue(MN, 0);
6719
2
    DAG.setRoot(patchableNode);
6720
2
    setValue(&I, patchableNode);
6721
2
    return;
6722
2
  }
6723
2
  case Intrinsic::xray_typedevent: {
6724
2
    // Here we want to make sure that the intrinsic behaves as if it has a
6725
2
    // specific calling convention, and only for x86_64.
6726
2
    // FIXME: Support other platforms later.
6727
2
    const auto &Triple = DAG.getTarget().getTargetTriple();
6728
2
    if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
6729
0
      return;
6730
2
6731
2
    SDLoc DL = getCurSDLoc();
6732
2
    SmallVector<SDValue, 8> Ops;
6733
2
6734
2
    // We want to say that we always want the arguments in registers.
6735
2
    // It's unclear to me how manipulating the selection DAG here forces callers
6736
2
    // to provide arguments in registers instead of on the stack.
6737
2
    SDValue LogTypeId = getValue(I.getArgOperand(0));
6738
2
    SDValue LogEntryVal = getValue(I.getArgOperand(1));
6739
2
    SDValue StrSizeVal = getValue(I.getArgOperand(2));
6740
2
    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6741
2
    SDValue Chain = getRoot();
6742
2
    Ops.push_back(LogTypeId);
6743
2
    Ops.push_back(LogEntryVal);
6744
2
    Ops.push_back(StrSizeVal);
6745
2
    Ops.push_back(Chain);
6746
2
6747
2
    // We need to enforce the calling convention for the callsite, so that
6748
2
    // argument ordering is enforced correctly, and that register allocation can
6749
2
    // see that some registers may be assumed clobbered and have to preserve
6750
2
    // them across calls to the intrinsic.
6751
2
    MachineSDNode *MN = DAG.getMachineNode(
6752
2
        TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
6753
2
    SDValue patchableNode = SDValue(MN, 0);
6754
2
    DAG.setRoot(patchableNode);
6755
2
    setValue(&I, patchableNode);
6756
2
    return;
6757
2
  }
6758
2
  case Intrinsic::experimental_deoptimize:
6759
0
    LowerDeoptimizeCall(&I);
6760
0
    return;
6761
2
6762
857
  case Intrinsic::experimental_vector_reduce_v2_fadd:
6763
857
  case Intrinsic::experimental_vector_reduce_v2_fmul:
6764
857
  case Intrinsic::experimental_vector_reduce_add:
6765
857
  case Intrinsic::experimental_vector_reduce_mul:
6766
857
  case Intrinsic::experimental_vector_reduce_and:
6767
857
  case Intrinsic::experimental_vector_reduce_or:
6768
857
  case Intrinsic::experimental_vector_reduce_xor:
6769
857
  case Intrinsic::experimental_vector_reduce_smax:
6770
857
  case Intrinsic::experimental_vector_reduce_smin:
6771
857
  case Intrinsic::experimental_vector_reduce_umax:
6772
857
  case Intrinsic::experimental_vector_reduce_umin:
6773
857
  case Intrinsic::experimental_vector_reduce_fmax:
6774
857
  case Intrinsic::experimental_vector_reduce_fmin:
6775
857
    visitVectorReduce(I, Intrinsic);
6776
857
    return;
6777
857
6778
857
  case Intrinsic::icall_branch_funnel: {
6779
13
    SmallVector<SDValue, 16> Ops;
6780
13
    Ops.push_back(getValue(I.getArgOperand(0)));
6781
13
6782
13
    int64_t Offset;
6783
13
    auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
6784
13
        I.getArgOperand(1), Offset, DAG.getDataLayout()));
6785
13
    if (!Base)
6786
0
      report_fatal_error(
6787
0
          "llvm.icall.branch.funnel operand must be a GlobalValue");
6788
13
    Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
6789
13
6790
13
    struct BranchFunnelTarget {
6791
13
      int64_t Offset;
6792
13
      SDValue Target;
6793
13
    };
6794
13
    SmallVector<BranchFunnelTarget, 8> Targets;
6795
13
6796
53
    for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; 
Op += 240
) {
6797
40
      auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
6798
40
          I.getArgOperand(Op), Offset, DAG.getDataLayout()));
6799
40
      if (ElemBase != Base)
6800
0
        report_fatal_error("all llvm.icall.branch.funnel operands must refer "
6801
0
                           "to the same GlobalValue");
6802
40
6803
40
      SDValue Val = getValue(I.getArgOperand(Op + 1));
6804
40
      auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
6805
40
      if (!GA)
6806
0
        report_fatal_error(
6807
0
            "llvm.icall.branch.funnel operand must be a GlobalValue");
6808
40
      Targets.push_back({Offset, DAG.getTargetGlobalAddress(
6809
40
                                     GA->getGlobal(), getCurSDLoc(),
6810
40
                                     Val.getValueType(), GA->getOffset())});
6811
40
    }
6812
13
    llvm::sort(Targets,
6813
28
               [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
6814
28
                 return T1.Offset < T2.Offset;
6815
28
               });
6816
13
6817
40
    for (auto &T : Targets) {
6818
40
      Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
6819
40
      Ops.push_back(T.Target);
6820
40
    }
6821
13
6822
13
    Ops.push_back(DAG.getRoot()); // Chain
6823
13
    SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
6824
13
                                 getCurSDLoc(), MVT::Other, Ops),
6825
13
              0);
6826
13
    DAG.setRoot(N);
6827
13
    setValue(&I, N);
6828
13
    HasTailCall = true;
6829
13
    return;
6830
13
  }
6831
13
6832
13
  case Intrinsic::wasm_landingpad_index:
6833
13
    // Information this intrinsic contained has been transferred to
6834
13
    // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
6835
13
    // delete it now.
6836
13
    return;
6837
13
6838
13
  case Intrinsic::aarch64_settag:
6839
12
  case Intrinsic::aarch64_settag_zero: {
6840
12
    const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6841
12
    bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
6842
12
    SDValue Val = TSI.EmitTargetCodeForSetTag(
6843
12
        DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
6844
12
        getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
6845
12
        ZeroMemory);
6846
12
    DAG.setRoot(Val);
6847
12
    setValue(&I, Val);
6848
12
    return;
6849
12
  }
6850
286k
  }
6851
286k
}
6852
6853
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
6854
1.38k
    const ConstrainedFPIntrinsic &FPI) {
6855
1.38k
  SDLoc sdl = getCurSDLoc();
6856
1.38k
  unsigned Opcode;
6857
1.38k
  switch (FPI.getIntrinsicID()) {
6858
1.38k
  
default: 0
llvm_unreachable0
("Impossible intrinsic"); // Can't reach here.
6859
1.38k
  case Intrinsic::experimental_constrained_fadd:
6860
110
    Opcode = ISD::STRICT_FADD;
6861
110
    break;
6862
1.38k
  case Intrinsic::experimental_constrained_fsub:
6863
110
    Opcode = ISD::STRICT_FSUB;
6864
110
    break;
6865
1.38k
  case Intrinsic::experimental_constrained_fmul:
6866
159
    Opcode = ISD::STRICT_FMUL;
6867
159
    break;
6868
1.38k
  case Intrinsic::experimental_constrained_fdiv:
6869
106
    Opcode = ISD::STRICT_FDIV;
6870
106
    break;
6871
1.38k
  case Intrinsic::experimental_constrained_frem:
6872
32
    Opcode = ISD::STRICT_FREM;
6873
32
    break;
6874
1.38k
  case Intrinsic::experimental_constrained_fma:
6875
87
    Opcode = ISD::STRICT_FMA;
6876
87
    break;
6877
1.38k
  case Intrinsic::experimental_constrained_fptrunc:
6878
40
    Opcode = ISD::STRICT_FP_ROUND;
6879
40
    break;
6880
1.38k
  case Intrinsic::experimental_constrained_fpext:
6881
48
    Opcode = ISD::STRICT_FP_EXTEND;
6882
48
    break;
6883
1.38k
  case Intrinsic::experimental_constrained_sqrt:
6884
74
    Opcode = ISD::STRICT_FSQRT;
6885
74
    break;
6886
1.38k
  case Intrinsic::experimental_constrained_pow:
6887
32
    Opcode = ISD::STRICT_FPOW;
6888
32
    break;
6889
1.38k
  case Intrinsic::experimental_constrained_powi:
6890
32
    Opcode = ISD::STRICT_FPOWI;
6891
32
    break;
6892
1.38k
  case Intrinsic::experimental_constrained_sin:
6893
32
    Opcode = ISD::STRICT_FSIN;
6894
32
    break;
6895
1.38k
  case Intrinsic::experimental_constrained_cos:
6896
32
    Opcode = ISD::STRICT_FCOS;
6897
32
    break;
6898
1.38k
  case Intrinsic::experimental_constrained_exp:
6899
32
    Opcode = ISD::STRICT_FEXP;
6900
32
    break;
6901
1.38k
  case Intrinsic::experimental_constrained_exp2:
6902
32
    Opcode = ISD::STRICT_FEXP2;
6903
32
    break;
6904
1.38k
  case Intrinsic::experimental_constrained_log:
6905
32
    Opcode = ISD::STRICT_FLOG;
6906
32
    break;
6907
1.38k
  case Intrinsic::experimental_constrained_log10:
6908
32
    Opcode = ISD::STRICT_FLOG10;
6909
32
    break;
6910
1.38k
  case Intrinsic::experimental_constrained_log2:
6911
32
    Opcode = ISD::STRICT_FLOG2;
6912
32
    break;
6913
1.38k
  case Intrinsic::experimental_constrained_rint:
6914
48
    Opcode = ISD::STRICT_FRINT;
6915
48
    break;
6916
1.38k
  case Intrinsic::experimental_constrained_nearbyint:
6917
48
    Opcode = ISD::STRICT_FNEARBYINT;
6918
48
    break;
6919
1.38k
  case Intrinsic::experimental_constrained_maxnum:
6920
35
    Opcode = ISD::STRICT_FMAXNUM;
6921
35
    break;
6922
1.38k
  case Intrinsic::experimental_constrained_minnum:
6923
35
    Opcode = ISD::STRICT_FMINNUM;
6924
35
    break;
6925
1.38k
  case Intrinsic::experimental_constrained_ceil:
6926
40
    Opcode = ISD::STRICT_FCEIL;
6927
40
    break;
6928
1.38k
  case Intrinsic::experimental_constrained_floor:
6929
40
    Opcode = ISD::STRICT_FFLOOR;
6930
40
    break;
6931
1.38k
  case Intrinsic::experimental_constrained_round:
6932
40
    Opcode = ISD::STRICT_FROUND;
6933
40
    break;
6934
1.38k
  case Intrinsic::experimental_constrained_trunc:
6935
40
    Opcode = ISD::STRICT_FTRUNC;
6936
40
    break;
6937
1.38k
  }
6938
1.38k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6939
1.38k
  SDValue Chain = getRoot();
6940
1.38k
  SmallVector<EVT, 4> ValueVTs;
6941
1.38k
  ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
6942
1.38k
  ValueVTs.push_back(MVT::Other); // Out chain
6943
1.38k
6944
1.38k
  SDVTList VTs = DAG.getVTList(ValueVTs);
6945
1.38k
  SDValue Result;
6946
1.38k
  if (Opcode == ISD::STRICT_FP_ROUND)
6947
40
    Result = DAG.getNode(Opcode, sdl, VTs,
6948
40
                          { Chain, getValue(FPI.getArgOperand(0)),
6949
40
                               DAG.getTargetConstant(0, sdl,
6950
40
                               TLI.getPointerTy(DAG.getDataLayout())) });
6951
1.34k
  else if (FPI.isUnaryOp())
6952
602
    Result = DAG.getNode(Opcode, sdl, VTs,
6953
602
                         { Chain, getValue(FPI.getArgOperand(0)) });
6954
738
  else if (FPI.isTernaryOp())
6955
87
    Result = DAG.getNode(Opcode, sdl, VTs,
6956
87
                         { Chain, getValue(FPI.getArgOperand(0)),
6957
87
                                  getValue(FPI.getArgOperand(1)),
6958
87
                                  getValue(FPI.getArgOperand(2)) });
6959
651
  else
6960
651
    Result = DAG.getNode(Opcode, sdl, VTs,
6961
651
                         { Chain, getValue(FPI.getArgOperand(0)),
6962
651
                           getValue(FPI.getArgOperand(1))  });
6963
1.38k
6964
1.38k
  if (FPI.getExceptionBehavior() !=
6965
1.38k
      ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
6966
1.37k
    SDNodeFlags Flags;
6967
1.37k
    Flags.setFPExcept(true);
6968
1.37k
    Result->setFlags(Flags);
6969
1.37k
  }
6970
1.38k
6971
1.38k
  assert(Result.getNode()->getNumValues() == 2);
6972
1.38k
  SDValue OutChain = Result.getValue(1);
6973
1.38k
  DAG.setRoot(OutChain);
6974
1.38k
  SDValue FPResult = Result.getValue(0);
6975
1.38k
  setValue(&FPI, FPResult);
6976
1.38k
}
6977
6978
std::pair<SDValue, SDValue>
6979
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
6980
440k
                                    const BasicBlock *EHPadBB) {
6981
440k
  MachineFunction &MF = DAG.getMachineFunction();
6982
440k
  MachineModuleInfo &MMI = MF.getMMI();
6983
440k
  MCSymbol *BeginLabel = nullptr;
6984
440k
6985
440k
  if (EHPadBB) {
6986
6.36k
    // Insert a label before the invoke call to mark the try range.  This can be
6987
6.36k
    // used to detect deletion of the invoke via the MachineModuleInfo.
6988
6.36k
    BeginLabel = MMI.getContext().createTempSymbol();
6989
6.36k
6990
6.36k
    // For SjLj, keep track of which landing pads go with which invokes
6991
6.36k
    // so as to maintain the ordering of pads in the LSDA.
6992
6.36k
    unsigned CallSiteIndex = MMI.getCurrentCallSite();
6993
6.36k
    if (CallSiteIndex) {
6994
175
      MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
6995
175
      LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
6996
175
6997
175
      // Now that the call site is handled, stop tracking it.
6998
175
      MMI.setCurrentCallSite(0);
6999
175
    }
7000
6.36k
7001
6.36k
    // Both PendingLoads and PendingExports must be flushed here;
7002
6.36k
    // this call might not return.
7003
6.36k
    (void)getRoot();
7004
6.36k
    DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
7005
6.36k
7006
6.36k
    CLI.setChain(getRoot());
7007
6.36k
  }
7008
440k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7009
440k
  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
7010
440k
7011
440k
  assert((CLI.IsTailCall || Result.second.getNode()) &&
7012
440k
         "Non-null chain expected with non-tail call!");
7013
440k
  assert((Result.second.getNode() || !Result.first.getNode()) &&
7014
440k
         "Null value expected with tail call!");
7015
440k
7016
440k
  if (!Result.second.getNode()) {
7017
63.8k
    // As a special case, a null chain means that a tail call has been emitted
7018
63.8k
    // and the DAG root is already updated.
7019
63.8k
    HasTailCall = true;
7020
63.8k
7021
63.8k
    // Since there's no actual continuation from this block, nothing can be
7022
63.8k
    // relying on us setting vregs for them.
7023
63.8k
    PendingExports.clear();
7024
377k
  } else {
7025
377k
    DAG.setRoot(Result.second);
7026
377k
  }
7027
440k
7028
440k
  if (EHPadBB) {
7029
6.36k
    // Insert a label at the end of the invoke call to mark the try range.  This
7030
6.36k
    // can be used to detect deletion of the invoke via the MachineModuleInfo.
7031
6.36k
    MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
7032
6.36k
    DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
7033
6.36k
7034
6.36k
    // Inform MachineModuleInfo of range.
7035
6.36k
    auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
7036
6.36k
    // There is a platform (e.g. wasm) that uses funclet style IR but does not
7037
6.36k
    // actually use outlined funclets and their LSDA info style.
7038
6.36k
    if (MF.hasEHFunclets() && 
isFuncletEHPersonality(Pers)236
) {
7039
166
      assert(CLI.CS);
7040
166
      WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
7041
166
      EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
7042
166
                                BeginLabel, EndLabel);
7043
6.19k
    } else if (!isScopedEHPersonality(Pers)) {
7044
6.12k
      MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
7045
6.12k
    }
7046
6.36k
  }
7047
440k
7048
440k
  return Result;
7049
440k
}
7050
7051
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
7052
                                      bool isTailCall,
7053
440k
                                      const BasicBlock *EHPadBB) {
7054
440k
  auto &DL = DAG.getDataLayout();
7055
440k
  FunctionType *FTy = CS.getFunctionType();
7056
440k
  Type *RetTy = CS.getType();
7057
440k
7058
440k
  TargetLowering::ArgListTy Args;
7059
440k
  Args.reserve(CS.arg_size());
7060
440k
7061
440k
  const Value *SwiftErrorVal = nullptr;
7062
440k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7063
440k
7064
440k
  // We can't tail call inside a function with a swifterror argument. Lowering
7065
440k
  // does not support this yet. It would have to move into the swifterror
7066
440k
  // register before the call.
7067
440k
  auto *Caller = CS.getInstruction()->getParent()->getParent();
7068
440k
  if (TLI.supportSwiftError() &&
7069
440k
      
Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)414k
)
7070
111
    isTailCall = false;
7071
440k
7072
440k
  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
7073
1.44M
       i != e; 
++i1.00M
) {
7074
1.00M
    TargetLowering::ArgListEntry Entry;
7075
1.00M
    const Value *V = *i;
7076
1.00M
7077
1.00M
    // Skip empty types
7078
1.00M
    if (V->getType()->isEmptyTy())
7079
6
      continue;
7080
1.00M
7081
1.00M
    SDValue ArgNode = getValue(V);
7082
1.00M
    Entry.Node = ArgNode; Entry.Ty = V->getType();
7083
1.00M
7084
1.00M
    Entry.setAttributes(&CS, i - CS.arg_begin());
7085
1.00M
7086
1.00M
    // Use swifterror virtual register as input to the call.
7087
1.00M
    if (Entry.IsSwiftError && 
TLI.supportSwiftError()129
) {
7088
110
      SwiftErrorVal = V;
7089
110
      // We find the virtual register for the actual swifterror argument.
7090
110
      // Instead of using the Value, we use the virtual register instead.
7091
110
      Entry.Node = DAG.getRegister(
7092
110
          SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
7093
110
          EVT(TLI.getPointerTy(DL)));
7094
110
    }
7095
1.00M
7096
1.00M
    Args.push_back(Entry);
7097
1.00M
7098
1.00M
    // If we have an explicit sret argument that is an Instruction, (i.e., it
7099
1.00M
    // might point to function-local memory), we can't meaningfully tail-call.
7100
1.00M
    if (Entry.IsSRet && 
isa<Instruction>(V)1.07k
)
7101
919
      isTailCall = false;
7102
1.00M
  }
7103
440k
7104
440k
  // Check if target-independent constraints permit a tail call here.
7105
440k
  // Target-dependent constraints are checked within TLI->LowerCallTo.
7106
440k
  if (isTailCall && 
!isInTailCallPosition(CS, DAG.getTarget())283k
)
7107
216k
    isTailCall = false;
7108
440k
7109
440k
  // Disable tail calls if there is an swifterror argument. Targets have not
7110
440k
  // been updated to support tail calls.
7111
440k
  if (TLI.supportSwiftError() && 
SwiftErrorVal414k
)
7112
110
    isTailCall = false;
7113
440k
7114
440k
  TargetLowering::CallLoweringInfo CLI(DAG);
7115
440k
  CLI.setDebugLoc(getCurSDLoc())
7116
440k
      .setChain(getRoot())
7117
440k
      .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
7118
440k
      .setTailCall(isTailCall)
7119
440k
      .setConvergent(CS.isConvergent());
7120
440k
  std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
7121
440k
7122
440k
  if (Result.first.getNode()) {
7123
197k
    const Instruction *Inst = CS.getInstruction();
7124
197k
    Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
7125
197k
    setValue(Inst, Result.first);
7126
197k
  }
7127
440k
7128
440k
  // The last element of CLI.InVals has the SDValue for swifterror return.
7129
440k
  // Here we copy it to a virtual register and update SwiftErrorMap for
7130
440k
  // book-keeping.
7131
440k
  if (SwiftErrorVal && 
TLI.supportSwiftError()110
) {
7132
110
    // Get the last element of InVals.
7133
110
    SDValue Src = CLI.InVals.back();
7134
110
    unsigned VReg = SwiftError.getOrCreateVRegDefAt(
7135
110
        CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
7136
110
    SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
7137
110
    DAG.setRoot(CopyNode);
7138
110
  }
7139
440k
}
7140
7141
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
7142
56
                             SelectionDAGBuilder &Builder) {
7143
56
  // Check to see if this load can be trivially constant folded, e.g. if the
7144
56
  // input is from a string literal.
7145
56
  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
7146
14
    // Cast pointer to the type we really want to load.
7147
14
    Type *LoadTy =
7148
14
        Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
7149
14
    if (LoadVT.isVector())
7150
4
      LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
7151
14
7152
14
    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
7153
14
                                         PointerType::getUnqual(LoadTy));
7154
14
7155
14
    if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
7156
14
            const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
7157
14
      return Builder.getValue(LoadCst);
7158
42
  }
7159
42
7160
42
  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
7161
42
  // still constant memory, the input chain can be the entry node.
7162
42
  SDValue Root;
7163
42
  bool ConstantMemory = false;
7164
42
7165
42
  // Do not serialize (non-volatile) loads of constant memory with anything.
7166
42
  if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
7167
0
    Root = Builder.DAG.getEntryNode();
7168
0
    ConstantMemory = true;
7169
42
  } else {
7170
42
    // Do not serialize non-volatile loads against each other.
7171
42
    Root = Builder.DAG.getRoot();
7172
42
  }
7173
42
7174
42
  SDValue Ptr = Builder.getValue(PtrVal);
7175
42
  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
7176
42
                                        Ptr, MachinePointerInfo(PtrVal),
7177
42
                                        /* Alignment = */ 1);
7178
42
7179
42
  if (!ConstantMemory)
7180
42
    Builder.PendingLoads.push_back(LoadVal.getValue(1));
7181
42
  return LoadVal;
7182
42
}
7183
7184
/// Record the value for an instruction that produces an integer result,
7185
/// converting the type where necessary.
7186
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
7187
                                                  SDValue Value,
7188
46
                                                  bool IsSigned) {
7189
46
  EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
7190
46
                                                    I.getType(), true);
7191
46
  if (IsSigned)
7192
16
    Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
7193
30
  else
7194
30
    Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
7195
46
  setValue(&I, Value);
7196
46
}
7197
7198
/// See if we can lower a memcmp call into an optimized form. If so, return
7199
/// true and lower it. Otherwise return false, and it will be lowered like a
7200
/// normal call.
7201
/// The caller already checked that \p I calls the appropriate LibFunc with a
7202
/// correct prototype.
7203
276
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
7204
276
  const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
7205
276
  const Value *Size = I.getArgOperand(2);
7206
276
  const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
7207
276
  if (CSize && 
CSize->getZExtValue() == 0252
) {
7208
19
    EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
7209
19
                                                          I.getType(), true);
7210
19
    setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
7211
19
    return true;
7212
19
  }
7213
257
7214
257
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7215
257
  std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
7216
257
      DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
7217
257
      getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
7218
257
  if (Res.first.getNode()) {
7219
12
    processIntegerCallValue(I, Res.first, true);
7220
12
    PendingLoads.push_back(Res.second);
7221
12
    return true;
7222
12
  }
7223
245
7224
245
  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
7225
245
  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
7226
245
  if (!CSize || 
!isOnlyUsedInZeroEqualityComparison(&I)221
)
7227
112
    return false;
7228
133
7229
133
  // If the target has a fast compare for the given size, it will return a
7230
133
  // preferred load type for that size. Require that the load VT is legal and
7231
133
  // that the target supports unaligned loads of that type. Otherwise, return
7232
133
  // INVALID.
7233
133
  auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
7234
38
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7235
38
    MVT LVT = TLI.hasFastEqualityCompare(NumBits);
7236
38
    if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
7237
12
      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
7238
12
      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
7239
12
      // TODO: Check alignment of src and dest ptrs.
7240
12
      unsigned DstAS = LHS->getType()->getPointerAddressSpace();
7241
12
      unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
7242
12
      if (!TLI.isTypeLegal(LVT) ||
7243
12
          !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
7244
12
          !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
7245
0
        LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
7246
12
    }
7247
38
7248
38
    return LVT;
7249
38
  };
7250
133
7251
133
  // This turns into unaligned loads. We only do this if the target natively
7252
133
  // supports the MVT we'll be loading or if it is small enough (<= 4) that
7253
133
  // we'll only produce a small number of byte loads.
7254
133
  MVT LoadVT;
7255
133
  unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
7256
133
  switch (NumBitsToCompare) {
7257
133
  default:
7258
79
    return false;
7259
133
  case 16:
7260
8
    LoadVT = MVT::i16;
7261
8
    break;
7262
133
  case 32:
7263
8
    LoadVT = MVT::i32;
7264
8
    break;
7265
133
  case 64:
7266
38
  case 128:
7267
38
  case 256:
7268
38
    LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
7269
38
    break;
7270
54
  }
7271
54
7272
54
  if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
7273
26
    return false;
7274
28
7275
28
  SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
7276
28
  SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
7277
28
7278
28
  // Bitcast to a wide integer type if the loads are vectors.
7279
28
  if (LoadVT.isVector()) {
7280
8
    EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
7281
8
    LoadL = DAG.getBitcast(CmpVT, LoadL);
7282
8
    LoadR = DAG.getBitcast(CmpVT, LoadR);
7283
8
  }
7284
28
7285
28
  SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
7286
28
  processIntegerCallValue(I, Cmp, false);
7287
28
  return true;
7288
28
}
7289
7290
/// See if we can lower a memchr call into an optimized form. If so, return
7291
/// true and lower it. Otherwise return false, and it will be lowered like a
7292
/// normal call.
7293
/// The caller already checked that \p I calls the appropriate LibFunc with a
7294
/// correct prototype.
7295
25
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
7296
25
  const Value *Src = I.getArgOperand(0);
7297
25
  const Value *Char = I.getArgOperand(1);
7298
25
  const Value *Length = I.getArgOperand(2);
7299
25
7300
25
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7301
25
  std::pair<SDValue, SDValue> Res =
7302
25
    TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
7303
25
                                getValue(Src), getValue(Char), getValue(Length),
7304
25
                                MachinePointerInfo(Src));
7305
25
  if (Res.first.getNode()) {
7306
5
    setValue(&I, Res.first);
7307
5
    PendingLoads.push_back(Res.second);
7308
5
    return true;
7309
5
  }
7310
20
7311
20
  return false;
7312
20
}
7313
7314
/// See if we can lower a mempcpy call into an optimized form. If so, return
7315
/// true and lower it. Otherwise return false, and it will be lowered like a
7316
/// normal call.
7317
/// The caller already checked that \p I calls the appropriate LibFunc with a
7318
/// correct prototype.
7319
2
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
7320
2
  SDValue Dst = getValue(I.getArgOperand(0));
7321
2
  SDValue Src = getValue(I.getArgOperand(1));
7322
2
  SDValue Size = getValue(I.getArgOperand(2));
7323
2
7324
2
  unsigned DstAlign = DAG.InferPtrAlignment(Dst);
7325
2
  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
7326
2
  unsigned Align = std::min(DstAlign, SrcAlign);
7327
2
  if (Align == 0) // Alignment of one or both could not be inferred.
7328
2
    Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
7329
2
7330
2
  bool isVol = false;
7331
2
  SDLoc sdl = getCurSDLoc();
7332
2
7333
2
  // In the mempcpy context we need to pass in a false value for isTailCall
7334
2
  // because the return pointer needs to be adjusted by the size of
7335
2
  // the copied memory.
7336
2
  SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
7337
2
                             false, /*isTailCall=*/false,
7338
2
                             MachinePointerInfo(I.getArgOperand(0)),
7339
2
                             MachinePointerInfo(I.getArgOperand(1)));
7340
2
  assert(MC.getNode() != nullptr &&
7341
2
         "** memcpy should not be lowered as TailCall in mempcpy context **");
7342
2
  DAG.setRoot(MC);
7343
2
7344
2
  // Check if Size needs to be truncated or extended.
7345
2
  Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
7346
2
7347
2
  // Adjust return pointer to point just past the last dst byte.
7348
2
  SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
7349
2
                                    Dst, Size);
7350
2
  setValue(&I, DstPlusSize);
7351
2
  return true;
7352
2
}
7353
7354
/// See if we can lower a strcpy call into an optimized form.  If so, return
7355
/// true and lower it, otherwise return false and it will be lowered like a
7356
/// normal call.
7357
/// The caller already checked that \p I calls the appropriate LibFunc with a
7358
/// correct prototype.
7359
209
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
7360
209
  const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
7361
209
7362
209
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7363
209
  std::pair<SDValue, SDValue> Res =
7364
209
    TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
7365
209
                                getValue(Arg0), getValue(Arg1),
7366
209
                                MachinePointerInfo(Arg0),
7367
209
                                MachinePointerInfo(Arg1), isStpcpy);
7368
209
  if (Res.first.getNode()) {
7369
3
    setValue(&I, Res.first);
7370
3
    DAG.setRoot(Res.second);
7371
3
    return true;
7372
3
  }
7373
206
7374
206
  return false;
7375
206
}
7376
7377
/// See if we can lower a strcmp call into an optimized form.  If so, return
7378
/// true and lower it, otherwise return false and it will be lowered like a
7379
/// normal call.
7380
/// The caller already checked that \p I calls the appropriate LibFunc with a
7381
/// correct prototype.
7382
5.09k
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
7383
5.09k
  const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
7384
5.09k
7385
5.09k
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7386
5.09k
  std::pair<SDValue, SDValue> Res =
7387
5.09k
    TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
7388
5.09k
                                getValue(Arg0), getValue(Arg1),
7389
5.09k
                                MachinePointerInfo(Arg0),
7390
5.09k
                                MachinePointerInfo(Arg1));
7391
5.09k
  if (Res.first.getNode()) {
7392
4
    processIntegerCallValue(I, Res.first, true);
7393
4
    PendingLoads.push_back(Res.second);
7394
4
    return true;
7395
4
  }
7396
5.08k
7397
5.08k
  return false;
7398
5.08k
}
7399
7400
/// See if we can lower a strlen call into an optimized form.  If so, return
7401
/// true and lower it, otherwise return false and it will be lowered like a
7402
/// normal call.
7403
/// The caller already checked that \p I calls the appropriate LibFunc with a
7404
/// correct prototype.
7405
436
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
7406
436
  const Value *Arg0 = I.getArgOperand(0);
7407
436
7408
436
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7409
436
  std::pair<SDValue, SDValue> Res =
7410
436
    TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
7411
436
                                getValue(Arg0), MachinePointerInfo(Arg0));
7412
436
  if (Res.first.getNode()) {
7413
1
    processIntegerCallValue(I, Res.first, false);
7414
1
    PendingLoads.push_back(Res.second);
7415
1
    return true;
7416
1
  }
7417
435
7418
435
  return false;
7419
435
}
7420
7421
/// See if we can lower a strnlen call into an optimized form.  If so, return
7422
/// true and lower it, otherwise return false and it will be lowered like a
7423
/// normal call.
7424
/// The caller already checked that \p I calls the appropriate LibFunc with a
7425
/// correct prototype.
7426
2
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
7427
2
  const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
7428
2
7429
2
  const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7430
2
  std::pair<SDValue, SDValue> Res =
7431
2
    TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
7432
2
                                 getValue(Arg0), getValue(Arg1),
7433
2
                                 MachinePointerInfo(Arg0));
7434
2
  if (Res.first.getNode()) {
7435
1
    processIntegerCallValue(I, Res.first, false);
7436
1
    PendingLoads.push_back(Res.second);
7437
1
    return true;
7438
1
  }
7439
1
7440
1
  return false;
7441
1
}
7442
7443
/// See if we can lower a unary floating-point operation into an SDNode with
7444
/// the specified Opcode.  If so, return true and lower it, otherwise return
7445
/// false and it will be lowered like a normal call.
7446
/// The caller already checked that \p I calls the appropriate LibFunc with a
7447
/// correct prototype.
7448
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
7449
802
                                              unsigned Opcode) {
7450
802
  // We already checked this call's prototype; verify it doesn't modify errno.
7451
802
  if (!I.onlyReadsMemory())
7452
201
    return false;
7453
601
7454
601
  SDValue Tmp = getValue(I.getArgOperand(0));
7455
601
  setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
7456
601
  return true;
7457
601
}
7458
7459
/// See if we can lower a binary floating-point operation into an SDNode with
7460
/// the specified Opcode. If so, return true and lower it. Otherwise return
7461
/// false, and it will be lowered like a normal call.
7462
/// The caller already checked that \p I calls the appropriate LibFunc with a
7463
/// correct prototype.
7464
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
7465
48
                                               unsigned Opcode) {
7466
48
  // We already checked this call's prototype; verify it doesn't modify errno.
7467
48
  if (!I.onlyReadsMemory())
7468
0
    return false;
7469
48
7470
48
  SDValue Tmp0 = getValue(I.getArgOperand(0));
7471
48
  SDValue Tmp1 = getValue(I.getArgOperand(1));
7472
48
  EVT VT = Tmp0.getValueType();
7473
48
  setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
7474
48
  return true;
7475
48
}
7476
7477
742k
void SelectionDAGBuilder::visitCall(const CallInst &I) {
7478
742k
  // Handle inline assembly differently.
7479
742k
  if (isa<InlineAsm>(I.getCalledValue())) {
7480
20.5k
    visitInlineAsm(&I);
7481
20.5k
    return;
7482
20.5k
  }
7483
721k
7484
721k
  if (Function *F = I.getCalledFunction()) {
7485
714k
    if (F->isDeclaration()) {
7486
610k
      // Is this an LLVM intrinsic or a target-specific intrinsic?
7487
610k
      unsigned IID = F->getIntrinsicID();
7488
610k
      if (!IID)
7489
324k
        if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
7490
0
          IID = II->getIntrinsicID(F);
7491
610k
7492
610k
      if (IID) {
7493
286k
        visitIntrinsicCall(I, IID);
7494
286k
        return;
7495
286k
      }
7496
428k
    }
7497
428k
7498
428k
    // Check for well-known libc/libm calls.  If the function is internal, it
7499
428k
    // can't be a library call.  Don't do the check if marked as nobuiltin for
7500
428k
    // some reason or the call site requires strict floating point semantics.
7501
428k
    LibFunc Func;
7502
428k
    if (!I.isNoBuiltin() && 
!I.isStrictFP()137k
&&
!F->hasLocalLinkage()137k
&&
7503
428k
        
F->hasName()128k
&&
LibInfo->getLibFunc(*F, Func)128k
&&
7504
428k
        
LibInfo->hasOptimizedCodeGen(Func)46.7k
) {
7505
7.06k
      switch (Func) {
7506
7.06k
      
default: break57
;
7507
7.06k
      case LibFunc_copysign:
7508
112
      case LibFunc_copysignf:
7509
112
      case LibFunc_copysignl:
7510
112
        // We already checked this call's prototype; verify it doesn't modify
7511
112
        // errno.
7512
112
        if (I.onlyReadsMemory()) {
7513
107
          SDValue LHS = getValue(I.getArgOperand(0));
7514
107
          SDValue RHS = getValue(I.getArgOperand(1));
7515
107
          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
7516
107
                                   LHS.getValueType(), LHS, RHS));
7517
107
          return;
7518
107
        }
7519
5
        break;
7520
131
      case LibFunc_fabs:
7521
131
      case LibFunc_fabsf:
7522
131
      case LibFunc_fabsl:
7523
131
        if (visitUnaryFloatCall(I, ISD::FABS))
7524
129
          return;
7525
2
        break;
7526
24
      case LibFunc_fmin:
7527
24
      case LibFunc_fminf:
7528
24
      case LibFunc_fminl:
7529
24
        if (visitBinaryFloatCall(I, ISD::FMINNUM))
7530
24
          return;
7531
0
        break;
7532
24
      case LibFunc_fmax:
7533
24
      case LibFunc_fmaxf:
7534
24
      case LibFunc_fmaxl:
7535
24
        if (visitBinaryFloatCall(I, ISD::FMAXNUM))
7536
24
          return;
7537
0
        break;
7538
163
      case LibFunc_sin:
7539
163
      case LibFunc_sinf:
7540
163
      case LibFunc_sinl:
7541
163
        if (visitUnaryFloatCall(I, ISD::FSIN))
7542
92
          return;
7543
71
        break;
7544
124
      case LibFunc_cos:
7545
124
      case LibFunc_cosf:
7546
124
      case LibFunc_cosl:
7547
124
        if (visitUnaryFloatCall(I, ISD::FCOS))
7548
66
          return;
7549
58
        break;
7550
104
      case LibFunc_sqrt:
7551
104
      case LibFunc_sqrtf:
7552
104
      case LibFunc_sqrtl:
7553
104
      case LibFunc_sqrt_finite:
7554
104
      case LibFunc_sqrtf_finite:
7555
104
      case LibFunc_sqrtl_finite:
7556
104
        if (visitUnaryFloatCall(I, ISD::FSQRT))
7557
69
          return;
7558
35
        break;
7559
69
      case LibFunc_floor:
7560
69
      case LibFunc_floorf:
7561
69
      case LibFunc_floorl:
7562
69
        if (visitUnaryFloatCall(I, ISD::FFLOOR))
7563
61
          return;
7564
8
        break;
7565
25
      case LibFunc_nearbyint:
7566
25
      case LibFunc_nearbyintf:
7567
25
      case LibFunc_nearbyintl:
7568
25
        if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
7569
25
          return;
7570
0
        break;
7571
56
      case LibFunc_ceil:
7572
56
      case LibFunc_ceilf:
7573
56
      case LibFunc_ceill:
7574
56
        if (visitUnaryFloatCall(I, ISD::FCEIL))
7575
49
          return;
7576
7
        break;
7577
25
      case LibFunc_rint:
7578
25
      case LibFunc_rintf:
7579
25
      case LibFunc_rintl:
7580
25
        if (visitUnaryFloatCall(I, ISD::FRINT))
7581
25
          return;
7582
0
        break;
7583
39
      case LibFunc_round:
7584
39
      case LibFunc_roundf:
7585
39
      case LibFunc_roundl:
7586
39
        if (visitUnaryFloatCall(I, ISD::FROUND))
7587
39
          return;
7588
0
        break;
7589
46
      case LibFunc_trunc:
7590
46
      case LibFunc_truncf:
7591
46
      case LibFunc_truncl:
7592
46
        if (visitUnaryFloatCall(I, ISD::FTRUNC))
7593
46
          return;
7594
0
        break;
7595
10
      case LibFunc_log2:
7596
10
      case LibFunc_log2f:
7597
10
      case LibFunc_log2l:
7598
10
        if (visitUnaryFloatCall(I, ISD::FLOG2))
7599
0
          return;
7600
10
        break;
7601
10
      case LibFunc_exp2:
7602
10
      case LibFunc_exp2f:
7603
10
      case LibFunc_exp2l:
7604
10
        if (visitUnaryFloatCall(I, ISD::FEXP2))
7605
0
          return;
7606
10
        break;
7607
276
      case LibFunc_memcmp:
7608
276
        if (visitMemCmpCall(I))
7609
59
          return;
7610
217
        break;
7611
217
      case LibFunc_mempcpy:
7612
2
        if (visitMemPCpyCall(I))
7613
2
          return;
7614
0
        break;
7615
25
      case LibFunc_memchr:
7616
25
        if (visitMemChrCall(I))
7617
5
          return;
7618
20
        break;
7619
208
      case LibFunc_strcpy:
7620
208
        if (visitStrCpyCall(I, false))
7621
2
          return;
7622
206
        break;
7623
206
      case LibFunc_stpcpy:
7624
1
        if (visitStrCpyCall(I, true))
7625
1
          return;
7626
0
        break;
7627
5.09k
      case LibFunc_strcmp:
7628
5.09k
        if (visitStrCmpCall(I))
7629
4
          return;
7630
5.08k
        break;
7631
5.08k
      case LibFunc_strlen:
7632
436
        if (visitStrLenCall(I))
7633
1
          return;
7634
435
        break;
7635
435
      case LibFunc_strnlen:
7636
2
        if (visitStrNLenCall(I))
7637
1
          return;
7638
1
        break;
7639
7.06k
      }
7640
7.06k
    }
7641
428k
  }
7642
434k
7643
434k
  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
7644
434k
  // have to do anything here to lower funclet bundles.
7645
434k
  assert(!I.hasOperandBundlesOtherThan(
7646
434k
             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
7647
434k
         "Cannot lower calls with arbitrary operand bundles!");
7648
434k
7649
434k
  SDValue Callee = getValue(I.getCalledValue());
7650
434k
7651
434k
  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
7652
5
    LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
7653
434k
  else
7654
434k
    // Check if we can potentially perform a tail call. More detailed checking
7655
434k
    // is be done within LowerCallTo, after more information about the call is
7656
434k
    // known.
7657
434k
    LowerCallTo(&I, Callee, I.isTailCall());
7658
434k
}
7659
7660
namespace {
7661
7662
/// AsmOperandInfo - This contains information for each constraint that we are
7663
/// lowering.
7664
class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
7665
public:
7666
  /// CallOperand - If this is the result output operand or a clobber
7667
  /// this is null, otherwise it is the incoming operand to the CallInst.
7668
  /// This gets modified as the asm is processed.
7669
  SDValue CallOperand;
7670
7671
  /// AssignedRegs - If this is a register or register class operand, this
7672
  /// contains the set of register corresponding to the operand.
7673
  RegsForValue AssignedRegs;
7674
7675
  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
7676
102k
    : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
7677
102k
  }
7678
7679
  /// Whether or not this operand accesses memory
7680
5.74k
  bool hasMemory(const TargetLowering &TLI) const {
7681
5.74k
    // Indirect operand accesses access memory.
7682
5.74k
    if (isIndirect)
7683
162
      return true;
7684
5.58k
7685
5.58k
    for (const auto &Code : Codes)
7686
5.71k
      if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
7687
67
        return true;
7688
5.58k
7689
5.58k
    
return false5.51k
;
7690
5.58k
  }
7691
7692
  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
7693
  /// corresponds to.  If there is no Value* for this operand, it returns
7694
  /// MVT::Other.
7695
  EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
7696
5.76k
                           const DataLayout &DL) const {
7697
5.76k
    if (!CallOperandVal) 
return MVT::Other0
;
7698
5.76k
7699
5.76k
    if (isa<BasicBlock>(CallOperandVal))
7700
2
      return TLI.getPointerTy(DL);
7701
5.76k
7702
5.76k
    llvm::Type *OpTy = CallOperandVal->getType();
7703
5.76k
7704
5.76k
    // FIXME: code duplicated from TargetLowering::ParseConstraints().
7705
5.76k
    // If this is an indirect operand, the operand is a pointer to the
7706
5.76k
    // accessed type.
7707
5.76k
    if (isIndirect) {
7708
447
      PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
7709
447
      if (!PtrTy)
7710
0
        report_fatal_error("Indirect operand for inline asm not a pointer!");
7711
447
      OpTy = PtrTy->getElementType();
7712
447
    }
7713
5.76k
7714
5.76k
    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
7715
5.76k
    if (StructType *STy = dyn_cast<StructType>(OpTy))
7716
15
      if (STy->getNumElements() == 1)
7717
8
        OpTy = STy->getElementType(0);
7718
5.76k
7719
5.76k
    // If OpTy is not a single value, it may be a struct/union that we
7720
5.76k
    // can tile with integers.
7721
5.76k
    if (!OpTy->isSingleValueType() && 
OpTy->isSized()26
) {
7722
20
      unsigned BitSize = DL.getTypeSizeInBits(OpTy);
7723
20
      switch (BitSize) {
7724
20
      
default: break7
;
7725
20
      case 1:
7726
13
      case 8:
7727
13
      case 16:
7728
13
      case 32:
7729
13
      case 64:
7730
13
      case 128:
7731
13
        OpTy = IntegerType::get(Context, BitSize);
7732
13
        break;
7733
5.76k
      }
7734
5.76k
    }
7735
5.76k
7736
5.76k
    return TLI.getValueType(DL, OpTy, true);
7737
5.76k
  }
7738
};
7739
7740
using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
7741
7742
} // end anonymous namespace
7743
7744
/// Make sure that the output operand \p OpInfo and its corresponding input
7745
/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
7746
/// out).
7747
static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
7748
                               SDISelAsmOperandInfo &MatchingOpInfo,
7749
444
                               SelectionDAG &DAG) {
7750
444
  if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
7751
431
    return;
7752
13
7753
13
  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
7754
13
  const auto &TLI = DAG.getTargetLoweringInfo();
7755
13
7756
13
  std::pair<unsigned, const TargetRegisterClass *> MatchRC =
7757
13
      TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
7758
13
                                       OpInfo.ConstraintVT);
7759
13
  std::pair<unsigned, const TargetRegisterClass *> InputRC =
7760
13
      TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
7761
13
                                       MatchingOpInfo.ConstraintVT);
7762
13
  if ((OpInfo.ConstraintVT.isInteger() !=
7763
13
       MatchingOpInfo.ConstraintVT.isInteger()) ||
7764
13
      (MatchRC.second != InputRC.second)) {
7765
0
    // FIXME: error out in a more elegant fashion
7766
0
    report_fatal_error("Unsupported asm: input constraint"
7767
0
                       " with a matching output constraint of"
7768
0
                       " incompatible type!");
7769
0
  }
7770
13
  MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
7771
13
}
7772
7773
/// Get a direct memory input to behave well as an indirect operand.
7774
/// This may introduce stores, hence the need for a \p Chain.
7775
/// \return The (possibly updated) chain.
7776
static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
7777
                                        SDISelAsmOperandInfo &OpInfo,
7778
118
                                        SelectionDAG &DAG) {
7779
118
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7780
118
7781
118
  // If we don't have an indirect input, put it in the constpool if we can,
7782
118
  // otherwise spill it to a stack slot.
7783
118
  // TODO: This isn't quite right. We need to handle these according to
7784
118
  // the addressing mode that the constraint wants. Also, this may take
7785
118
  // an additional register for the computation and we don't want that
7786
118
  // either.
7787
118
7788
118
  // If the operand is a float, integer, or vector constant, spill to a
7789
118
  // constant pool entry to get its address.
7790
118
  const Value *OpVal = OpInfo.CallOperandVal;
7791
118
  if (isa<ConstantFP>(OpVal) || 
isa<ConstantInt>(OpVal)114
||
7792
118
      
isa<ConstantVector>(OpVal)108
||
isa<ConstantDataVector>(OpVal)108
) {
7793
10
    OpInfo.CallOperand = DAG.getConstantPool(
7794
10
        cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
7795
10
    return Chain;
7796
10
  }
7797
108
7798
108
  // Otherwise, create a stack slot and emit a store to it before the asm.
7799
108
  Type *Ty = OpVal->getType();
7800
108
  auto &DL = DAG.getDataLayout();
7801
108
  uint64_t TySize = DL.getTypeAllocSize(Ty);
7802
108
  unsigned Align = DL.getPrefTypeAlignment(Ty);
7803
108
  MachineFunction &MF = DAG.getMachineFunction();
7804
108
  int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
7805
108
  SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
7806
108
  Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
7807
108
                            MachinePointerInfo::getFixedStack(MF, SSFI),
7808
108
                            TLI.getMemValueType(DL, Ty));
7809
108
  OpInfo.CallOperand = StackSlot;
7810
108
7811
108
  return Chain;
7812
108
}
7813
7814
/// GetRegistersForValue - Assign registers (virtual or physical) for the
7815
/// specified operand.  We prefer to assign virtual registers, to allow the
7816
/// register allocator to handle the assignment process.  However, if the asm
7817
/// uses features that we can't model on machineinstrs, we have SDISel do the
7818
/// allocation.  This produces generally horrible, but correct, code.
7819
///
7820
///   OpInfo describes the operand
7821
///   RefOpInfo describes the matching operand if any, the operand otherwise
7822
static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
7823
                                 SDISelAsmOperandInfo &OpInfo,
7824
102k
                                 SDISelAsmOperandInfo &RefOpInfo) {
7825
102k
  LLVMContext &Context = *DAG.getContext();
7826
102k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7827
102k
7828
102k
  MachineFunction &MF = DAG.getMachineFunction();
7829
102k
  SmallVector<unsigned, 4> Regs;
7830
102k
  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
7831
102k
7832
102k
  // No work to do for memory operations.
7833
102k
  if (OpInfo.ConstraintType == TargetLowering::C_Memory)
7834
8.57k
    return;
7835
94.0k
7836
94.0k
  // If this is a constraint for a single physreg, or a constraint for a
7837
94.0k
  // register class, find it.
7838
94.0k
  unsigned AssignedReg;
7839
94.0k
  const TargetRegisterClass *RC;
7840
94.0k
  std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
7841
94.0k
      &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
7842
94.0k
  // RC is unset only on failure. Return immediately.
7843
94.0k
  if (!RC)
7844
1.33k
    return;
7845
92.7k
7846
92.7k
  // Get the actual register value type.  This is important, because the user
7847
92.7k
  // may have asked for (e.g.) the AX register in i32 type.  We need to
7848
92.7k
  // remember that AX is actually i16 to get the right extension.
7849
92.7k
  const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
7850
92.7k
7851
92.7k
  if (OpInfo.ConstraintVT != MVT::Other) {
7852
9.24k
    // If this is an FP operand in an integer register (or visa versa), or more
7853
9.24k
    // generally if the operand value disagrees with the register class we plan
7854
9.24k
    // to stick it in, fix the operand type.
7855
9.24k
    //
7856
9.24k
    // If this is an input value, the bitcast to the new type is done now.
7857
9.24k
    // Bitcast for output value is done at the end of visitInlineAsm().
7858
9.24k
    if ((OpInfo.Type == InlineAsm::isOutput ||
7859
9.24k
         
OpInfo.Type == InlineAsm::isInput4.66k
) &&
7860
9.24k
        !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
7861
560
      // Try to convert to the first EVT that the reg class contains.  If the
7862
560
      // types are identical size, use a bitcast to convert (e.g. two differing
7863
560
      // vector types).  Note: output bitcast is done at the end of
7864
560
      // visitInlineAsm().
7865
560
      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
7866
185
        // Exclude indirect inputs while they are unsupported because the code
7867
185
        // to perform the load is missing and thus OpInfo.CallOperand still
7868
185
        // refers to the input address rather than the pointed-to value.
7869
185
        if (OpInfo.Type == InlineAsm::isInput && 
!OpInfo.isIndirect88
)
7870
87
          OpInfo.CallOperand =
7871
87
              DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
7872
185
        OpInfo.ConstraintVT = RegVT;
7873
185
        // If the operand is an FP value and we want it in integer registers,
7874
185
        // use the corresponding integer type. This turns an f64 value into
7875
185
        // i64, which can be passed with two i32 values on a 32-bit machine.
7876
375
      } else if (RegVT.isInteger() && 
OpInfo.ConstraintVT.isFloatingPoint()344
) {
7877
33
        MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
7878
33
        if (OpInfo.Type == InlineAsm::isInput)
7879
13
          OpInfo.CallOperand =
7880
13
              DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
7881
33
        OpInfo.ConstraintVT = VT;
7882
33
      }
7883
560
    }
7884
9.24k
  }
7885
92.7k
7886
92.7k
  // No need to allocate a matching input constraint since the constraint it's
7887
92.7k
  // matching to has already been allocated.
7888
92.7k
  if (OpInfo.isMatchingInputConstraint())
7889
436
    return;
7890
92.3k
7891
92.3k
  EVT ValueVT = OpInfo.ConstraintVT;
7892
92.3k
  if (OpInfo.ConstraintVT == MVT::Other)
7893
83.4k
    ValueVT = RegVT;
7894
92.3k
7895
92.3k
  // Initialize NumRegs.
7896
92.3k
  unsigned NumRegs = 1;
7897
92.3k
  if (OpInfo.ConstraintVT != MVT::Other)
7898
8.80k
    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
7899
92.3k
7900
92.3k
  // If this is a constraint for a specific physical register, like {r17},
7901
92.3k
  // assign it now.
7902
92.3k
7903
92.3k
  // If this associated to a specific register, initialize iterator to correct
7904
92.3k
  // place. If virtual, make sure we have enough registers
7905
92.3k
7906
92.3k
  // Initialize iterator if necessary
7907
92.3k
  TargetRegisterClass::iterator I = RC->begin();
7908
92.3k
  MachineRegisterInfo &RegInfo = MF.getRegInfo();
7909
92.3k
7910
92.3k
  // Do not check for single registers.
7911
92.3k
  if (AssignedReg) {
7912
1.13M
      for (; *I != AssignedReg; 
++I1.04M
)
7913
85.3k
        assert(I != RC->end() && "AssignedReg should be member of RC");
7914
85.3k
  }
7915
92.3k
7916
184k
  for (; NumRegs; 
--NumRegs, ++I92.5k
) {
7917
92.5k
    assert(I != RC->end() && "Ran out of registers to allocate!");
7918
92.5k
    Register R = AssignedReg ? 
Register(*I)85.3k
:
RegInfo.createVirtualRegister(RC)7.17k
;
7919
92.5k
    Regs.push_back(R);
7920
92.5k
  }
7921
92.3k
7922
92.3k
  OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
7923
92.3k
}
7924
7925
static unsigned
7926
findMatchingInlineAsmOperand(unsigned OperandNo,
7927
436
                             const std::vector<SDValue> &AsmNodeOperands) {
7928
436
  // Scan until we find the definition we already emitted of this operand.
7929
436
  unsigned CurOp = InlineAsm::Op_FirstOperand;
7930
3.17k
  for (; OperandNo; 
--OperandNo2.73k
) {
7931
2.73k
    // Advance to the next operand.
7932
2.73k
    unsigned OpFlag =
7933
2.73k
        cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
7934
2.73k
    assert((InlineAsm::isRegDefKind(OpFlag) ||
7935
2.73k
            InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
7936
2.73k
            InlineAsm::isMemKind(OpFlag)) &&
7937
2.73k
           "Skipped past definitions?");
7938
2.73k
    CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
7939
2.73k
  }
7940
436
  return CurOp;
7941
436
}
7942
7943
namespace {
7944
7945
class ExtraFlags {
7946
  unsigned Flags = 0;
7947
7948
public:
7949
20.5k
  explicit ExtraFlags(ImmutableCallSite CS) {
7950
20.5k
    const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
7951
20.5k
    if (IA->hasSideEffects())
7952
19.2k
      Flags |= InlineAsm::Extra_HasSideEffects;
7953
20.5k
    if (IA->isAlignStack())
7954
21
      Flags |= InlineAsm::Extra_IsAlignStack;
7955
20.5k
    if (CS.isConvergent())
7956
1
      Flags |= InlineAsm::Extra_IsConvergent;
7957
20.5k
    Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
7958
20.5k
  }
7959
7960
102k
  void update(const TargetLowering::AsmOperandInfo &OpInfo) {
7961
102k
    // Ideally, we would only check against memory constraints.  However, the
7962
102k
    // meaning of an Other constraint can be target-specific and we can't easily
7963
102k
    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
7964
102k
    // for Other constraints as well.
7965
102k
    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
7966
102k
        
OpInfo.ConstraintType == TargetLowering::C_Other94.1k
) {
7967
9.05k
      if (OpInfo.Type == InlineAsm::isInput)
7968
714
        Flags |= InlineAsm::Extra_MayLoad;
7969
8.34k
      else if (OpInfo.Type == InlineAsm::isOutput)
7970
287
        Flags |= InlineAsm::Extra_MayStore;
7971
8.05k
      else if (OpInfo.Type == InlineAsm::isClobber)
7972
8.05k
        Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
7973
9.05k
    }
7974
102k
  }
7975
7976
20.5k
  unsigned get() const { return Flags; }
7977
};
7978
7979
} // end anonymous namespace
7980
7981
/// visitInlineAsm - Handle a call to an InlineAsm object.
7982
20.5k
void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
7983
20.5k
  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
7984
20.5k
7985
20.5k
  /// ConstraintOperands - Information about all of the constraints.
7986
20.5k
  SDISelAsmOperandInfoVector ConstraintOperands;
7987
20.5k
7988
20.5k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7989
20.5k
  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
7990
20.5k
      DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
7991
20.5k
7992
20.5k
  // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
7993
20.5k
  // AsmDialect, MayLoad, MayStore).
7994
20.5k
  bool HasSideEffect = IA->hasSideEffects();
7995
20.5k
  ExtraFlags ExtraInfo(CS);
7996
20.5k
7997
20.5k
  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
7998
20.5k
  unsigned ResNo = 0;   // ResNo - The result number of the next output.
7999
102k
  for (auto &T : TargetConstraints) {
8000
102k
    ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
8001
102k
    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
8002
102k
8003
102k
    // Compute the value type for each operand.
8004
102k
    if (OpInfo.Type == InlineAsm::isInput ||
8005
102k
        
(97.2k
OpInfo.Type == InlineAsm::isOutput97.2k
&&
OpInfo.isIndirect4.84k
)) {
8006
5.76k
      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
8007
5.76k
8008
5.76k
      // Process the call argument. BasicBlocks are labels, currently appearing
8009
5.76k
      // only in asm's.
8010
5.76k
      const Instruction *I = CS.getInstruction();
8011
5.76k
      if (isa<CallBrInst>(I) &&
8012
5.76k
          (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
8013
11
                          cast<CallBrInst>(I)->getNumIndirectDests())) {
8014
10
        const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
8015
10
        EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
8016
10
        OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
8017
5.75k
      } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
8018
2
        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
8019
5.75k
      } else {
8020
5.75k
        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
8021
5.75k
      }
8022
5.76k
8023
5.76k
      OpInfo.ConstraintVT =
8024
5.76k
          OpInfo
8025
5.76k
              .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
8026
5.76k
              .getSimpleVT();
8027
96.9k
    } else if (OpInfo.Type == InlineAsm::isOutput && 
!OpInfo.isIndirect4.57k
) {
8028
4.57k
      // The return value of the call is this value.  As such, there is no
8029
4.57k
      // corresponding argument.
8030
4.57k
      assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
8031
4.57k
      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
8032
794
        OpInfo.ConstraintVT = TLI.getSimpleValueType(
8033
794
            DAG.getDataLayout(), STy->getElementType(ResNo));
8034
3.78k
      } else {
8035
3.78k
        assert(ResNo == 0 && "Asm only has one result!");
8036
3.78k
        OpInfo.ConstraintVT =
8037
3.78k
            TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
8038
3.78k
      }
8039
4.57k
      ++ResNo;
8040
92.3k
    } else {
8041
92.3k
      OpInfo.ConstraintVT = MVT::Other;
8042
92.3k
    }
8043
102k
8044
102k
    if (!HasSideEffect)
8045
5.74k
      HasSideEffect = OpInfo.hasMemory(TLI);
8046
102k
8047
102k
    // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
8048
102k
    // FIXME: Could we compute this on OpInfo rather than T?
8049
102k
8050
102k
    // Compute the constraint code and ConstraintType to use.
8051
102k
    TLI.ComputeConstraintToUse(T, SDValue());
8052
102k
8053
102k
    ExtraInfo.update(T);
8054
102k
  }
8055
20.5k
8056
20.5k
8057
20.5k
  // We won't need to flush pending loads if this asm doesn't touch
8058
20.5k
  // memory and is nonvolatile.
8059
20.5k
  SDValue Flag, Chain = (HasSideEffect) ? 
getRoot()19.4k
:
DAG.getRoot()1.09k
;
8060
20.5k
8061
20.5k
  bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
8062
20.5k
  if (IsCallBr) {
8063
5
    // If this is a callbr we need to flush pending exports since inlineasm_br
8064
5
    // is a terminator. We need to do this before nodes are glued to
8065
5
    // the inlineasm_br node.
8066
5
    Chain = getControlRoot();
8067
5
  }
8068
20.5k
8069
20.5k
  // Second pass over the constraints: compute which constraint option to use.
8070
102k
  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
8071
102k
    // If this is an output operand with a matching input operand, look up the
8072
102k
    // matching input. If their types mismatch, e.g. one is an integer, the
8073
102k
    // other is floating point, or their sizes are different, flag it as an
8074
102k
    // error.
8075
102k
    if (OpInfo.hasMatchingInput()) {
8076
444
      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
8077
444
      patchMatchingInput(OpInfo, Input, DAG);
8078
444
    }
8079
102k
8080
102k
    // Compute the constraint code and ConstraintType to use.
8081
102k
    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
8082
102k
8083
102k
    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
8084
102k
        
OpInfo.Type == InlineAsm::isClobber8.57k
)
8085
8.05k
      continue;
8086
94.6k
8087
94.6k
    // If this is a memory input, and if the operand is not indirect, do what we
8088
94.6k
    // need to provide an address for the memory input.
8089
94.6k
    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
8090
94.6k
        
!OpInfo.isIndirect520
) {
8091
118
      assert((OpInfo.isMultipleAlternative ||
8092
118
              (OpInfo.Type == InlineAsm::isInput)) &&
8093
118
             "Can only indirectify direct input operands!");
8094
118
8095
118
      // Memory operands really want the address of the value.
8096
118
      Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
8097
118
8098
118
      // There is no longer a Value* corresponding to this operand.
8099
118
      OpInfo.CallOperandVal = nullptr;
8100
118
8101
118
      // It is now an indirect operand.
8102
118
      OpInfo.isIndirect = true;
8103
118
    }
8104
94.6k
8105
94.6k
  }
8106
20.5k
8107
20.5k
  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
8108
20.5k
  std::vector<SDValue> AsmNodeOperands;
8109
20.5k
  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
8110
20.5k
  AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
8111
20.5k
      IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
8112
20.5k
8113
20.5k
  // If we have a !srcloc metadata node associated with it, we want to attach
8114
20.5k
  // this to the ultimately generated inline asm machineinstr.  To do this, we
8115
20.5k
  // pass in the third operand as this (potentially null) inline asm MDNode.
8116
20.5k
  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
8117
20.5k
  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
8118
20.5k
8119
20.5k
  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
8120
20.5k
  // bits as operand 3.
8121
20.5k
  AsmNodeOperands.push_back(DAG.getTargetConstant(
8122
20.5k
      ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
8123
20.5k
8124
20.5k
  // Third pass: Loop over operands to prepare DAG-level operands.. As part of
8125
20.5k
  // this, assign virtual and physical registers for inputs and otput.
8126
102k
  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
8127
102k
    // Assign Registers.
8128
102k
    SDISelAsmOperandInfo &RefOpInfo =
8129
102k
        OpInfo.isMatchingInputConstraint()
8130
102k
            ? 
ConstraintOperands[OpInfo.getMatchedOperand()]436
8131
102k
            : 
OpInfo102k
;
8132
102k
    GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
8133
102k
8134
102k
    switch (OpInfo.Type) {
8135
102k
    case InlineAsm::isOutput:
8136
4.84k
      if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
8137
4.84k
          
(4.61k
OpInfo.ConstraintType == TargetLowering::C_Other4.61k
&&
8138
4.61k
           
OpInfo.isIndirect60
)) {
8139
230
        unsigned ConstraintID =
8140
230
            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
8141
230
        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
8142
230
               "Failed to convert memory constraint code to constraint id.");
8143
230
8144
230
        // Add information to the INLINEASM node to know about this output.
8145
230
        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
8146
230
        OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
8147
230
        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
8148
230
                                                        MVT::i32));
8149
230
        AsmNodeOperands.push_back(OpInfo.CallOperand);
8150
230
        break;
8151
4.61k
      } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
8152
4.61k
                  
!OpInfo.isIndirect57
) ||
8153
4.61k
                 
OpInfo.ConstraintType == TargetLowering::C_Register4.55k
||
8154
4.61k
                 
OpInfo.ConstraintType == TargetLowering::C_RegisterClass3.55k
) {
8155
4.61k
        // Otherwise, this outputs to a register (directly for C_Register /
8156
4.61k
        // C_RegisterClass, and a target-defined fashion for C_Other). Find a
8157
4.61k
        // register that we can use.
8158
4.61k
        if (OpInfo.AssignedRegs.Regs.empty()) {
8159
34
          emitInlineAsmError(
8160
34
              CS, "couldn't allocate output register for constraint '" +
8161
34
                      Twine(OpInfo.ConstraintCode) + "'");
8162
34
          return;
8163
34
        }
8164
4.58k
8165
4.58k
        // Add information to the INLINEASM node to know that this register is
8166
4.58k
        // set.
8167
4.58k
        OpInfo.AssignedRegs.AddInlineAsmOperands(
8168
4.58k
            OpInfo.isEarlyClobber ? 
InlineAsm::Kind_RegDefEarlyClobber222
8169
4.58k
                                  : 
InlineAsm::Kind_RegDef4.35k
,
8170
4.58k
            false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
8171
4.58k
      }
8172
4.84k
      
break4.58k
;
8173
4.84k
8174
5.45k
    case InlineAsm::isInput: {
8175
5.45k
      SDValue InOperandVal = OpInfo.CallOperand;
8176
5.45k
8177
5.45k
      if (OpInfo.isMatchingInputConstraint()) {
8178
436
        // If this is required to match an output register we have already set,
8179
436
        // just use its register.
8180
436
        auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
8181
436
                                                  AsmNodeOperands);
8182
436
        unsigned OpFlag =
8183
436
          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
8184
436
        if (InlineAsm::isRegDefKind(OpFlag) ||
8185
436
            
InlineAsm::isRegDefEarlyClobberKind(OpFlag)45
) {
8186
436
          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
8187
436
          if (OpInfo.isIndirect) {
8188
1
            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
8189
1
            emitInlineAsmError(CS, "inline asm not supported yet:"
8190
1
                                   " don't know how to handle tied "
8191
1
                                   "indirect register inputs");
8192
1
            return;
8193
1
          }
8194
435
8195
435
          MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
8196
435
          SmallVector<unsigned, 4> Regs;
8197
435
8198
435
          if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) {
8199
435
            unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
8200
435
            MachineRegisterInfo &RegInfo =
8201
435
                DAG.getMachineFunction().getRegInfo();
8202
898
            for (unsigned i = 0; i != NumRegs; 
++i463
)
8203
463
              Regs.push_back(RegInfo.createVirtualRegister(RC));
8204
435
          } else {
8205
0
            emitInlineAsmError(CS, "inline asm error: This value type register "
8206
0
                                   "class is not natively supported!");
8207
0
            return;
8208
0
          }
8209
435
8210
435
          RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
8211
435
8212
435
          SDLoc dl = getCurSDLoc();
8213
435
          // Use the produced MatchedRegs object to
8214
435
          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
8215
435
                                    CS.getInstruction());
8216
435
          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
8217
435
                                           true, OpInfo.getMatchedOperand(), dl,
8218
435
                                           DAG, AsmNodeOperands);
8219
435
          break;
8220
435
        }
8221
0
8222
0
        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
8223
0
        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
8224
0
               "Unexpected number of operands");
8225
0
        // Add information to the INLINEASM node to know about this input.
8226
0
        // See InlineAsm.h isUseOperandTiedToDef.
8227
0
        OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
8228
0
        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
8229
0
                                                    OpInfo.getMatchedOperand());
8230
0
        AsmNodeOperands.push_back(DAG.getTargetConstant(
8231
0
            OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
8232
0
        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
8233
0
        break;
8234
0
      }
8235
5.01k
8236
5.01k
      // Treat indirect 'X' constraint as memory.
8237
5.01k
      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
8238
5.01k
          
OpInfo.isIndirect467
)
8239
1
        OpInfo.ConstraintType = TargetLowering::C_Memory;
8240
5.01k
8241
5.01k
      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
8242
466
        std::vector<SDValue> Ops;
8243
466
        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
8244
466
                                          Ops, DAG);
8245
466
        if (Ops.empty()) {
8246
29
          emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
8247
29
                                     Twine(OpInfo.ConstraintCode) + "'");
8248
29
          return;
8249
29
        }
8250
437
8251
437
        // Add information to the INLINEASM node to know about this input.
8252
437
        unsigned ResOpType =
8253
437
          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
8254
437
        AsmNodeOperands.push_back(DAG.getTargetConstant(
8255
437
            ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
8256
437
        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
8257
437
        break;
8258
437
      }
8259
4.55k
8260
4.55k
      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
8261
294
        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
8262
294
        assert(InOperandVal.getValueType() ==
8263
294
                   TLI.getPointerTy(DAG.getDataLayout()) &&
8264
294
               "Memory operands expect pointer values");
8265
294
8266
294
        unsigned ConstraintID =
8267
294
            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
8268
294
        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
8269
294
               "Failed to convert memory constraint code to constraint id.");
8270
294
8271
294
        // Add information to the INLINEASM node to know about this input.
8272
294
        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
8273
294
        ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
8274
294
        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
8275
294
                                                        getCurSDLoc(),
8276
294
                                                        MVT::i32));
8277
294
        AsmNodeOperands.push_back(InOperandVal);
8278
294
        break;
8279
294
      }
8280
4.25k
8281
4.25k
      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
8282
4.25k
              OpInfo.ConstraintType == TargetLowering::C_Register) &&
8283
4.25k
             "Unknown constraint type!");
8284
4.25k
8285
4.25k
      // TODO: Support this.
8286
4.25k
      if (OpInfo.isIndirect) {
8287
2
        emitInlineAsmError(
8288
2
            CS, "Don't know how to handle indirect register inputs yet "
8289
2
                "for constraint '" +
8290
2
                    Twine(OpInfo.ConstraintCode) + "'");
8291
2
        return;
8292
2
      }
8293
4.25k
8294
4.25k
      // Copy the input into the appropriate registers.
8295
4.25k
      if (OpInfo.AssignedRegs.Regs.empty()) {
8296
28
        emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
8297
28
                                   Twine(OpInfo.ConstraintCode) + "'");
8298
28
        return;
8299
28
      }
8300
4.22k
8301
4.22k
      SDLoc dl = getCurSDLoc();
8302
4.22k
8303
4.22k
      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
8304
4.22k
                                        Chain, &Flag, CS.getInstruction());
8305
4.22k
8306
4.22k
      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
8307
4.22k
                                               dl, DAG, AsmNodeOperands);
8308
4.22k
      break;
8309
4.22k
    }
8310
92.3k
    case InlineAsm::isClobber:
8311
92.3k
      // Add the clobbered value to the operand list, so that the register
8312
92.3k
      // allocator is aware that the physreg got clobbered.
8313
92.3k
      if (!OpInfo.AssignedRegs.Regs.empty())
8314
83.4k
        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
8315
83.4k
                                                 false, 0, getCurSDLoc(), DAG,
8316
83.4k
                                                 AsmNodeOperands);
8317
92.3k
      break;
8318
102k
    }
8319
102k
  }
8320
20.5k
8321
20.5k
  // Finish up input operands.  Set the input chain and add the flag last.
8322
20.5k
  AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
8323
20.4k
  if (Flag.getNode()) 
AsmNodeOperands.push_back(Flag)3.37k
;
8324
20.4k
8325
20.4k
  unsigned ISDOpc = IsCallBr ? 
ISD::INLINEASM_BR5
:
ISD::INLINEASM20.4k
;
8326
20.4k
  Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
8327
20.4k
                      DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
8328
20.4k
  Flag = Chain.getValue(1);
8329
20.4k
8330
20.4k
  // Do additional work to generate outputs.
8331
20.4k
8332
20.4k
  SmallVector<EVT, 1> ResultVTs;
8333
20.4k
  SmallVector<SDValue, 1> ResultValues;
8334
20.4k
  SmallVector<SDValue, 8> OutChains;
8335
20.4k
8336
20.4k
  llvm::Type *CSResultType = CS.getType();
8337
20.4k
  ArrayRef<Type *> ResultTypes;
8338
20.4k
  if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
8339
197
    ResultTypes = StructResult->elements();
8340
20.2k
  else if (!CSResultType->isVoidTy())
8341
3.73k
    ResultTypes = makeArrayRef(CSResultType);
8342
20.4k
8343
20.4k
  auto CurResultType = ResultTypes.begin();
8344
20.4k
  auto handleRegAssign = [&](SDValue V) {
8345
4.52k
    assert(CurResultType != ResultTypes.end() && "Unexpected value");
8346
4.52k
    assert((*CurResultType)->isSized() && "Unexpected unsized type");
8347
4.52k
    EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
8348
4.52k
    ++CurResultType;
8349
4.52k
    // If the type of the inline asm call site return value is different but has
8350
4.52k
    // same size as the type of the asm output bitcast it.  One example of this
8351
4.52k
    // is for vectors with different width / number of elements.  This can
8352
4.52k
    // happen for register classes that can contain multiple different value
8353
4.52k
    // types.  The preg or vreg allocated may not have the same VT as was
8354
4.52k
    // expected.
8355
4.52k
    //
8356
4.52k
    // This can also happen for a return value that disagrees with the register
8357
4.52k
    // class it is put in, eg. a double in a general-purpose register on a
8358
4.52k
    // 32-bit machine.
8359
4.52k
    if (ResultVT != V.getValueType() &&
8360
4.52k
        
ResultVT.getSizeInBits() == V.getValueSizeInBits()117
)
8361
117
      V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
8362
4.40k
    else if (ResultVT != V.getValueType() && 
ResultVT.isInteger()0
&&
8363
4.40k
             
V.getValueType().isInteger()0
) {
8364
0
      // If a result value was tied to an input value, the computed result
8365
0
      // may have a wider width than the expected result.  Extract the
8366
0
      // relevant portion.
8367
0
      V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
8368
0
    }
8369
4.52k
    assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
8370
4.52k
    ResultVTs.push_back(ResultVT);
8371
4.52k
    ResultValues.push_back(V);
8372
4.52k
  };
8373
20.4k
8374
20.4k
  // Deal with output operands.
8375
102k
  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
8376
102k
    if (OpInfo.Type == InlineAsm::isOutput) {
8377
4.78k
      SDValue Val;
8378
4.78k
      // Skip trivial output operands.
8379
4.78k
      if (OpInfo.AssignedRegs.Regs.empty())
8380
230
        continue;
8381
4.55k
8382
4.55k
      switch (OpInfo.ConstraintType) {
8383
4.55k
      case TargetLowering::C_Register:
8384
4.50k
      case TargetLowering::C_RegisterClass:
8385
4.50k
        Val = OpInfo.AssignedRegs.getCopyFromRegs(
8386
4.50k
            DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
8387
4.50k
        break;
8388
4.50k
      case TargetLowering::C_Other:
8389
57
        Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
8390
57
                                              OpInfo, DAG);
8391
57
        break;
8392
4.50k
      case TargetLowering::C_Memory:
8393
0
        break; // Already handled.
8394
4.50k
      case TargetLowering::C_Unknown:
8395
0
        assert(false && "Unexpected unknown constraint");
8396
4.55k
      }
8397
4.55k
8398
4.55k
      // Indirect output manifest as stores. Record output chains.
8399
4.55k
      if (OpInfo.isIndirect) {
8400
34
        const Value *Ptr = OpInfo.CallOperandVal;
8401
34
        assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
8402
34
        SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
8403
34
                                     MachinePointerInfo(Ptr));
8404
34
        OutChains.push_back(Store);
8405
4.52k
      } else {
8406
4.52k
        // generate CopyFromRegs to associated registers.
8407
4.52k
        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
8408
4.52k
        if (Val.getOpcode() == ISD::MERGE_VALUES) {
8409
0
          for (const SDValue &V : Val->op_values())
8410
0
            handleRegAssign(V);
8411
0
        } else
8412
4.52k
          handleRegAssign(Val);
8413
4.52k
      }
8414
4.55k
    }
8415
102k
  }
8416
20.4k
8417
20.4k
  // Set results.
8418
20.4k
  if (!ResultValues.empty()) {
8419
3.93k
    assert(CurResultType == ResultTypes.end() &&
8420
3.93k
           "Mismatch in number of ResultTypes");
8421
3.93k
    assert(ResultValues.size() == ResultTypes.size() &&
8422
3.93k
           "Mismatch in number of output operands in asm result");
8423
3.93k
8424
3.93k
    SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
8425
3.93k
                            DAG.getVTList(ResultVTs), ResultValues);
8426
3.93k
    setValue(CS.getInstruction(), V);
8427
3.93k
  }
8428
20.4k
8429
20.4k
  // Collect store chains.
8430
20.4k
  if (!OutChains.empty())
8431
31
    Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
8432
20.4k
8433
20.4k
  // Only Update Root if inline assembly has a memory effect.
8434
20.4k
  if (ResultValues.empty() || 
HasSideEffect3.93k
||
!OutChains.empty()893
||
IsCallBr893
)
8435
19.5k
    DAG.setRoot(Chain);
8436
20.4k
}
8437
8438
void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
8439
94
                                             const Twine &Message) {
8440
94
  LLVMContext &Ctx = *DAG.getContext();
8441
94
  Ctx.emitError(CS.getInstruction(), Message);
8442
94
8443
94
  // Make sure we leave the DAG in a valid state
8444
94
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8445
94
  SmallVector<EVT, 1> ValueVTs;
8446
94
  ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
8447
94
8448
94
  if (ValueVTs.empty())
8449
41
    return;
8450
53
8451
53
  SmallVector<SDValue, 1> Ops;
8452
108
  for (unsigned i = 0, e = ValueVTs.size(); i != e; 
++i55
)
8453
55
    Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
8454
53
8455
53
  setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
8456
53
}
8457
8458
532
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
8459
532
  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
8460
532
                          MVT::Other, getRoot(),
8461
532
                          getValue(I.getArgOperand(0)),
8462
532
                          DAG.getSrcValue(I.getArgOperand(0))));
8463
532
}
8464
8465
334
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
8466
334
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8467
334
  const DataLayout &DL = DAG.getDataLayout();
8468
334
  SDValue V = DAG.getVAArg(
8469
334
      TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
8470
334
      getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
8471
334
      DL.getABITypeAlignment(I.getType()));
8472
334
  DAG.setRoot(V.getValue(1));
8473
334
8474
334
  if (I.getType()->isPointerTy())
8475
22
    V = DAG.getPtrExtOrTrunc(
8476
22
        V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
8477
334
  setValue(&I, V);
8478
334
}
8479
8480
700
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
8481
700
  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
8482
700
                          MVT::Other, getRoot(),
8483
700
                          getValue(I.getArgOperand(0)),
8484
700
                          DAG.getSrcValue(I.getArgOperand(0))));
8485
700
}
8486
8487
263
void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
8488
263
  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
8489
263
                          MVT::Other, getRoot(),
8490
263
                          getValue(I.getArgOperand(0)),
8491
263
                          getValue(I.getArgOperand(1)),
8492
263
                          DAG.getSrcValue(I.getArgOperand(0)),
8493
263
                          DAG.getSrcValue(I.getArgOperand(1))));
8494
263
}
8495
8496
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
8497
                                                    const Instruction &I,
8498
286k
                                                    SDValue Op) {
8499
286k
  const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
8500
286k
  if (!Range)
8501
281k
    return Op;
8502
4.25k
8503
4.25k
  ConstantRange CR = getConstantRangeFromMetadata(*Range);
8504
4.25k
  if (CR.isFullSet() || 
CR.isEmptySet()4.25k
||
CR.isUpperWrapped()4.25k
)
8505
0
    return Op;
8506
4.25k
8507
4.25k
  APInt Lo = CR.getUnsignedMin();
8508
4.25k
  if (!Lo.isMinValue())
8509
1
    return Op;
8510
4.25k
8511
4.25k
  APInt Hi = CR.getUnsignedMax();
8512
4.25k
  unsigned Bits = std::max(Hi.getActiveBits(),
8513
4.25k
                           static_cast<unsigned>(IntegerType::MIN_INT_BITS));
8514
4.25k
8515
4.25k
  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
8516
4.25k
8517
4.25k
  SDLoc SL = getCurSDLoc();
8518
4.25k
8519
4.25k
  SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
8520
4.25k
                             DAG.getValueType(SmallVT));
8521
4.25k
  unsigned NumVals = Op.getNode()->getNumValues();
8522
4.25k
  if (NumVals == 1)
8523
4.24k
    return ZExt;
8524
5
8525
5
  SmallVector<SDValue, 4> Ops;
8526
5
8527
5
  Ops.push_back(ZExt);
8528
13
  for (unsigned I = 1; I != NumVals; 
++I8
)
8529
8
    Ops.push_back(Op.getValue(I));
8530
5
8531
5
  return DAG.getMergeValues(Ops, SL);
8532
5
}
8533
8534
/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
8535
/// the call being lowered.
8536
///
8537
/// This is a helper for lowering intrinsics that follow a target calling
8538
/// convention or require stack pointer adjustment. Only a subset of the
8539
/// intrinsic's operands need to participate in the calling convention.
8540
void SelectionDAGBuilder::populateCallLoweringInfo(
8541
    TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
8542
    unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
8543
234
    bool IsPatchPoint) {
8544
234
  TargetLowering::ArgListTy Args;
8545
234
  Args.reserve(NumArgs);
8546
234
8547
234
  // Populate the argument list.
8548
234
  // Attributes for args start at offset 1, after the return attribute.
8549
234
  for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
8550
449
       ArgI != ArgE; 
++ArgI215
) {
8551
215
    const Value *V = Call->getOperand(ArgI);
8552
215
8553
215
    assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
8554
215
8555
215
    TargetLowering::ArgListEntry Entry;
8556
215
    Entry.Node = getValue(V);
8557
215
    Entry.Ty = V->getType();
8558
215
    Entry.setAttributes(Call, ArgI);
8559
215
    Args.push_back(Entry);
8560
215
  }
8561
234
8562
234
  CLI.setDebugLoc(getCurSDLoc())
8563
234
      .setChain(getRoot())
8564
234
      .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
8565
234
      .setDiscardResult(Call->use_empty())
8566
234
      .setIsPatchPoint(IsPatchPoint);
8567
234
}
8568
8569
/// Add a stack map intrinsic call's live variable operands to a stackmap
8570
/// or patchpoint target node's operand list.
8571
///
8572
/// Constants are converted to TargetConstants purely as an optimization to
8573
/// avoid constant materialization and register allocation.
8574
///
8575
/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
8576
/// generate addess computation nodes, and so FinalizeISel can convert the
8577
/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
8578
/// address materialization and register allocation, but may also be required
8579
/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
8580
/// alloca in the entry block, then the runtime may assume that the alloca's
8581
/// StackMap location can be read immediately after compilation and that the
8582
/// location is valid at any point during execution (this is similar to the
8583
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
8584
/// only available in a register, then the runtime would need to trap when
8585
/// execution reaches the StackMap in order to read the alloca's location.
8586
static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
8587
                                const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
8588
286
                                SelectionDAGBuilder &Builder) {
8589
671
  for (unsigned i = StartIdx, e = CS.arg_size(); i != e; 
++i385
) {
8590
385
    SDValue OpVal = Builder.getValue(CS.getArgument(i));
8591
385
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
8592
58
      Ops.push_back(
8593
58
        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
8594
58
      Ops.push_back(
8595
58
        Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
8596
327
    } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
8597
21
      const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
8598
21
      Ops.push_back(Builder.DAG.getTargetFrameIndex(
8599
21
          FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
8600
21
    } else
8601
306
      Ops.push_back(OpVal);
8602
385
  }
8603
286
}
8604
8605
/// Lower llvm.experimental.stackmap directly to its target opcode.
8606
140
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
8607
140
  // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
8608
140
  //                                  [live variables...])
8609
140
8610
140
  assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
8611
140
8612
140
  SDValue Chain, InFlag, Callee, NullPtr;
8613
140
  SmallVector<SDValue, 32> Ops;
8614
140
8615
140
  SDLoc DL = getCurSDLoc();
8616
140
  Callee = getValue(CI.getCalledValue());
8617
140
  NullPtr = DAG.getIntPtrConstant(0, DL, true);
8618
140
8619
140
  // The stackmap intrinsic only records the live variables (the arguemnts
8620
140
  // passed to it) and emits NOPS (if requested). Unlike the patchpoint
8621
140
  // intrinsic, this won't be lowered to a function call. This means we don't
8622
140
  // have to worry about calling conventions and target specific lowering code.
8623
140
  // Instead we perform the call lowering right here.
8624
140
  //
8625
140
  // chain, flag = CALLSEQ_START(chain, 0, 0)
8626
140
  // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
8627
140
  // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
8628
140
  //
8629
140
  Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
8630
140
  InFlag = Chain.getValue(1);
8631
140
8632
140
  // Add the <id> and <numBytes> constants.
8633
140
  SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
8634
140
  Ops.push_back(DAG.getTargetConstant(
8635
140
                  cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
8636
140
  SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
8637
140
  Ops.push_back(DAG.getTargetConstant(
8638
140
                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
8639
140
                  MVT::i32));
8640
140
8641
140
  // Push live variables for the stack map.
8642
140
  addStackMapLiveVars(&CI, 2, DL, Ops, *this);
8643
140
8644
140
  // We are not pushing any register mask info here on the operands list,
8645
140
  // because the stackmap doesn't clobber anything.
8646
140
8647
140
  // Push the chain and the glue flag.
8648
140
  Ops.push_back(Chain);
8649
140
  Ops.push_back(InFlag);
8650
140
8651
140
  // Create the STACKMAP node.
8652
140
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8653
140
  SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
8654
140
  Chain = SDValue(SM, 0);
8655
140
  InFlag = Chain.getValue(1);
8656
140
8657
140
  Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
8658
140
8659
140
  // Stackmaps don't generate values, so nothing goes into the NodeMap.
8660
140
8661
140
  // Set the root to the target-lowered call chain.
8662
140
  DAG.setRoot(Chain);
8663
140
8664
140
  // Inform the Frame Information that we have a stackmap in this function.
8665
140
  FuncInfo.MF->getFrameInfo().setHasStackMap();
8666
140
}
8667
8668
/// Lower llvm.experimental.patchpoint directly to its target opcode.
8669
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
8670
146
                                          const BasicBlock *EHPadBB) {
8671
146
  // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
8672
146
  //                                                 i32 <numBytes>,
8673
146
  //                                                 i8* <target>,
8674
146
  //                                                 i32 <numArgs>,
8675
146
  //                                                 [Args...],
8676
146
  //                                                 [live variables...])
8677
146
8678
146
  CallingConv::ID CC = CS.getCallingConv();
8679
146
  bool IsAnyRegCC = CC == CallingConv::AnyReg;
8680
146
  bool HasDef = !CS->getType()->isVoidTy();
8681
146
  SDLoc dl = getCurSDLoc();
8682
146
  SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
8683
146
8684
146
  // Handle immediate and symbolic callees.
8685
146
  if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
8686
141
    Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
8687
141
                                   /*isTarget=*/true);
8688
5
  else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
8689
5
    Callee =  DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
8690
5
                                         SDLoc(SymbolicCallee),
8691
5
                                         SymbolicCallee->getValueType(0));
8692
146
8693
146
  // Get the real number of arguments participating in the call <numArgs>
8694
146
  SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
8695
146
  unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
8696
146
8697
146
  // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
8698
146
  // Intrinsics include all meta-operands up to but not including CC.
8699
146
  unsigned NumMetaOpers = PatchPointOpers::CCPos;
8700
146
  assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
8701
146
         "Not enough arguments provided to the patchpoint intrinsic");
8702
146
8703
146
  // For AnyRegCC the arguments are lowered later on manually.
8704
146
  unsigned NumCallArgs = IsAnyRegCC ? 
066
:
NumArgs80
;
8705
146
  Type *ReturnTy =
8706
146
    IsAnyRegCC ? 
Type::getVoidTy(*DAG.getContext())66
:
CS->getType()80
;
8707
146
8708
146
  TargetLowering::CallLoweringInfo CLI(DAG);
8709
146
  populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
8710
146
                           NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
8711
146
  std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
8712
146
8713
146
  SDNode *CallEnd = Result.second.getNode();
8714
146
  if (HasDef && 
(CallEnd->getOpcode() == ISD::CopyFromReg)81
)
8715
34
    CallEnd = CallEnd->getOperand(0).getNode();
8716
146
8717
146
  /// Get a call instruction from the call sequence chain.
8718
146
  /// Tail calls are not allowed.
8719
146
  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
8720
146
         "Expected a callseq node.");
8721
146
  SDNode *Call = CallEnd->getOperand(0).getNode();
8722
146
  bool HasGlue = Call->getGluedNode();
8723
146
8724
146
  // Replace the target specific call node with the patchable intrinsic.
8725
146
  SmallVector<SDValue, 8> Ops;
8726
146
8727
146
  // Add the <id> and <numBytes> constants.
8728
146
  SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
8729
146
  Ops.push_back(DAG.getTargetConstant(
8730
146
                  cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
8731
146
  SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
8732
146
  Ops.push_back(DAG.getTargetConstant(
8733
146
                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
8734
146
                  MVT::i32));
8735
146
8736
146
  // Add the callee.
8737
146
  Ops.push_back(Callee);
8738
146
8739
146
  // Adjust <numArgs> to account for any arguments that have been passed on the
8740
146
  // stack instead.
8741
146
  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
8742
146
  unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 
458
:
388
);
8743
146
  NumCallRegArgs = IsAnyRegCC ? 
NumArgs66
:
NumCallRegArgs80
;
8744
146
  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
8745
146
8746
146
  // Add the calling convention
8747
146
  Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
8748
146
8749
146
  // Add the arguments we omitted previously. The register allocator should
8750
146
  // place these in any free register.
8751
146
  if (IsAnyRegCC)
8752
329
    
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; 66
i != e;
++i263
)
8753
263
      Ops.push_back(getValue(CS.getArgument(i)));
8754
146
8755
146
  // Push the arguments from the call instruction up to the register mask.
8756
146
  SDNode::op_iterator e = HasGlue ? 
Call->op_end()-258
:
Call->op_end()-188
;
8757
146
  Ops.append(Call->op_begin() + 2, e);
8758
146
8759
146
  // Push live variables for the stack map.
8760
146
  addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
8761
146
8762
146
  // Push the register mask info.
8763
146
  if (HasGlue)
8764
58
    Ops.push_back(*(Call->op_end()-2));
8765
88
  else
8766
88
    Ops.push_back(*(Call->op_end()-1));
8767
146
8768
146
  // Push the chain (this is originally the first operand of the call, but
8769
146
  // becomes now the last or second to last operand).
8770
146
  Ops.push_back(*(Call->op_begin()));
8771
146
8772
146
  // Push the glue flag (last operand).
8773
146
  if (HasGlue)
8774
58
    Ops.push_back(*(Call->op_end()-1));
8775
146
8776
146
  SDVTList NodeTys;
8777
146
  if (IsAnyRegCC && 
HasDef66
) {
8778
47
    // Create the return types based on the intrinsic definition
8779
47
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8780
47
    SmallVector<EVT, 3> ValueVTs;
8781
47
    ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
8782
47
    assert(ValueVTs.size() == 1 && "Expected only one return value type.");
8783
47
8784
47
    // There is always a chain and a glue type at the end
8785
47
    ValueVTs.push_back(MVT::Other);
8786
47
    ValueVTs.push_back(MVT::Glue);
8787
47
    NodeTys = DAG.getVTList(ValueVTs);
8788
47
  } else
8789
99
    NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8790
146
8791
146
  // Replace the target specific call node with a PATCHPOINT node.
8792
146
  MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
8793
146
                                         dl, NodeTys, Ops);
8794
146
8795
146
  // Update the NodeMap.
8796
146
  if (HasDef) {
8797
81
    if (IsAnyRegCC)
8798
47
      setValue(CS.getInstruction(), SDValue(MN, 0));
8799
34
    else
8800
34
      setValue(CS.getInstruction(), Result.first);
8801
81
  }
8802
146
8803
146
  // Fixup the consumers of the intrinsic. The chain and glue may be used in the
8804
146
  // call sequence. Furthermore the location of the chain and glue can change
8805
146
  // when the AnyReg calling convention is used and the intrinsic returns a
8806
146
  // value.
8807
146
  if (IsAnyRegCC && 
HasDef66
) {
8808
47
    SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
8809
47
    SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
8810
47
    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
8811
47
  } else
8812
99
    DAG.ReplaceAllUsesWith(Call, MN);
8813
146
  DAG.DeleteNode(Call);
8814
146
8815
146
  // Inform the Frame Information that we have a patchpoint in this function.
8816
146
  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
8817
146
}
8818
8819
void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
8820
857
                                            unsigned Intrinsic) {
8821
857
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8822
857
  SDValue Op1 = getValue(I.getArgOperand(0));
8823
857
  SDValue Op2;
8824
857
  if (I.getNumArgOperands() > 1)
8825
23
    Op2 = getValue(I.getArgOperand(1));
8826
857
  SDLoc dl = getCurSDLoc();
8827
857
  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
8828
857
  SDValue Res;
8829
857
  FastMathFlags FMF;
8830
857
  if (isa<FPMathOperator>(I))
8831
34
    FMF = I.getFastMathFlags();
8832
857
8833
857
  switch (Intrinsic) {
8834
857
  case Intrinsic::experimental_vector_reduce_v2_fadd:
8835
23
    if (FMF.allowReassoc())
8836
23
      Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
8837
23
                        DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
8838
0
    else
8839
0
      Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
8840
23
    break;
8841
857
  case Intrinsic::experimental_vector_reduce_v2_fmul:
8842
0
    if (FMF.allowReassoc())
8843
0
      Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
8844
0
                        DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
8845
0
    else
8846
0
      Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
8847
0
    break;
8848
857
  case Intrinsic::experimental_vector_reduce_add:
8849
730
    Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
8850
730
    break;
8851
857
  case Intrinsic::experimental_vector_reduce_mul:
8852
0
    Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
8853
0
    break;
8854
857
  case Intrinsic::experimental_vector_reduce_and:
8855
20
    Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
8856
20
    break;
8857
857
  case Intrinsic::experimental_vector_reduce_or:
8858
6
    Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
8859
6
    break;
8860
857
  case Intrinsic::experimental_vector_reduce_xor:
8861
0
    Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
8862
0
    break;
8863
857
  case Intrinsic::experimental_vector_reduce_smax:
8864
18
    Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
8865
18
    break;
8866
857
  case Intrinsic::experimental_vector_reduce_smin:
8867
15
    Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
8868
15
    break;
8869
857
  case Intrinsic::experimental_vector_reduce_umax:
8870
24
    Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
8871
24
    break;
8872
857
  case Intrinsic::experimental_vector_reduce_umin:
8873
10
    Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
8874
10
    break;
8875
857
  case Intrinsic::experimental_vector_reduce_fmax:
8876
9
    Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
8877
9
    break;
8878
857
  case Intrinsic::experimental_vector_reduce_fmin:
8879
2
    Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
8880
2
    break;
8881
857
  default:
8882
0
    llvm_unreachable("Unhandled vector reduce intrinsic");
8883
857
  }
8884
857
  setValue(&I, Res);
8885
857
}
8886
8887
/// Returns an AttributeList representing the attributes applied to the return
8888
/// value of the given call.
8889
459k
static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
8890
459k
  SmallVector<Attribute::AttrKind, 2> Attrs;
8891
459k
  if (CLI.RetSExt)
8892
1.44k
    Attrs.push_back(Attribute::SExt);
8893
459k
  if (CLI.RetZExt)
8894
27.3k
    Attrs.push_back(Attribute::ZExt);
8895
459k
  if (CLI.IsInReg)
8896
223
    Attrs.push_back(Attribute::InReg);
8897
459k
8898
459k
  return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
8899
459k
                            Attrs);
8900
459k
}
8901
8902
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
8903
/// implementation, which just calls LowerCall.
8904
/// FIXME: When all targets are
8905
/// migrated to using LowerCall, this hook should be integrated into SDISel.
8906
std::pair<SDValue, SDValue>
8907
459k
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
8908
459k
  // Handle the incoming return values from the call.
8909
459k
  CLI.Ins.clear();
8910
459k
  Type *OrigRetTy = CLI.RetTy;
8911
459k
  SmallVector<EVT, 4> RetTys;
8912
459k
  SmallVector<uint64_t, 4> Offsets;
8913
459k
  auto &DL = CLI.DAG.getDataLayout();
8914
459k
  ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
8915
459k
8916
459k
  if (CLI.IsPostTypeLegalization) {
8917
5.66k
    // If we are lowering a libcall after legalization, split the return type.
8918
5.66k
    SmallVector<EVT, 4> OldRetTys;
8919
5.66k
    SmallVector<uint64_t, 4> OldOffsets;
8920
5.66k
    RetTys.swap(OldRetTys);
8921
5.66k
    Offsets.swap(OldOffsets);
8922
5.66k
8923
11.3k
    for (size_t i = 0, e = OldRetTys.size(); i != e; 
++i5.66k
) {
8924
5.66k
      EVT RetVT = OldRetTys[i];
8925
5.66k
      uint64_t Offset = OldOffsets[i];
8926
5.66k
      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
8927
5.66k
      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
8928
5.66k
      unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
8929
5.66k
      RetTys.append(NumRegs, RegisterVT);
8930
11.3k
      for (unsigned j = 0; j != NumRegs; 
++j5.67k
)
8931
5.67k
        Offsets.push_back(Offset + j * RegisterVTByteSZ);
8932
5.66k
    }
8933
5.66k
  }
8934
459k
8935
459k
  SmallVector<ISD::OutputArg, 4> Outs;
8936
459k
  GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
8937
459k
8938
459k
  bool CanLowerReturn =
8939
459k
      this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
8940
459k
                           CLI.IsVarArg, Outs, CLI.RetTy->getContext());
8941
459k
8942
459k
  SDValue DemoteStackSlot;
8943
459k
  int DemoteStackIdx = -100;
8944
459k
  if (!CanLowerReturn) {
8945
272
    // FIXME: equivalent assert?
8946
272
    // assert(!CS.hasInAllocaArgument() &&
8947
272
    //        "sret demotion is incompatible with inalloca");
8948
272
    uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
8949
272
    unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
8950
272
    MachineFunction &MF = CLI.DAG.getMachineFunction();
8951
272
    DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
8952
272
    Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
8953
272
                                              DL.getAllocaAddrSpace());
8954
272
8955
272
    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
8956
272
    ArgListEntry Entry;
8957
272
    Entry.Node = DemoteStackSlot;
8958
272
    Entry.Ty = StackSlotPtrType;
8959
272
    Entry.IsSExt = false;
8960
272
    Entry.IsZExt = false;
8961
272
    Entry.IsInReg = false;
8962
272
    Entry.IsSRet = true;
8963
272
    Entry.IsNest = false;
8964
272
    Entry.IsByVal = false;
8965
272
    Entry.IsReturned = false;
8966
272
    Entry.IsSwiftSelf = false;
8967
272
    Entry.IsSwiftError = false;
8968
272
    Entry.Alignment = Align;
8969
272
    CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
8970
272
    CLI.NumFixedArgs += 1;
8971
272
    CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
8972
272
8973
272
    // sret demotion isn't compatible with tail-calls, since the sret argument
8974
272
    // points into the callers stack frame.
8975
272
    CLI.IsTailCall = false;
8976
459k
  } else {
8977
459k
    bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
8978
459k
        CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
8979
726k
    for (unsigned I = 0, E = RetTys.size(); I != E; 
++I267k
) {
8980
267k
      ISD::ArgFlagsTy Flags;
8981
267k
      if (NeedsRegBlock) {
8982
818
        Flags.setInConsecutiveRegs();
8983
818
        if (I == RetTys.size() - 1)
8984
686
          Flags.setInConsecutiveRegsLast();
8985
818
      }
8986
267k
      EVT VT = RetTys[I];
8987
267k
      MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
8988
267k
                                                     CLI.CallConv, VT);
8989
267k
      unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
8990
267k
                                                       CLI.CallConv, VT);
8991
538k
      for (unsigned i = 0; i != NumRegs; 
++i271k
) {
8992
271k
        ISD::InputArg MyFlags;
8993
271k
        MyFlags.Flags = Flags;
8994
271k
        MyFlags.VT = RegisterVT;
8995
271k
        MyFlags.ArgVT = VT;
8996
271k
        MyFlags.Used = CLI.IsReturnValueUsed;
8997
271k
        if (CLI.RetTy->isPointerTy()) {
8998
89.8k
          MyFlags.Flags.setPointer();
8999
89.8k
          MyFlags.Flags.setPointerAddrSpace(
9000
89.8k
              cast<PointerType>(CLI.RetTy)->getAddressSpace());
9001
89.8k
        }
9002
271k
        if (CLI.RetSExt)
9003
2.27k
          MyFlags.Flags.setSExt();
9004
271k
        if (CLI.RetZExt)
9005
28.8k
          MyFlags.Flags.setZExt();
9006
271k
        if (CLI.IsInReg)
9007
476
          MyFlags.Flags.setInReg();
9008
271k
        CLI.Ins.push_back(MyFlags);
9009
271k
      }
9010
267k
    }
9011
459k
  }
9012
459k
9013
459k
  // We push in swifterror return as the last element of CLI.Ins.
9014
459k
  ArgListTy &Args = CLI.getArgs();
9015
459k
  if (supportSwiftError()) {
9016
1.40M
    for (unsigned i = 0, e = Args.size(); i != e; 
++i972k
) {
9017
972k
      if (Args[i].IsSwiftError) {
9018
110
        ISD::InputArg MyFlags;
9019
110
        MyFlags.VT = getPointerTy(DL);
9020
110
        MyFlags.ArgVT = EVT(getPointerTy(DL));
9021
110
        MyFlags.Flags.setSwiftError();
9022
110
        CLI.Ins.push_back(MyFlags);
9023
110
      }
9024
972k
    }
9025
430k
  }
9026
459k
9027
459k
  // Handle all of the outgoing arguments.
9028
459k
  CLI.Outs.clear();
9029
459k
  CLI.OutVals.clear();
9030
1.49M
  for (unsigned i = 0, e = Args.size(); i != e; 
++i1.03M
) {
9031
1.03M
    SmallVector<EVT, 4> ValueVTs;
9032
1.03M
    ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
9033
1.03M
    // FIXME: Split arguments if CLI.IsPostTypeLegalization
9034
1.03M
    Type *FinalType = Args[i].Ty;
9035
1.03M
    if (Args[i].IsByVal)
9036
1.10k
      FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
9037
1.03M
    bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
9038
1.03M
        FinalType, CLI.CallConv, CLI.IsVarArg);
9039
2.07M
    for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
9040
1.03M
         ++Value) {
9041
1.03M
      EVT VT = ValueVTs[Value];
9042
1.03M
      Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
9043
1.03M
      SDValue Op = SDValue(Args[i].Node.getNode(),
9044
1.03M
                           Args[i].Node.getResNo() + Value);
9045
1.03M
      ISD::ArgFlagsTy Flags;
9046
1.03M
9047
1.03M
      // Certain targets (such as MIPS), may have a different ABI alignment
9048
1.03M
      // for a type depending on the context. Give the target a chance to
9049
1.03M
      // specify the alignment it wants.
9050
1.03M
      unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
9051
1.03M
9052
1.03M
      if (Args[i].Ty->isPointerTy()) {
9053
601k
        Flags.setPointer();
9054
601k
        Flags.setPointerAddrSpace(
9055
601k
            cast<PointerType>(Args[i].Ty)->getAddressSpace());
9056
601k
      }
9057
1.03M
      if (Args[i].IsZExt)
9058
43.4k
        Flags.setZExt();
9059
1.03M
      if (Args[i].IsSExt)
9060
2.88k
        Flags.setSExt();
9061
1.03M
      if (Args[i].IsInReg) {
9062
240
        // If we are using vectorcall calling convention, a structure that is
9063
240
        // passed InReg - is surely an HVA
9064
240
        if (CLI.CallConv == CallingConv::X86_VectorCall &&
9065
240
            
isa<StructType>(FinalType)14
) {
9066
8
          // The first value of a structure is marked
9067
8
          if (0 == Value)
9068
2
            Flags.setHvaStart();
9069
8
          Flags.setHva();
9070
8
        }
9071
240
        // Set InReg Flag
9072
240
        Flags.setInReg();
9073
240
      }
9074
1.03M
      if (Args[i].IsSRet)
9075
1.37k
        Flags.setSRet();
9076
1.03M
      if (Args[i].IsSwiftSelf)
9077
97
        Flags.setSwiftSelf();
9078
1.03M
      if (Args[i].IsSwiftError)
9079
129
        Flags.setSwiftError();
9080
1.03M
      if (Args[i].IsByVal)
9081
1.10k
        Flags.setByVal();
9082
1.03M
      if (Args[i].IsInAlloca) {
9083
22
        Flags.setInAlloca();
9084
22
        // Set the byval flag for CCAssignFn callbacks that don't know about
9085
22
        // inalloca.  This way we can know how many bytes we should've allocated
9086
22
        // and how many bytes a callee cleanup function will pop.  If we port
9087
22
        // inalloca to more targets, we'll have to add custom inalloca handling
9088
22
        // in the various CC lowering callbacks.
9089
22
        Flags.setByVal();
9090
22
      }
9091
1.03M
      if (Args[i].IsByVal || 
Args[i].IsInAlloca1.03M
) {
9092
1.13k
        PointerType *Ty = cast<PointerType>(Args[i].Ty);
9093
1.13k
        Type *ElementTy = Ty->getElementType();
9094
1.13k
9095
1.13k
        unsigned FrameSize = DL.getTypeAllocSize(
9096
1.13k
            Args[i].ByValType ? 
Args[i].ByValType1.10k
:
ElementTy22
);
9097
1.13k
        Flags.setByValSize(FrameSize);
9098
1.13k
9099
1.13k
        // info is not there but there are cases it cannot get right.
9100
1.13k
        unsigned FrameAlign;
9101
1.13k
        if (Args[i].Alignment)
9102
872
          FrameAlign = Args[i].Alignment;
9103
259
        else
9104
259
          FrameAlign = getByValTypeAlignment(ElementTy, DL);
9105
1.13k
        Flags.setByValAlign(FrameAlign);
9106
1.13k
      }
9107
1.03M
      if (Args[i].IsNest)
9108
7
        Flags.setNest();
9109
1.03M
      if (NeedsRegBlock)
9110
2.90k
        Flags.setInConsecutiveRegs();
9111
1.03M
      Flags.setOrigAlign(OriginalAlignment);
9112
1.03M
9113
1.03M
      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
9114
1.03M
                                                 CLI.CallConv, VT);
9115
1.03M
      unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
9116
1.03M
                                                        CLI.CallConv, VT);
9117
1.03M
      SmallVector<SDValue, 4> Parts(NumParts);
9118
1.03M
      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
9119
1.03M
9120
1.03M
      if (Args[i].IsSExt)
9121
2.88k
        ExtendKind = ISD::SIGN_EXTEND;
9122
1.03M
      else if (Args[i].IsZExt)
9123
43.4k
        ExtendKind = ISD::ZERO_EXTEND;
9124
1.03M
9125
1.03M
      // Conservatively only handle 'returned' on non-vectors that can be lowered,
9126
1.03M
      // for now.
9127
1.03M
      if (Args[i].IsReturned && 
!Op.getValueType().isVector()4.76k
&&
9128
1.03M
          
CanLowerReturn4.76k
) {
9129
4.76k
        assert((CLI.RetTy == Args[i].Ty ||
9130
4.76k
                (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
9131
4.76k
                 CLI.RetTy->getPointerAddressSpace() ==
9132
4.76k
                     Args[i].Ty->getPointerAddressSpace())) &&
9133
4.76k
               RetTys.size() == NumValues && "unexpected use of 'returned'");
9134
4.76k
        // Before passing 'returned' to the target lowering code, ensure that
9135
4.76k
        // either the register MVT and the actual EVT are the same size or that
9136
4.76k
        // the return value and argument are extended in the same way; in these
9137
4.76k
        // cases it's safe to pass the argument register value unchanged as the
9138
4.76k
        // return register value (although it's at the target's option whether
9139
4.76k
        // to do so)
9140
4.76k
        // TODO: allow code generation to take advantage of partially preserved
9141
4.76k
        // registers rather than clobbering the entire register when the
9142
4.76k
        // parameter extension method is not compatible with the return
9143
4.76k
        // extension method
9144
4.76k
        if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
9145
4.76k
            
(23
ExtendKind != ISD::ANY_EXTEND23
&&
CLI.RetSExt == Args[i].IsSExt14
&&
9146
23
             
CLI.RetZExt == Args[i].IsZExt14
))
9147
4.74k
          Flags.setReturned();
9148
4.76k
      }
9149
1.03M
9150
1.03M
      getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
9151
1.03M
                     CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
9152
1.03M
9153
2.09M
      for (unsigned j = 0; j != NumParts; 
++j1.05M
) {
9154
1.05M
        // if it isn't first piece, alignment must be 1
9155
1.05M
        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
9156
1.05M
                               i < CLI.NumFixedArgs,
9157
1.05M
                               i, j*Parts[j].getValueType().getStoreSize());
9158
1.05M
        if (NumParts > 1 && 
j == 040.0k
)
9159
18.7k
          MyFlags.Flags.setSplit();
9160
1.03M
        else if (j != 0) {
9161
21.2k
          MyFlags.Flags.setOrigAlign(1);
9162
21.2k
          if (j == NumParts - 1)
9163
18.7k
            MyFlags.Flags.setSplitEnd();
9164
21.2k
        }
9165
1.05M
9166
1.05M
        CLI.Outs.push_back(MyFlags);
9167
1.05M
        CLI.OutVals.push_back(Parts[j]);
9168
1.05M
      }
9169
1.03M
9170
1.03M
      if (NeedsRegBlock && 
Value == NumValues - 12.90k
)
9171
1.64k
        CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
9172
1.03M
    }
9173
1.03M
  }
9174
459k
9175
459k
  SmallVector<SDValue, 4> InVals;
9176
459k
  CLI.Chain = LowerCall(CLI, InVals);
9177
459k
9178
459k
  // Update CLI.InVals to use outside of this function.
9179
459k
  CLI.InVals = InVals;
9180
459k
9181
459k
  // Verify that the target's LowerCall behaved as expected.
9182
459k
  assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
9183
459k
         "LowerCall didn't return a valid chain!");
9184
459k
  assert((!CLI.IsTailCall || InVals.empty()) &&
9185
459k
         "LowerCall emitted a return value for a tail call!");
9186
459k
  assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
9187
459k
         "LowerCall didn't emit the correct number of values!");
9188
459k
9189
459k
  // For a tail call, the return value is merely live-out and there aren't
9190
459k
  // any nodes in the DAG representing it. Return a special value to
9191
459k
  // indicate that a tail call has been emitted and no more Instructions
9192
459k
  // should be processed in the current block.
9193
459k
  if (CLI.IsTailCall) {
9194
64.3k
    CLI.DAG.setRoot(CLI.Chain);
9195
64.3k
    return std::make_pair(SDValue(), SDValue());
9196
64.3k
  }
9197
395k
9198
#ifndef NDEBUG
9199
  for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
9200
    assert(InVals[i].getNode() && "LowerCall emitted a null value!");
9201
    assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
9202
           "LowerCall emitted a value with the wrong type!");
9203
  }
9204
#endif
9205
9206
395k
  SmallVector<SDValue, 4> ReturnValues;
9207
395k
  if (!CanLowerReturn) {
9208
272
    // The instruction result is the result of loading from the
9209
272
    // hidden sret parameter.
9210
272
    SmallVector<EVT, 1> PVTs;
9211
272
    Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
9212
272
9213
272
    ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
9214
272
    assert(PVTs.size() == 1 && "Pointers should fit in one register");
9215
272
    EVT PtrVT = PVTs[0];
9216
272
9217
272
    unsigned NumValues = RetTys.size();
9218
272
    ReturnValues.resize(NumValues);
9219
272
    SmallVector<SDValue, 4> Chains(NumValues);
9220
272
9221
272
    // An aggregate return value cannot wrap around the address space, so
9222
272
    // offsets to its parts don't wrap either.
9223
272
    SDNodeFlags Flags;
9224
272
    Flags.setNoUnsignedWrap(true);
9225
272
9226
704
    for (unsigned i = 0; i < NumValues; 
++i432
) {
9227
432
      SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
9228
432
                                    CLI.DAG.getConstant(Offsets[i], CLI.DL,
9229
432
                                                        PtrVT), Flags);
9230
432
      SDValue L = CLI.DAG.getLoad(
9231
432
          RetTys[i], CLI.DL, CLI.Chain, Add,
9232
432
          MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
9233
432
                                            DemoteStackIdx, Offsets[i]),
9234
432
          /* Alignment = */ 1);
9235
432
      ReturnValues[i] = L;
9236
432
      Chains[i] = L.getValue(1);
9237
432
    }
9238
272
9239
272
    CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
9240
395k
  } else {
9241
395k
    // Collect the legal value parts into potentially illegal values
9242
395k
    // that correspond to the original function's return values.
9243
395k
    Optional<ISD::NodeType> AssertOp;
9244
395k
    if (CLI.RetSExt)
9245
1.40k
      AssertOp = ISD::AssertSext;
9246
393k
    else if (CLI.RetZExt)
9247
26.5k
      AssertOp = ISD::AssertZext;
9248
395k
    unsigned CurReg = 0;
9249
606k
    for (unsigned I = 0, E = RetTys.size(); I != E; 
++I211k
) {
9250
211k
      EVT VT = RetTys[I];
9251
211k
      MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
9252
211k
                                                     CLI.CallConv, VT);
9253
211k
      unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
9254
211k
                                                       CLI.CallConv, VT);
9255
211k
9256
211k
      ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
9257
211k
                                              NumRegs, RegisterVT, VT, nullptr,
9258
211k
                                              CLI.CallConv, AssertOp));
9259
211k
      CurReg += NumRegs;
9260
211k
    }
9261
395k
9262
395k
    // For a function returning void, there is no return value. We can't create
9263
395k
    // such a node, so we just return a null return value in that case. In
9264
395k
    // that case, nothing will actually look at the value.
9265
395k
    if (ReturnValues.empty())
9266
184k
      return std::make_pair(SDValue(), CLI.Chain);
9267
210k
  }
9268
210k
9269
210k
  SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
9270
210k
                                CLI.DAG.getVTList(RetTys), ReturnValues);
9271
210k
  return std::make_pair(Res, CLI.Chain);
9272
210k
}
9273
9274
void TargetLowering::LowerOperationWrapper(SDNode *N,
9275
                                           SmallVectorImpl<SDValue> &Results,
9276
4.72k
                                           SelectionDAG &DAG) const {
9277
4.72k
  if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
9278
4.57k
    Results.push_back(Res);
9279
4.72k
}
9280
9281
0
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9282
0
  llvm_unreachable("LowerOperation not implemented for this target!");
9283
0
}
9284
9285
void
9286
815k
SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
9287
815k
  SDValue Op = getNonRegisterValue(V);
9288
815k
  assert((Op.getOpcode() != ISD::CopyFromReg ||
9289
815k
          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
9290
815k
         "Copy from a reg to the same reg!");
9291
815k
  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
9292
815k
9293
815k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9294
815k
  // If this is an InlineAsm we have to match the registers required, not the
9295
815k
  // notional registers required by the type.
9296
815k
9297
815k
  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
9298
815k
                   None); // This is not an ABI copy.
9299
815k
  SDValue Chain = DAG.getEntryNode();
9300
815k
9301
815k
  ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
9302
815k
                              FuncInfo.PreferredExtendType.end())
9303
815k
                                 ? 
ISD::ANY_EXTEND183k
9304
815k
                                 : 
FuncInfo.PreferredExtendType[V]631k
;
9305
815k
  RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
9306
815k
  PendingExports.push_back(Chain);
9307
815k
}
9308
9309
#include "llvm/CodeGen/SelectionDAGISel.h"
9310
9311
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
9312
/// entry block, return true.  This includes arguments used by switches, since
9313
/// the switch may expand into multiple basic blocks.
9314
117k
static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
9315
117k
  // With FastISel active, we may be splitting blocks, so force creation
9316
117k
  // of virtual registers for all non-dead arguments.
9317
117k
  if (FastISel)
9318
6.00k
    return A->use_empty();
9319
111k
9320
111k
  const BasicBlock &Entry = A->getParent()->front();
9321
111k
  for (const User *U : A->users())
9322
131k
    if (cast<Instruction>(U)->getParent() != &Entry || 
isa<SwitchInst>(U)115k
)
9323
15.8k
      return false;  // Use not in entry block.
9324
111k
9325
111k
  
return true95.2k
;
9326
111k
}
9327
9328
using ArgCopyElisionMapTy =
9329
    DenseMap<const Argument *,
9330
             std::pair<const AllocaInst *, const StoreInst *>>;
9331
9332
/// Scan the entry block of the function in FuncInfo for arguments that look
9333
/// like copies into a local alloca. Record any copied arguments in
9334
/// ArgCopyElisionCandidates.
9335
static void
9336
findArgumentCopyElisionCandidates(const DataLayout &DL,
9337
                                  FunctionLoweringInfo *FuncInfo,
9338
272k
                                  ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
9339
272k
  // Record the state of every static alloca used in the entry block. Argument
9340
272k
  // allocas are all used in the entry block, so we need approximately as many
9341
272k
  // entries as we have arguments.
9342
272k
  enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
9343
272k
  SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
9344
272k
  unsigned NumArgs = FuncInfo->Fn->arg_size();
9345
272k
  StaticAllocas.reserve(NumArgs * 2);
9346
272k
9347
2.48M
  auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
9348
2.48M
    if (!V)
9349
0
      return nullptr;
9350
2.48M
    V = V->stripPointerCasts();
9351
2.48M
    const auto *AI = dyn_cast<AllocaInst>(V);
9352
2.48M
    if (!AI || 
!AI->isStaticAlloca()47.5k
||
!FuncInfo->StaticAllocaMap.count(AI)47.1k
)
9353
2.44M
      return nullptr;
9354
47.1k
    auto Iter = StaticAllocas.insert({AI, Unknown});
9355
47.1k
    return &Iter.first->second;
9356
47.1k
  };
9357
272k
9358
272k
  // Look for stores of arguments to static allocas. Look through bitcasts and
9359
272k
  // GEPs to handle type coercions, as long as the alloca is fully initialized
9360
272k
  // by the store. Any non-store use of an alloca escapes it and any subsequent
9361
272k
  // unanalyzed store might write it.
9362
272k
  // FIXME: Handle structs initialized with multiple stores.
9363
1.55M
  for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
9364
1.55M
    // Look for stores, and handle non-store uses conservatively.
9365
1.55M
    const auto *SI = dyn_cast<StoreInst>(&I);
9366
1.55M
    if (!SI) {
9367
1.45M
      // We will look through cast uses, so ignore them completely.
9368
1.45M
      if (I.isCast())
9369
244k
        continue;
9370
1.21M
      // Ignore debug info intrinsics, they don't escape or store to allocas.
9371
1.21M
      if (isa<DbgInfoIntrinsic>(I))
9372
5.12k
        continue;
9373
1.20M
      // This is an unknown instruction. Assume it escapes or writes to all
9374
1.20M
      // static alloca operands.
9375
2.29M
      
for (const Use &U : I.operands())1.20M
{
9376
2.29M
        if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
9377
37.9k
          *Info = StaticAllocaInfo::Clobbered;
9378
2.29M
      }
9379
1.20M
      continue;
9380
1.20M
    }
9381
95.2k
9382
95.2k
    // If the stored value is a static alloca, mark it as escaped.
9383
95.2k
    if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
9384
292
      *Info = StaticAllocaInfo::Clobbered;
9385
95.2k
9386
95.2k
    // Check if the destination is a static alloca.
9387
95.2k
    const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
9388
95.2k
    StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
9389
95.2k
    if (!Info)
9390
86.3k
      continue;
9391
8.90k
    const AllocaInst *AI = cast<AllocaInst>(Dst);
9392
8.90k
9393
8.90k
    // Skip allocas that have been initialized or clobbered.
9394
8.90k
    if (*Info != StaticAllocaInfo::Unknown)
9395
3.47k
      continue;
9396
5.42k
9397
5.42k
    // Check if the stored value is an argument, and that this store fully
9398
5.42k
    // initializes the alloca. Don't elide copies from the same argument twice.
9399
5.42k
    const Value *Val = SI->getValueOperand()->stripPointerCasts();
9400
5.42k
    const auto *Arg = dyn_cast<Argument>(Val);
9401
5.42k
    if (!Arg || 
Arg->hasInAllocaAttr()2.11k
||
Arg->hasByValAttr()2.11k
||
9402
5.42k
        
Arg->getType()->isEmptyTy()2.11k
||
9403
5.42k
        DL.getTypeStoreSize(Arg->getType()) !=
9404
2.11k
            DL.getTypeAllocSize(AI->getAllocatedType()) ||
9405
5.42k
        
ArgCopyElisionCandidates.count(Arg)2.05k
) {
9406
3.36k
      *Info = StaticAllocaInfo::Clobbered;
9407
3.36k
      continue;
9408
3.36k
    }
9409
2.05k
9410
2.05k
    LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
9411
2.05k
                      << '\n');
9412
2.05k
9413
2.05k
    // Mark this alloca and store for argument copy elision.
9414
2.05k
    *Info = StaticAllocaInfo::Elidable;
9415
2.05k
    ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
9416
2.05k
9417
2.05k
    // Stop scanning if we've seen all arguments. This will happen early in -O0
9418
2.05k
    // builds, which is useful, because -O0 builds have large entry blocks and
9419
2.05k
    // many allocas.
9420
2.05k
    if (ArgCopyElisionCandidates.size() == NumArgs)
9421
1.16k
      break;
9422
2.05k
  }
9423
272k
}
9424
9425
/// Try to elide argument copies from memory into a local alloca. Succeeds if
9426
/// ArgVal is a load from a suitable fixed stack object.
9427
static void tryToElideArgumentCopy(
9428
    FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
9429
    DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
9430
    SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
9431
    ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
9432
2.05k
    SDValue ArgVal, bool &ArgHasUses) {
9433
2.05k
  // Check if this is a load from a fixed stack object.
9434
2.05k
  auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
9435
2.05k
  if (!LNode)
9436
1.70k
    return;
9437
349
  auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
9438
349
  if (!FINode)
9439
2
    return;
9440
347
9441
347
  // Check that the fixed stack object is the right size and alignment.
9442
347
  // Look at the alignment that the user wrote on the alloca instead of looking
9443
347
  // at the stack object.
9444
347
  auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
9445
347
  assert(ArgCopyIter != ArgCopyElisionCandidates.end());
9446
347
  const AllocaInst *AI = ArgCopyIter->second.first;
9447
347
  int FixedIndex = FINode->getIndex();
9448
347
  int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
9449
347
  int OldIndex = AllocaIndex;
9450
347
  MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
9451
347
  if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
9452
1
    LLVM_DEBUG(
9453
1
        dbgs() << "  argument copy elision failed due to bad fixed stack "
9454
1
                  "object size\n");
9455
1
    return;
9456
1
  }
9457
346
  unsigned RequiredAlignment = AI->getAlignment();
9458
346
  if (!RequiredAlignment) {
9459
64
    RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
9460
64
        AI->getAllocatedType());
9461
64
  }
9462
346
  if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
9463
25
    LLVM_DEBUG(dbgs() << "  argument copy elision failed: alignment of alloca "
9464
25
                         "greater than stack argument alignment ("
9465
25
                      << RequiredAlignment << " vs "
9466
25
                      << MFI.getObjectAlignment(FixedIndex) << ")\n");
9467
25
    return;
9468
25
  }
9469
321
9470
321
  // Perform the elision. Delete the old stack object and replace its only use
9471
321
  // in the variable info map. Mark the stack object as mutable.
9472
321
  LLVM_DEBUG({
9473
321
    dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
9474
321
           << "  Replacing frame index " << OldIndex << " with " << FixedIndex
9475
321
           << '\n';
9476
321
  });
9477
321
  MFI.RemoveStackObject(OldIndex);
9478
321
  MFI.setIsImmutableObjectIndex(FixedIndex, false);
9479
321
  AllocaIndex = FixedIndex;
9480
321
  ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
9481
321
  Chains.push_back(ArgVal.getValue(1));
9482
321
9483
321
  // Avoid emitting code for the store implementing the copy.
9484
321
  const StoreInst *SI = ArgCopyIter->second.second;
9485
321
  ElidedArgCopyInstrs.insert(SI);
9486
321
9487
321
  // Check for uses of the argument again so that we can avoid exporting ArgVal
9488
321
  // if it is't used by anything other than the store.
9489
321
  for (const Value *U : Arg.users()) {
9490
321
    if (U != SI) {
9491
34
      ArgHasUses = true;
9492
34
      break;
9493
34
    }
9494
321
  }
9495
321
}
9496
9497
272k
void SelectionDAGISel::LowerArguments(const Function &F) {
9498
272k
  SelectionDAG &DAG = SDB->DAG;
9499
272k
  SDLoc dl = SDB->getCurSDLoc();
9500
272k
  const DataLayout &DL = DAG.getDataLayout();
9501
272k
  SmallVector<ISD::InputArg, 16> Ins;
9502
272k
9503
272k
  if (!FuncInfo->CanLowerReturn) {
9504
1.94k
    // Put in an sret pointer parameter before all the other parameters.
9505
1.94k
    SmallVector<EVT, 1> ValueVTs;
9506
1.94k
    ComputeValueVTs(*TLI, DAG.getDataLayout(),
9507
1.94k
                    F.getReturnType()->getPointerTo(
9508
1.94k
                        DAG.getDataLayout().getAllocaAddrSpace()),
9509
1.94k
                    ValueVTs);
9510
1.94k
9511
1.94k
    // NOTE: Assuming that a pointer will never break down to more than one VT
9512
1.94k
    // or one register.
9513
1.94k
    ISD::ArgFlagsTy Flags;
9514
1.94k
    Flags.setSRet();
9515
1.94k
    MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
9516
1.94k
    ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
9517
1.94k
                         ISD::InputArg::NoArgIndex, 0);
9518
1.94k
    Ins.push_back(RetArg);
9519
1.94k
  }
9520
272k
9521
272k
  // Look for stores of arguments to static allocas. Mark such arguments with a
9522
272k
  // flag to ask the target to give us the memory location of that argument if
9523
272k
  // available.
9524
272k
  ArgCopyElisionMapTy ArgCopyElisionCandidates;
9525
272k
  findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
9526
272k
9527
272k
  // Set up the incoming argument description vector.
9528
514k
  for (const Argument &Arg : F.args()) {
9529
514k
    unsigned ArgNo = Arg.getArgNo();
9530
514k
    SmallVector<EVT, 4> ValueVTs;
9531
514k
    ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
9532
514k
    bool isArgValueUsed = !Arg.use_empty();
9533
514k
    unsigned PartBase = 0;
9534
514k
    Type *FinalType = Arg.getType();
9535
514k
    if (Arg.hasAttribute(Attribute::ByVal))
9536
646
      FinalType = Arg.getParamByValType();
9537
514k
    bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
9538
514k
        FinalType, F.getCallingConv(), F.isVarArg());
9539
514k
    for (unsigned Value = 0, NumValues = ValueVTs.size();
9540
1.03M
         Value != NumValues; 
++Value522k
) {
9541
522k
      EVT VT = ValueVTs[Value];
9542
522k
      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
9543
522k
      ISD::ArgFlagsTy Flags;
9544
522k
9545
522k
      // Certain targets (such as MIPS), may have a different ABI alignment
9546
522k
      // for a type depending on the context. Give the target a chance to
9547
522k
      // specify the alignment it wants.
9548
522k
      unsigned OriginalAlignment =
9549
522k
          TLI->getABIAlignmentForCallingConv(ArgTy, DL);
9550
522k
9551
522k
      if (Arg.getType()->isPointerTy()) {
9552
187k
        Flags.setPointer();
9553
187k
        Flags.setPointerAddrSpace(
9554
187k
            cast<PointerType>(Arg.getType())->getAddressSpace());
9555
187k
      }
9556
522k
      if (Arg.hasAttribute(Attribute::ZExt))
9557
9.35k
        Flags.setZExt();
9558
522k
      if (Arg.hasAttribute(Attribute::SExt))
9559
8.50k
        Flags.setSExt();
9560
522k
      if (Arg.hasAttribute(Attribute::InReg)) {
9561
5.07k
        // If we are using vectorcall calling convention, a structure that is
9562
5.07k
        // passed InReg - is surely an HVA
9563
5.07k
        if (F.getCallingConv() == CallingConv::X86_VectorCall &&
9564
5.07k
            
isa<StructType>(Arg.getType())74
) {
9565
50
          // The first value of a structure is marked
9566
50
          if (0 == Value)
9567
14
            Flags.setHvaStart();
9568
50
          Flags.setHva();
9569
50
        }
9570
5.07k
        // Set InReg Flag
9571
5.07k
        Flags.setInReg();
9572
5.07k
      }
9573
522k
      if (Arg.hasAttribute(Attribute::StructRet))
9574
897
        Flags.setSRet();
9575
522k
      if (Arg.hasAttribute(Attribute::SwiftSelf))
9576
76
        Flags.setSwiftSelf();
9577
522k
      if (Arg.hasAttribute(Attribute::SwiftError))
9578
119
        Flags.setSwiftError();
9579
522k
      if (Arg.hasAttribute(Attribute::ByVal))
9580
646
        Flags.setByVal();
9581
522k
      if (Arg.hasAttribute(Attribute::InAlloca)) {
9582
18
        Flags.setInAlloca();
9583
18
        // Set the byval flag for CCAssignFn callbacks that don't know about
9584
18
        // inalloca.  This way we can know how many bytes we should've allocated
9585
18
        // and how many bytes a callee cleanup function will pop.  If we port
9586
18
        // inalloca to more targets, we'll have to add custom inalloca handling
9587
18
        // in the various CC lowering callbacks.
9588
18
        Flags.setByVal();
9589
18
      }
9590
522k
      if (F.getCallingConv() == CallingConv::X86_INTR) {
9591
56
        // IA Interrupt passes frame (1st parameter) by value in the stack.
9592
56
        if (ArgNo == 0)
9593
39
          Flags.setByVal();
9594
56
      }
9595
522k
      if (Flags.isByVal() || 
Flags.isInAlloca()522k
) {
9596
703
        Type *ElementTy = Arg.getParamByValType();
9597
703
9598
703
        // For ByVal, size and alignment should be passed from FE.  BE will
9599
703
        // guess if this info is not there but there are cases it cannot get
9600
703
        // right.
9601
703
        unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
9602
703
        Flags.setByValSize(FrameSize);
9603
703
9604
703
        unsigned FrameAlign;
9605
703
        if (Arg.getParamAlignment())
9606
286
          FrameAlign = Arg.getParamAlignment();
9607
417
        else
9608
417
          FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
9609
703
        Flags.setByValAlign(FrameAlign);
9610
703
      }
9611
522k
      if (Arg.hasAttribute(Attribute::Nest))
9612
69
        Flags.setNest();
9613
522k
      if (NeedsRegBlock)
9614
5.95k
        Flags.setInConsecutiveRegs();
9615
522k
      Flags.setOrigAlign(OriginalAlignment);
9616
522k
      if (ArgCopyElisionCandidates.count(&Arg))
9617
2.06k
        Flags.setCopyElisionCandidate();
9618
522k
      if (Arg.hasAttribute(Attribute::Returned))
9619
1.12k
        Flags.setReturned();
9620
522k
9621
522k
      MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
9622
522k
          *CurDAG->getContext(), F.getCallingConv(), VT);
9623
522k
      unsigned NumRegs = TLI->getNumRegistersForCallingConv(
9624
522k
          *CurDAG->getContext(), F.getCallingConv(), VT);
9625
1.10M
      for (unsigned i = 0; i != NumRegs; 
++i586k
) {
9626
586k
        ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
9627
586k
                              ArgNo, PartBase+i*RegisterVT.getStoreSize());
9628
586k
        if (NumRegs > 1 && 
i == 092.0k
)
9629
28.7k
          MyFlags.Flags.setSplit();
9630
557k
        // if it isn't first piece, alignment must be 1
9631
557k
        else if (i > 0) {
9632
63.3k
          MyFlags.Flags.setOrigAlign(1);
9633
63.3k
          if (i == NumRegs - 1)
9634
28.7k
            MyFlags.Flags.setSplitEnd();
9635
63.3k
        }
9636
586k
        Ins.push_back(MyFlags);
9637
586k
      }
9638
522k
      if (NeedsRegBlock && 
Value == NumValues - 15.95k
)
9639
4.21k
        Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
9640
522k
      PartBase += VT.getStoreSize();
9641
522k
    }
9642
514k
  }
9643
272k
9644
272k
  // Call the target to set up the argument values.
9645
272k
  SmallVector<SDValue, 8> InVals;
9646
272k
  SDValue NewRoot = TLI->LowerFormalArguments(
9647
272k
      DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
9648
272k
9649
272k
  // Verify that the target's LowerFormalArguments behaved as expected.
9650
272k
  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
9651
272k
         "LowerFormalArguments didn't return a valid chain!");
9652
272k
  assert(InVals.size() == Ins.size() &&
9653
272k
         "LowerFormalArguments didn't emit the correct number of values!");
9654
272k
  LLVM_DEBUG({
9655
272k
    for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9656
272k
      assert(InVals[i].getNode() &&
9657
272k
             "LowerFormalArguments emitted a null value!");
9658
272k
      assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
9659
272k
             "LowerFormalArguments emitted a value with the wrong type!");
9660
272k
    }
9661
272k
  });
9662
272k
9663
272k
  // Update the DAG with the new chain value resulting from argument lowering.
9664
272k
  DAG.setRoot(NewRoot);
9665
272k
9666
272k
  // Set up the argument values.
9667
272k
  unsigned i = 0;
9668
272k
  if (!FuncInfo->CanLowerReturn) {
9669
1.94k
    // Create a virtual register for the sret pointer, and put in a copy
9670
1.94k
    // from the sret argument into it.
9671
1.94k
    SmallVector<EVT, 1> ValueVTs;
9672
1.94k
    ComputeValueVTs(*TLI, DAG.getDataLayout(),
9673
1.94k
                    F.getReturnType()->getPointerTo(
9674
1.94k
                        DAG.getDataLayout().getAllocaAddrSpace()),
9675
1.94k
                    ValueVTs);
9676
1.94k
    MVT VT = ValueVTs[0].getSimpleVT();
9677
1.94k
    MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
9678
1.94k
    Optional<ISD::NodeType> AssertOp = None;
9679
1.94k
    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
9680
1.94k
                                        nullptr, F.getCallingConv(), AssertOp);
9681
1.94k
9682
1.94k
    MachineFunction& MF = SDB->DAG.getMachineFunction();
9683
1.94k
    MachineRegisterInfo& RegInfo = MF.getRegInfo();
9684
1.94k
    unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
9685
1.94k
    FuncInfo->DemoteRegister = SRetReg;
9686
1.94k
    NewRoot =
9687
1.94k
        SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
9688
1.94k
    DAG.setRoot(NewRoot);
9689
1.94k
9690
1.94k
    // i indexes lowered arguments.  Bump it past the hidden sret argument.
9691
1.94k
    ++i;
9692
1.94k
  }
9693
272k
9694
272k
  SmallVector<SDValue, 4> Chains;
9695
272k
  DenseMap<int, int> ArgCopyElisionFrameIndexMap;
9696
514k
  for (const Argument &Arg : F.args()) {
9697
514k
    SmallVector<SDValue, 4> ArgValues;
9698
514k
    SmallVector<EVT, 4> ValueVTs;
9699
514k
    ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
9700
514k
    unsigned NumValues = ValueVTs.size();
9701
514k
    if (NumValues == 0)
9702
24
      continue;
9703
514k
9704
514k
    bool ArgHasUses = !Arg.use_empty();
9705
514k
9706
514k
    // Elide the copying store if the target loaded this argument from a
9707
514k
    // suitable fixed stack object.
9708
514k
    if (Ins[i].Flags.isCopyElisionCandidate()) {
9709
2.05k
      tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
9710
2.05k
                             ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
9711
2.05k
                             InVals[i], ArgHasUses);
9712
2.05k
    }
9713
514k
9714
514k
    // If this argument is unused then remember its value. It is used to generate
9715
514k
    // debugging information.
9716
514k
    bool isSwiftErrorArg =
9717
514k
        TLI->supportSwiftError() &&
9718
514k
        
Arg.hasAttribute(Attribute::SwiftError)340k
;
9719
514k
    if (!ArgHasUses && 
!isSwiftErrorArg64.5k
) {
9720
64.5k
      SDB->setUnusedArgValue(&Arg, InVals[i]);
9721
64.5k
9722
64.5k
      // Also remember any frame index for use in FastISel.
9723
64.5k
      if (FrameIndexSDNode *FI =
9724
89
          dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
9725
89
        FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
9726
64.5k
    }
9727
514k
9728
1.03M
    for (unsigned Val = 0; Val != NumValues; 
++Val522k
) {
9729
522k
      EVT VT = ValueVTs[Val];
9730
522k
      MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
9731
522k
                                                      F.getCallingConv(), VT);
9732
522k
      unsigned NumParts = TLI->getNumRegistersForCallingConv(
9733
522k
          *CurDAG->getContext(), F.getCallingConv(), VT);
9734
522k
9735
522k
      // Even an apparant 'unused' swifterror argument needs to be returned. So
9736
522k
      // we do generate a copy for it that can be used on return from the
9737
522k
      // function.
9738
522k
      if (ArgHasUses || 
isSwiftErrorArg71.0k
) {
9739
451k
        Optional<ISD::NodeType> AssertOp;
9740
451k
        if (Arg.hasAttribute(Attribute::SExt))
9741
8.38k
          AssertOp = ISD::AssertSext;
9742
443k
        else if (Arg.hasAttribute(Attribute::ZExt))
9743
8.96k
          AssertOp = ISD::AssertZext;
9744
451k
9745
451k
        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
9746
451k
                                             PartVT, VT, nullptr,
9747
451k
                                             F.getCallingConv(), AssertOp));
9748
451k
      }
9749
522k
9750
522k
      i += NumParts;
9751
522k
    }
9752
514k
9753
514k
    // We don't need to do anything else for unused arguments.
9754
514k
    if (ArgValues.empty())
9755
64.5k
      continue;
9756
450k
9757
450k
    // Note down frame index.
9758
450k
    if (FrameIndexSDNode *FI =
9759
458
        dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
9760
458
      FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
9761
450k
9762
450k
    SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
9763
450k
                                     SDB->getCurSDLoc());
9764
450k
9765
450k
    SDB->setValue(&Arg, Res);
9766
450k
    if (!TM.Options.EnableFastISel && 
Res.getOpcode() == ISD::BUILD_PAIR428k
) {
9767
7.76k
      // We want to associate the argument with the frame index, among
9768
7.76k
      // involved operands, that correspond to the lowest address. The
9769
7.76k
      // getCopyFromParts function, called earlier, is swapping the order of
9770
7.76k
      // the operands to BUILD_PAIR depending on endianness. The result of
9771
7.76k
      // that swapping is that the least significant bits of the argument will
9772
7.76k
      // be in the first operand of the BUILD_PAIR node, and the most
9773
7.76k
      // significant bits will be in the second operand.
9774
7.76k
      unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 
11.61k
:
06.14k
;
9775
7.76k
      if (LoadSDNode *LNode =
9776
2.74k
          dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
9777
2.74k
        if (FrameIndexSDNode *FI =
9778
2.47k
            dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
9779
2.47k
          FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
9780
7.76k
    }
9781
450k
9782
450k
    // Update the SwiftErrorVRegDefMap.
9783
450k
    if (Res.getOpcode() == ISD::CopyFromReg && 
isSwiftErrorArg333k
) {
9784
101
      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
9785
101
      if (TargetRegisterInfo::isVirtualRegister(Reg))
9786
101
        SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
9787
101
                                   Reg);
9788
101
    }
9789
450k
9790
450k
    // If this argument is live outside of the entry block, insert a copy from
9791
450k
    // wherever we got it to the vreg that other BB's will reference it as.
9792
450k
    if (Res.getOpcode() == ISD::CopyFromReg) {
9793
333k
      // If we can, though, try to skip creating an unnecessary vreg.
9794
333k
      // FIXME: This isn't very clean... it would be nice to make this more
9795
333k
      // general.
9796
333k
      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
9797
333k
      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
9798
333k
        FuncInfo->ValueMap[&Arg] = Reg;
9799
333k
        continue;
9800
333k
      }
9801
117k
    }
9802
117k
    if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
9803
21.8k
      FuncInfo->InitializeRegForValue(&Arg);
9804
21.8k
      SDB->CopyToExportRegsIfNeeded(&Arg);
9805
21.8k
    }
9806
117k
  }
9807
272k
9808
272k
  if (!Chains.empty()) {
9809
203
    Chains.push_back(NewRoot);
9810
203
    NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
9811
203
  }
9812
272k
9813
272k
  DAG.setRoot(NewRoot);
9814
272k
9815
272k
  assert(i == InVals.size() && "Argument register count mismatch!");
9816
272k
9817
272k
  // If any argument copy elisions occurred and we have debug info, update the
9818
272k
  // stale frame indices used in the dbg.declare variable info table.
9819
272k
  MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
9820
272k
  if (!DbgDeclareInfo.empty() && 
!ArgCopyElisionFrameIndexMap.empty()0
) {
9821
0
    for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
9822
0
      auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
9823
0
      if (I != ArgCopyElisionFrameIndexMap.end())
9824
0
        VI.Slot = I->second;
9825
0
    }
9826
0
  }
9827
272k
9828
272k
  // Finally, if the target has anything special to do, allow it to do so.
9829
272k
  EmitFunctionEntryCode();
9830
272k
}
9831
9832
/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
9833
/// ensure constants are generated when needed.  Remember the virtual registers
9834
/// that need to be added to the Machine PHI nodes as input.  We cannot just
9835
/// directly add them, because expansion might result in multiple MBB's for one
9836
/// BB.  As such, the start of the BB might correspond to a different MBB than
9837
/// the end.
9838
void
9839
1.12M
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
9840
1.12M
  const Instruction *TI = LLVMBB->getTerminator();
9841
1.12M
9842
1.12M
  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
9843
1.12M
9844
1.12M
  // Check PHI nodes in successors that expect a value to be available from this
9845
1.12M
  // block.
9846
2.52M
  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; 
++succ1.39M
) {
9847
1.39M
    const BasicBlock *SuccBB = TI->getSuccessor(succ);
9848
1.39M
    if (!isa<PHINode>(SuccBB->begin())) 
continue985k
;
9849
412k
    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
9850
412k
9851
412k
    // If this terminator has multiple identical successors (common for
9852
412k
    // switches), only handle each succ once.
9853
412k
    if (!SuccsHandled.insert(SuccMBB).second)
9854
325
      continue;
9855
412k
9856
412k
    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
9857
412k
9858
412k
    // At this point we know that there is a 1-1 correspondence between LLVM PHI
9859
412k
    // nodes and Machine PHI nodes, but the incoming operands have not been
9860
412k
    // emitted yet.
9861
609k
    for (const PHINode &PN : SuccBB->phis()) {
9862
609k
      // Ignore dead phi's.
9863
609k
      if (PN.use_empty())
9864
9.53k
        continue;
9865
600k
9866
600k
      // Skip empty types
9867
600k
      if (PN.getType()->isEmptyTy())
9868
4
        continue;
9869
600k
9870
600k
      unsigned Reg;
9871
600k
      const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
9872
600k
9873
600k
      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
9874
167k
        unsigned &RegOut = ConstantsOut[C];
9875
167k
        if (RegOut == 0) {
9876
161k
          RegOut = FuncInfo.CreateRegs(C);
9877
161k
          CopyValueToVirtualRegister(C, RegOut);
9878
161k
        }
9879
167k
        Reg = RegOut;
9880
432k
      } else {
9881
432k
        DenseMap<const Value *, unsigned>::iterator I =
9882
432k
          FuncInfo.ValueMap.find(PHIOp);
9883
432k
        if (I != FuncInfo.ValueMap.end())
9884
431k
          Reg = I->second;
9885
479
        else {
9886
479
          assert(isa<AllocaInst>(PHIOp) &&
9887
479
                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
9888
479
                 "Didn't codegen value into a register!??");
9889
479
          Reg = FuncInfo.CreateRegs(PHIOp);
9890
479
          CopyValueToVirtualRegister(PHIOp, Reg);
9891
479
        }
9892
432k
      }
9893
600k
9894
600k
      // Remember that this register needs to added to the machine PHI node as
9895
600k
      // the input for this MBB.
9896
600k
      SmallVector<EVT, 4> ValueVTs;
9897
600k
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9898
600k
      ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
9899
1.20M
      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; 
++vti601k
) {
9900
601k
        EVT VT = ValueVTs[vti];
9901
601k
        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
9902
1.21M
        for (unsigned i = 0, e = NumRegisters; i != e; 
++i616k
)
9903
616k
          FuncInfo.PHINodesToUpdate.push_back(
9904
616k
              std::make_pair(&*MBBI++, Reg + i));
9905
601k
        Reg += NumRegisters;
9906
601k
      }
9907
600k
    }
9908
412k
  }
9909
1.12M
9910
1.12M
  ConstantsOut.clear();
9911
1.12M
}
9912
9913
/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
9914
/// is 0.
9915
MachineBasicBlock *
9916
SelectionDAGBuilder::StackProtectorDescriptor::
9917
AddSuccessorMBB(const BasicBlock *BB,
9918
                MachineBasicBlock *ParentMBB,
9919
                bool IsLikely,
9920
804
                MachineBasicBlock *SuccMBB) {
9921
804
  // If SuccBB has not been created yet, create it.
9922
804
  if (!SuccMBB) {
9923
794
    MachineFunction *MF = ParentMBB->getParent();
9924
794
    MachineFunction::iterator BBI(ParentMBB);
9925
794
    SuccMBB = MF->CreateMachineBasicBlock(BB);
9926
794
    MF->insert(++BBI, SuccMBB);
9927
794
  }
9928
804
  // Add it as a successor of ParentMBB.
9929
804
  ParentMBB->addSuccessor(
9930
804
      SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
9931
804
  return SuccMBB;
9932
804
}
9933
9934
869k
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
9935
869k
  MachineFunction::iterator I(MBB);
9936
869k
  if (++I == FuncInfo.MF->end())
9937
6.94k
    return nullptr;
9938
862k
  return &*I;
9939
862k
}
9940
9941
/// During lowering new call nodes can be created (such as memset, etc.).
9942
/// Those will become new roots of the current DAG, but complications arise
9943
/// when they are tail calls. In such cases, the call lowering will update
9944
/// the root, but the builder still needs to know that a tail call has been
9945
/// lowered in order to avoid generating an additional return.
9946
14.3k
void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
9947
14.3k
  // If the node is null, we do have a tail call.
9948
14.3k
  if (MaybeTC.getNode() != nullptr)
9949
14.3k
    DAG.setRoot(MaybeTC);
9950
36
  else
9951
36
    HasTailCall = true;
9952
14.3k
}
9953
9954
void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
9955
                                        MachineBasicBlock *SwitchMBB,
9956
8.30k
                                        MachineBasicBlock *DefaultMBB) {
9957
8.30k
  MachineFunction *CurMF = FuncInfo.MF;
9958
8.30k
  MachineBasicBlock *NextMBB = nullptr;
9959
8.30k
  MachineFunction::iterator BBI(W.MBB);
9960
8.30k
  if (++BBI != FuncInfo.MF->end())
9961
8.29k
    NextMBB = &*BBI;
9962
8.30k
9963
8.30k
  unsigned Size = W.LastCluster - W.FirstCluster + 1;
9964
8.30k
9965
8.30k
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
9966
8.30k
9967
8.30k
  if (Size == 2 && 
W.MBB == SwitchMBB4.66k
) {
9968
4.38k
    // If any two of the cases has the same destination, and if one value
9969
4.38k
    // is the same as the other, but has one bit unset that the other has set,
9970
4.38k
    // use bit manipulation to do two compares at once.  For example:
9971
4.38k
    // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
9972
4.38k
    // TODO: This could be extended to merge any 2 cases in switches with 3
9973
4.38k
    // cases.
9974
4.38k
    // TODO: Handle cases where W.CaseBB != SwitchBB.
9975
4.38k
    CaseCluster &Small = *W.FirstCluster;
9976
4.38k
    CaseCluster &Big = *W.LastCluster;
9977
4.38k
9978
4.38k
    if (Small.Low == Small.High && 
Big.Low == Big.High4.36k
&&
9979
4.38k
        
Small.MBB == Big.MBB4.29k
) {
9980
301
      const APInt &SmallValue = Small.Low->getValue();
9981
301
      const APInt &BigValue = Big.Low->getValue();
9982
301
9983
301
      // Check that there is only one bit different.
9984
301
      APInt CommonBit = BigValue ^ SmallValue;
9985
301
      if (CommonBit.isPowerOf2()) {
9986
36
        SDValue CondLHS = getValue(Cond);
9987
36
        EVT VT = CondLHS.getValueType();
9988
36
        SDLoc DL = getCurSDLoc();
9989
36
9990
36
        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
9991
36
                                 DAG.getConstant(CommonBit, DL, VT));
9992
36
        SDValue Cond = DAG.getSetCC(
9993
36
            DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
9994
36
            ISD::SETEQ);
9995
36
9996
36
        // Update successor info.
9997
36
        // Both Small and Big will jump to Small.BB, so we sum up the
9998
36
        // probabilities.
9999
36
        addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
10000
36
        if (BPI)
10001
36
          addSuccessorWithProb(
10002
36
              SwitchMBB, DefaultMBB,
10003
36
              // The default destination is the first successor in IR.
10004
36
              BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
10005
0
        else
10006
0
          addSuccessorWithProb(SwitchMBB, DefaultMBB);
10007
36
10008
36
        // Insert the true branch.
10009
36
        SDValue BrCond =
10010
36
            DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
10011
36
                        DAG.getBasicBlock(Small.MBB));
10012
36
        // Insert the false branch.
10013
36
        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
10014
36
                             DAG.getBasicBlock(DefaultMBB));
10015
36
10016
36
        DAG.setRoot(BrCond);
10017
36
        return;
10018
36
      }
10019
8.26k
    }
10020
4.38k
  }
10021
8.26k
10022
8.26k
  if (TM.getOptLevel() != CodeGenOpt::None) {
10023
8.22k
    // Here, we order cases by probability so the most likely case will be
10024
8.22k
    // checked first. However, two clusters can have the same probability in
10025
8.22k
    // which case their relative ordering is non-deterministic. So we use Low
10026
8.22k
    // as a tie-breaker as clusters are guaranteed to never overlap.
10027
8.22k
    llvm::sort(W.FirstCluster, W.LastCluster + 1,
10028
8.22k
               [](const CaseCluster &a, const CaseCluster &b) {
10029
5.41k
      return a.Prob != b.Prob ?
10030
517
             a.Prob > b.Prob :
10031
5.41k
             
a.Low->getValue().slt(b.Low->getValue())4.89k
;
10032
5.41k
    });
10033
8.22k
10034
8.22k
    // Rearrange the case blocks so that the last one falls through if possible
10035
8.22k
    // without changing the order of probabilities.
10036
9.41k
    for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
10037
5.22k
      --I;
10038
5.22k
      if (I->Prob > W.LastCluster->Prob)
10039
405
        break;
10040
4.82k
      if (I->Kind == CC_Range && 
I->MBB == NextMBB4.81k
) {
10041
3.63k
        std::swap(*I, *W.LastCluster);
10042
3.63k
        break;
10043
3.63k
      }
10044
4.82k
    }
10045
8.22k
  }
10046
8.26k
10047
8.26k
  // Compute total probability.
10048
8.26k
  BranchProbability DefaultProb = W.DefaultProb;
10049
8.26k
  BranchProbability UnhandledProbs = DefaultProb;
10050
22.0k
  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; 
++I13.7k
)
10051
13.7k
    UnhandledProbs += I->Prob;
10052
8.26k
10053
8.26k
  MachineBasicBlock *CurMBB = W.MBB;
10054
22.0k
  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; 
++I13.7k
) {
10055
13.7k
    bool FallthroughUnreachable = false;
10056
13.7k
    MachineBasicBlock *Fallthrough;
10057
13.7k
    if (I == W.LastCluster) {
10058
8.26k
      // For the last cluster, fall through to the default destination.
10059
8.26k
      Fallthrough = DefaultMBB;
10060
8.26k
      FallthroughUnreachable = isa<UnreachableInst>(
10061
8.26k
          DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
10062
8.26k
    } else {
10063
5.47k
      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
10064
5.47k
      CurMF->insert(BBI, Fallthrough);
10065
5.47k
      // Put Cond in a virtual register to make it available from the new blocks.
10066
5.47k
      ExportFromCurrentBlock(Cond);
10067
5.47k
    }
10068
13.7k
    UnhandledProbs -= I->Prob;
10069
13.7k
10070
13.7k
    switch (I->Kind) {
10071
13.7k
      case CC_JumpTable: {
10072
2.26k
        // FIXME: Optimize away range check based on pivot comparisons.
10073
2.26k
        JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
10074
2.26k
        SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
10075
2.26k
10076
2.26k
        // The jump block hasn't been inserted yet; insert it here.
10077
2.26k
        MachineBasicBlock *JumpMBB = JT->MBB;
10078
2.26k
        CurMF->insert(BBI, JumpMBB);
10079
2.26k
10080
2.26k
        auto JumpProb = I->Prob;
10081
2.26k
        auto FallthroughProb = UnhandledProbs;
10082
2.26k
10083
2.26k
        // If the default statement is a target of the jump table, we evenly
10084
2.26k
        // distribute the default probability to successors of CurMBB. Also
10085
2.26k
        // update the probability on the edge from JumpMBB to Fallthrough.
10086
2.26k
        for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
10087
2.26k
                                              SE = JumpMBB->succ_end();
10088
12.6k
             SI != SE; 
++SI10.3k
) {
10089
10.6k
          if (*SI == DefaultMBB) {
10090
319
            JumpProb += DefaultProb / 2;
10091
319
            FallthroughProb -= DefaultProb / 2;
10092
319
            JumpMBB->setSuccProbability(SI, DefaultProb / 2);
10093
319
            JumpMBB->normalizeSuccProbs();
10094
319
            break;
10095
319
          }
10096
10.6k
        }
10097
2.26k
10098
2.26k
        if (FallthroughUnreachable) {
10099
48
          // Skip the range check if the fallthrough block is unreachable.
10100
48
          JTH->OmitRangeCheck = true;
10101
48
        }
10102
2.26k
10103
2.26k
        if (!JTH->OmitRangeCheck)
10104
2.21k
          addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
10105
2.26k
        addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
10106
2.26k
        CurMBB->normalizeSuccProbs();
10107
2.26k
10108
2.26k
        // The jump table header will be inserted in our current block, do the
10109
2.26k
        // range check, and fall through to our fallthrough block.
10110
2.26k
        JTH->HeaderBB = CurMBB;
10111
2.26k
        JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
10112
2.26k
10113
2.26k
        // If we're in the right place, emit the jump table header right now.
10114
2.26k
        if (CurMBB == SwitchMBB) {
10115
2.23k
          visitJumpTableHeader(*JT, *JTH, SwitchMBB);
10116
2.23k
          JTH->Emitted = true;
10117
2.23k
        }
10118
2.26k
        break;
10119
13.7k
      }
10120
13.7k
      case CC_BitTests: {
10121
286
        // FIXME: If Fallthrough is unreachable, skip the range check.
10122
286
10123
286
        // FIXME: Optimize away range check based on pivot comparisons.
10124
286
        BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
10125
286
10126
286
        // The bit test blocks haven't been inserted yet; insert them here.
10127
286
        for (BitTestCase &BTC : BTB->Cases)
10128
447
          CurMF->insert(BBI, BTC.ThisBB);
10129
286
10130
286
        // Fill in fields of the BitTestBlock.
10131
286
        BTB->Parent = CurMBB;
10132
286
        BTB->Default = Fallthrough;
10133
286
10134
286
        BTB->DefaultProb = UnhandledProbs;
10135
286
        // If the cases in bit test don't form a contiguous range, we evenly
10136
286
        // distribute the probability on the edge to Fallthrough to two
10137
286
        // successors of CurMBB.
10138
286
        if (!BTB->ContiguousRange) {
10139
249
          BTB->Prob += DefaultProb / 2;
10140
249
          BTB->DefaultProb -= DefaultProb / 2;
10141
249
        }
10142
286
10143
286
        // If we're in the right place, emit the bit test header right now.
10144
286
        if (CurMBB == SwitchMBB) {
10145
284
          visitBitTestHeader(*BTB, SwitchMBB);
10146
284
          BTB->Emitted = true;
10147
284
        }
10148
286
        break;
10149
13.7k
      }
10150
13.7k
      case CC_Range: {
10151
11.1k
        const Value *RHS, *LHS, *MHS;
10152
11.1k
        ISD::CondCode CC;
10153
11.1k
        if (I->Low == I->High) {
10154
10.8k
          // Check Cond == I->Low.
10155
10.8k
          CC = ISD::SETEQ;
10156
10.8k
          LHS = Cond;
10157
10.8k
          RHS=I->Low;
10158
10.8k
          MHS = nullptr;
10159
10.8k
        } else {
10160
340
          // Check I->Low <= Cond <= I->High.
10161
340
          CC = ISD::SETLE;
10162
340
          LHS = I->Low;
10163
340
          MHS = Cond;
10164
340
          RHS = I->High;
10165
340
        }
10166
11.1k
10167
11.1k
        // If Fallthrough is unreachable, fold away the comparison.
10168
11.1k
        if (FallthroughUnreachable)
10169
17
          CC = ISD::SETTRUE;
10170
11.1k
10171
11.1k
        // The false probability is the sum of all unhandled cases.
10172
11.1k
        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
10173
11.1k
                     getCurSDLoc(), I->Prob, UnhandledProbs);
10174
11.1k
10175
11.1k
        if (CurMBB == SwitchMBB)
10176
5.02k
          visitSwitchCase(CB, SwitchMBB);
10177
6.16k
        else
10178
6.16k
          SL->SwitchCases.push_back(CB);
10179
11.1k
10180
11.1k
        break;
10181
13.7k
      }
10182
13.7k
    }
10183
13.7k
    CurMBB = Fallthrough;
10184
13.7k
  }
10185
8.26k
}
10186
10187
unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
10188
                                              CaseClusterIt First,
10189
34
                                              CaseClusterIt Last) {
10190
115
  return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
10191
115
    if (X.Prob != CC.Prob)
10192
42
      return X.Prob > CC.Prob;
10193
73
10194
73
    // Ties are broken by comparing the case value.
10195
73
    return X.Low->getValue().slt(CC.Low->getValue());
10196
73
  });
10197
34
}
10198
10199
void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
10200
                                        const SwitchWorkListItem &W,
10201
                                        Value *Cond,
10202
189
                                        MachineBasicBlock *SwitchMBB) {
10203
189
  assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
10204
189
         "Clusters not sorted?");
10205
189
10206
189
  assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
10207
189
10208
189
  // Balance the tree based on branch probabilities to create a near-optimal (in
10209
189
  // terms of search time given key frequency) binary search tree. See e.g. Kurt
10210
189
  // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
10211
189
  CaseClusterIt LastLeft = W.FirstCluster;
10212
189
  CaseClusterIt FirstRight = W.LastCluster;
10213
189
  auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
10214
189
  auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
10215
189
10216
189
  // Move LastLeft and FirstRight towards each other from opposite directions to
10217
189
  // find a partitioning of the clusters which balances the probability on both
10218
189
  // sides. If LeftProb and RightProb are equal, alternate which side is
10219
189
  // taken to ensure 0-probability nodes are distributed evenly.
10220
189
  unsigned I = 0;
10221
815
  while (LastLeft + 1 < FirstRight) {
10222
626
    if (LeftProb < RightProb || 
(338
LeftProb == RightProb338
&&
(I & 1)282
))
10223
293
      LeftProb += (++LastLeft)->Prob;
10224
333
    else
10225
333
      RightProb += (--FirstRight)->Prob;
10226
626
    I++;
10227
626
  }
10228
189
10229
199
  while (true) {
10230
199
    // Our binary search tree differs from a typical BST in that ours can have up
10231
199
    // to three values in each leaf. The pivot selection above doesn't take that
10232
199
    // into account, which means the tree might require more nodes and be less
10233
199
    // efficient. We compensate for this here.
10234
199
10235
199
    unsigned NumLeft = LastLeft - W.FirstCluster + 1;
10236
199
    unsigned NumRight = W.LastCluster - FirstRight + 1;
10237
199
10238
199
    if (std::min(NumLeft, NumRight) < 3 && 
std::max(NumLeft, NumRight) > 3157
) {
10239
17
      // If one side has less than 3 clusters, and the other has more than 3,
10240
17
      // consider taking a cluster from the other side.
10241
17
10242
17
      if (NumLeft < NumRight) {
10243
8
        // Consider moving the first cluster on the right to the left side.
10244
8
        CaseCluster &CC = *FirstRight;
10245
8
        unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
10246
8
        unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
10247
8
        if (LeftSideRank <= RightSideRank) {
10248
2
          // Moving the cluster to the left does not demote it.
10249
2
          ++LastLeft;
10250
2
          ++FirstRight;
10251
2
          continue;
10252
2
        }
10253
9
      } else {
10254
9
        assert(NumRight < NumLeft);
10255
9
        // Consider moving the last element on the left to the right side.
10256
9
        CaseCluster &CC = *LastLeft;
10257
9
        unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
10258
9
        unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
10259
9
        if (RightSideRank <= LeftSideRank) {
10260
8
          // Moving the cluster to the right does not demot it.
10261
8
          --LastLeft;
10262
8
          --FirstRight;
10263
8
          continue;
10264
8
        }
10265
189
      }
10266
17
    }
10267
189
    break;
10268
189
  }
10269
189
10270
189
  assert(LastLeft + 1 == FirstRight);
10271
189
  assert(LastLeft >= W.FirstCluster);
10272
189
  assert(FirstRight <= W.LastCluster);
10273
189
10274
189
  // Use the first element on the right as pivot since we will make less-than
10275
189
  // comparisons against it.
10276
189
  CaseClusterIt PivotCluster = FirstRight;
10277
189
  assert(PivotCluster > W.FirstCluster);
10278
189
  assert(PivotCluster <= W.LastCluster);
10279
189
10280
189
  CaseClusterIt FirstLeft = W.FirstCluster;
10281
189
  CaseClusterIt LastRight = W.LastCluster;
10282
189
10283
189
  const ConstantInt *Pivot = PivotCluster->Low;
10284
189
10285
189
  // New blocks will be inserted immediately after the current one.
10286
189
  MachineFunction::iterator BBI(W.MBB);
10287
189
  ++BBI;
10288
189
10289
189
  // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
10290
189
  // we can branch to its destination directly if it's squeezed exactly in
10291
189
  // between the known lower bound and Pivot - 1.
10292
189
  MachineBasicBlock *LeftMBB;
10293
189
  if (FirstLeft == LastLeft && 
FirstLeft->Kind == CC_Range8
&&
10294
189
      
FirstLeft->Low == W.GE1
&&
10295
189
      
(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()0
) {
10296
0
    LeftMBB = FirstLeft->MBB;
10297
189
  } else {
10298
189
    LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
10299
189
    FuncInfo.MF->insert(BBI, LeftMBB);
10300
189
    WorkList.push_back(
10301
189
        {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
10302
189
    // Put Cond in a virtual register to make it available from the new blocks.
10303
189
    ExportFromCurrentBlock(Cond);
10304
189
  }
10305
189
10306
189
  // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
10307
189
  // single cluster, RHS.Low == Pivot, and we can branch to its destination
10308
189
  // directly if RHS.High equals the current upper bound.
10309
189
  MachineBasicBlock *RightMBB;
10310
189
  if (FirstRight == LastRight && 
FirstRight->Kind == CC_Range2
&&
10311
189
      
W.LT2
&&
(FirstRight->High->getValue() + 1ULL) == W.LT->getValue()0
) {
10312
0
    RightMBB = FirstRight->MBB;
10313
189
  } else {
10314
189
    RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
10315
189
    FuncInfo.MF->insert(BBI, RightMBB);
10316
189
    WorkList.push_back(
10317
189
        {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
10318
189
    // Put Cond in a virtual register to make it available from the new blocks.
10319
189
    ExportFromCurrentBlock(Cond);
10320
189
  }
10321
189
10322
189
  // Create the CaseBlock record that will be used to lower the branch.
10323
189
  CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
10324
189
               getCurSDLoc(), LeftProb, RightProb);
10325
189
10326
189
  if (W.MBB == SwitchMBB)
10327
125
    visitSwitchCase(CB, SwitchMBB);
10328
64
  else
10329
64
    SL->SwitchCases.push_back(CB);
10330
189
}
10331
10332
// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
10333
// from the swith statement.
10334
static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
10335
903
                                            BranchProbability PeeledCaseProb) {
10336
903
  if (PeeledCaseProb == BranchProbability::getOne())
10337
0
    return BranchProbability::getZero();
10338
903
  BranchProbability SwitchProb = PeeledCaseProb.getCompl();
10339
903
10340
903
  uint32_t Numerator = CaseProb.getNumerator();
10341
903
  uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
10342
903
  return BranchProbability(Numerator, std::max(Numerator, Denominator));
10343
903
}
10344
10345
// Try to peel the top probability case if it exceeds the threshold.
10346
// Return current MachineBasicBlock for the switch statement if the peeling
10347
// does not occur.
10348
// If the peeling is performed, return the newly created MachineBasicBlock
10349
// for the peeled switch statement. Also update Clusters to remove the peeled
10350
// case. PeeledCaseProb is the BranchProbability for the peeled case.
10351
MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
10352
    const SwitchInst &SI, CaseClusterVector &Clusters,
10353
7.70k
    BranchProbability &PeeledCaseProb) {
10354
7.70k
  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
10355
7.70k
  // Don't perform if there is only one cluster or optimizing for size.
10356
7.70k
  if (SwitchPeelThreshold > 100 || 
!FuncInfo.BPI7.67k
||
Clusters.size() < 27.63k
||
10357
7.70k
      
TM.getOptLevel() == CodeGenOpt::None7.60k
||
10358
7.70k
      
SwitchMBB->getParent()->getFunction().hasMinSize()7.60k
)
10359
202
    return SwitchMBB;
10360
7.50k
10361
7.50k
  BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
10362
7.50k
  unsigned PeeledCaseIndex = 0;
10363
7.50k
  bool SwitchPeeled = false;
10364
32.7k
  for (unsigned Index = 0; Index < Clusters.size(); 
++Index25.2k
) {
10365
25.2k
    CaseCluster &CC = Clusters[Index];
10366
25.2k
    if (CC.Prob < TopCaseProb)
10367
24.7k
      continue;
10368
412
    TopCaseProb = CC.Prob;
10369
412
    PeeledCaseIndex = Index;
10370
412
    SwitchPeeled = true;
10371
412
  }
10372
7.50k
  if (!SwitchPeeled)
10373
7.09k
    return SwitchMBB;
10374
412
10375
412
  LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
10376
412
                    << TopCaseProb << "\n");
10377
412
10378
412
  // Record the MBB for the peeled switch statement.
10379
412
  MachineFunction::iterator BBI(SwitchMBB);
10380
412
  ++BBI;
10381
412
  MachineBasicBlock *PeeledSwitchMBB =
10382
412
      FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
10383
412
  FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
10384
412
10385
412
  ExportFromCurrentBlock(SI.getCondition());
10386
412
  auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
10387
412
  SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
10388
412
                          nullptr,   nullptr,      TopCaseProb.getCompl()};
10389
412
  lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
10390
412
10391
412
  Clusters.erase(PeeledCaseIt);
10392
491
  for (CaseCluster &CC : Clusters) {
10393
491
    LLVM_DEBUG(
10394
491
        dbgs() << "Scale the probablity for one cluster, before scaling: "
10395
491
               << CC.Prob << "\n");
10396
491
    CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
10397
491
    LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
10398
491
  }
10399
412
  PeeledCaseProb = TopCaseProb;
10400
412
  return PeeledSwitchMBB;
10401
412
}
10402
10403
7.70k
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
10404
7.70k
  // Extract cases from the switch.
10405
7.70k
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
10406
7.70k
  CaseClusterVector Clusters;
10407
7.70k
  Clusters.reserve(SI.getNumCases());
10408
28.5k
  for (auto I : SI.cases()) {
10409
28.5k
    MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
10410
28.5k
    const ConstantInt *CaseVal = I.getCaseValue();
10411
28.5k
    BranchProbability Prob =
10412
28.5k
        BPI ? 
BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())28.1k
10413
28.5k
            : 
BranchProbability(1, SI.getNumCases() + 1)425
;
10414
28.5k
    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
10415
28.5k
  }
10416
7.70k
10417
7.70k
  MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
10418
7.70k
10419
7.70k
  // Cluster adjacent cases with the same destination. We do this at all
10420
7.70k
  // optimization levels because it's cheap to do and will make codegen faster
10421
7.70k
  // if there are many clusters.
10422
7.70k
  sortAndRangeify(Clusters);
10423
7.70k
10424
7.70k
  // The branch probablity of the peeled case.
10425
7.70k
  BranchProbability PeeledCaseProb = BranchProbability::getZero();
10426
7.70k
  MachineBasicBlock *PeeledSwitchMBB =
10427
7.70k
      peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
10428
7.70k
10429
7.70k
  // If there is only the default destination, jump there directly.
10430
7.70k
  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
10431
7.70k
  if (Clusters.empty()) {
10432
4
    assert(PeeledSwitchMBB == SwitchMBB);
10433
4
    SwitchMBB->addSuccessor(DefaultMBB);
10434
4
    if (DefaultMBB != NextBlock(SwitchMBB)) {
10435
2
      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
10436
2
                              getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
10437
2
    }
10438
4
    return;
10439
4
  }
10440
7.70k
10441
7.70k
  SL->findJumpTables(Clusters, &SI, DefaultMBB);
10442
7.70k
  SL->findBitTestClusters(Clusters, &SI);
10443
7.70k
10444
7.70k
  LLVM_DEBUG({
10445
7.70k
    dbgs() << "Case clusters: ";
10446
7.70k
    for (const CaseCluster &C : Clusters) {
10447
7.70k
      if (C.Kind == CC_JumpTable)
10448
7.70k
        dbgs() << "JT:";
10449
7.70k
      if (C.Kind == CC_BitTests)
10450
7.70k
        dbgs() << "BT:";
10451
7.70k
10452
7.70k
      C.Low->getValue().print(dbgs(), true);
10453
7.70k
      if (C.Low != C.High) {
10454
7.70k
        dbgs() << '-';
10455
7.70k
        C.High->getValue().print(dbgs(), true);
10456
7.70k
      }
10457
7.70k
      dbgs() << ' ';
10458
7.70k
    }
10459
7.70k
    dbgs() << '\n';
10460
7.70k
  });
10461
7.70k
10462
7.70k
  assert(!Clusters.empty());
10463
7.70k
  SwitchWorkList WorkList;
10464
7.70k
  CaseClusterIt First = Clusters.begin();
10465
7.70k
  CaseClusterIt Last = Clusters.end() - 1;
10466
7.70k
  auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
10467
7.70k
  // Scale the branchprobability for DefaultMBB if the peel occurs and
10468
7.70k
  // DefaultMBB is not replaced.
10469
7.70k
  if (PeeledCaseProb != BranchProbability::getZero() &&
10470
7.70k
      
DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]412
)
10471
412
    DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
10472
7.70k
  WorkList.push_back(
10473
7.70k
      {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
10474
7.70k
10475
15.7k
  while (!WorkList.empty()) {
10476
8.08k
    SwitchWorkListItem W = WorkList.back();
10477
8.08k
    WorkList.pop_back();
10478
8.08k
    unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
10479
8.08k
10480
8.08k
    if (NumClusters > 3 && 
TM.getOptLevel() != CodeGenOpt::None211
&&
10481
8.08k
        
!DefaultMBB->getParent()->getFunction().hasMinSize()192
) {
10482
189
      // For optimized builds, lower large range as a balanced binary tree.
10483
189
      splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
10484
189
      continue;
10485
189
    }
10486
7.89k
10487
7.89k
    lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
10488
7.89k
  }
10489
7.70k
}