/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This implements routines for translating from LLVM IR into SelectionDAG IR. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "SelectionDAGBuilder.h" |
14 | | #include "SDNodeDbgValue.h" |
15 | | #include "llvm/ADT/APFloat.h" |
16 | | #include "llvm/ADT/APInt.h" |
17 | | #include "llvm/ADT/ArrayRef.h" |
18 | | #include "llvm/ADT/BitVector.h" |
19 | | #include "llvm/ADT/DenseMap.h" |
20 | | #include "llvm/ADT/None.h" |
21 | | #include "llvm/ADT/Optional.h" |
22 | | #include "llvm/ADT/STLExtras.h" |
23 | | #include "llvm/ADT/SmallPtrSet.h" |
24 | | #include "llvm/ADT/SmallSet.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/ADT/StringRef.h" |
27 | | #include "llvm/ADT/Triple.h" |
28 | | #include "llvm/ADT/Twine.h" |
29 | | #include "llvm/Analysis/AliasAnalysis.h" |
30 | | #include "llvm/Analysis/BranchProbabilityInfo.h" |
31 | | #include "llvm/Analysis/ConstantFolding.h" |
32 | | #include "llvm/Analysis/EHPersonalities.h" |
33 | | #include "llvm/Analysis/Loads.h" |
34 | | #include "llvm/Analysis/MemoryLocation.h" |
35 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
36 | | #include "llvm/Analysis/ValueTracking.h" |
37 | | #include "llvm/Analysis/VectorUtils.h" |
38 | | #include "llvm/CodeGen/Analysis.h" |
39 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
40 | | #include "llvm/CodeGen/GCMetadata.h" |
41 | | #include "llvm/CodeGen/ISDOpcodes.h" |
42 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
43 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
44 | | #include "llvm/CodeGen/MachineFunction.h" |
45 | | #include "llvm/CodeGen/MachineInstr.h" |
46 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
47 | | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
48 | | #include "llvm/CodeGen/MachineMemOperand.h" |
49 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
50 | | #include "llvm/CodeGen/MachineOperand.h" |
51 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
52 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
53 | | #include "llvm/CodeGen/SelectionDAG.h" |
54 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
55 | | #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
56 | | #include "llvm/CodeGen/StackMaps.h" |
57 | | #include "llvm/CodeGen/SwiftErrorValueTracking.h" |
58 | | #include "llvm/CodeGen/TargetFrameLowering.h" |
59 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
60 | | #include "llvm/CodeGen/TargetLowering.h" |
61 | | #include "llvm/CodeGen/TargetOpcodes.h" |
62 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
63 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
64 | | #include "llvm/CodeGen/ValueTypes.h" |
65 | | #include "llvm/CodeGen/WinEHFuncInfo.h" |
66 | | #include "llvm/IR/Argument.h" |
67 | | #include "llvm/IR/Attributes.h" |
68 | | #include "llvm/IR/BasicBlock.h" |
69 | | #include "llvm/IR/CFG.h" |
70 | | #include "llvm/IR/CallSite.h" |
71 | | #include "llvm/IR/CallingConv.h" |
72 | | #include "llvm/IR/Constant.h" |
73 | | #include "llvm/IR/ConstantRange.h" |
74 | | #include "llvm/IR/Constants.h" |
75 | | #include "llvm/IR/DataLayout.h" |
76 | | #include "llvm/IR/DebugInfoMetadata.h" |
77 | | #include "llvm/IR/DebugLoc.h" |
78 | | #include "llvm/IR/DerivedTypes.h" |
79 | | #include "llvm/IR/Function.h" |
80 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
81 | | #include "llvm/IR/InlineAsm.h" |
82 | | #include "llvm/IR/InstrTypes.h" |
83 | | #include "llvm/IR/Instruction.h" |
84 | | #include "llvm/IR/Instructions.h" |
85 | | #include "llvm/IR/IntrinsicInst.h" |
86 | | #include "llvm/IR/Intrinsics.h" |
87 | | #include "llvm/IR/LLVMContext.h" |
88 | | #include "llvm/IR/Metadata.h" |
89 | | #include "llvm/IR/Module.h" |
90 | | #include "llvm/IR/Operator.h" |
91 | | #include "llvm/IR/PatternMatch.h" |
92 | | #include "llvm/IR/Statepoint.h" |
93 | | #include "llvm/IR/Type.h" |
94 | | #include "llvm/IR/User.h" |
95 | | #include "llvm/IR/Value.h" |
96 | | #include "llvm/MC/MCContext.h" |
97 | | #include "llvm/MC/MCSymbol.h" |
98 | | #include "llvm/Support/AtomicOrdering.h" |
99 | | #include "llvm/Support/BranchProbability.h" |
100 | | #include "llvm/Support/Casting.h" |
101 | | #include "llvm/Support/CodeGen.h" |
102 | | #include "llvm/Support/CommandLine.h" |
103 | | #include "llvm/Support/Compiler.h" |
104 | | #include "llvm/Support/Debug.h" |
105 | | #include "llvm/Support/ErrorHandling.h" |
106 | | #include "llvm/Support/MachineValueType.h" |
107 | | #include "llvm/Support/MathExtras.h" |
108 | | #include "llvm/Support/raw_ostream.h" |
109 | | #include "llvm/Target/TargetIntrinsicInfo.h" |
110 | | #include "llvm/Target/TargetMachine.h" |
111 | | #include "llvm/Target/TargetOptions.h" |
112 | | #include "llvm/Transforms/Utils/Local.h" |
113 | | #include <algorithm> |
114 | | #include <cassert> |
115 | | #include <cstddef> |
116 | | #include <cstdint> |
117 | | #include <cstring> |
118 | | #include <iterator> |
119 | | #include <limits> |
120 | | #include <numeric> |
121 | | #include <tuple> |
122 | | #include <utility> |
123 | | #include <vector> |
124 | | |
125 | | using namespace llvm; |
126 | | using namespace PatternMatch; |
127 | | using namespace SwitchCG; |
128 | | |
129 | | #define DEBUG_TYPE "isel" |
130 | | |
131 | | /// LimitFloatPrecision - Generate low-precision inline sequences for |
132 | | /// some float libcalls (6, 8 or 12 bits). |
133 | | static unsigned LimitFloatPrecision; |
134 | | |
135 | | static cl::opt<unsigned, true> |
136 | | LimitFPPrecision("limit-float-precision", |
137 | | cl::desc("Generate low-precision inline sequences " |
138 | | "for some float libcalls"), |
139 | | cl::location(LimitFloatPrecision), cl::Hidden, |
140 | | cl::init(0)); |
141 | | |
142 | | static cl::opt<unsigned> SwitchPeelThreshold( |
143 | | "switch-peel-threshold", cl::Hidden, cl::init(66), |
144 | | cl::desc("Set the case probability threshold for peeling the case from a " |
145 | | "switch statement. A value greater than 100 will void this " |
146 | | "optimization")); |
147 | | |
148 | | // Limit the width of DAG chains. This is important in general to prevent |
149 | | // DAG-based analysis from blowing up. For example, alias analysis and |
150 | | // load clustering may not complete in reasonable time. It is difficult to |
151 | | // recognize and avoid this situation within each individual analysis, and |
152 | | // future analyses are likely to have the same behavior. Limiting DAG width is |
153 | | // the safe approach and will be especially important with global DAGs. |
154 | | // |
155 | | // MaxParallelChains default is arbitrarily high to avoid affecting |
156 | | // optimization, but could be lowered to improve compile time. Any ld-ld-st-st |
157 | | // sequence over this should have been converted to llvm.memcpy by the |
158 | | // frontend. It is easy to induce this behavior with .ll code such as: |
159 | | // %buffer = alloca [4096 x i8] |
160 | | // %data = load [4096 x i8]* %argPtr |
161 | | // store [4096 x i8] %data, [4096 x i8]* %buffer |
162 | | static const unsigned MaxParallelChains = 64; |
163 | | |
164 | | // Return the calling convention if the Value passed requires ABI mangling as it |
165 | | // is a parameter to a function or a return value from a function which is not |
166 | | // an intrinsic. |
167 | 2.39k | static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) { |
168 | 2.39k | if (auto *R = dyn_cast<ReturnInst>(V)) |
169 | 0 | return R->getParent()->getParent()->getCallingConv(); |
170 | 2.39k | |
171 | 2.39k | if (auto *CI = dyn_cast<CallInst>(V)) { |
172 | 27 | const bool IsInlineAsm = CI->isInlineAsm(); |
173 | 27 | const bool IsIndirectFunctionCall = |
174 | 27 | !IsInlineAsm && !CI->getCalledFunction()24 ; |
175 | 27 | |
176 | 27 | // It is possible that the call instruction is an inline asm statement or an |
177 | 27 | // indirect function call in which case the return value of |
178 | 27 | // getCalledFunction() would be nullptr. |
179 | 27 | const bool IsInstrinsicCall = |
180 | 27 | !IsInlineAsm && !IsIndirectFunctionCall24 && |
181 | 27 | CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic23 ; |
182 | 27 | |
183 | 27 | if (!IsInlineAsm && !IsInstrinsicCall24 ) |
184 | 14 | return CI->getCallingConv(); |
185 | 2.37k | } |
186 | 2.37k | |
187 | 2.37k | return None; |
188 | 2.37k | } |
189 | | |
190 | | static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, |
191 | | const SDValue *Parts, unsigned NumParts, |
192 | | MVT PartVT, EVT ValueVT, const Value *V, |
193 | | Optional<CallingConv::ID> CC); |
194 | | |
195 | | /// getCopyFromParts - Create a value that contains the specified legal parts |
196 | | /// combined into the value they represent. If the parts combine to a type |
197 | | /// larger than ValueVT then AssertOp can be used to specify whether the extra |
198 | | /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT |
199 | | /// (ISD::AssertSext). |
200 | | static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, |
201 | | const SDValue *Parts, unsigned NumParts, |
202 | | MVT PartVT, EVT ValueVT, const Value *V, |
203 | | Optional<CallingConv::ID> CC = None, |
204 | 2.57M | Optional<ISD::NodeType> AssertOp = None) { |
205 | 2.57M | if (ValueVT.isVector()) |
206 | 302k | return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, |
207 | 302k | CC); |
208 | 2.27M | |
209 | 2.27M | assert(NumParts > 0 && "No parts to assemble!"); |
210 | 2.27M | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
211 | 2.27M | SDValue Val = Parts[0]; |
212 | 2.27M | |
213 | 2.27M | if (NumParts > 1) { |
214 | 33.8k | // Assemble the value from multiple parts. |
215 | 33.8k | if (ValueVT.isInteger()) { |
216 | 31.0k | unsigned PartBits = PartVT.getSizeInBits(); |
217 | 31.0k | unsigned ValueBits = ValueVT.getSizeInBits(); |
218 | 31.0k | |
219 | 31.0k | // Assemble the power of 2 part. |
220 | 31.0k | unsigned RoundParts = |
221 | 31.0k | (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts)470 : NumParts30.5k ; |
222 | 31.0k | unsigned RoundBits = PartBits * RoundParts; |
223 | 31.0k | EVT RoundVT = RoundBits == ValueBits ? |
224 | 30.0k | ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits)1.02k ; |
225 | 31.0k | SDValue Lo, Hi; |
226 | 31.0k | |
227 | 31.0k | EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); |
228 | 31.0k | |
229 | 31.0k | if (RoundParts > 2) { |
230 | 2.01k | Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, |
231 | 2.01k | PartVT, HalfVT, V); |
232 | 2.01k | Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, |
233 | 2.01k | RoundParts / 2, PartVT, HalfVT, V); |
234 | 29.0k | } else { |
235 | 29.0k | Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); |
236 | 29.0k | Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); |
237 | 29.0k | } |
238 | 31.0k | |
239 | 31.0k | if (DAG.getDataLayout().isBigEndian()) |
240 | 3.95k | std::swap(Lo, Hi); |
241 | 31.0k | |
242 | 31.0k | Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); |
243 | 31.0k | |
244 | 31.0k | if (RoundParts < NumParts) { |
245 | 470 | // Assemble the trailing non-power-of-2 part. |
246 | 470 | unsigned OddParts = NumParts - RoundParts; |
247 | 470 | EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); |
248 | 470 | Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, |
249 | 470 | OddVT, V, CC); |
250 | 470 | |
251 | 470 | // Combine the round and odd parts. |
252 | 470 | Lo = Val; |
253 | 470 | if (DAG.getDataLayout().isBigEndian()) |
254 | 5 | std::swap(Lo, Hi); |
255 | 470 | EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); |
256 | 470 | Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); |
257 | 470 | Hi = |
258 | 470 | DAG.getNode(ISD::SHL, DL, TotalVT, Hi, |
259 | 470 | DAG.getConstant(Lo.getValueSizeInBits(), DL, |
260 | 470 | TLI.getPointerTy(DAG.getDataLayout()))); |
261 | 470 | Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); |
262 | 470 | Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); |
263 | 470 | } |
264 | 31.0k | } else if (2.79k PartVT.isFloatingPoint()2.79k ) { |
265 | 176 | // FP split into multiple FP parts (for ppcf128) |
266 | 176 | assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && |
267 | 176 | "Unexpected split"); |
268 | 176 | SDValue Lo, Hi; |
269 | 176 | Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); |
270 | 176 | Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); |
271 | 176 | if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) |
272 | 176 | std::swap(Lo, Hi); |
273 | 176 | Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); |
274 | 2.61k | } else { |
275 | 2.61k | // FP split into integer parts (soft fp) |
276 | 2.61k | assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && |
277 | 2.61k | !PartVT.isVector() && "Unexpected split"); |
278 | 2.61k | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); |
279 | 2.61k | Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC); |
280 | 2.61k | } |
281 | 33.8k | } |
282 | 2.27M | |
283 | 2.27M | // There is now one part, held in Val. Correct it to match ValueVT. |
284 | 2.27M | // PartEVT is the type of the register class that holds the value. |
285 | 2.27M | // ValueVT is the type of the inline asm operation. |
286 | 2.27M | EVT PartEVT = Val.getValueType(); |
287 | 2.27M | |
288 | 2.27M | if (PartEVT == ValueVT) |
289 | 2.19M | return Val; |
290 | 75.7k | |
291 | 75.7k | if (PartEVT.isInteger() && ValueVT.isFloatingPoint()74.5k && |
292 | 75.7k | ValueVT.bitsLT(PartEVT)4.77k ) { |
293 | 40 | // For an FP value in an integer part, we need to truncate to the right |
294 | 40 | // width first. |
295 | 40 | PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); |
296 | 40 | Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); |
297 | 40 | } |
298 | 75.7k | |
299 | 75.7k | // Handle types that have the same size. |
300 | 75.7k | if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) |
301 | 4.78k | return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
302 | 70.9k | |
303 | 70.9k | // Handle types with different sizes. |
304 | 70.9k | if (PartEVT.isInteger() && ValueVT.isInteger()69.7k ) { |
305 | 69.7k | if (ValueVT.bitsLT(PartEVT)) { |
306 | 69.7k | // For a truncate, see if we have any information to |
307 | 69.7k | // indicate whether the truncated bits will always be |
308 | 69.7k | // zero or sign-extension. |
309 | 69.7k | if (AssertOp.hasValue()) |
310 | 24.9k | Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, |
311 | 24.9k | DAG.getValueType(ValueVT)); |
312 | 69.7k | return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); |
313 | 69.7k | } |
314 | 13 | return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); |
315 | 13 | } |
316 | 1.14k | |
317 | 1.14k | if (1.14k PartEVT.isFloatingPoint()1.14k && ValueVT.isFloatingPoint()) { |
318 | 1.14k | // FP_ROUND's are always exact here. |
319 | 1.14k | if (ValueVT.bitsLT(Val.getValueType())) |
320 | 1.14k | return DAG.getNode( |
321 | 1.14k | ISD::FP_ROUND, DL, ValueVT, Val, |
322 | 1.14k | DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); |
323 | 0 | |
324 | 0 | return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); |
325 | 0 | } |
326 | 18.4E | |
327 | 18.4E | // Handle MMX to a narrower integer type by bitcasting MMX to integer and |
328 | 18.4E | // then truncating. |
329 | 18.4E | if (PartEVT == MVT::x86mmx && ValueVT.isInteger()1 && |
330 | 18.4E | ValueVT.bitsLT(PartEVT)1 ) { |
331 | 1 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val); |
332 | 1 | return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); |
333 | 1 | } |
334 | 18.4E | |
335 | 18.4E | report_fatal_error("Unknown mismatch in getCopyFromParts!"); |
336 | 18.4E | } |
337 | | |
338 | | static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, |
339 | 12 | const Twine &ErrMsg) { |
340 | 12 | const Instruction *I = dyn_cast_or_null<Instruction>(V); |
341 | 12 | if (!V) |
342 | 0 | return Ctx.emitError(ErrMsg); |
343 | 12 | |
344 | 12 | const char *AsmError = ", possible invalid constraint for vector type"; |
345 | 12 | if (const CallInst *CI = dyn_cast<CallInst>(I)) |
346 | 12 | if (isa<InlineAsm>(CI->getCalledValue())) |
347 | 12 | return Ctx.emitError(I, ErrMsg + AsmError); |
348 | 0 | |
349 | 0 | return Ctx.emitError(I, ErrMsg); |
350 | 0 | } |
351 | | |
352 | | /// getCopyFromPartsVector - Create a value that contains the specified legal |
353 | | /// parts combined into the value they represent. If the parts combine to a |
354 | | /// type larger than ValueVT then AssertOp can be used to specify whether the |
355 | | /// extra bits are known to be zero (ISD::AssertZext) or sign extended from |
356 | | /// ValueVT (ISD::AssertSext). |
357 | | static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, |
358 | | const SDValue *Parts, unsigned NumParts, |
359 | | MVT PartVT, EVT ValueVT, const Value *V, |
360 | 302k | Optional<CallingConv::ID> CallConv) { |
361 | 302k | assert(ValueVT.isVector() && "Not a vector value"); |
362 | 302k | assert(NumParts > 0 && "No parts to assemble!"); |
363 | 302k | const bool IsABIRegCopy = CallConv.hasValue(); |
364 | 302k | |
365 | 302k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
366 | 302k | SDValue Val = Parts[0]; |
367 | 302k | |
368 | 302k | // Handle a multi-element vector. |
369 | 302k | if (NumParts > 1) { |
370 | 30.7k | EVT IntermediateVT; |
371 | 30.7k | MVT RegisterVT; |
372 | 30.7k | unsigned NumIntermediates; |
373 | 30.7k | unsigned NumRegs; |
374 | 30.7k | |
375 | 30.7k | if (IsABIRegCopy) { |
376 | 16.6k | NumRegs = TLI.getVectorTypeBreakdownForCallingConv( |
377 | 16.6k | *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, |
378 | 16.6k | NumIntermediates, RegisterVT); |
379 | 16.6k | } else { |
380 | 14.1k | NumRegs = |
381 | 14.1k | TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, |
382 | 14.1k | NumIntermediates, RegisterVT); |
383 | 14.1k | } |
384 | 30.7k | |
385 | 30.7k | assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); |
386 | 30.7k | NumParts = NumRegs; // Silence a compiler warning. |
387 | 30.7k | assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); |
388 | 30.7k | assert(RegisterVT.getSizeInBits() == |
389 | 30.7k | Parts[0].getSimpleValueType().getSizeInBits() && |
390 | 30.7k | "Part type sizes don't match!"); |
391 | 30.7k | |
392 | 30.7k | // Assemble the parts into intermediate operands. |
393 | 30.7k | SmallVector<SDValue, 8> Ops(NumIntermediates); |
394 | 30.7k | if (NumIntermediates == NumParts) { |
395 | 29.9k | // If the register was not expanded, truncate or copy the value, |
396 | 29.9k | // as appropriate. |
397 | 125k | for (unsigned i = 0; i != NumParts; ++i95.1k ) |
398 | 95.1k | Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, |
399 | 95.1k | PartVT, IntermediateVT, V); |
400 | 29.9k | } else if (842 NumParts > 0842 ) { |
401 | 842 | // If the intermediate type was expanded, build the intermediate |
402 | 842 | // operands from the parts. |
403 | 842 | assert(NumParts % NumIntermediates == 0 && |
404 | 842 | "Must expand into a divisible number of parts!"); |
405 | 842 | unsigned Factor = NumParts / NumIntermediates; |
406 | 2.70k | for (unsigned i = 0; i != NumIntermediates; ++i1.86k ) |
407 | 1.86k | Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, |
408 | 1.86k | PartVT, IntermediateVT, V); |
409 | 842 | } |
410 | 30.7k | |
411 | 30.7k | // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the |
412 | 30.7k | // intermediate operands. |
413 | 30.7k | EVT BuiltVectorTy = |
414 | 30.7k | EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), |
415 | 30.7k | (IntermediateVT.isVector() |
416 | 30.7k | ? IntermediateVT.getVectorNumElements() * NumParts23.5k |
417 | 30.7k | : NumIntermediates7.19k )); |
418 | 30.7k | Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS23.5k |
419 | 30.7k | : ISD::BUILD_VECTOR7.19k , |
420 | 30.7k | DL, BuiltVectorTy, Ops); |
421 | 30.7k | } |
422 | 302k | |
423 | 302k | // There is now one part, held in Val. Correct it to match ValueVT. |
424 | 302k | EVT PartEVT = Val.getValueType(); |
425 | 302k | |
426 | 302k | if (PartEVT == ValueVT) |
427 | 291k | return Val; |
428 | 11.0k | |
429 | 11.0k | if (PartEVT.isVector()) { |
430 | 10.0k | // If the element type of the source/dest vectors are the same, but the |
431 | 10.0k | // parts vector has more elements than the value vector, then we have a |
432 | 10.0k | // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the |
433 | 10.0k | // elements we want. |
434 | 10.0k | if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { |
435 | 3.37k | assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && |
436 | 3.37k | "Cannot narrow, it would be a lossy transformation"); |
437 | 3.37k | return DAG.getNode( |
438 | 3.37k | ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, |
439 | 3.37k | DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
440 | 3.37k | } |
441 | 6.69k | |
442 | 6.69k | // Vector/Vector bitcast. |
443 | 6.69k | if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) |
444 | 2.20k | return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
445 | 4.49k | |
446 | 4.49k | assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && |
447 | 4.49k | "Cannot handle this kind of promotion"); |
448 | 4.49k | // Promoted vector extract |
449 | 4.49k | return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); |
450 | 4.49k | |
451 | 4.49k | } |
452 | 1.00k | |
453 | 1.00k | // Trivial bitcast if the types are the same size and the destination |
454 | 1.00k | // vector type is legal. |
455 | 1.00k | if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && |
456 | 1.00k | TLI.isTypeLegal(ValueVT)829 ) |
457 | 23 | return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
458 | 979 | |
459 | 979 | if (ValueVT.getVectorNumElements() != 1) { |
460 | 194 | // Certain ABIs require that vectors are passed as integers. For vectors |
461 | 194 | // are the same size, this is an obvious bitcast. |
462 | 194 | if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { |
463 | 106 | return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
464 | 106 | } else if (88 ValueVT.getSizeInBits() < PartEVT.getSizeInBits()88 ) { |
465 | 80 | // Bitcast Val back the original type and extract the corresponding |
466 | 80 | // vector we want. |
467 | 80 | unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); |
468 | 80 | EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), |
469 | 80 | ValueVT.getVectorElementType(), Elts); |
470 | 80 | Val = DAG.getBitcast(WiderVecType, Val); |
471 | 80 | return DAG.getNode( |
472 | 80 | ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, |
473 | 80 | DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
474 | 80 | } |
475 | 8 | |
476 | 8 | diagnosePossiblyInvalidConstraint( |
477 | 8 | *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); |
478 | 8 | return DAG.getUNDEF(ValueVT); |
479 | 8 | } |
480 | 785 | |
481 | 785 | // Handle cases such as i8 -> <1 x i1> |
482 | 785 | EVT ValueSVT = ValueVT.getVectorElementType(); |
483 | 785 | if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) |
484 | 85 | Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)6 |
485 | 85 | : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT)79 ; |
486 | 785 | |
487 | 785 | return DAG.getBuildVector(ValueVT, DL, Val); |
488 | 785 | } |
489 | | |
490 | | static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, |
491 | | SDValue Val, SDValue *Parts, unsigned NumParts, |
492 | | MVT PartVT, const Value *V, |
493 | | Optional<CallingConv::ID> CallConv); |
494 | | |
495 | | /// getCopyToParts - Create a series of nodes that contain the specified value |
496 | | /// split into legal parts. If the parts contain more bits than Val, then, for |
497 | | /// integers, ExtendKind can be used to specify how to generate the extra bits. |
498 | | static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, |
499 | | SDValue *Parts, unsigned NumParts, MVT PartVT, |
500 | | const Value *V, |
501 | | Optional<CallingConv::ID> CallConv = None, |
502 | 2.08M | ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { |
503 | 2.08M | EVT ValueVT = Val.getValueType(); |
504 | 2.08M | |
505 | 2.08M | // Handle the vector case separately. |
506 | 2.08M | if (ValueVT.isVector()) |
507 | 160k | return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, |
508 | 160k | CallConv); |
509 | 1.92M | |
510 | 1.92M | unsigned PartBits = PartVT.getSizeInBits(); |
511 | 1.92M | unsigned OrigNumParts = NumParts; |
512 | 1.92M | assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && |
513 | 1.92M | "Copying to an illegal type!"); |
514 | 1.92M | |
515 | 1.92M | if (NumParts == 0) |
516 | 0 | return; |
517 | 1.92M | |
518 | 1.92M | assert(!ValueVT.isVector() && "Vector case handled elsewhere"); |
519 | 1.92M | EVT PartEVT = PartVT; |
520 | 1.92M | if (PartEVT == ValueVT) { |
521 | 1.81M | assert(NumParts == 1 && "No-op copy with multiple parts!"); |
522 | 1.81M | Parts[0] = Val; |
523 | 1.81M | return; |
524 | 1.81M | } |
525 | 109k | |
526 | 109k | if (NumParts * PartBits > ValueVT.getSizeInBits()) { |
527 | 76.5k | // If the parts cover more bits than the value has, promote the value. |
528 | 76.5k | if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()669 ) { |
529 | 669 | assert(NumParts == 1 && "Do not know what to promote to!"); |
530 | 669 | Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); |
531 | 75.9k | } else { |
532 | 75.9k | if (ValueVT.isFloatingPoint()) { |
533 | 53 | // FP values need to be bitcast, then extended if they are being put |
534 | 53 | // into a larger container. |
535 | 53 | ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); |
536 | 53 | Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
537 | 53 | } |
538 | 75.9k | assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && |
539 | 75.9k | ValueVT.isInteger() && |
540 | 75.9k | "Unknown mismatch!"); |
541 | 75.9k | ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); |
542 | 75.9k | Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); |
543 | 75.9k | if (PartVT == MVT::x86mmx) |
544 | 1 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
545 | 75.9k | } |
546 | 76.5k | } else if (32.8k PartBits == ValueVT.getSizeInBits()32.8k ) { |
547 | 1.69k | // Different types of the same size. |
548 | 1.69k | assert(NumParts == 1 && PartEVT != ValueVT); |
549 | 1.69k | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
550 | 31.1k | } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { |
551 | 186 | // If the parts cover less bits than value has, truncate the value. |
552 | 186 | assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && |
553 | 186 | ValueVT.isInteger() && |
554 | 186 | "Unknown mismatch!"); |
555 | 186 | ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); |
556 | 186 | Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); |
557 | 186 | if (PartVT == MVT::x86mmx) |
558 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
559 | 186 | } |
560 | 109k | |
561 | 109k | // The value may have changed - recompute ValueVT. |
562 | 109k | ValueVT = Val.getValueType(); |
563 | 109k | assert(NumParts * PartBits == ValueVT.getSizeInBits() && |
564 | 109k | "Failed to tile the value with PartVT!"); |
565 | 109k | |
566 | 109k | if (NumParts == 1) { |
567 | 78.3k | if (PartEVT != ValueVT) { |
568 | 4 | diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, |
569 | 4 | "scalar-to-vector conversion failed"); |
570 | 4 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
571 | 4 | } |
572 | 78.3k | |
573 | 78.3k | Parts[0] = Val; |
574 | 78.3k | return; |
575 | 78.3k | } |
576 | 31.0k | |
577 | 31.0k | // Expand the value into multiple parts. |
578 | 31.0k | if (NumParts & (NumParts - 1)) { |
579 | 159 | // The number of parts is not a power of 2. Split off and copy the tail. |
580 | 159 | assert(PartVT.isInteger() && ValueVT.isInteger() && |
581 | 159 | "Do not know what to expand to!"); |
582 | 159 | unsigned RoundParts = 1 << Log2_32(NumParts); |
583 | 159 | unsigned RoundBits = RoundParts * PartBits; |
584 | 159 | unsigned OddParts = NumParts - RoundParts; |
585 | 159 | SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, |
586 | 159 | DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false)); |
587 | 159 | |
588 | 159 | getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, |
589 | 159 | CallConv); |
590 | 159 | |
591 | 159 | if (DAG.getDataLayout().isBigEndian()) |
592 | 3 | // The odd parts were reversed by getCopyToParts - unreverse them. |
593 | 3 | std::reverse(Parts + RoundParts, Parts + NumParts); |
594 | 159 | |
595 | 159 | NumParts = RoundParts; |
596 | 159 | ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); |
597 | 159 | Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); |
598 | 159 | } |
599 | 31.0k | |
600 | 31.0k | // The number of parts is a power of 2. Repeatedly bisect the value using |
601 | 31.0k | // EXTRACT_ELEMENT. |
602 | 31.0k | Parts[0] = DAG.getNode(ISD::BITCAST, DL, |
603 | 31.0k | EVT::getIntegerVT(*DAG.getContext(), |
604 | 31.0k | ValueVT.getSizeInBits()), |
605 | 31.0k | Val); |
606 | 31.0k | |
607 | 63.2k | for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 232.2k ) { |
608 | 66.1k | for (unsigned i = 0; i < NumParts; i += StepSize33.9k ) { |
609 | 33.9k | unsigned ThisBits = StepSize * PartBits / 2; |
610 | 33.9k | EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); |
611 | 33.9k | SDValue &Part0 = Parts[i]; |
612 | 33.9k | SDValue &Part1 = Parts[i+StepSize/2]; |
613 | 33.9k | |
614 | 33.9k | Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, |
615 | 33.9k | ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); |
616 | 33.9k | Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, |
617 | 33.9k | ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); |
618 | 33.9k | |
619 | 33.9k | if (ThisBits == PartBits && ThisVT != PartVT32.5k ) { |
620 | 181 | Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); |
621 | 181 | Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); |
622 | 181 | } |
623 | 33.9k | } |
624 | 32.2k | } |
625 | 31.0k | |
626 | 31.0k | if (DAG.getDataLayout().isBigEndian()) |
627 | 2.52k | std::reverse(Parts, Parts + OrigNumParts); |
628 | 31.0k | } |
629 | | |
630 | | static SDValue widenVectorToPartType(SelectionDAG &DAG, |
631 | 5.07k | SDValue Val, const SDLoc &DL, EVT PartVT) { |
632 | 5.07k | if (!PartVT.isVector()) |
633 | 102 | return SDValue(); |
634 | 4.97k | |
635 | 4.97k | EVT ValueVT = Val.getValueType(); |
636 | 4.97k | unsigned PartNumElts = PartVT.getVectorNumElements(); |
637 | 4.97k | unsigned ValueNumElts = ValueVT.getVectorNumElements(); |
638 | 4.97k | if (PartNumElts > ValueNumElts && |
639 | 4.97k | PartVT.getVectorElementType() == ValueVT.getVectorElementType()2.21k ) { |
640 | 2.13k | EVT ElementVT = PartVT.getVectorElementType(); |
641 | 2.13k | // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in |
642 | 2.13k | // undef elements. |
643 | 2.13k | SmallVector<SDValue, 16> Ops; |
644 | 2.13k | DAG.ExtractVectorElements(Val, Ops); |
645 | 2.13k | SDValue EltUndef = DAG.getUNDEF(ElementVT); |
646 | 13.2k | for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i11.0k ) |
647 | 11.0k | Ops.push_back(EltUndef); |
648 | 2.13k | |
649 | 2.13k | // FIXME: Use CONCAT for 2x -> 4x. |
650 | 2.13k | return DAG.getBuildVector(PartVT, DL, Ops); |
651 | 2.13k | } |
652 | 2.83k | |
653 | 2.83k | return SDValue(); |
654 | 2.83k | } |
655 | | |
656 | | /// getCopyToPartsVector - Create a series of nodes that contain the specified |
657 | | /// value split into legal parts. |
658 | | static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, |
659 | | SDValue Val, SDValue *Parts, unsigned NumParts, |
660 | | MVT PartVT, const Value *V, |
661 | 160k | Optional<CallingConv::ID> CallConv) { |
662 | 160k | EVT ValueVT = Val.getValueType(); |
663 | 160k | assert(ValueVT.isVector() && "Not a vector"); |
664 | 160k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
665 | 160k | const bool IsABIRegCopy = CallConv.hasValue(); |
666 | 160k | |
667 | 160k | if (NumParts == 1) { |
668 | 143k | EVT PartEVT = PartVT; |
669 | 143k | if (PartEVT == ValueVT) { |
670 | 138k | // Nothing to do. |
671 | 138k | } else if (5.51k PartVT.getSizeInBits() == ValueVT.getSizeInBits()5.51k ) { |
672 | 712 | // Bitconvert vector->vector case. |
673 | 712 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
674 | 4.80k | } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) { |
675 | 2.10k | Val = Widened; |
676 | 2.69k | } else if (PartVT.isVector() && |
677 | 2.69k | PartEVT.getVectorElementType().bitsGE( |
678 | 2.59k | ValueVT.getVectorElementType()) && |
679 | 2.69k | PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()2.59k ) { |
680 | 2.59k | |
681 | 2.59k | // Promoted vector extract |
682 | 2.59k | Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); |
683 | 2.59k | } else { |
684 | 102 | if (ValueVT.getVectorNumElements() == 1) { |
685 | 54 | Val = DAG.getNode( |
686 | 54 | ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, |
687 | 54 | DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
688 | 54 | } else { |
689 | 48 | assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && |
690 | 48 | "lossy conversion of vector to scalar type"); |
691 | 48 | EVT IntermediateType = |
692 | 48 | EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); |
693 | 48 | Val = DAG.getBitcast(IntermediateType, Val); |
694 | 48 | Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); |
695 | 48 | } |
696 | 102 | } |
697 | 143k | |
698 | 143k | assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); |
699 | 143k | Parts[0] = Val; |
700 | 143k | return; |
701 | 143k | } |
702 | 16.1k | |
703 | 16.1k | // Handle a multi-element vector. |
704 | 16.1k | EVT IntermediateVT; |
705 | 16.1k | MVT RegisterVT; |
706 | 16.1k | unsigned NumIntermediates; |
707 | 16.1k | unsigned NumRegs; |
708 | 16.1k | if (IsABIRegCopy) { |
709 | 7.82k | NumRegs = TLI.getVectorTypeBreakdownForCallingConv( |
710 | 7.82k | *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, |
711 | 7.82k | NumIntermediates, RegisterVT); |
712 | 8.31k | } else { |
713 | 8.31k | NumRegs = |
714 | 8.31k | TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, |
715 | 8.31k | NumIntermediates, RegisterVT); |
716 | 8.31k | } |
717 | 16.1k | |
718 | 16.1k | assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); |
719 | 16.1k | NumParts = NumRegs; // Silence a compiler warning. |
720 | 16.1k | assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); |
721 | 16.1k | |
722 | 16.1k | unsigned IntermediateNumElts = IntermediateVT.isVector() ? |
723 | 13.0k | IntermediateVT.getVectorNumElements() : 13.08k ; |
724 | 16.1k | |
725 | 16.1k | // Convert the vector to the appropiate type if necessary. |
726 | 16.1k | unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; |
727 | 16.1k | |
728 | 16.1k | EVT BuiltVectorTy = EVT::getVectorVT( |
729 | 16.1k | *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); |
730 | 16.1k | MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); |
731 | 16.1k | if (ValueVT != BuiltVectorTy) { |
732 | 275 | if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) |
733 | 34 | Val = Widened; |
734 | 275 | |
735 | 275 | Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); |
736 | 275 | } |
737 | 16.1k | |
738 | 16.1k | // Split the vector into intermediate operands. |
739 | 16.1k | SmallVector<SDValue, 8> Ops(NumIntermediates); |
740 | 58.2k | for (unsigned i = 0; i != NumIntermediates; ++i42.0k ) { |
741 | 42.0k | if (IntermediateVT.isVector()) { |
742 | 29.6k | Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, |
743 | 29.6k | DAG.getConstant(i * IntermediateNumElts, DL, IdxVT)); |
744 | 29.6k | } else { |
745 | 12.4k | Ops[i] = DAG.getNode( |
746 | 12.4k | ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, |
747 | 12.4k | DAG.getConstant(i, DL, IdxVT)); |
748 | 12.4k | } |
749 | 42.0k | } |
750 | 16.1k | |
751 | 16.1k | // Split the intermediate operands into legal parts. |
752 | 16.1k | if (NumParts == NumIntermediates) { |
753 | 15.9k | // If the register was not expanded, promote or copy the value, |
754 | 15.9k | // as appropriate. |
755 | 57.5k | for (unsigned i = 0; i != NumParts; ++i41.6k ) |
756 | 41.6k | getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv); |
757 | 15.9k | } else if (142 NumParts > 0142 ) { |
758 | 142 | // If the intermediate type was expanded, split each the value into |
759 | 142 | // legal parts. |
760 | 142 | assert(NumIntermediates != 0 && "division by zero"); |
761 | 142 | assert(NumParts % NumIntermediates == 0 && |
762 | 142 | "Must expand into a divisible number of parts!"); |
763 | 142 | unsigned Factor = NumParts / NumIntermediates; |
764 | 615 | for (unsigned i = 0; i != NumIntermediates; ++i473 ) |
765 | 473 | getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V, |
766 | 473 | CallConv); |
767 | 142 | } |
768 | 16.1k | } |
769 | | |
770 | | RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, |
771 | | EVT valuevt, Optional<CallingConv::ID> CC) |
772 | | : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), |
773 | 92.7k | RegCount(1, regs.size()), CallConv(CC) {} |
774 | | |
775 | | RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, |
776 | | const DataLayout &DL, unsigned Reg, Type *Ty, |
777 | 2.61M | Optional<CallingConv::ID> CC) { |
778 | 2.61M | ComputeValueVTs(TLI, DL, Ty, ValueVTs); |
779 | 2.61M | |
780 | 2.61M | CallConv = CC; |
781 | 2.61M | |
782 | 2.61M | for (EVT ValueVT : ValueVTs) { |
783 | 2.61M | unsigned NumRegs = |
784 | 2.61M | isABIMangled() |
785 | 2.61M | ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)16 |
786 | 2.61M | : TLI.getNumRegisters(Context, ValueVT)2.61M ; |
787 | 2.61M | MVT RegisterVT = |
788 | 2.61M | isABIMangled() |
789 | 2.61M | ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)16 |
790 | 2.61M | : TLI.getRegisterType(Context, ValueVT)2.61M ; |
791 | 5.29M | for (unsigned i = 0; i != NumRegs; ++i2.67M ) |
792 | 2.67M | Regs.push_back(Reg + i); |
793 | 2.61M | RegVTs.push_back(RegisterVT); |
794 | 2.61M | RegCount.push_back(NumRegs); |
795 | 2.61M | Reg += NumRegs; |
796 | 2.61M | } |
797 | 2.61M | } |
798 | | |
799 | | SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, |
800 | | FunctionLoweringInfo &FuncInfo, |
801 | | const SDLoc &dl, SDValue &Chain, |
802 | 1.80M | SDValue *Flag, const Value *V) const { |
803 | 1.80M | // A Value with type {} or [0 x %t] needs no registers. |
804 | 1.80M | if (ValueVTs.empty()) |
805 | 0 | return SDValue(); |
806 | 1.80M | |
807 | 1.80M | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
808 | 1.80M | |
809 | 1.80M | // Assemble the legal parts into the final values. |
810 | 1.80M | SmallVector<SDValue, 4> Values(ValueVTs.size()); |
811 | 1.80M | SmallVector<SDValue, 8> Parts; |
812 | 3.61M | for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value1.80M ) { |
813 | 1.80M | // Copy the legal parts from the registers. |
814 | 1.80M | EVT ValueVT = ValueVTs[Value]; |
815 | 1.80M | unsigned NumRegs = RegCount[Value]; |
816 | 1.80M | MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( |
817 | 14 | *DAG.getContext(), |
818 | 14 | CallConv.getValue(), RegVTs[Value]) |
819 | 1.80M | : RegVTs[Value]1.80M ; |
820 | 1.80M | |
821 | 1.80M | Parts.resize(NumRegs); |
822 | 3.64M | for (unsigned i = 0; i != NumRegs; ++i1.84M ) { |
823 | 1.84M | SDValue P; |
824 | 1.84M | if (!Flag) { |
825 | 1.83M | P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); |
826 | 1.83M | } else { |
827 | 4.58k | P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); |
828 | 4.58k | *Flag = P.getValue(2); |
829 | 4.58k | } |
830 | 1.84M | |
831 | 1.84M | Chain = P.getValue(1); |
832 | 1.84M | Parts[i] = P; |
833 | 1.84M | |
834 | 1.84M | // If the source register was virtual and if we know something about it, |
835 | 1.84M | // add an assert node. |
836 | 1.84M | if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || |
837 | 1.84M | !RegisterVT.isInteger()1.84M ) |
838 | 70.6k | continue; |
839 | 1.77M | |
840 | 1.77M | const FunctionLoweringInfo::LiveOutInfo *LOI = |
841 | 1.77M | FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); |
842 | 1.77M | if (!LOI) |
843 | 738k | continue; |
844 | 1.03M | |
845 | 1.03M | unsigned RegSize = RegisterVT.getScalarSizeInBits(); |
846 | 1.03M | unsigned NumSignBits = LOI->NumSignBits; |
847 | 1.03M | unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); |
848 | 1.03M | |
849 | 1.03M | if (NumZeroBits == RegSize) { |
850 | 3.27k | // The current value is a zero. |
851 | 3.27k | // Explicitly express that as it would be easier for |
852 | 3.27k | // optimizations to kick in. |
853 | 3.27k | Parts[i] = DAG.getConstant(0, dl, RegisterVT); |
854 | 3.27k | continue; |
855 | 3.27k | } |
856 | 1.02M | |
857 | 1.02M | // FIXME: We capture more information than the dag can represent. For |
858 | 1.02M | // now, just use the tightest assertzext/assertsext possible. |
859 | 1.02M | bool isSExt; |
860 | 1.02M | EVT FromVT(MVT::Other); |
861 | 1.02M | if (NumZeroBits) { |
862 | 125k | FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits); |
863 | 125k | isSExt = false; |
864 | 903k | } else if (NumSignBits > 1) { |
865 | 97.4k | FromVT = |
866 | 97.4k | EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1); |
867 | 97.4k | isSExt = true; |
868 | 806k | } else { |
869 | 806k | continue; |
870 | 806k | } |
871 | 222k | // Add an assertion node. |
872 | 222k | assert(FromVT != MVT::Other); |
873 | 222k | Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext97.4k : ISD::AssertZext125k , dl, |
874 | 222k | RegisterVT, P, DAG.getValueType(FromVT)); |
875 | 222k | } |
876 | 1.80M | |
877 | 1.80M | Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, |
878 | 1.80M | RegisterVT, ValueVT, V, CallConv); |
879 | 1.80M | Part += NumRegs; |
880 | 1.80M | Parts.clear(); |
881 | 1.80M | } |
882 | 1.80M | |
883 | 1.80M | return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); |
884 | 1.80M | } |
885 | | |
886 | | void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, |
887 | | const SDLoc &dl, SDValue &Chain, SDValue *Flag, |
888 | | const Value *V, |
889 | 819k | ISD::NodeType PreferredExtendType) const { |
890 | 819k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
891 | 819k | ISD::NodeType ExtendKind = PreferredExtendType; |
892 | 819k | |
893 | 819k | // Get the list of the values's legal parts. |
894 | 819k | unsigned NumRegs = Regs.size(); |
895 | 819k | SmallVector<SDValue, 8> Parts(NumRegs); |
896 | 1.64M | for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value822k ) { |
897 | 822k | unsigned NumParts = RegCount[Value]; |
898 | 822k | |
899 | 822k | MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( |
900 | 2 | *DAG.getContext(), |
901 | 2 | CallConv.getValue(), RegVTs[Value]) |
902 | 822k | : RegVTs[Value]822k ; |
903 | 822k | |
904 | 822k | if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)775k ) |
905 | 44.4k | ExtendKind = ISD::ZERO_EXTEND; |
906 | 822k | |
907 | 822k | getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], |
908 | 822k | NumParts, RegisterVT, V, CallConv, ExtendKind); |
909 | 822k | Part += NumParts; |
910 | 822k | } |
911 | 819k | |
912 | 819k | // Copy the parts into the registers. |
913 | 819k | SmallVector<SDValue, 8> Chains(NumRegs); |
914 | 1.66M | for (unsigned i = 0; i != NumRegs; ++i842k ) { |
915 | 842k | SDValue Part; |
916 | 842k | if (!Flag) { |
917 | 838k | Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); |
918 | 838k | } else { |
919 | 4.79k | Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); |
920 | 4.79k | *Flag = Part.getValue(1); |
921 | 4.79k | } |
922 | 842k | |
923 | 842k | Chains[i] = Part.getValue(0); |
924 | 842k | } |
925 | 819k | |
926 | 819k | if (NumRegs == 1 || Flag17.7k ) |
927 | 802k | // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is |
928 | 802k | // flagged to it. That is the CopyToReg nodes and the user are considered |
929 | 802k | // a single scheduling unit. If we create a TokenFactor and return it as |
930 | 802k | // chain, then the TokenFactor is both a predecessor (operand) of the |
931 | 802k | // user as well as a successor (the TF operands are flagged to the user). |
932 | 802k | // c1, f1 = CopyToReg |
933 | 802k | // c2, f2 = CopyToReg |
934 | 802k | // c3 = TokenFactor c1, c2 |
935 | 802k | // ... |
936 | 802k | // = op c3, ..., f2 |
937 | 802k | Chain = Chains[NumRegs-1]; |
938 | 17.6k | else |
939 | 17.6k | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); |
940 | 819k | } |
941 | | |
942 | | void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, |
943 | | unsigned MatchingIdx, const SDLoc &dl, |
944 | | SelectionDAG &DAG, |
945 | 92.7k | std::vector<SDValue> &Ops) const { |
946 | 92.7k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
947 | 92.7k | |
948 | 92.7k | unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); |
949 | 92.7k | if (HasMatching) |
950 | 435 | Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); |
951 | 92.3k | else if (!Regs.empty() && |
952 | 92.3k | TargetRegisterInfo::isVirtualRegister(Regs.front())) { |
953 | 7.00k | // Put the register class of the virtual registers in the flag word. That |
954 | 7.00k | // way, later passes can recompute register class constraints for inline |
955 | 7.00k | // assembly as well as normal instructions. |
956 | 7.00k | // Don't do this for tied operands that can use the regclass information |
957 | 7.00k | // from the def. |
958 | 7.00k | const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); |
959 | 7.00k | const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); |
960 | 7.00k | Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); |
961 | 7.00k | } |
962 | 92.7k | |
963 | 92.7k | SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); |
964 | 92.7k | Ops.push_back(Res); |
965 | 92.7k | |
966 | 92.7k | if (Code == InlineAsm::Kind_Clobber) { |
967 | 83.4k | // Clobbers should always have a 1:1 mapping with registers, and may |
968 | 83.4k | // reference registers that have illegal (e.g. vector) types. Hence, we |
969 | 83.4k | // shouldn't try to apply any sort of splitting logic to them. |
970 | 83.4k | assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && |
971 | 83.4k | "No 1:1 mapping from clobbers to regs?"); |
972 | 83.4k | unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); |
973 | 83.4k | (void)SP; |
974 | 166k | for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I83.4k ) { |
975 | 83.4k | Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); |
976 | 83.4k | assert( |
977 | 83.4k | (Regs[I] != SP || |
978 | 83.4k | DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && |
979 | 83.4k | "If we clobbered the stack pointer, MFI should know about it."); |
980 | 83.4k | } |
981 | 83.4k | return; |
982 | 83.4k | } |
983 | 9.24k | |
984 | 18.4k | for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); 9.24k Value != e; ++Value9.24k ) { |
985 | 9.24k | unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); |
986 | 9.24k | MVT RegisterVT = RegVTs[Value]; |
987 | 18.7k | for (unsigned i = 0; i != NumRegs; ++i9.46k ) { |
988 | 9.46k | assert(Reg < Regs.size() && "Mismatch in # registers expected"); |
989 | 9.46k | unsigned TheReg = Regs[Reg++]; |
990 | 9.46k | Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); |
991 | 9.46k | } |
992 | 9.24k | } |
993 | 9.24k | } |
994 | | |
995 | | SmallVector<std::pair<unsigned, unsigned>, 4> |
996 | 10 | RegsForValue::getRegsAndSizes() const { |
997 | 10 | SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; |
998 | 10 | unsigned I = 0; |
999 | 14 | for (auto CountAndVT : zip_first(RegCount, RegVTs)) { |
1000 | 14 | unsigned RegCount = std::get<0>(CountAndVT); |
1001 | 14 | MVT RegisterVT = std::get<1>(CountAndVT); |
1002 | 14 | unsigned RegisterSize = RegisterVT.getSizeInBits(); |
1003 | 39 | for (unsigned E = I + RegCount; I != E; ++I25 ) |
1004 | 25 | OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); |
1005 | 14 | } |
1006 | 10 | return OutVec; |
1007 | 10 | } |
1008 | | |
1009 | | void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, |
1010 | 278k | const TargetLibraryInfo *li) { |
1011 | 278k | AA = aa; |
1012 | 278k | GFI = gfi; |
1013 | 278k | LibInfo = li; |
1014 | 278k | DL = &DAG.getDataLayout(); |
1015 | 278k | Context = DAG.getContext(); |
1016 | 278k | LPadToCallSiteMap.clear(); |
1017 | 278k | SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); |
1018 | 278k | } |
1019 | | |
1020 | 1.24M | void SelectionDAGBuilder::clear() { |
1021 | 1.24M | NodeMap.clear(); |
1022 | 1.24M | UnusedArgNodeMap.clear(); |
1023 | 1.24M | PendingLoads.clear(); |
1024 | 1.24M | PendingExports.clear(); |
1025 | 1.24M | CurInst = nullptr; |
1026 | 1.24M | HasTailCall = false; |
1027 | 1.24M | SDNodeOrder = LowestSDNodeOrder; |
1028 | 1.24M | StatepointLowering.clear(); |
1029 | 1.24M | } |
1030 | | |
1031 | 1.47M | void SelectionDAGBuilder::clearDanglingDebugInfo() { |
1032 | 1.47M | DanglingDebugInfoMap.clear(); |
1033 | 1.47M | } |
1034 | | |
1035 | 1.27M | SDValue SelectionDAGBuilder::getRoot() { |
1036 | 1.27M | if (PendingLoads.empty()) |
1037 | 1.05M | return DAG.getRoot(); |
1038 | 212k | |
1039 | 212k | if (PendingLoads.size() == 1) { |
1040 | 163k | SDValue Root = PendingLoads[0]; |
1041 | 163k | DAG.setRoot(Root); |
1042 | 163k | PendingLoads.clear(); |
1043 | 163k | return Root; |
1044 | 163k | } |
1045 | 48.6k | |
1046 | 48.6k | // Otherwise, we have to make a token factor node. |
1047 | 48.6k | SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads); |
1048 | 48.6k | PendingLoads.clear(); |
1049 | 48.6k | DAG.setRoot(Root); |
1050 | 48.6k | return Root; |
1051 | 48.6k | } |
1052 | | |
1053 | 2.14M | SDValue SelectionDAGBuilder::getControlRoot() { |
1054 | 2.14M | SDValue Root = DAG.getRoot(); |
1055 | 2.14M | |
1056 | 2.14M | if (PendingExports.empty()) |
1057 | 1.65M | return Root; |
1058 | 483k | |
1059 | 483k | // Turn all of the CopyToReg chains into one factored node. |
1060 | 483k | if (Root.getOpcode() != ISD::EntryToken) { |
1061 | 229k | unsigned i = 0, e = PendingExports.size(); |
1062 | 572k | for (; i != e; ++i342k ) { |
1063 | 342k | assert(PendingExports[i].getNode()->getNumOperands() > 1); |
1064 | 342k | if (PendingExports[i].getNode()->getOperand(0) == Root) |
1065 | 0 | break; // Don't add the root if we already indirectly depend on it. |
1066 | 342k | } |
1067 | 229k | |
1068 | 229k | if (i == e) |
1069 | 229k | PendingExports.push_back(Root); |
1070 | 229k | } |
1071 | 483k | |
1072 | 483k | Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, |
1073 | 483k | PendingExports); |
1074 | 483k | PendingExports.clear(); |
1075 | 483k | DAG.setRoot(Root); |
1076 | 483k | return Root; |
1077 | 483k | } |
1078 | | |
1079 | 6.77M | void SelectionDAGBuilder::visit(const Instruction &I) { |
1080 | 6.77M | // Set up outgoing PHI node register values before emitting the terminator. |
1081 | 6.77M | if (I.isTerminator()) { |
1082 | 1.12M | HandlePHINodesInSuccessorBlocks(I.getParent()); |
1083 | 1.12M | } |
1084 | 6.77M | |
1085 | 6.77M | // Increase the SDNodeOrder if dealing with a non-debug instruction. |
1086 | 6.77M | if (!isa<DbgInfoIntrinsic>(I)) |
1087 | 6.77M | ++SDNodeOrder; |
1088 | 6.77M | |
1089 | 6.77M | CurInst = &I; |
1090 | 6.77M | |
1091 | 6.77M | visit(I.getOpcode(), I); |
1092 | 6.77M | |
1093 | 6.77M | if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { |
1094 | 238k | // Propagate the fast-math-flags of this IR instruction to the DAG node that |
1095 | 238k | // maps to this instruction. |
1096 | 238k | // TODO: We could handle all flags (nsw, etc) here. |
1097 | 238k | // TODO: If an IR instruction maps to >1 node, only the final node will have |
1098 | 238k | // flags set. |
1099 | 238k | if (SDNode *Node = getNodeForIRValue(&I)) { |
1100 | 237k | SDNodeFlags IncomingFlags; |
1101 | 237k | IncomingFlags.copyFMF(*FPMO); |
1102 | 237k | if (!Node->getFlags().isDefined()) |
1103 | 234k | Node->setFlags(IncomingFlags); |
1104 | 3.23k | else |
1105 | 3.23k | Node->intersectFlagsWith(IncomingFlags); |
1106 | 237k | } |
1107 | 238k | } |
1108 | 6.77M | |
1109 | 6.77M | if (!I.isTerminator() && !HasTailCall5.65M && |
1110 | 6.77M | !isStatepoint(&I)5.58M ) // statepoints handle their exports internally |
1111 | 5.58M | CopyToExportRegsIfNeeded(&I); |
1112 | 6.77M | |
1113 | 6.77M | CurInst = nullptr; |
1114 | 6.77M | } |
1115 | | |
1116 | 0 | void SelectionDAGBuilder::visitPHI(const PHINode &) { |
1117 | 0 | llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); |
1118 | 0 | } |
1119 | | |
1120 | 7.07M | void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { |
1121 | 7.07M | // Note: this doesn't use InstVisitor, because it has to work with |
1122 | 7.07M | // ConstantExpr's in addition to instructions. |
1123 | 7.07M | switch (Opcode) { |
1124 | 7.07M | default: 0 llvm_unreachable0 ("Unknown instruction type encountered!"); |
1125 | 7.07M | // Build the switch statement using the Instruction.def file. |
1126 | 7.07M | #define HANDLE_INST(NUM, OPCODE, CLASS) \ |
1127 | 7.07M | case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break7.07M ; |
1128 | 7.07M | #include "llvm/IR/Instruction.def"259k |
1129 | 7.07M | } |
1130 | 7.07M | } |
1131 | | |
1132 | | void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, |
1133 | 5.47k | const DIExpression *Expr) { |
1134 | 5.47k | auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { |
1135 | 40 | const DbgValueInst *DI = DDI.getDI(); |
1136 | 40 | DIVariable *DanglingVariable = DI->getVariable(); |
1137 | 40 | DIExpression *DanglingExpr = DI->getExpression(); |
1138 | 40 | if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)4 ) { |
1139 | 4 | LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); |
1140 | 4 | return true; |
1141 | 4 | } |
1142 | 36 | return false; |
1143 | 36 | }; |
1144 | 5.47k | |
1145 | 5.47k | for (auto &DDIMI : DanglingDebugInfoMap) { |
1146 | 26 | DanglingDebugInfoVector &DDIV = DDIMI.second; |
1147 | 26 | |
1148 | 26 | // If debug info is to be dropped, run it through final checks to see |
1149 | 26 | // whether it can be salvaged. |
1150 | 26 | for (auto &DDI : DDIV) |
1151 | 20 | if (isMatchingDbgValue(DDI)) |
1152 | 2 | salvageUnresolvedDbgValue(DDI); |
1153 | 26 | |
1154 | 26 | DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); |
1155 | 26 | } |
1156 | 5.47k | } |
1157 | | |
1158 | | // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, |
1159 | | // generate the debug data structures now that we've seen its definition. |
1160 | | void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, |
1161 | 4.77M | SDValue Val) { |
1162 | 4.77M | auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V); |
1163 | 4.77M | if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) |
1164 | 4.77M | return; |
1165 | 14 | |
1166 | 14 | DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; |
1167 | 18 | for (auto &DDI : DDIV) { |
1168 | 18 | const DbgValueInst *DI = DDI.getDI(); |
1169 | 18 | assert(DI && "Ill-formed DanglingDebugInfo"); |
1170 | 18 | DebugLoc dl = DDI.getdl(); |
1171 | 18 | unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); |
1172 | 18 | unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); |
1173 | 18 | DILocalVariable *Variable = DI->getVariable(); |
1174 | 18 | DIExpression *Expr = DI->getExpression(); |
1175 | 18 | assert(Variable->isValidLocationForIntrinsic(dl) && |
1176 | 18 | "Expected inlined-at fields to agree"); |
1177 | 18 | SDDbgValue *SDV; |
1178 | 18 | if (Val.getNode()) { |
1179 | 18 | // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a |
1180 | 18 | // FuncArgumentDbgValue (it would be hoisted to the function entry, and if |
1181 | 18 | // we couldn't resolve it directly when examining the DbgValue intrinsic |
1182 | 18 | // in the first place we should not be more successful here). Unless we |
1183 | 18 | // have some test case that prove this to be correct we should avoid |
1184 | 18 | // calling EmitFuncArgumentDbgValue here. |
1185 | 18 | if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { |
1186 | 16 | LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" |
1187 | 16 | << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); |
1188 | 16 | LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); |
1189 | 16 | // Increase the SDNodeOrder for the DbgValue here to make sure it is |
1190 | 16 | // inserted after the definition of Val when emitting the instructions |
1191 | 16 | // after ISel. An alternative could be to teach |
1192 | 16 | // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. |
1193 | 16 | LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() |
1194 | 16 | << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " |
1195 | 16 | << ValSDNodeOrder << "\n"); |
1196 | 16 | SDV = getDbgValue(Val, Variable, Expr, dl, |
1197 | 16 | std::max(DbgSDNodeOrder, ValSDNodeOrder)); |
1198 | 16 | DAG.AddDbgValue(SDV, Val.getNode(), false); |
1199 | 16 | } else |
1200 | 18 | LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI |
1201 | 18 | << "in EmitFuncArgumentDbgValue\n"); |
1202 | 18 | } else { |
1203 | 0 | LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); |
1204 | 0 | auto Undef = |
1205 | 0 | UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); |
1206 | 0 | auto SDV = |
1207 | 0 | DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); |
1208 | 0 | DAG.AddDbgValue(SDV, nullptr, false); |
1209 | 0 | } |
1210 | 18 | } |
1211 | 14 | DDIV.clear(); |
1212 | 14 | } |
1213 | | |
1214 | 14 | void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { |
1215 | 14 | Value *V = DDI.getDI()->getValue(); |
1216 | 14 | DILocalVariable *Var = DDI.getDI()->getVariable(); |
1217 | 14 | DIExpression *Expr = DDI.getDI()->getExpression(); |
1218 | 14 | DebugLoc DL = DDI.getdl(); |
1219 | 14 | DebugLoc InstDL = DDI.getDI()->getDebugLoc(); |
1220 | 14 | unsigned SDOrder = DDI.getSDNodeOrder(); |
1221 | 14 | |
1222 | 14 | // Currently we consider only dbg.value intrinsics -- we tell the salvager |
1223 | 14 | // that DW_OP_stack_value is desired. |
1224 | 14 | assert(isa<DbgValueInst>(DDI.getDI())); |
1225 | 14 | bool StackValue = true; |
1226 | 14 | |
1227 | 14 | // Can this Value can be encoded without any further work? |
1228 | 14 | if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) |
1229 | 0 | return; |
1230 | 14 | |
1231 | 14 | // Attempt to salvage back through as many instructions as possible. Bail if |
1232 | 14 | // a non-instruction is seen, such as a constant expression or global |
1233 | 14 | // variable. FIXME: Further work could recover those too. |
1234 | 15 | while (14 isa<Instruction>(V)) { |
1235 | 2 | Instruction &VAsInst = *cast<Instruction>(V); |
1236 | 2 | DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue); |
1237 | 2 | |
1238 | 2 | // If we cannot salvage any further, and haven't yet found a suitable debug |
1239 | 2 | // expression, bail out. |
1240 | 2 | if (!NewExpr) |
1241 | 0 | break; |
1242 | 2 | |
1243 | 2 | // New value and expr now represent this debuginfo. |
1244 | 2 | V = VAsInst.getOperand(0); |
1245 | 2 | Expr = NewExpr; |
1246 | 2 | |
1247 | 2 | // Some kind of simplification occurred: check whether the operand of the |
1248 | 2 | // salvaged debug expression can be encoded in this DAG. |
1249 | 2 | if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) { |
1250 | 1 | LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " |
1251 | 1 | << DDI.getDI() << "\nBy stripping back to:\n " << V); |
1252 | 1 | return; |
1253 | 1 | } |
1254 | 2 | } |
1255 | 14 | |
1256 | 14 | // This was the final opportunity to salvage this debug information, and it |
1257 | 14 | // couldn't be done. Place an undef DBG_VALUE at this location to terminate |
1258 | 14 | // any earlier variable location. |
1259 | 14 | auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); |
1260 | 13 | auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); |
1261 | 13 | DAG.AddDbgValue(SDV, nullptr, false); |
1262 | 13 | |
1263 | 13 | LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() |
1264 | 13 | << "\n"); |
1265 | 13 | LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) |
1266 | 13 | << "\n"); |
1267 | 13 | } |
1268 | | |
1269 | | bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, |
1270 | | DIExpression *Expr, DebugLoc dl, |
1271 | 4.94k | DebugLoc InstDL, unsigned Order) { |
1272 | 4.94k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1273 | 4.94k | SDDbgValue *SDV; |
1274 | 4.94k | if (isa<ConstantInt>(V) || isa<ConstantFP>(V)818 || isa<UndefValue>(V)810 || |
1275 | 4.94k | isa<ConstantPointerNull>(V)786 ) { |
1276 | 4.17k | SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder); |
1277 | 4.17k | DAG.AddDbgValue(SDV, nullptr, false); |
1278 | 4.17k | return true; |
1279 | 4.17k | } |
1280 | 774 | |
1281 | 774 | // If the Value is a frame index, we can create a FrameIndex debug value |
1282 | 774 | // without relying on the DAG at all. |
1283 | 774 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { |
1284 | 57 | auto SI = FuncInfo.StaticAllocaMap.find(AI); |
1285 | 57 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
1286 | 56 | auto SDV = |
1287 | 56 | DAG.getFrameIndexDbgValue(Var, Expr, SI->second, |
1288 | 56 | /*IsIndirect*/ false, dl, SDNodeOrder); |
1289 | 56 | // Do not attach the SDNodeDbgValue to an SDNode: this variable location |
1290 | 56 | // is still available even if the SDNode gets optimized out. |
1291 | 56 | DAG.AddDbgValue(SDV, nullptr, false); |
1292 | 56 | return true; |
1293 | 56 | } |
1294 | 718 | } |
1295 | 718 | |
1296 | 718 | // Do not use getValue() in here; we don't want to generate code at |
1297 | 718 | // this point if it hasn't been done yet. |
1298 | 718 | SDValue N = NodeMap[V]; |
1299 | 718 | if (!N.getNode() && isa<Argument>(V)145 ) // Check unused arguments map. |
1300 | 61 | N = UnusedArgNodeMap[V]; |
1301 | 718 | if (N.getNode()) { |
1302 | 627 | if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) |
1303 | 283 | return true; |
1304 | 344 | SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder); |
1305 | 344 | DAG.AddDbgValue(SDV, N.getNode(), false); |
1306 | 344 | return true; |
1307 | 344 | } |
1308 | 91 | |
1309 | 91 | // Special rules apply for the first dbg.values of parameter variables in a |
1310 | 91 | // function. Identify them by the fact they reference Argument Values, that |
1311 | 91 | // they're parameters, and they are parameters of the current function. We |
1312 | 91 | // need to let them dangle until they get an SDNode. |
1313 | 91 | bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter()7 && |
1314 | 91 | !InstDL.getInlinedAt()6 ; |
1315 | 91 | if (!IsParamOfFunc) { |
1316 | 86 | // The value is not used in this block yet (or it would have an SDNode). |
1317 | 86 | // We still want the value to appear for the user if possible -- if it has |
1318 | 86 | // an associated VReg, we can refer to that instead. |
1319 | 86 | auto VMI = FuncInfo.ValueMap.find(V); |
1320 | 86 | if (VMI != FuncInfo.ValueMap.end()) { |
1321 | 44 | unsigned Reg = VMI->second; |
1322 | 44 | // If this is a PHI node, it may be split up into several MI PHI nodes |
1323 | 44 | // (in FunctionLoweringInfo::set). |
1324 | 44 | RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, |
1325 | 44 | V->getType(), None); |
1326 | 44 | if (RFV.occupiesMultipleRegs()) { |
1327 | 7 | unsigned Offset = 0; |
1328 | 7 | unsigned BitsToDescribe = 0; |
1329 | 7 | if (auto VarSize = Var->getSizeInBits()) |
1330 | 7 | BitsToDescribe = *VarSize; |
1331 | 7 | if (auto Fragment = Expr->getFragmentInfo()) |
1332 | 2 | BitsToDescribe = Fragment->SizeInBits; |
1333 | 16 | for (auto RegAndSize : RFV.getRegsAndSizes()) { |
1334 | 16 | unsigned RegisterSize = RegAndSize.second; |
1335 | 16 | // Bail out if all bits are described already. |
1336 | 16 | if (Offset >= BitsToDescribe) |
1337 | 1 | break; |
1338 | 15 | unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) |
1339 | 15 | ? BitsToDescribe - Offset2 |
1340 | 15 | : RegisterSize13 ; |
1341 | 15 | auto FragmentExpr = DIExpression::createFragmentExpression( |
1342 | 15 | Expr, Offset, FragmentSize); |
1343 | 15 | if (!FragmentExpr) |
1344 | 0 | continue; |
1345 | 15 | SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first, |
1346 | 15 | false, dl, SDNodeOrder); |
1347 | 15 | DAG.AddDbgValue(SDV, nullptr, false); |
1348 | 15 | Offset += RegisterSize; |
1349 | 15 | } |
1350 | 37 | } else { |
1351 | 37 | SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder); |
1352 | 37 | DAG.AddDbgValue(SDV, nullptr, false); |
1353 | 37 | } |
1354 | 44 | return true; |
1355 | 44 | } |
1356 | 47 | } |
1357 | 47 | |
1358 | 47 | return false; |
1359 | 47 | } |
1360 | | |
1361 | 1.19M | void SelectionDAGBuilder::resolveOrClearDbgInfo() { |
1362 | 1.19M | // Try to fixup any remaining dangling debug info -- and drop it if we can't. |
1363 | 1.19M | for (auto &Pair : DanglingDebugInfoMap) |
1364 | 28 | for (auto &DDI : Pair.second) |
1365 | 12 | salvageUnresolvedDbgValue(DDI); |
1366 | 1.19M | clearDanglingDebugInfo(); |
1367 | 1.19M | } |
1368 | | |
1369 | | /// getCopyFromRegs - If there was virtual register allocated for the value V |
1370 | | /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. |
1371 | 4.62M | SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { |
1372 | 4.62M | DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); |
1373 | 4.62M | SDValue Result; |
1374 | 4.62M | |
1375 | 4.62M | if (It != FuncInfo.ValueMap.end()) { |
1376 | 1.79M | unsigned InReg = It->second; |
1377 | 1.79M | |
1378 | 1.79M | RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), |
1379 | 1.79M | DAG.getDataLayout(), InReg, Ty, |
1380 | 1.79M | None); // This is not an ABI copy. |
1381 | 1.79M | SDValue Chain = DAG.getEntryNode(); |
1382 | 1.79M | Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, |
1383 | 1.79M | V); |
1384 | 1.79M | resolveDanglingDebugInfo(V, Result); |
1385 | 1.79M | } |
1386 | 4.62M | |
1387 | 4.62M | return Result; |
1388 | 4.62M | } |
1389 | | |
1390 | | /// getValue - Return an SDValue for the given Value. |
1391 | 10.8M | SDValue SelectionDAGBuilder::getValue(const Value *V) { |
1392 | 10.8M | // If we already have an SDValue for this value, use it. It's important |
1393 | 10.8M | // to do this first, so that we don't create a CopyFromReg if we already |
1394 | 10.8M | // have a regular SDValue. |
1395 | 10.8M | SDValue &N = NodeMap[V]; |
1396 | 10.8M | if (N.getNode()) return N6.18M ; |
1397 | 4.62M | |
1398 | 4.62M | // If there's a virtual register allocated and initialized for this |
1399 | 4.62M | // value, use it. |
1400 | 4.62M | if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) |
1401 | 1.79M | return copyFromReg; |
1402 | 2.82M | |
1403 | 2.82M | // Otherwise create a new SDValue and remember it. |
1404 | 2.82M | SDValue Val = getValueImpl(V); |
1405 | 2.82M | NodeMap[V] = Val; |
1406 | 2.82M | resolveDanglingDebugInfo(V, Val); |
1407 | 2.82M | return Val; |
1408 | 2.82M | } |
1409 | | |
1410 | | // Return true if SDValue exists for the given Value |
1411 | 463 | bool SelectionDAGBuilder::findValue(const Value *V) const { |
1412 | 463 | return (NodeMap.find(V) != NodeMap.end()) || |
1413 | 463 | (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end())53 ; |
1414 | 463 | } |
1415 | | |
1416 | | /// getNonRegisterValue - Return an SDValue for the given Value, but |
1417 | | /// don't look in FuncInfo.ValueMap for a virtual register. |
1418 | 815k | SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { |
1419 | 815k | // If we already have an SDValue for this value, use it. |
1420 | 815k | SDValue &N = NodeMap[V]; |
1421 | 815k | if (N.getNode()) { |
1422 | 666k | if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)647k ) { |
1423 | 19.0k | // Remove the debug location from the node as the node is about to be used |
1424 | 19.0k | // in a location which may differ from the original debug location. This |
1425 | 19.0k | // is relevant to Constant and ConstantFP nodes because they can appear |
1426 | 19.0k | // as constant expressions inside PHI nodes. |
1427 | 19.0k | N->setDebugLoc(DebugLoc()); |
1428 | 19.0k | } |
1429 | 666k | return N; |
1430 | 666k | } |
1431 | 148k | |
1432 | 148k | // Otherwise create a new SDValue and remember it. |
1433 | 148k | SDValue Val = getValueImpl(V); |
1434 | 148k | NodeMap[V] = Val; |
1435 | 148k | resolveDanglingDebugInfo(V, Val); |
1436 | 148k | return Val; |
1437 | 148k | } |
1438 | | |
1439 | | /// getValueImpl - Helper function for getValue and getNonRegisterValue. |
1440 | | /// Create an SDValue for the given value. |
1441 | 2.97M | SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { |
1442 | 2.97M | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1443 | 2.97M | |
1444 | 2.97M | if (const Constant *C = dyn_cast<Constant>(V)) { |
1445 | 2.79M | EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); |
1446 | 2.79M | |
1447 | 2.79M | if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) |
1448 | 1.37M | return DAG.getConstant(*CI, getCurSDLoc(), VT); |
1449 | 1.42M | |
1450 | 1.42M | if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) |
1451 | 764k | return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); |
1452 | 659k | |
1453 | 659k | if (isa<ConstantPointerNull>(C)) { |
1454 | 138k | unsigned AS = V->getType()->getPointerAddressSpace(); |
1455 | 138k | return DAG.getConstant(0, getCurSDLoc(), |
1456 | 138k | TLI.getPointerTy(DAG.getDataLayout(), AS)); |
1457 | 138k | } |
1458 | 520k | |
1459 | 520k | if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) |
1460 | 55.7k | return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); |
1461 | 464k | |
1462 | 464k | if (isa<UndefValue>(C) && !V->getType()->isAggregateType()47.0k ) |
1463 | 45.6k | return DAG.getUNDEF(VT); |
1464 | 419k | |
1465 | 419k | if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { |
1466 | 296k | visit(CE->getOpcode(), *CE); |
1467 | 296k | SDValue N1 = NodeMap[V]; |
1468 | 296k | assert(N1.getNode() && "visit didn't populate the NodeMap!"); |
1469 | 296k | return N1; |
1470 | 296k | } |
1471 | 122k | |
1472 | 122k | if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)122k ) { |
1473 | 247 | SmallVector<SDValue, 4> Constants; |
1474 | 247 | for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); |
1475 | 1.06k | OI != OE; ++OI821 ) { |
1476 | 821 | SDNode *Val = getValue(*OI).getNode(); |
1477 | 821 | // If the operand is an empty aggregate, there are no values. |
1478 | 821 | if (!Val) continue1 ; |
1479 | 820 | // Add each leaf value from the operand to the Constants list |
1480 | 820 | // to form a flattened list of all the values. |
1481 | 1.64k | for (unsigned i = 0, e = Val->getNumValues(); 820 i != e; ++i827 ) |
1482 | 827 | Constants.push_back(SDValue(Val, i)); |
1483 | 820 | } |
1484 | 247 | |
1485 | 247 | return DAG.getMergeValues(Constants, getCurSDLoc()); |
1486 | 247 | } |
1487 | 121k | |
1488 | 121k | if (const ConstantDataSequential *CDS = |
1489 | 84.7k | dyn_cast<ConstantDataSequential>(C)) { |
1490 | 84.7k | SmallVector<SDValue, 4> Ops; |
1491 | 532k | for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i447k ) { |
1492 | 447k | SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); |
1493 | 447k | // Add each leaf value from the operand to the Constants list |
1494 | 447k | // to form a flattened list of all the values. |
1495 | 895k | for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i447k ) |
1496 | 447k | Ops.push_back(SDValue(Val, i)); |
1497 | 447k | } |
1498 | 84.7k | |
1499 | 84.7k | if (isa<ArrayType>(CDS->getType())) |
1500 | 36 | return DAG.getMergeValues(Ops, getCurSDLoc()); |
1501 | 84.7k | return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); |
1502 | 84.7k | } |
1503 | 37.1k | |
1504 | 37.1k | if (C->getType()->isStructTy() || C->getType()->isArrayTy()35.9k ) { |
1505 | 1.51k | assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && |
1506 | 1.51k | "Unknown struct or array constant!"); |
1507 | 1.51k | |
1508 | 1.51k | SmallVector<EVT, 4> ValueVTs; |
1509 | 1.51k | ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); |
1510 | 1.51k | unsigned NumElts = ValueVTs.size(); |
1511 | 1.51k | if (NumElts == 0) |
1512 | 1 | return SDValue(); // empty struct |
1513 | 1.51k | SmallVector<SDValue, 4> Constants(NumElts); |
1514 | 5.68k | for (unsigned i = 0; i != NumElts; ++i4.17k ) { |
1515 | 4.17k | EVT EltVT = ValueVTs[i]; |
1516 | 4.17k | if (isa<UndefValue>(C)) |
1517 | 3.84k | Constants[i] = DAG.getUNDEF(EltVT); |
1518 | 334 | else if (EltVT.isFloatingPoint()) |
1519 | 30 | Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); |
1520 | 304 | else |
1521 | 304 | Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); |
1522 | 4.17k | } |
1523 | 1.51k | |
1524 | 1.51k | return DAG.getMergeValues(Constants, getCurSDLoc()); |
1525 | 1.51k | } |
1526 | 35.6k | |
1527 | 35.6k | if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) |
1528 | 135 | return DAG.getBlockAddress(BA, VT); |
1529 | 35.5k | |
1530 | 35.5k | VectorType *VecTy = cast<VectorType>(V->getType()); |
1531 | 35.5k | unsigned NumElements = VecTy->getNumElements(); |
1532 | 35.5k | |
1533 | 35.5k | // Now that we know the number and type of the elements, get that number of |
1534 | 35.5k | // elements into the Ops array based on what kind of constant it is. |
1535 | 35.5k | SmallVector<SDValue, 16> Ops; |
1536 | 35.5k | if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { |
1537 | 36.1k | for (unsigned i = 0; i != NumElements; ++i32.3k ) |
1538 | 32.3k | Ops.push_back(getValue(CV->getOperand(i))); |
1539 | 31.8k | } else { |
1540 | 31.8k | assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); |
1541 | 31.8k | EVT EltVT = |
1542 | 31.8k | TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); |
1543 | 31.8k | |
1544 | 31.8k | SDValue Op; |
1545 | 31.8k | if (EltVT.isFloatingPoint()) |
1546 | 4.13k | Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); |
1547 | 27.6k | else |
1548 | 27.6k | Op = DAG.getConstant(0, getCurSDLoc(), EltVT); |
1549 | 31.8k | Ops.assign(NumElements, Op); |
1550 | 31.8k | } |
1551 | 35.5k | |
1552 | 35.5k | // Create a BUILD_VECTOR node. |
1553 | 35.5k | return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); |
1554 | 35.5k | } |
1555 | 178k | |
1556 | 178k | // If this is a static alloca, generate it as the frameindex instead of |
1557 | 178k | // computation. |
1558 | 178k | if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { |
1559 | 175k | DenseMap<const AllocaInst*, int>::iterator SI = |
1560 | 175k | FuncInfo.StaticAllocaMap.find(AI); |
1561 | 175k | if (SI != FuncInfo.StaticAllocaMap.end()) |
1562 | 175k | return DAG.getFrameIndex(SI->second, |
1563 | 175k | TLI.getFrameIndexTy(DAG.getDataLayout())); |
1564 | 2.38k | } |
1565 | 2.38k | |
1566 | 2.38k | // If this is an instruction which fast-isel has deferred, select it now. |
1567 | 2.38k | if (const Instruction *Inst = dyn_cast<Instruction>(V)) { |
1568 | 2.38k | unsigned InReg = FuncInfo.InitializeRegForValue(Inst); |
1569 | 2.38k | |
1570 | 2.38k | RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, |
1571 | 2.38k | Inst->getType(), getABIRegCopyCC(V)); |
1572 | 2.38k | SDValue Chain = DAG.getEntryNode(); |
1573 | 2.38k | return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); |
1574 | 2.38k | } |
1575 | 2 | |
1576 | 2 | llvm_unreachable("Can't get register for value!"); |
1577 | 2 | } |
1578 | | |
1579 | 168 | void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { |
1580 | 168 | auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); |
1581 | 168 | bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; |
1582 | 168 | bool IsCoreCLR = Pers == EHPersonality::CoreCLR; |
1583 | 168 | bool IsSEH = isAsynchronousEHPersonality(Pers); |
1584 | 168 | bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX; |
1585 | 168 | MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; |
1586 | 168 | if (!IsSEH) |
1587 | 135 | CatchPadMBB->setIsEHScopeEntry(); |
1588 | 168 | // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. |
1589 | 168 | if (IsMSVCCXX || IsCoreCLR94 ) |
1590 | 85 | CatchPadMBB->setIsEHFuncletEntry(); |
1591 | 168 | // Wasm does not need catchpads anymore |
1592 | 168 | if (!IsWasmCXX) |
1593 | 118 | DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, |
1594 | 118 | getControlRoot())); |
1595 | 168 | } |
1596 | | |
1597 | 159 | void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { |
1598 | 159 | // Update machine-CFG edge. |
1599 | 159 | MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; |
1600 | 159 | FuncInfo.MBB->addSuccessor(TargetMBB); |
1601 | 159 | |
1602 | 159 | auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); |
1603 | 159 | bool IsSEH = isAsynchronousEHPersonality(Pers); |
1604 | 159 | if (IsSEH) { |
1605 | 31 | // If this is not a fall-through branch or optimizations are switched off, |
1606 | 31 | // emit the branch. |
1607 | 31 | if (TargetMBB != NextBlock(FuncInfo.MBB) || |
1608 | 31 | TM.getOptLevel() == CodeGenOpt::None22 ) |
1609 | 11 | DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, |
1610 | 11 | getControlRoot(), DAG.getBasicBlock(TargetMBB))); |
1611 | 31 | return; |
1612 | 31 | } |
1613 | 128 | |
1614 | 128 | // Figure out the funclet membership for the catchret's successor. |
1615 | 128 | // This will be used by the FuncletLayout pass to determine how to order the |
1616 | 128 | // BB's. |
1617 | 128 | // A 'catchret' returns to the outer scope's color. |
1618 | 128 | Value *ParentPad = I.getCatchSwitchParentPad(); |
1619 | 128 | const BasicBlock *SuccessorColor; |
1620 | 128 | if (isa<ConstantTokenNone>(ParentPad)) |
1621 | 119 | SuccessorColor = &FuncInfo.Fn->getEntryBlock(); |
1622 | 9 | else |
1623 | 9 | SuccessorColor = cast<Instruction>(ParentPad)->getParent(); |
1624 | 128 | assert(SuccessorColor && "No parent funclet for catchret!"); |
1625 | 128 | MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; |
1626 | 128 | assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); |
1627 | 128 | |
1628 | 128 | // Create the terminator node. |
1629 | 128 | SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, |
1630 | 128 | getControlRoot(), DAG.getBasicBlock(TargetMBB), |
1631 | 128 | DAG.getBasicBlock(SuccessorColorMBB)); |
1632 | 128 | DAG.setRoot(Ret); |
1633 | 128 | } |
1634 | | |
1635 | 66 | void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { |
1636 | 66 | // Don't emit any special code for the cleanuppad instruction. It just marks |
1637 | 66 | // the start of an EH scope/funclet. |
1638 | 66 | FuncInfo.MBB->setIsEHScopeEntry(); |
1639 | 66 | auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); |
1640 | 66 | if (Pers != EHPersonality::Wasm_CXX) { |
1641 | 46 | FuncInfo.MBB->setIsEHFuncletEntry(); |
1642 | 46 | FuncInfo.MBB->setIsCleanupFuncletEntry(); |
1643 | 46 | } |
1644 | 66 | } |
1645 | | |
1646 | | // For wasm, there's alwyas a single catch pad attached to a catchswitch, and |
1647 | | // the control flow always stops at the single catch pad, as it does for a |
1648 | | // cleanup pad. In case the exception caught is not of the types the catch pad |
1649 | | // catches, it will be rethrown by a rethrow. |
1650 | | static void findWasmUnwindDestinations( |
1651 | | FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, |
1652 | | BranchProbability Prob, |
1653 | | SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> |
1654 | 88 | &UnwindDests) { |
1655 | 88 | while (EHPadBB) { |
1656 | 76 | const Instruction *Pad = EHPadBB->getFirstNonPHI(); |
1657 | 76 | if (isa<CleanupPadInst>(Pad)) { |
1658 | 23 | // Stop on cleanup pads. |
1659 | 23 | UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); |
1660 | 23 | UnwindDests.back().first->setIsEHScopeEntry(); |
1661 | 23 | break; |
1662 | 53 | } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { |
1663 | 53 | // Add the catchpad handlers to the possible destinations. We don't |
1664 | 53 | // continue to the unwind destination of the catchswitch for wasm. |
1665 | 53 | for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { |
1666 | 53 | UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); |
1667 | 53 | UnwindDests.back().first->setIsEHScopeEntry(); |
1668 | 53 | } |
1669 | 53 | break; |
1670 | 53 | } else { |
1671 | 0 | continue; |
1672 | 0 | } |
1673 | 76 | } |
1674 | 88 | } |
1675 | | |
1676 | | /// When an invoke or a cleanupret unwinds to the next EH pad, there are |
1677 | | /// many places it could ultimately go. In the IR, we have a single unwind |
1678 | | /// destination, but in the machine CFG, we enumerate all the possible blocks. |
1679 | | /// This function skips over imaginary basic blocks that hold catchswitch |
1680 | | /// instructions, and finds all the "real" machine |
1681 | | /// basic block destinations. As those destinations may not be successors of |
1682 | | /// EHPadBB, here we also calculate the edge probability to those destinations. |
1683 | | /// The passed-in Prob is the edge probability to EHPadBB. |
1684 | | static void findUnwindDestinations( |
1685 | | FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, |
1686 | | BranchProbability Prob, |
1687 | | SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> |
1688 | 6.42k | &UnwindDests) { |
1689 | 6.42k | EHPersonality Personality = |
1690 | 6.42k | classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); |
1691 | 6.42k | bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; |
1692 | 6.42k | bool IsCoreCLR = Personality == EHPersonality::CoreCLR; |
1693 | 6.42k | bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; |
1694 | 6.42k | bool IsSEH = isAsynchronousEHPersonality(Personality); |
1695 | 6.42k | |
1696 | 6.42k | if (IsWasmCXX) { |
1697 | 88 | findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests); |
1698 | 88 | assert(UnwindDests.size() <= 1 && |
1699 | 88 | "There should be at most one unwind destination for wasm"); |
1700 | 88 | return; |
1701 | 88 | } |
1702 | 6.33k | |
1703 | 6.46k | while (6.33k EHPadBB) { |
1704 | 6.33k | const Instruction *Pad = EHPadBB->getFirstNonPHI(); |
1705 | 6.33k | BasicBlock *NewEHPadBB = nullptr; |
1706 | 6.33k | if (isa<LandingPadInst>(Pad)) { |
1707 | 6.12k | // Stop on landingpads. They are not funclets. |
1708 | 6.12k | UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); |
1709 | 6.12k | break; |
1710 | 6.12k | } else if (205 isa<CleanupPadInst>(Pad)205 ) { |
1711 | 70 | // Stop on cleanup pads. Cleanups are always funclet entries for all known |
1712 | 70 | // personalities. |
1713 | 70 | UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); |
1714 | 70 | UnwindDests.back().first->setIsEHScopeEntry(); |
1715 | 70 | UnwindDests.back().first->setIsEHFuncletEntry(); |
1716 | 70 | break; |
1717 | 135 | } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { |
1718 | 135 | // Add the catchpad handlers to the possible destinations. |
1719 | 146 | for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { |
1720 | 146 | UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); |
1721 | 146 | // For MSVC++ and the CLR, catchblocks are funclets and need prologues. |
1722 | 146 | if (IsMSVCCXX || IsCoreCLR56 ) |
1723 | 101 | UnwindDests.back().first->setIsEHFuncletEntry(); |
1724 | 146 | if (!IsSEH) |
1725 | 101 | UnwindDests.back().first->setIsEHScopeEntry(); |
1726 | 146 | } |
1727 | 135 | NewEHPadBB = CatchSwitch->getUnwindDest(); |
1728 | 135 | } else { |
1729 | 0 | continue; |
1730 | 0 | } |
1731 | 135 | |
1732 | 135 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
1733 | 135 | if (BPI && NewEHPadBB127 ) |
1734 | 28 | Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); |
1735 | 135 | EHPadBB = NewEHPadBB; |
1736 | 135 | } |
1737 | 6.33k | } |
1738 | | |
1739 | 53 | void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { |
1740 | 53 | // Update successor info. |
1741 | 53 | SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; |
1742 | 53 | auto UnwindDest = I.getUnwindDest(); |
1743 | 53 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
1744 | 53 | BranchProbability UnwindDestProb = |
1745 | 53 | (BPI && UnwindDest45 ) |
1746 | 53 | ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)12 |
1747 | 53 | : BranchProbability::getZero()41 ; |
1748 | 53 | findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); |
1749 | 53 | for (auto &UnwindDest : UnwindDests) { |
1750 | 15 | UnwindDest.first->setIsEHPad(); |
1751 | 15 | addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); |
1752 | 15 | } |
1753 | 53 | FuncInfo.MBB->normalizeSuccProbs(); |
1754 | 53 | |
1755 | 53 | // Create the terminator node. |
1756 | 53 | SDValue Ret = |
1757 | 53 | DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); |
1758 | 53 | DAG.setRoot(Ret); |
1759 | 53 | } |
1760 | | |
1761 | 0 | void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { |
1762 | 0 | report_fatal_error("visitCatchSwitch not yet implemented!"); |
1763 | 0 | } |
1764 | | |
1765 | 259k | void SelectionDAGBuilder::visitRet(const ReturnInst &I) { |
1766 | 259k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1767 | 259k | auto &DL = DAG.getDataLayout(); |
1768 | 259k | SDValue Chain = getControlRoot(); |
1769 | 259k | SmallVector<ISD::OutputArg, 8> Outs; |
1770 | 259k | SmallVector<SDValue, 8> OutVals; |
1771 | 259k | |
1772 | 259k | // Calls to @llvm.experimental.deoptimize don't generate a return value, so |
1773 | 259k | // lower |
1774 | 259k | // |
1775 | 259k | // %val = call <ty> @llvm.experimental.deoptimize() |
1776 | 259k | // ret <ty> %val |
1777 | 259k | // |
1778 | 259k | // differently. |
1779 | 259k | if (I.getParent()->getTerminatingDeoptimizeCall()) { |
1780 | 0 | LowerDeoptimizingReturn(); |
1781 | 0 | return; |
1782 | 0 | } |
1783 | 259k | |
1784 | 259k | if (!FuncInfo.CanLowerReturn) { |
1785 | 1.94k | unsigned DemoteReg = FuncInfo.DemoteRegister; |
1786 | 1.94k | const Function *F = I.getParent()->getParent(); |
1787 | 1.94k | |
1788 | 1.94k | // Emit a store of the return value through the virtual register. |
1789 | 1.94k | // Leave Outs empty so that LowerReturn won't try to load return |
1790 | 1.94k | // registers the usual way. |
1791 | 1.94k | SmallVector<EVT, 1> PtrValueVTs; |
1792 | 1.94k | ComputeValueVTs(TLI, DL, |
1793 | 1.94k | F->getReturnType()->getPointerTo( |
1794 | 1.94k | DAG.getDataLayout().getAllocaAddrSpace()), |
1795 | 1.94k | PtrValueVTs); |
1796 | 1.94k | |
1797 | 1.94k | SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), |
1798 | 1.94k | DemoteReg, PtrValueVTs[0]); |
1799 | 1.94k | SDValue RetOp = getValue(I.getOperand(0)); |
1800 | 1.94k | |
1801 | 1.94k | SmallVector<EVT, 4> ValueVTs, MemVTs; |
1802 | 1.94k | SmallVector<uint64_t, 4> Offsets; |
1803 | 1.94k | ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, |
1804 | 1.94k | &Offsets); |
1805 | 1.94k | unsigned NumValues = ValueVTs.size(); |
1806 | 1.94k | |
1807 | 1.94k | SmallVector<SDValue, 4> Chains(NumValues); |
1808 | 4.21k | for (unsigned i = 0; i != NumValues; ++i2.26k ) { |
1809 | 2.26k | // An aggregate return value cannot wrap around the address space, so |
1810 | 2.26k | // offsets to its parts don't wrap either. |
1811 | 2.26k | SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); |
1812 | 2.26k | |
1813 | 2.26k | SDValue Val = RetOp.getValue(i); |
1814 | 2.26k | if (MemVTs[i] != ValueVTs[i]) |
1815 | 0 | Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); |
1816 | 2.26k | Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, |
1817 | 2.26k | // FIXME: better loc info would be nice. |
1818 | 2.26k | Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); |
1819 | 2.26k | } |
1820 | 1.94k | |
1821 | 1.94k | Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), |
1822 | 1.94k | MVT::Other, Chains); |
1823 | 257k | } else if (I.getNumOperands() != 0) { |
1824 | 177k | SmallVector<EVT, 4> ValueVTs; |
1825 | 177k | ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); |
1826 | 177k | unsigned NumValues = ValueVTs.size(); |
1827 | 177k | if (NumValues) { |
1828 | 177k | SDValue RetOp = getValue(I.getOperand(0)); |
1829 | 177k | |
1830 | 177k | const Function *F = I.getParent()->getParent(); |
1831 | 177k | |
1832 | 177k | bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( |
1833 | 177k | I.getOperand(0)->getType(), F->getCallingConv(), |
1834 | 177k | /*IsVarArg*/ false); |
1835 | 177k | |
1836 | 177k | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
1837 | 177k | if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, |
1838 | 177k | Attribute::SExt)) |
1839 | 4.12k | ExtendKind = ISD::SIGN_EXTEND; |
1840 | 173k | else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, |
1841 | 173k | Attribute::ZExt)) |
1842 | 6.38k | ExtendKind = ISD::ZERO_EXTEND; |
1843 | 177k | |
1844 | 177k | LLVMContext &Context = F->getContext(); |
1845 | 177k | bool RetInReg = F->getAttributes().hasAttribute( |
1846 | 177k | AttributeList::ReturnIndex, Attribute::InReg); |
1847 | 177k | |
1848 | 358k | for (unsigned j = 0; j != NumValues; ++j180k ) { |
1849 | 180k | EVT VT = ValueVTs[j]; |
1850 | 180k | |
1851 | 180k | if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()10.5k ) |
1852 | 10.5k | VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); |
1853 | 180k | |
1854 | 180k | CallingConv::ID CC = F->getCallingConv(); |
1855 | 180k | |
1856 | 180k | unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); |
1857 | 180k | MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); |
1858 | 180k | SmallVector<SDValue, 4> Parts(NumParts); |
1859 | 180k | getCopyToParts(DAG, getCurSDLoc(), |
1860 | 180k | SDValue(RetOp.getNode(), RetOp.getResNo() + j), |
1861 | 180k | &Parts[0], NumParts, PartVT, &I, CC, ExtendKind); |
1862 | 180k | |
1863 | 180k | // 'inreg' on function refers to return value |
1864 | 180k | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1865 | 180k | if (RetInReg) |
1866 | 42 | Flags.setInReg(); |
1867 | 180k | |
1868 | 180k | if (I.getOperand(0)->getType()->isPointerTy()) { |
1869 | 9.07k | Flags.setPointer(); |
1870 | 9.07k | Flags.setPointerAddrSpace( |
1871 | 9.07k | cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace()); |
1872 | 9.07k | } |
1873 | 180k | |
1874 | 180k | if (NeedsRegBlock) { |
1875 | 2.28k | Flags.setInConsecutiveRegs(); |
1876 | 2.28k | if (j == NumValues - 1) |
1877 | 2.11k | Flags.setInConsecutiveRegsLast(); |
1878 | 2.28k | } |
1879 | 180k | |
1880 | 180k | // Propagate extension type if any |
1881 | 180k | if (ExtendKind == ISD::SIGN_EXTEND) |
1882 | 4.12k | Flags.setSExt(); |
1883 | 176k | else if (ExtendKind == ISD::ZERO_EXTEND) |
1884 | 6.38k | Flags.setZExt(); |
1885 | 180k | |
1886 | 378k | for (unsigned i = 0; i < NumParts; ++i198k ) { |
1887 | 198k | Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), |
1888 | 198k | VT, /*isfixed=*/true, 0, 0)); |
1889 | 198k | OutVals.push_back(Parts[i]); |
1890 | 198k | } |
1891 | 180k | } |
1892 | 177k | } |
1893 | 177k | } |
1894 | 259k | |
1895 | 259k | // Push in swifterror virtual register as the last element of Outs. This makes |
1896 | 259k | // sure swifterror virtual register will be returned in the swifterror |
1897 | 259k | // physical register. |
1898 | 259k | const Function *F = I.getParent()->getParent(); |
1899 | 259k | if (TLI.supportSwiftError() && |
1900 | 259k | F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)170k ) { |
1901 | 112 | assert(SwiftError.getFunctionArg() && "Need a swift error argument"); |
1902 | 112 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1903 | 112 | Flags.setSwiftError(); |
1904 | 112 | Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, |
1905 | 112 | EVT(TLI.getPointerTy(DL)) /*argvt*/, |
1906 | 112 | true /*isfixed*/, 1 /*origidx*/, |
1907 | 112 | 0 /*partOffs*/)); |
1908 | 112 | // Create SDNode for the swifterror virtual register. |
1909 | 112 | OutVals.push_back( |
1910 | 112 | DAG.getRegister(SwiftError.getOrCreateVRegUseAt( |
1911 | 112 | &I, FuncInfo.MBB, SwiftError.getFunctionArg()), |
1912 | 112 | EVT(TLI.getPointerTy(DL)))); |
1913 | 112 | } |
1914 | 259k | |
1915 | 259k | bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); |
1916 | 259k | CallingConv::ID CallConv = |
1917 | 259k | DAG.getMachineFunction().getFunction().getCallingConv(); |
1918 | 259k | Chain = DAG.getTargetLoweringInfo().LowerReturn( |
1919 | 259k | Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); |
1920 | 259k | |
1921 | 259k | // Verify that the target's LowerReturn behaved as expected. |
1922 | 259k | assert(Chain.getNode() && Chain.getValueType() == MVT::Other && |
1923 | 259k | "LowerReturn didn't return a valid chain!"); |
1924 | 259k | |
1925 | 259k | // Update the DAG with the new chain value resulting from return lowering. |
1926 | 259k | DAG.setRoot(Chain); |
1927 | 259k | } |
1928 | | |
1929 | | /// CopyToExportRegsIfNeeded - If the given value has virtual registers |
1930 | | /// created for it, emit nodes to copy the value into the virtual |
1931 | | /// registers. |
1932 | 5.61M | void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { |
1933 | 5.61M | // Skip empty types |
1934 | 5.61M | if (V->getType()->isEmptyTy()) |
1935 | 16 | return; |
1936 | 5.61M | |
1937 | 5.61M | DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); |
1938 | 5.61M | if (VMI != FuncInfo.ValueMap.end()) { |
1939 | 624k | assert(!V->use_empty() && "Unused value assigned virtual registers!"); |
1940 | 624k | CopyValueToVirtualRegister(V, VMI->second); |
1941 | 624k | } |
1942 | 5.61M | } |
1943 | | |
1944 | | /// ExportFromCurrentBlock - If this condition isn't known to be exported from |
1945 | | /// the current basic block, add it to ValueMap now so that we'll get a |
1946 | | /// CopyTo/FromReg. |
1947 | 64.8k | void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { |
1948 | 64.8k | // No need to export constants. |
1949 | 64.8k | if (!isa<Instruction>(V) && !isa<Argument>(V)17.7k ) return14.6k ; |
1950 | 50.1k | |
1951 | 50.1k | // Already exported? |
1952 | 50.1k | if (FuncInfo.isExportedInst(V)) return21.3k ; |
1953 | 28.8k | |
1954 | 28.8k | unsigned Reg = FuncInfo.InitializeRegForValue(V); |
1955 | 28.8k | CopyValueToVirtualRegister(V, Reg); |
1956 | 28.8k | } |
1957 | | |
1958 | | bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, |
1959 | 67.5k | const BasicBlock *FromBB) { |
1960 | 67.5k | // The operands of the setcc have to be in this block. We don't know |
1961 | 67.5k | // how to export them from some other block. |
1962 | 67.5k | if (const Instruction *VI = dyn_cast<Instruction>(V)) { |
1963 | 46.5k | // Can export from current BB. |
1964 | 46.5k | if (VI->getParent() == FromBB) |
1965 | 39.8k | return true; |
1966 | 6.68k | |
1967 | 6.68k | // Is already exported, noop. |
1968 | 6.68k | return FuncInfo.isExportedInst(V); |
1969 | 6.68k | } |
1970 | 21.0k | |
1971 | 21.0k | // If this is an argument, we can export it if the BB is the entry block or |
1972 | 21.0k | // if it is already exported. |
1973 | 21.0k | if (isa<Argument>(V)) { |
1974 | 2.06k | if (FromBB == &FromBB->getParent()->getEntryBlock()) |
1975 | 365 | return true; |
1976 | 1.69k | |
1977 | 1.69k | // Otherwise, can only export this if it is already exported. |
1978 | 1.69k | return FuncInfo.isExportedInst(V); |
1979 | 1.69k | } |
1980 | 19.0k | |
1981 | 19.0k | // Otherwise, constants can always be exported. |
1982 | 19.0k | return true; |
1983 | 19.0k | } |
1984 | | |
1985 | | /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. |
1986 | | BranchProbability |
1987 | | SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, |
1988 | 1.07M | const MachineBasicBlock *Dst) const { |
1989 | 1.07M | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
1990 | 1.07M | const BasicBlock *SrcBB = Src->getBasicBlock(); |
1991 | 1.07M | const BasicBlock *DstBB = Dst->getBasicBlock(); |
1992 | 1.07M | if (!BPI) { |
1993 | 49 | // If BPI is not available, set the default probability as 1 / N, where N is |
1994 | 49 | // the number of successors. |
1995 | 49 | auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1); |
1996 | 49 | return BranchProbability(1, SuccSize); |
1997 | 49 | } |
1998 | 1.07M | return BPI->getEdgeProbability(SrcBB, DstBB); |
1999 | 1.07M | } |
2000 | | |
2001 | | void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, |
2002 | | MachineBasicBlock *Dst, |
2003 | 1.15M | BranchProbability Prob) { |
2004 | 1.15M | if (!FuncInfo.BPI) |
2005 | 1.61k | Src->addSuccessorWithoutProb(Dst); |
2006 | 1.15M | else { |
2007 | 1.15M | if (Prob.isUnknown()) |
2008 | 991k | Prob = getEdgeProbability(Src, Dst); |
2009 | 1.15M | Src->addSuccessor(Dst, Prob); |
2010 | 1.15M | } |
2011 | 1.15M | } |
2012 | | |
2013 | 70.5k | static bool InBlock(const Value *V, const BasicBlock *BB) { |
2014 | 70.5k | if (const Instruction *I = dyn_cast<Instruction>(V)) |
2015 | 70.3k | return I->getParent() == BB; |
2016 | 249 | return true; |
2017 | 249 | } |
2018 | | |
2019 | | /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. |
2020 | | /// This function emits a branch and is used at the leaves of an OR or an |
2021 | | /// AND operator tree. |
2022 | | void |
2023 | | SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, |
2024 | | MachineBasicBlock *TBB, |
2025 | | MachineBasicBlock *FBB, |
2026 | | MachineBasicBlock *CurBB, |
2027 | | MachineBasicBlock *SwitchBB, |
2028 | | BranchProbability TProb, |
2029 | | BranchProbability FProb, |
2030 | 70.9k | bool InvertCond) { |
2031 | 70.9k | const BasicBlock *BB = CurBB->getBasicBlock(); |
2032 | 70.9k | |
2033 | 70.9k | // If the leaf of the tree is a comparison, merge the condition into |
2034 | 70.9k | // the caseblock. |
2035 | 70.9k | if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { |
2036 | 66.9k | // The operands of the cmp have to be in this block. We don't know |
2037 | 66.9k | // how to export them from some other block. If this is the first block |
2038 | 66.9k | // of the sequence, no exporting is needed. |
2039 | 66.9k | if (CurBB == SwitchBB || |
2040 | 66.9k | (33.7k isExportableFromCurrentBlock(BOp->getOperand(0), BB)33.7k && |
2041 | 66.9k | isExportableFromCurrentBlock(BOp->getOperand(1), BB)33.7k )) { |
2042 | 66.9k | ISD::CondCode Condition; |
2043 | 66.9k | if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { |
2044 | 63.5k | ICmpInst::Predicate Pred = |
2045 | 63.5k | InvertCond ? IC->getInversePredicate()294 : IC->getPredicate()63.2k ; |
2046 | 63.5k | Condition = getICmpCondCode(Pred); |
2047 | 63.5k | } else { |
2048 | 3.37k | const FCmpInst *FC = cast<FCmpInst>(Cond); |
2049 | 3.37k | FCmpInst::Predicate Pred = |
2050 | 3.37k | InvertCond ? FC->getInversePredicate()2 : FC->getPredicate()3.37k ; |
2051 | 3.37k | Condition = getFCmpCondCode(Pred); |
2052 | 3.37k | if (TM.Options.NoNaNsFPMath) |
2053 | 0 | Condition = getFCmpCodeWithoutNaN(Condition); |
2054 | 3.37k | } |
2055 | 66.9k | |
2056 | 66.9k | CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, |
2057 | 66.9k | TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); |
2058 | 66.9k | SL->SwitchCases.push_back(CB); |
2059 | 66.9k | return; |
2060 | 66.9k | } |
2061 | 4.02k | } |
2062 | 4.02k | |
2063 | 4.02k | // Create a CaseBlock record representing this branch. |
2064 | 4.02k | ISD::CondCode Opc = InvertCond ? ISD::SETNE197 : ISD::SETEQ3.82k ; |
2065 | 4.02k | CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), |
2066 | 4.02k | nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); |
2067 | 4.02k | SL->SwitchCases.push_back(CB); |
2068 | 4.02k | } |
2069 | | |
2070 | | void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, |
2071 | | MachineBasicBlock *TBB, |
2072 | | MachineBasicBlock *FBB, |
2073 | | MachineBasicBlock *CurBB, |
2074 | | MachineBasicBlock *SwitchBB, |
2075 | | Instruction::BinaryOps Opc, |
2076 | | BranchProbability TProb, |
2077 | | BranchProbability FProb, |
2078 | 106k | bool InvertCond) { |
2079 | 106k | // Skip over not part of the tree and remember to invert op and operands at |
2080 | 106k | // next level. |
2081 | 106k | Value *NotCond; |
2082 | 106k | if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && |
2083 | 106k | InBlock(NotCond, CurBB->getBasicBlock())482 ) { |
2084 | 363 | FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, |
2085 | 363 | !InvertCond); |
2086 | 363 | return; |
2087 | 363 | } |
2088 | 105k | |
2089 | 105k | const Instruction *BOp = dyn_cast<Instruction>(Cond); |
2090 | 105k | // Compute the effective opcode for Cond, taking into account whether it needs |
2091 | 105k | // to be inverted, e.g. |
2092 | 105k | // and (not (or A, B)), C |
2093 | 105k | // gets lowered as |
2094 | 105k | // and (and (not A, not B), C) |
2095 | 105k | unsigned BOpc = 0; |
2096 | 105k | if (BOp) { |
2097 | 105k | BOpc = BOp->getOpcode(); |
2098 | 105k | if (InvertCond) { |
2099 | 581 | if (BOpc == Instruction::And) |
2100 | 101 | BOpc = Instruction::Or; |
2101 | 480 | else if (BOpc == Instruction::Or) |
2102 | 111 | BOpc = Instruction::And; |
2103 | 581 | } |
2104 | 105k | } |
2105 | 105k | |
2106 | 105k | // If this node is not part of the or/and tree, emit it as a branch. |
2107 | 105k | if (!BOp || !(105k isa<BinaryOperator>(BOp)105k || isa<CmpInst>(BOp)67.3k ) || |
2108 | 105k | BOpc != unsigned(Opc)105k || !BOp->hasOneUse()37.3k || |
2109 | 105k | BOp->getParent() != CurBB->getBasicBlock()37.3k || |
2110 | 105k | !InBlock(BOp->getOperand(0), CurBB->getBasicBlock())35.1k || |
2111 | 105k | !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())34.9k ) { |
2112 | 70.9k | EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, |
2113 | 70.9k | TProb, FProb, InvertCond); |
2114 | 70.9k | return; |
2115 | 70.9k | } |
2116 | 34.8k | |
2117 | 34.8k | // Create TmpBB after CurBB. |
2118 | 34.8k | MachineFunction::iterator BBI(CurBB); |
2119 | 34.8k | MachineFunction &MF = DAG.getMachineFunction(); |
2120 | 34.8k | MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); |
2121 | 34.8k | CurBB->getParent()->insert(++BBI, TmpBB); |
2122 | 34.8k | |
2123 | 34.8k | if (Opc == Instruction::Or) { |
2124 | 9.20k | // Codegen X | Y as: |
2125 | 9.20k | // BB1: |
2126 | 9.20k | // jmp_if_X TBB |
2127 | 9.20k | // jmp TmpBB |
2128 | 9.20k | // TmpBB: |
2129 | 9.20k | // jmp_if_Y TBB |
2130 | 9.20k | // jmp FBB |
2131 | 9.20k | // |
2132 | 9.20k | |
2133 | 9.20k | // We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
2134 | 9.20k | // The requirement is that |
2135 | 9.20k | // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) |
2136 | 9.20k | // = TrueProb for original BB. |
2137 | 9.20k | // Assuming the original probabilities are A and B, one choice is to set |
2138 | 9.20k | // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to |
2139 | 9.20k | // A/(1+B) and 2B/(1+B). This choice assumes that |
2140 | 9.20k | // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. |
2141 | 9.20k | // Another choice is to assume TrueProb for BB1 equals to TrueProb for |
2142 | 9.20k | // TmpBB, but the math is more complicated. |
2143 | 9.20k | |
2144 | 9.20k | auto NewTrueProb = TProb / 2; |
2145 | 9.20k | auto NewFalseProb = TProb / 2 + FProb; |
2146 | 9.20k | // Emit the LHS condition. |
2147 | 9.20k | FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, |
2148 | 9.20k | NewTrueProb, NewFalseProb, InvertCond); |
2149 | 9.20k | |
2150 | 9.20k | // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). |
2151 | 9.20k | SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; |
2152 | 9.20k | BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); |
2153 | 9.20k | // Emit the RHS condition into TmpBB. |
2154 | 9.20k | FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, |
2155 | 9.20k | Probs[0], Probs[1], InvertCond); |
2156 | 25.6k | } else { |
2157 | 25.6k | assert(Opc == Instruction::And && "Unknown merge op!"); |
2158 | 25.6k | // Codegen X & Y as: |
2159 | 25.6k | // BB1: |
2160 | 25.6k | // jmp_if_X TmpBB |
2161 | 25.6k | // jmp FBB |
2162 | 25.6k | // TmpBB: |
2163 | 25.6k | // jmp_if_Y TBB |
2164 | 25.6k | // jmp FBB |
2165 | 25.6k | // |
2166 | 25.6k | // This requires creation of TmpBB after CurBB. |
2167 | 25.6k | |
2168 | 25.6k | // We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
2169 | 25.6k | // The requirement is that |
2170 | 25.6k | // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) |
2171 | 25.6k | // = FalseProb for original BB. |
2172 | 25.6k | // Assuming the original probabilities are A and B, one choice is to set |
2173 | 25.6k | // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to |
2174 | 25.6k | // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == |
2175 | 25.6k | // TrueProb for BB1 * FalseProb for TmpBB. |
2176 | 25.6k | |
2177 | 25.6k | auto NewTrueProb = TProb + FProb / 2; |
2178 | 25.6k | auto NewFalseProb = FProb / 2; |
2179 | 25.6k | // Emit the LHS condition. |
2180 | 25.6k | FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, |
2181 | 25.6k | NewTrueProb, NewFalseProb, InvertCond); |
2182 | 25.6k | |
2183 | 25.6k | // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). |
2184 | 25.6k | SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; |
2185 | 25.6k | BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); |
2186 | 25.6k | // Emit the RHS condition into TmpBB. |
2187 | 25.6k | FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, |
2188 | 25.6k | Probs[0], Probs[1], InvertCond); |
2189 | 25.6k | } |
2190 | 34.8k | } |
2191 | | |
2192 | | /// If the set of cases should be emitted as a series of branches, return true. |
2193 | | /// If we should emit this as a bunch of and/or'd together conditions, return |
2194 | | /// false. |
2195 | | bool |
2196 | 36.0k | SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { |
2197 | 36.0k | if (Cases.size() != 2) return true3.26k ; |
2198 | 32.8k | |
2199 | 32.8k | // If this is two comparisons of the same values or'd or and'd together, they |
2200 | 32.8k | // will get folded into a single comparison, so don't emit two blocks. |
2201 | 32.8k | if ((Cases[0].CmpLHS == Cases[1].CmpLHS && |
2202 | 32.8k | Cases[0].CmpRHS == Cases[1].CmpRHS1.40k ) || |
2203 | 32.8k | (32.7k Cases[0].CmpRHS == Cases[1].CmpLHS32.7k && |
2204 | 32.7k | Cases[0].CmpLHS == Cases[1].CmpRHS30 )) { |
2205 | 9 | return false; |
2206 | 9 | } |
2207 | 32.7k | |
2208 | 32.7k | // Handle: (X != null) | (Y != null) --> (X|Y) != 0 |
2209 | 32.7k | // Handle: (X == null) & (Y == null) --> (X|Y) == 0 |
2210 | 32.7k | if (Cases[0].CmpRHS == Cases[1].CmpRHS && |
2211 | 32.7k | Cases[0].CC == Cases[1].CC9.88k && |
2212 | 32.7k | isa<Constant>(Cases[0].CmpRHS)7.07k && |
2213 | 32.7k | cast<Constant>(Cases[0].CmpRHS)->isNullValue()6.99k ) { |
2214 | 6.32k | if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB5.87k ) |
2215 | 5.56k | return false; |
2216 | 761 | if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB168 ) |
2217 | 15 | return false; |
2218 | 27.2k | } |
2219 | 27.2k | |
2220 | 27.2k | return true; |
2221 | 27.2k | } |
2222 | | |
2223 | 825k | void SelectionDAGBuilder::visitBr(const BranchInst &I) { |
2224 | 825k | MachineBasicBlock *BrMBB = FuncInfo.MBB; |
2225 | 825k | |
2226 | 825k | // Update machine-CFG edges. |
2227 | 825k | MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; |
2228 | 825k | |
2229 | 825k | if (I.isUnconditional()) { |
2230 | 301k | // Update machine-CFG edges. |
2231 | 301k | BrMBB->addSuccessor(Succ0MBB); |
2232 | 301k | |
2233 | 301k | // If this is not a fall-through branch or optimizations are switched off, |
2234 | 301k | // emit the branch. |
2235 | 301k | if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None207k ) |
2236 | 94.3k | DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), |
2237 | 94.3k | MVT::Other, getControlRoot(), |
2238 | 94.3k | DAG.getBasicBlock(Succ0MBB))); |
2239 | 301k | |
2240 | 301k | return; |
2241 | 301k | } |
2242 | 523k | |
2243 | 523k | // If this condition is one of the special cases we handle, do special stuff |
2244 | 523k | // now. |
2245 | 523k | const Value *CondVal = I.getCondition(); |
2246 | 523k | MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; |
2247 | 523k | |
2248 | 523k | // If this is a series of conditions that are or'd or and'd together, emit |
2249 | 523k | // this as a sequence of branches instead of setcc's with and/or operations. |
2250 | 523k | // As long as jumps are not expensive, this should improve performance. |
2251 | 523k | // For example, instead of something like: |
2252 | 523k | // cmp A, B |
2253 | 523k | // C = seteq |
2254 | 523k | // cmp D, E |
2255 | 523k | // F = setle |
2256 | 523k | // or C, F |
2257 | 523k | // jnz foo |
2258 | 523k | // Emit: |
2259 | 523k | // cmp A, B |
2260 | 523k | // je foo |
2261 | 523k | // cmp D, E |
2262 | 523k | // jle foo |
2263 | 523k | if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { |
2264 | 40.4k | Instruction::BinaryOps Opcode = BOp->getOpcode(); |
2265 | 40.4k | if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse()40.3k && |
2266 | 40.4k | !I.getMetadata(LLVMContext::MD_unpredictable)36.2k && |
2267 | 40.4k | (36.2k Opcode == Instruction::And36.2k || Opcode == Instruction::Or8.79k )) { |
2268 | 36.0k | FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, |
2269 | 36.0k | Opcode, |
2270 | 36.0k | getEdgeProbability(BrMBB, Succ0MBB), |
2271 | 36.0k | getEdgeProbability(BrMBB, Succ1MBB), |
2272 | 36.0k | /*InvertCond=*/false); |
2273 | 36.0k | // If the compares in later blocks need to use values not currently |
2274 | 36.0k | // exported from this block, export them now. This block should always |
2275 | 36.0k | // be the first entry. |
2276 | 36.0k | assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); |
2277 | 36.0k | |
2278 | 36.0k | // Allow some cases to be rejected. |
2279 | 36.0k | if (ShouldEmitAsBranches(SL->SwitchCases)) { |
2280 | 59.7k | for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i29.2k ) { |
2281 | 29.2k | ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS); |
2282 | 29.2k | ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS); |
2283 | 29.2k | } |
2284 | 30.4k | |
2285 | 30.4k | // Emit the branch for this block. |
2286 | 30.4k | visitSwitchCase(SL->SwitchCases[0], BrMBB); |
2287 | 30.4k | SL->SwitchCases.erase(SL->SwitchCases.begin()); |
2288 | 30.4k | return; |
2289 | 30.4k | } |
2290 | 5.58k | |
2291 | 5.58k | // Okay, we decided not to do this, remove any inserted MBB's and clear |
2292 | 5.58k | // SwitchCases. |
2293 | 11.1k | for (unsigned i = 1, e = SL->SwitchCases.size(); 5.58k i != e; ++i5.58k ) |
2294 | 5.58k | FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB); |
2295 | 5.58k | |
2296 | 5.58k | SL->SwitchCases.clear(); |
2297 | 5.58k | } |
2298 | 40.4k | } |
2299 | 523k | |
2300 | 523k | // Create a CaseBlock record representing this branch. |
2301 | 523k | CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), |
2302 | 493k | nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); |
2303 | 493k | |
2304 | 493k | // Use visitSwitchCase to actually insert the fast branch sequence for this |
2305 | 493k | // cond branch. |
2306 | 493k | visitSwitchCase(CB, BrMBB); |
2307 | 493k | } |
2308 | | |
2309 | | /// visitSwitchCase - Emits the necessary code to represent a single node in |
2310 | | /// the binary search tree resulting from lowering a switch instruction. |
2311 | | void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, |
2312 | 564k | MachineBasicBlock *SwitchBB) { |
2313 | 564k | SDValue Cond; |
2314 | 564k | SDValue CondLHS = getValue(CB.CmpLHS); |
2315 | 564k | SDLoc dl = CB.DL; |
2316 | 564k | |
2317 | 564k | if (CB.CC == ISD::SETTRUE) { |
2318 | 17 | // Branch or fall through to TrueBB. |
2319 | 17 | addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); |
2320 | 17 | SwitchBB->normalizeSuccProbs(); |
2321 | 17 | if (CB.TrueBB != NextBlock(SwitchBB)) { |
2322 | 14 | DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(), |
2323 | 14 | DAG.getBasicBlock(CB.TrueBB))); |
2324 | 14 | } |
2325 | 17 | return; |
2326 | 17 | } |
2327 | 564k | |
2328 | 564k | auto &TLI = DAG.getTargetLoweringInfo(); |
2329 | 564k | EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); |
2330 | 564k | |
2331 | 564k | // Build the setcc now. |
2332 | 564k | if (!CB.CmpMHS) { |
2333 | 563k | // Fold "(X == true)" to X and "(X == false)" to !X to |
2334 | 563k | // handle common cases produced by branch lowering. |
2335 | 563k | if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && |
2336 | 563k | CB.CC == ISD::SETEQ497k ) |
2337 | 496k | Cond = CondLHS; |
2338 | 66.9k | else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && |
2339 | 66.9k | CB.CC == ISD::SETEQ4 ) { |
2340 | 4 | SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); |
2341 | 4 | Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); |
2342 | 66.9k | } else { |
2343 | 66.9k | SDValue CondRHS = getValue(CB.CmpRHS); |
2344 | 66.9k | |
2345 | 66.9k | // If a pointer's DAG type is larger than its memory type then the DAG |
2346 | 66.9k | // values are zero-extended. This breaks signed comparisons so truncate |
2347 | 66.9k | // back to the underlying type before doing the compare. |
2348 | 66.9k | if (CondLHS.getValueType() != MemVT) { |
2349 | 0 | CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT); |
2350 | 0 | CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT); |
2351 | 0 | } |
2352 | 66.9k | Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC); |
2353 | 66.9k | } |
2354 | 563k | } else { |
2355 | 338 | assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); |
2356 | 338 | |
2357 | 338 | const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); |
2358 | 338 | const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); |
2359 | 338 | |
2360 | 338 | SDValue CmpOp = getValue(CB.CmpMHS); |
2361 | 338 | EVT VT = CmpOp.getValueType(); |
2362 | 338 | |
2363 | 338 | if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { |
2364 | 1 | Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), |
2365 | 1 | ISD::SETLE); |
2366 | 337 | } else { |
2367 | 337 | SDValue SUB = DAG.getNode(ISD::SUB, dl, |
2368 | 337 | VT, CmpOp, DAG.getConstant(Low, dl, VT)); |
2369 | 337 | Cond = DAG.getSetCC(dl, MVT::i1, SUB, |
2370 | 337 | DAG.getConstant(High-Low, dl, VT), ISD::SETULE); |
2371 | 337 | } |
2372 | 338 | } |
2373 | 564k | |
2374 | 564k | // Update successor info |
2375 | 564k | addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); |
2376 | 564k | // TrueBB and FalseBB are always different unless the incoming IR is |
2377 | 564k | // degenerate. This only happens when running llc on weird IR. |
2378 | 564k | if (CB.TrueBB != CB.FalseBB) |
2379 | 564k | addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); |
2380 | 564k | SwitchBB->normalizeSuccProbs(); |
2381 | 564k | |
2382 | 564k | // If the lhs block is the next block, invert the condition so that we can |
2383 | 564k | // fall through to the lhs instead of the rhs block. |
2384 | 564k | if (CB.TrueBB == NextBlock(SwitchBB)) { |
2385 | 234k | std::swap(CB.TrueBB, CB.FalseBB); |
2386 | 234k | SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); |
2387 | 234k | Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); |
2388 | 234k | } |
2389 | 564k | |
2390 | 564k | SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, |
2391 | 564k | MVT::Other, getControlRoot(), Cond, |
2392 | 564k | DAG.getBasicBlock(CB.TrueBB)); |
2393 | 564k | |
2394 | 564k | // Insert the false branch. Do this even if it's a fall through branch, |
2395 | 564k | // this makes it easier to do DAG optimizations which require inverting |
2396 | 564k | // the branch condition. |
2397 | 564k | BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, |
2398 | 564k | DAG.getBasicBlock(CB.FalseBB)); |
2399 | 564k | |
2400 | 564k | DAG.setRoot(BrCond); |
2401 | 564k | } |
2402 | | |
2403 | | /// visitJumpTable - Emit JumpTable node in the current MBB |
2404 | 2.26k | void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { |
2405 | 2.26k | // Emit the code for the jump table |
2406 | 2.26k | assert(JT.Reg != -1U && "Should lower JT Header first!"); |
2407 | 2.26k | EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
2408 | 2.26k | SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), |
2409 | 2.26k | JT.Reg, PTy); |
2410 | 2.26k | SDValue Table = DAG.getJumpTable(JT.JTI, PTy); |
2411 | 2.26k | SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), |
2412 | 2.26k | MVT::Other, Index.getValue(1), |
2413 | 2.26k | Table, Index); |
2414 | 2.26k | DAG.setRoot(BrJumpTable); |
2415 | 2.26k | } |
2416 | | |
2417 | | /// visitJumpTableHeader - This function emits necessary code to produce index |
2418 | | /// in the JumpTable from switch case. |
2419 | | void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, |
2420 | | JumpTableHeader &JTH, |
2421 | 2.26k | MachineBasicBlock *SwitchBB) { |
2422 | 2.26k | SDLoc dl = getCurSDLoc(); |
2423 | 2.26k | |
2424 | 2.26k | // Subtract the lowest switch case value from the value being switched on. |
2425 | 2.26k | SDValue SwitchOp = getValue(JTH.SValue); |
2426 | 2.26k | EVT VT = SwitchOp.getValueType(); |
2427 | 2.26k | SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, |
2428 | 2.26k | DAG.getConstant(JTH.First, dl, VT)); |
2429 | 2.26k | |
2430 | 2.26k | // The SDNode we just created, which holds the value being switched on minus |
2431 | 2.26k | // the smallest case value, needs to be copied to a virtual register so it |
2432 | 2.26k | // can be used as an index into the jump table in a subsequent basic block. |
2433 | 2.26k | // This value may be smaller or larger than the target's pointer type, and |
2434 | 2.26k | // therefore require extension or truncating. |
2435 | 2.26k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2436 | 2.26k | SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); |
2437 | 2.26k | |
2438 | 2.26k | unsigned JumpTableReg = |
2439 | 2.26k | FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); |
2440 | 2.26k | SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, |
2441 | 2.26k | JumpTableReg, SwitchOp); |
2442 | 2.26k | JT.Reg = JumpTableReg; |
2443 | 2.26k | |
2444 | 2.26k | if (!JTH.OmitRangeCheck) { |
2445 | 2.21k | // Emit the range check for the jump table, and branch to the default block |
2446 | 2.21k | // for the switch statement if the value being switched on exceeds the |
2447 | 2.21k | // largest case in the switch. |
2448 | 2.21k | SDValue CMP = DAG.getSetCC( |
2449 | 2.21k | dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
2450 | 2.21k | Sub.getValueType()), |
2451 | 2.21k | Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); |
2452 | 2.21k | |
2453 | 2.21k | SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, |
2454 | 2.21k | MVT::Other, CopyTo, CMP, |
2455 | 2.21k | DAG.getBasicBlock(JT.Default)); |
2456 | 2.21k | |
2457 | 2.21k | // Avoid emitting unnecessary branches to the next block. |
2458 | 2.21k | if (JT.MBB != NextBlock(SwitchBB)) |
2459 | 77 | BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, |
2460 | 77 | DAG.getBasicBlock(JT.MBB)); |
2461 | 2.21k | |
2462 | 2.21k | DAG.setRoot(BrCond); |
2463 | 2.21k | } else { |
2464 | 48 | // Avoid emitting unnecessary branches to the next block. |
2465 | 48 | if (JT.MBB != NextBlock(SwitchBB)) |
2466 | 2 | DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo, |
2467 | 2 | DAG.getBasicBlock(JT.MBB))); |
2468 | 46 | else |
2469 | 46 | DAG.setRoot(CopyTo); |
2470 | 48 | } |
2471 | 2.26k | } |
2472 | | |
2473 | | /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global |
2474 | | /// variable if there exists one. |
2475 | | static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, |
2476 | 2.14k | SDValue &Chain) { |
2477 | 2.14k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2478 | 2.14k | EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
2479 | 2.14k | EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); |
2480 | 2.14k | MachineFunction &MF = DAG.getMachineFunction(); |
2481 | 2.14k | Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); |
2482 | 2.14k | MachineSDNode *Node = |
2483 | 2.14k | DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); |
2484 | 2.14k | if (Global) { |
2485 | 2.12k | MachinePointerInfo MPInfo(Global); |
2486 | 2.12k | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | |
2487 | 2.12k | MachineMemOperand::MODereferenceable; |
2488 | 2.12k | MachineMemOperand *MemRef = MF.getMachineMemOperand( |
2489 | 2.12k | MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); |
2490 | 2.12k | DAG.setNodeMemRefs(Node, {MemRef}); |
2491 | 2.12k | } |
2492 | 2.14k | if (PtrTy != PtrMemTy) |
2493 | 0 | return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy); |
2494 | 2.14k | return SDValue(Node, 0); |
2495 | 2.14k | } |
2496 | | |
2497 | | /// Codegen a new tail for a stack protector check ParentMBB which has had its |
2498 | | /// tail spliced into a stack protector check success bb. |
2499 | | /// |
2500 | | /// For a high level explanation of how this fits into the stack protector |
2501 | | /// generation see the comment on the declaration of class |
2502 | | /// StackProtectorDescriptor. |
2503 | | void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, |
2504 | 486 | MachineBasicBlock *ParentBB) { |
2505 | 486 | |
2506 | 486 | // First create the loads to the guard/stack slot for the comparison. |
2507 | 486 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2508 | 486 | EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
2509 | 486 | EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); |
2510 | 486 | |
2511 | 486 | MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); |
2512 | 486 | int FI = MFI.getStackProtectorIndex(); |
2513 | 486 | |
2514 | 486 | SDValue Guard; |
2515 | 486 | SDLoc dl = getCurSDLoc(); |
2516 | 486 | SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); |
2517 | 486 | const Module &M = *ParentBB->getParent()->getFunction().getParent(); |
2518 | 486 | unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); |
2519 | 486 | |
2520 | 486 | // Generate code to load the content of the guard slot. |
2521 | 486 | SDValue GuardVal = DAG.getLoad( |
2522 | 486 | PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr, |
2523 | 486 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, |
2524 | 486 | MachineMemOperand::MOVolatile); |
2525 | 486 | |
2526 | 486 | if (TLI.useStackGuardXorFP()) |
2527 | 145 | GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); |
2528 | 486 | |
2529 | 486 | // Retrieve guard check function, nullptr if instrumentation is inlined. |
2530 | 486 | if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { |
2531 | 84 | // The target provides a guard check function to validate the guard value. |
2532 | 84 | // Generate a call to that function with the content of the guard slot as |
2533 | 84 | // argument. |
2534 | 84 | FunctionType *FnTy = GuardCheckFn->getFunctionType(); |
2535 | 84 | assert(FnTy->getNumParams() == 1 && "Invalid function signature"); |
2536 | 84 | |
2537 | 84 | TargetLowering::ArgListTy Args; |
2538 | 84 | TargetLowering::ArgListEntry Entry; |
2539 | 84 | Entry.Node = GuardVal; |
2540 | 84 | Entry.Ty = FnTy->getParamType(0); |
2541 | 84 | if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) |
2542 | 84 | Entry.IsInReg = true; |
2543 | 84 | Args.push_back(Entry); |
2544 | 84 | |
2545 | 84 | TargetLowering::CallLoweringInfo CLI(DAG); |
2546 | 84 | CLI.setDebugLoc(getCurSDLoc()) |
2547 | 84 | .setChain(DAG.getEntryNode()) |
2548 | 84 | .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(), |
2549 | 84 | getValue(GuardCheckFn), std::move(Args)); |
2550 | 84 | |
2551 | 84 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
2552 | 84 | DAG.setRoot(Result.second); |
2553 | 84 | return; |
2554 | 84 | } |
2555 | 402 | |
2556 | 402 | // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. |
2557 | 402 | // Otherwise, emit a volatile load to retrieve the stack guard value. |
2558 | 402 | SDValue Chain = DAG.getEntryNode(); |
2559 | 402 | if (TLI.useLoadStackGuardNode()) { |
2560 | 268 | Guard = getLoadStackGuard(DAG, dl, Chain); |
2561 | 268 | } else { |
2562 | 134 | const Value *IRGuard = TLI.getSDagStackGuard(M); |
2563 | 134 | SDValue GuardPtr = getValue(IRGuard); |
2564 | 134 | |
2565 | 134 | Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, |
2566 | 134 | MachinePointerInfo(IRGuard, 0), Align, |
2567 | 134 | MachineMemOperand::MOVolatile); |
2568 | 134 | } |
2569 | 402 | |
2570 | 402 | // Perform the comparison via a subtract/getsetcc. |
2571 | 402 | EVT VT = Guard.getValueType(); |
2572 | 402 | SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); |
2573 | 402 | |
2574 | 402 | SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), |
2575 | 402 | *DAG.getContext(), |
2576 | 402 | Sub.getValueType()), |
2577 | 402 | Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); |
2578 | 402 | |
2579 | 402 | // If the sub is not 0, then we know the guard/stackslot do not equal, so |
2580 | 402 | // branch to failure MBB. |
2581 | 402 | SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, |
2582 | 402 | MVT::Other, GuardVal.getOperand(0), |
2583 | 402 | Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); |
2584 | 402 | // Otherwise branch to success MBB. |
2585 | 402 | SDValue Br = DAG.getNode(ISD::BR, dl, |
2586 | 402 | MVT::Other, BrCond, |
2587 | 402 | DAG.getBasicBlock(SPD.getSuccessMBB())); |
2588 | 402 | |
2589 | 402 | DAG.setRoot(Br); |
2590 | 402 | } |
2591 | | |
2592 | | /// Codegen the failure basic block for a stack protector check. |
2593 | | /// |
2594 | | /// A failure stack protector machine basic block consists simply of a call to |
2595 | | /// __stack_chk_fail(). |
2596 | | /// |
2597 | | /// For a high level explanation of how this fits into the stack protector |
2598 | | /// generation see the comment on the declaration of class |
2599 | | /// StackProtectorDescriptor. |
2600 | | void |
2601 | 392 | SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { |
2602 | 392 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2603 | 392 | SDValue Chain = |
2604 | 392 | TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, |
2605 | 392 | None, false, getCurSDLoc(), false, false).second; |
2606 | 392 | // On PS4, the "return address" must still be within the calling function, |
2607 | 392 | // even if it's at the very end, so emit an explicit TRAP here. |
2608 | 392 | // Passing 'true' for doesNotReturn above won't generate the trap for us. |
2609 | 392 | if (TM.getTargetTriple().isPS4CPU()) |
2610 | 1 | Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); |
2611 | 392 | |
2612 | 392 | DAG.setRoot(Chain); |
2613 | 392 | } |
2614 | | |
2615 | | /// visitBitTestHeader - This function emits necessary code to produce value |
2616 | | /// suitable for "bit tests" |
2617 | | void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, |
2618 | 286 | MachineBasicBlock *SwitchBB) { |
2619 | 286 | SDLoc dl = getCurSDLoc(); |
2620 | 286 | |
2621 | 286 | // Subtract the minimum value |
2622 | 286 | SDValue SwitchOp = getValue(B.SValue); |
2623 | 286 | EVT VT = SwitchOp.getValueType(); |
2624 | 286 | SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, |
2625 | 286 | DAG.getConstant(B.First, dl, VT)); |
2626 | 286 | |
2627 | 286 | // Check range |
2628 | 286 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2629 | 286 | SDValue RangeCmp = DAG.getSetCC( |
2630 | 286 | dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
2631 | 286 | Sub.getValueType()), |
2632 | 286 | Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); |
2633 | 286 | |
2634 | 286 | // Determine the type of the test operands. |
2635 | 286 | bool UsePtrType = false; |
2636 | 286 | if (!TLI.isTypeLegal(VT)) |
2637 | 0 | UsePtrType = true; |
2638 | 286 | else { |
2639 | 522 | for (unsigned i = 0, e = B.Cases.size(); i != e; ++i236 ) |
2640 | 367 | if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { |
2641 | 131 | // Switch table case range are encoded into series of masks. |
2642 | 131 | // Just use pointer type, it's guaranteed to fit. |
2643 | 131 | UsePtrType = true; |
2644 | 131 | break; |
2645 | 131 | } |
2646 | 286 | } |
2647 | 286 | if (UsePtrType) { |
2648 | 131 | VT = TLI.getPointerTy(DAG.getDataLayout()); |
2649 | 131 | Sub = DAG.getZExtOrTrunc(Sub, dl, VT); |
2650 | 131 | } |
2651 | 286 | |
2652 | 286 | B.RegVT = VT.getSimpleVT(); |
2653 | 286 | B.Reg = FuncInfo.CreateReg(B.RegVT); |
2654 | 286 | SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); |
2655 | 286 | |
2656 | 286 | MachineBasicBlock* MBB = B.Cases[0].ThisBB; |
2657 | 286 | |
2658 | 286 | addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); |
2659 | 286 | addSuccessorWithProb(SwitchBB, MBB, B.Prob); |
2660 | 286 | SwitchBB->normalizeSuccProbs(); |
2661 | 286 | |
2662 | 286 | SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, |
2663 | 286 | MVT::Other, CopyTo, RangeCmp, |
2664 | 286 | DAG.getBasicBlock(B.Default)); |
2665 | 286 | |
2666 | 286 | // Avoid emitting unnecessary branches to the next block. |
2667 | 286 | if (MBB != NextBlock(SwitchBB)) |
2668 | 28 | BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, |
2669 | 28 | DAG.getBasicBlock(MBB)); |
2670 | 286 | |
2671 | 286 | DAG.setRoot(BrRange); |
2672 | 286 | } |
2673 | | |
2674 | | /// visitBitTestCase - this function produces one "bit test" |
2675 | | void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, |
2676 | | MachineBasicBlock* NextMBB, |
2677 | | BranchProbability BranchProbToNext, |
2678 | | unsigned Reg, |
2679 | | BitTestCase &B, |
2680 | 410 | MachineBasicBlock *SwitchBB) { |
2681 | 410 | SDLoc dl = getCurSDLoc(); |
2682 | 410 | MVT VT = BB.RegVT; |
2683 | 410 | SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); |
2684 | 410 | SDValue Cmp; |
2685 | 410 | unsigned PopCount = countPopulation(B.Mask); |
2686 | 410 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2687 | 410 | if (PopCount == 1) { |
2688 | 110 | // Testing for a single bit; just compare the shift count with what it |
2689 | 110 | // would need to be to shift a 1 bit in that position. |
2690 | 110 | Cmp = DAG.getSetCC( |
2691 | 110 | dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), |
2692 | 110 | ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), |
2693 | 110 | ISD::SETEQ); |
2694 | 300 | } else if (PopCount == BB.Range) { |
2695 | 28 | // There is only one zero bit in the range, test for it directly. |
2696 | 28 | Cmp = DAG.getSetCC( |
2697 | 28 | dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), |
2698 | 28 | ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), |
2699 | 28 | ISD::SETNE); |
2700 | 272 | } else { |
2701 | 272 | // Make desired shift |
2702 | 272 | SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, |
2703 | 272 | DAG.getConstant(1, dl, VT), ShiftOp); |
2704 | 272 | |
2705 | 272 | // Emit bit tests and jumps |
2706 | 272 | SDValue AndOp = DAG.getNode(ISD::AND, dl, |
2707 | 272 | VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); |
2708 | 272 | Cmp = DAG.getSetCC( |
2709 | 272 | dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), |
2710 | 272 | AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); |
2711 | 272 | } |
2712 | 410 | |
2713 | 410 | // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. |
2714 | 410 | addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); |
2715 | 410 | // The branch probability from SwitchBB to NextMBB is BranchProbToNext. |
2716 | 410 | addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); |
2717 | 410 | // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is |
2718 | 410 | // one as they are relative probabilities (and thus work more like weights), |
2719 | 410 | // and hence we need to normalize them to let the sum of them become one. |
2720 | 410 | SwitchBB->normalizeSuccProbs(); |
2721 | 410 | |
2722 | 410 | SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, |
2723 | 410 | MVT::Other, getControlRoot(), |
2724 | 410 | Cmp, DAG.getBasicBlock(B.TargetBB)); |
2725 | 410 | |
2726 | 410 | // Avoid emitting unnecessary branches to the next block. |
2727 | 410 | if (NextMBB != NextBlock(SwitchBB)) |
2728 | 170 | BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, |
2729 | 170 | DAG.getBasicBlock(NextMBB)); |
2730 | 410 | |
2731 | 410 | DAG.setRoot(BrAnd); |
2732 | 410 | } |
2733 | | |
2734 | 6.36k | void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { |
2735 | 6.36k | MachineBasicBlock *InvokeMBB = FuncInfo.MBB; |
2736 | 6.36k | |
2737 | 6.36k | // Retrieve successors. Look through artificial IR level blocks like |
2738 | 6.36k | // catchswitch for successors. |
2739 | 6.36k | MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; |
2740 | 6.36k | const BasicBlock *EHPadBB = I.getSuccessor(1); |
2741 | 6.36k | |
2742 | 6.36k | // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't |
2743 | 6.36k | // have to do anything here to lower funclet bundles. |
2744 | 6.36k | assert(!I.hasOperandBundlesOtherThan( |
2745 | 6.36k | {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && |
2746 | 6.36k | "Cannot lower invokes with arbitrary operand bundles yet!"); |
2747 | 6.36k | |
2748 | 6.36k | const Value *Callee(I.getCalledValue()); |
2749 | 6.36k | const Function *Fn = dyn_cast<Function>(Callee); |
2750 | 6.36k | if (isa<InlineAsm>(Callee)) |
2751 | 1 | visitInlineAsm(&I); |
2752 | 6.36k | else if (Fn && Fn->isIntrinsic()5.84k ) { |
2753 | 14 | switch (Fn->getIntrinsicID()) { |
2754 | 14 | default: |
2755 | 0 | llvm_unreachable("Cannot invoke this intrinsic"); |
2756 | 14 | case Intrinsic::donothing: |
2757 | 1 | // Ignore invokes to @llvm.donothing: jump directly to the next BB. |
2758 | 1 | break; |
2759 | 14 | case Intrinsic::experimental_patchpoint_void: |
2760 | 2 | case Intrinsic::experimental_patchpoint_i64: |
2761 | 2 | visitPatchpoint(&I, EHPadBB); |
2762 | 2 | break; |
2763 | 8 | case Intrinsic::experimental_gc_statepoint: |
2764 | 8 | LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); |
2765 | 8 | break; |
2766 | 3 | case Intrinsic::wasm_rethrow_in_catch: { |
2767 | 3 | // This is usually done in visitTargetIntrinsic, but this intrinsic is |
2768 | 3 | // special because it can be invoked, so we manually lower it to a DAG |
2769 | 3 | // node here. |
2770 | 3 | SmallVector<SDValue, 8> Ops; |
2771 | 3 | Ops.push_back(getRoot()); // inchain |
2772 | 3 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2773 | 3 | Ops.push_back( |
2774 | 3 | DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(), |
2775 | 3 | TLI.getPointerTy(DAG.getDataLayout()))); |
2776 | 3 | SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain |
2777 | 3 | DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); |
2778 | 3 | break; |
2779 | 6.35k | } |
2780 | 6.35k | } |
2781 | 6.35k | } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { |
2782 | 0 | // Currently we do not lower any intrinsic calls with deopt operand bundles. |
2783 | 0 | // Eventually we will support lowering the @llvm.experimental.deoptimize |
2784 | 0 | // intrinsic, and right now there are no plans to support other intrinsics |
2785 | 0 | // with deopt state. |
2786 | 0 | LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); |
2787 | 6.35k | } else { |
2788 | 6.35k | LowerCallTo(&I, getValue(Callee), false, EHPadBB); |
2789 | 6.35k | } |
2790 | 6.36k | |
2791 | 6.36k | // If the value of the invoke is used outside of its defining block, make it |
2792 | 6.36k | // available as a virtual register. |
2793 | 6.36k | // We already took care of the exported value for the statepoint instruction |
2794 | 6.36k | // during call to the LowerStatepoint. |
2795 | 6.36k | if (!isStatepoint(I)) { |
2796 | 6.36k | CopyToExportRegsIfNeeded(&I); |
2797 | 6.36k | } |
2798 | 6.36k | |
2799 | 6.36k | SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; |
2800 | 6.36k | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
2801 | 6.36k | BranchProbability EHPadBBProb = |
2802 | 6.36k | BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)6.21k |
2803 | 6.36k | : BranchProbability::getZero()153 ; |
2804 | 6.36k | findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); |
2805 | 6.36k | |
2806 | 6.36k | // Update successor info. |
2807 | 6.36k | addSuccessorWithProb(InvokeMBB, Return); |
2808 | 6.40k | for (auto &UnwindDest : UnwindDests) { |
2809 | 6.40k | UnwindDest.first->setIsEHPad(); |
2810 | 6.40k | addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); |
2811 | 6.40k | } |
2812 | 6.36k | InvokeMBB->normalizeSuccProbs(); |
2813 | 6.36k | |
2814 | 6.36k | // Drop into normal successor. |
2815 | 6.36k | DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), |
2816 | 6.36k | DAG.getBasicBlock(Return))); |
2817 | 6.36k | } |
2818 | | |
2819 | 5 | void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { |
2820 | 5 | MachineBasicBlock *CallBrMBB = FuncInfo.MBB; |
2821 | 5 | |
2822 | 5 | // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't |
2823 | 5 | // have to do anything here to lower funclet bundles. |
2824 | 5 | assert(!I.hasOperandBundlesOtherThan( |
2825 | 5 | {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && |
2826 | 5 | "Cannot lower callbrs with arbitrary operand bundles yet!"); |
2827 | 5 | |
2828 | 5 | assert(isa<InlineAsm>(I.getCalledValue()) && |
2829 | 5 | "Only know how to handle inlineasm callbr"); |
2830 | 5 | visitInlineAsm(&I); |
2831 | 5 | |
2832 | 5 | // Retrieve successors. |
2833 | 5 | MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; |
2834 | 5 | |
2835 | 5 | // Update successor info. |
2836 | 5 | addSuccessorWithProb(CallBrMBB, Return); |
2837 | 15 | for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i10 ) { |
2838 | 10 | MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; |
2839 | 10 | addSuccessorWithProb(CallBrMBB, Target); |
2840 | 10 | } |
2841 | 5 | CallBrMBB->normalizeSuccProbs(); |
2842 | 5 | |
2843 | 5 | // Drop into default successor. |
2844 | 5 | DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), |
2845 | 5 | MVT::Other, getControlRoot(), |
2846 | 5 | DAG.getBasicBlock(Return))); |
2847 | 5 | } |
2848 | | |
2849 | 0 | void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { |
2850 | 0 | llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); |
2851 | 0 | } |
2852 | | |
2853 | 3.55k | void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { |
2854 | 3.55k | assert(FuncInfo.MBB->isEHPad() && |
2855 | 3.55k | "Call to landingpad not in landing pad!"); |
2856 | 3.55k | |
2857 | 3.55k | // If there aren't registers to copy the values into (e.g., during SjLj |
2858 | 3.55k | // exceptions), then don't bother to create these DAG nodes. |
2859 | 3.55k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2860 | 3.55k | const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); |
2861 | 3.55k | if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && |
2862 | 3.55k | TLI.getExceptionSelectorRegister(PersonalityFn) == 0124 ) |
2863 | 124 | return; |
2864 | 3.43k | |
2865 | 3.43k | // If landingpad's return type is token type, we don't create DAG nodes |
2866 | 3.43k | // for its exception pointer and selector value. The extraction of exception |
2867 | 3.43k | // pointer or selector value from token type landingpads is not currently |
2868 | 3.43k | // supported. |
2869 | 3.43k | if (LP.getType()->isTokenTy()) |
2870 | 8 | return; |
2871 | 3.42k | |
2872 | 3.42k | SmallVector<EVT, 2> ValueVTs; |
2873 | 3.42k | SDLoc dl = getCurSDLoc(); |
2874 | 3.42k | ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); |
2875 | 3.42k | assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); |
2876 | 3.42k | |
2877 | 3.42k | // Get the two live-in registers as SDValues. The physregs have already been |
2878 | 3.42k | // copied into virtual registers. |
2879 | 3.42k | SDValue Ops[2]; |
2880 | 3.42k | if (FuncInfo.ExceptionPointerVirtReg) { |
2881 | 3.42k | Ops[0] = DAG.getZExtOrTrunc( |
2882 | 3.42k | DAG.getCopyFromReg(DAG.getEntryNode(), dl, |
2883 | 3.42k | FuncInfo.ExceptionPointerVirtReg, |
2884 | 3.42k | TLI.getPointerTy(DAG.getDataLayout())), |
2885 | 3.42k | dl, ValueVTs[0]); |
2886 | 3.42k | } else { |
2887 | 0 | Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); |
2888 | 0 | } |
2889 | 3.42k | Ops[1] = DAG.getZExtOrTrunc( |
2890 | 3.42k | DAG.getCopyFromReg(DAG.getEntryNode(), dl, |
2891 | 3.42k | FuncInfo.ExceptionSelectorVirtReg, |
2892 | 3.42k | TLI.getPointerTy(DAG.getDataLayout())), |
2893 | 3.42k | dl, ValueVTs[1]); |
2894 | 3.42k | |
2895 | 3.42k | // Merge into one. |
2896 | 3.42k | SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, |
2897 | 3.42k | DAG.getVTList(ValueVTs), Ops); |
2898 | 3.42k | setValue(&LP, Res); |
2899 | 3.42k | } |
2900 | | |
2901 | | void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, |
2902 | 0 | MachineBasicBlock *Last) { |
2903 | 0 | // Update JTCases. |
2904 | 0 | for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i) |
2905 | 0 | if (SL->JTCases[i].first.HeaderBB == First) |
2906 | 0 | SL->JTCases[i].first.HeaderBB = Last; |
2907 | 0 |
|
2908 | 0 | // Update BitTestCases. |
2909 | 0 | for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i) |
2910 | 0 | if (SL->BitTestCases[i].Parent == First) |
2911 | 0 | SL->BitTestCases[i].Parent = Last; |
2912 | 0 | } |
2913 | | |
2914 | 100 | void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { |
2915 | 100 | MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; |
2916 | 100 | |
2917 | 100 | // Update machine-CFG edges with unique successors. |
2918 | 100 | SmallSet<BasicBlock*, 32> Done; |
2919 | 387 | for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i287 ) { |
2920 | 287 | BasicBlock *BB = I.getSuccessor(i); |
2921 | 287 | bool Inserted = Done.insert(BB).second; |
2922 | 287 | if (!Inserted) |
2923 | 6 | continue; |
2924 | 281 | |
2925 | 281 | MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; |
2926 | 281 | addSuccessorWithProb(IndirectBrMBB, Succ); |
2927 | 281 | } |
2928 | 100 | IndirectBrMBB->normalizeSuccProbs(); |
2929 | 100 | |
2930 | 100 | DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), |
2931 | 100 | MVT::Other, getControlRoot(), |
2932 | 100 | getValue(I.getAddress()))); |
2933 | 100 | } |
2934 | | |
2935 | 26.1k | void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { |
2936 | 26.1k | if (!DAG.getTarget().Options.TrapUnreachable) |
2937 | 1.67k | return; |
2938 | 24.4k | |
2939 | 24.4k | // We may be able to ignore unreachable behind a noreturn call. |
2940 | 24.4k | if (DAG.getTarget().Options.NoTrapAfterNoreturn) { |
2941 | 24.3k | const BasicBlock &BB = *I.getParent(); |
2942 | 24.3k | if (&I != &BB.front()) { |
2943 | 23.7k | BasicBlock::const_iterator PredI = |
2944 | 23.7k | std::prev(BasicBlock::const_iterator(&I)); |
2945 | 23.7k | if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { |
2946 | 23.6k | if (Call->doesNotReturn()) |
2947 | 22.8k | return; |
2948 | 1.63k | } |
2949 | 23.7k | } |
2950 | 24.3k | } |
2951 | 1.63k | |
2952 | 1.63k | DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); |
2953 | 1.63k | } |
2954 | | |
2955 | 11.5k | void SelectionDAGBuilder::visitFSub(const User &I) { |
2956 | 11.5k | // -0.0 - X --> fneg |
2957 | 11.5k | Type *Ty = I.getType(); |
2958 | 11.5k | if (isa<Constant>(I.getOperand(0)) && |
2959 | 11.5k | I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)6.45k ) { |
2960 | 5.20k | SDValue Op2 = getValue(I.getOperand(1)); |
2961 | 5.20k | setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), |
2962 | 5.20k | Op2.getValueType(), Op2)); |
2963 | 5.20k | return; |
2964 | 5.20k | } |
2965 | 6.35k | |
2966 | 6.35k | visitBinary(I, ISD::FSUB); |
2967 | 6.35k | } |
2968 | | |
2969 | | /// Checks if the given instruction performs a vector reduction, in which case |
2970 | | /// we have the freedom to alter the elements in the result as long as the |
2971 | | /// reduction of them stays unchanged. |
2972 | 609k | static bool isVectorReductionOp(const User *I) { |
2973 | 609k | const Instruction *Inst = dyn_cast<Instruction>(I); |
2974 | 609k | if (!Inst || !Inst->getType()->isVectorTy()608k ) |
2975 | 492k | return false; |
2976 | 117k | |
2977 | 117k | auto OpCode = Inst->getOpcode(); |
2978 | 117k | switch (OpCode) { |
2979 | 117k | case Instruction::Add: |
2980 | 69.1k | case Instruction::Mul: |
2981 | 69.1k | case Instruction::And: |
2982 | 69.1k | case Instruction::Or: |
2983 | 69.1k | case Instruction::Xor: |
2984 | 69.1k | break; |
2985 | 69.1k | case Instruction::FAdd: |
2986 | 23.1k | case Instruction::FMul: |
2987 | 23.1k | if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) |
2988 | 23.1k | if (FPOp->getFastMathFlags().isFast()) |
2989 | 893 | break; |
2990 | 22.2k | LLVM_FALLTHROUGH; |
2991 | 47.0k | default: |
2992 | 47.0k | return false; |
2993 | 69.9k | } |
2994 | 69.9k | |
2995 | 69.9k | unsigned ElemNum = Inst->getType()->getVectorNumElements(); |
2996 | 69.9k | // Ensure the reduction size is a power of 2. |
2997 | 69.9k | if (!isPowerOf2_32(ElemNum)) |
2998 | 257 | return false; |
2999 | 69.7k | |
3000 | 69.7k | unsigned ElemNumToReduce = ElemNum; |
3001 | 69.7k | |
3002 | 69.7k | // Do DFS search on the def-use chain from the given instruction. We only |
3003 | 69.7k | // allow four kinds of operations during the search until we reach the |
3004 | 69.7k | // instruction that extracts the first element from the vector: |
3005 | 69.7k | // |
3006 | 69.7k | // 1. The reduction operation of the same opcode as the given instruction. |
3007 | 69.7k | // |
3008 | 69.7k | // 2. PHI node. |
3009 | 69.7k | // |
3010 | 69.7k | // 3. ShuffleVector instruction together with a reduction operation that |
3011 | 69.7k | // does a partial reduction. |
3012 | 69.7k | // |
3013 | 69.7k | // 4. ExtractElement that extracts the first element from the vector, and we |
3014 | 69.7k | // stop searching the def-use chain here. |
3015 | 69.7k | // |
3016 | 69.7k | // 3 & 4 above perform a reduction on all elements of the vector. We push defs |
3017 | 69.7k | // from 1-3 to the stack to continue the DFS. The given instruction is not |
3018 | 69.7k | // a reduction operation if we meet any other instructions other than those |
3019 | 69.7k | // listed above. |
3020 | 69.7k | |
3021 | 69.7k | SmallVector<const User *, 16> UsersToVisit{Inst}; |
3022 | 69.7k | SmallPtrSet<const User *, 16> Visited; |
3023 | 69.7k | bool ReduxExtracted = false; |
3024 | 69.7k | |
3025 | 99.2k | while (!UsersToVisit.empty()) { |
3026 | 98.1k | auto User = UsersToVisit.back(); |
3027 | 98.1k | UsersToVisit.pop_back(); |
3028 | 98.1k | if (!Visited.insert(User).second) |
3029 | 4.34k | continue; |
3030 | 93.8k | |
3031 | 114k | for (const auto &U : User->users())93.8k { |
3032 | 114k | auto Inst = dyn_cast<Instruction>(U); |
3033 | 114k | if (!Inst) |
3034 | 0 | return false; |
3035 | 114k | |
3036 | 114k | if (Inst->getOpcode() == OpCode || isa<PHINode>(U)85.4k ) { |
3037 | 42.4k | if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) |
3038 | 499 | if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) |
3039 | 0 | return false; |
3040 | 42.4k | UsersToVisit.push_back(U); |
3041 | 71.6k | } else if (const ShuffleVectorInst *ShufInst = |
3042 | 9.68k | dyn_cast<ShuffleVectorInst>(U)) { |
3043 | 9.68k | // Detect the following pattern: A ShuffleVector instruction together |
3044 | 9.68k | // with a reduction that do partial reduction on the first and second |
3045 | 9.68k | // ElemNumToReduce / 2 elements, and store the result in |
3046 | 9.68k | // ElemNumToReduce / 2 elements in another vector. |
3047 | 9.68k | |
3048 | 9.68k | unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); |
3049 | 9.68k | if (ResultElements < ElemNum) |
3050 | 112 | return false; |
3051 | 9.57k | |
3052 | 9.57k | if (ElemNumToReduce == 1) |
3053 | 0 | return false; |
3054 | 9.57k | if (!isa<UndefValue>(U->getOperand(1))) |
3055 | 1.23k | return false; |
3056 | 12.6k | for (unsigned i = 0; 8.34k i < ElemNumToReduce / 2; ++i4.35k ) |
3057 | 10.7k | if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) |
3058 | 6.35k | return false; |
3059 | 13.4k | for (unsigned i = ElemNumToReduce / 2; 1.98k i < ElemNum; ++i11.4k ) |
3060 | 11.4k | if (ShufInst->getMaskValue(i) != -1) |
3061 | 8 | return false; |
3062 | 1.98k | |
3063 | 1.98k | // There is only one user of this ShuffleVector instruction, which |
3064 | 1.98k | // must be a reduction operation. |
3065 | 1.98k | if (1.98k !U->hasOneUse()1.98k ) |
3066 | 0 | return false; |
3067 | 1.98k | |
3068 | 1.98k | auto U2 = dyn_cast<Instruction>(*U->user_begin()); |
3069 | 1.98k | if (!U2 || U2->getOpcode() != OpCode) |
3070 | 2 | return false; |
3071 | 1.97k | |
3072 | 1.97k | // Check operands of the reduction operation. |
3073 | 1.97k | if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U1.96k ) || |
3074 | 1.97k | (16 U2->getOperand(1) == U->getOperand(0)16 && U2->getOperand(0) == U16 )) { |
3075 | 1.97k | UsersToVisit.push_back(U2); |
3076 | 1.97k | ElemNumToReduce /= 2; |
3077 | 1.97k | } else |
3078 | 0 | return false; |
3079 | 61.9k | } else if (isa<ExtractElementInst>(U)) { |
3080 | 4.29k | // At this moment we should have reduced all elements in the vector. |
3081 | 4.29k | if (ElemNumToReduce != 1) |
3082 | 3.30k | return false; |
3083 | 991 | |
3084 | 991 | const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); |
3085 | 991 | if (!Val || !Val->isZero()) |
3086 | 0 | return false; |
3087 | 991 | |
3088 | 991 | ReduxExtracted = true; |
3089 | 991 | } else |
3090 | 57.6k | return false; |
3091 | 114k | } |
3092 | 93.8k | } |
3093 | 69.7k | return ReduxExtracted1.09k ; |
3094 | 69.7k | } |
3095 | | |
3096 | 66 | void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { |
3097 | 66 | SDNodeFlags Flags; |
3098 | 66 | |
3099 | 66 | SDValue Op = getValue(I.getOperand(0)); |
3100 | 66 | SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(), |
3101 | 66 | Op, Flags); |
3102 | 66 | setValue(&I, UnNodeValue); |
3103 | 66 | } |
3104 | | |
3105 | 609k | void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { |
3106 | 609k | SDNodeFlags Flags; |
3107 | 609k | if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) { |
3108 | 335k | Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); |
3109 | 335k | Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); |
3110 | 335k | } |
3111 | 609k | if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { |
3112 | 2.76k | Flags.setExact(ExactOp->isExact()); |
3113 | 2.76k | } |
3114 | 609k | if (isVectorReductionOp(&I)) { |
3115 | 991 | Flags.setVectorReduction(true); |
3116 | 991 | LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); |
3117 | 991 | } |
3118 | 609k | |
3119 | 609k | SDValue Op1 = getValue(I.getOperand(0)); |
3120 | 609k | SDValue Op2 = getValue(I.getOperand(1)); |
3121 | 609k | SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), |
3122 | 609k | Op1, Op2, Flags); |
3123 | 609k | setValue(&I, BinNodeValue); |
3124 | 609k | } |
3125 | | |
3126 | 117k | void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { |
3127 | 117k | SDValue Op1 = getValue(I.getOperand(0)); |
3128 | 117k | SDValue Op2 = getValue(I.getOperand(1)); |
3129 | 117k | |
3130 | 117k | EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( |
3131 | 117k | Op1.getValueType(), DAG.getDataLayout()); |
3132 | 117k | |
3133 | 117k | // Coerce the shift amount to the right type if we can. |
3134 | 117k | if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy105k ) { |
3135 | 67.3k | unsigned ShiftSize = ShiftTy.getSizeInBits(); |
3136 | 67.3k | unsigned Op2Size = Op2.getValueSizeInBits(); |
3137 | 67.3k | SDLoc DL = getCurSDLoc(); |
3138 | 67.3k | |
3139 | 67.3k | // If the operand is smaller than the shift count type, promote it. |
3140 | 67.3k | if (ShiftSize > Op2Size) |
3141 | 23.0k | Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); |
3142 | 44.3k | |
3143 | 44.3k | // If the operand is larger than the shift count type but the shift |
3144 | 44.3k | // count type has enough bits to represent any shift value, truncate |
3145 | 44.3k | // it now. This is a common case and it exposes the truncate to |
3146 | 44.3k | // optimization early. |
3147 | 44.3k | else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) |
3148 | 44.3k | Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); |
3149 | 9 | // Otherwise we'll need to temporarily settle for some other convenient |
3150 | 9 | // type. Type legalization will make adjustments once the shiftee is split. |
3151 | 9 | else |
3152 | 9 | Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); |
3153 | 67.3k | } |
3154 | 117k | |
3155 | 117k | bool nuw = false; |
3156 | 117k | bool nsw = false; |
3157 | 117k | bool exact = false; |
3158 | 117k | |
3159 | 117k | if (Opcode == ISD::SRL || Opcode == ISD::SRA71.2k || Opcode == ISD::SHL61.4k ) { |
3160 | 117k | |
3161 | 117k | if (const OverflowingBinaryOperator *OFBinOp = |
3162 | 61.4k | dyn_cast<const OverflowingBinaryOperator>(&I)) { |
3163 | 61.4k | nuw = OFBinOp->hasNoUnsignedWrap(); |
3164 | 61.4k | nsw = OFBinOp->hasNoSignedWrap(); |
3165 | 61.4k | } |
3166 | 117k | if (const PossiblyExactOperator *ExactOp = |
3167 | 56.1k | dyn_cast<const PossiblyExactOperator>(&I)) |
3168 | 56.1k | exact = ExactOp->isExact(); |
3169 | 117k | } |
3170 | 117k | SDNodeFlags Flags; |
3171 | 117k | Flags.setExact(exact); |
3172 | 117k | Flags.setNoSignedWrap(nsw); |
3173 | 117k | Flags.setNoUnsignedWrap(nuw); |
3174 | 117k | SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, |
3175 | 117k | Flags); |
3176 | 117k | setValue(&I, Res); |
3177 | 117k | } |
3178 | | |
3179 | 3.27k | void SelectionDAGBuilder::visitSDiv(const User &I) { |
3180 | 3.27k | SDValue Op1 = getValue(I.getOperand(0)); |
3181 | 3.27k | SDValue Op2 = getValue(I.getOperand(1)); |
3182 | 3.27k | |
3183 | 3.27k | SDNodeFlags Flags; |
3184 | 3.27k | Flags.setExact(isa<PossiblyExactOperator>(&I) && |
3185 | 3.27k | cast<PossiblyExactOperator>(&I)->isExact()); |
3186 | 3.27k | setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, |
3187 | 3.27k | Op2, Flags)); |
3188 | 3.27k | } |
3189 | | |
3190 | 608k | void SelectionDAGBuilder::visitICmp(const User &I) { |
3191 | 608k | ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; |
3192 | 608k | if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) |
3193 | 608k | predicate = IC->getPredicate(); |
3194 | 353 | else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) |
3195 | 353 | predicate = ICmpInst::Predicate(IC->getPredicate()); |
3196 | 608k | SDValue Op1 = getValue(I.getOperand(0)); |
3197 | 608k | SDValue Op2 = getValue(I.getOperand(1)); |
3198 | 608k | ISD::CondCode Opcode = getICmpCondCode(predicate); |
3199 | 608k | |
3200 | 608k | auto &TLI = DAG.getTargetLoweringInfo(); |
3201 | 608k | EVT MemVT = |
3202 | 608k | TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); |
3203 | 608k | |
3204 | 608k | // If a pointer's DAG type is larger than its memory type then the DAG values |
3205 | 608k | // are zero-extended. This breaks signed comparisons so truncate back to the |
3206 | 608k | // underlying type before doing the compare. |
3207 | 608k | if (Op1.getValueType() != MemVT) { |
3208 | 0 | Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT); |
3209 | 0 | Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT); |
3210 | 0 | } |
3211 | 608k | |
3212 | 608k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3213 | 608k | I.getType()); |
3214 | 608k | setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); |
3215 | 608k | } |
3216 | | |
3217 | 15.9k | void SelectionDAGBuilder::visitFCmp(const User &I) { |
3218 | 15.9k | FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; |
3219 | 15.9k | if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) |
3220 | 15.9k | predicate = FC->getPredicate(); |
3221 | 2 | else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) |
3222 | 2 | predicate = FCmpInst::Predicate(FC->getPredicate()); |
3223 | 15.9k | SDValue Op1 = getValue(I.getOperand(0)); |
3224 | 15.9k | SDValue Op2 = getValue(I.getOperand(1)); |
3225 | 15.9k | |
3226 | 15.9k | ISD::CondCode Condition = getFCmpCondCode(predicate); |
3227 | 15.9k | auto *FPMO = dyn_cast<FPMathOperator>(&I); |
3228 | 15.9k | if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath14.8k ) |
3229 | 1.37k | Condition = getFCmpCodeWithoutNaN(Condition); |
3230 | 15.9k | |
3231 | 15.9k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3232 | 15.9k | I.getType()); |
3233 | 15.9k | setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); |
3234 | 15.9k | } |
3235 | | |
3236 | | // Check if the condition of the select has one use or two users that are both |
3237 | | // selects with the same condition. |
3238 | 14.6k | static bool hasOnlySelectUsers(const Value *Cond) { |
3239 | 16.7k | return llvm::all_of(Cond->users(), [](const Value *V) { |
3240 | 16.7k | return isa<SelectInst>(V); |
3241 | 16.7k | }); |
3242 | 14.6k | } |
3243 | | |
3244 | 70.5k | void SelectionDAGBuilder::visitSelect(const User &I) { |
3245 | 70.5k | SmallVector<EVT, 4> ValueVTs; |
3246 | 70.5k | ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), |
3247 | 70.5k | ValueVTs); |
3248 | 70.5k | unsigned NumValues = ValueVTs.size(); |
3249 | 70.5k | if (NumValues == 0) return7 ; |
3250 | 70.5k | |
3251 | 70.5k | SmallVector<SDValue, 4> Values(NumValues); |
3252 | 70.5k | SDValue Cond = getValue(I.getOperand(0)); |
3253 | 70.5k | SDValue LHSVal = getValue(I.getOperand(1)); |
3254 | 70.5k | SDValue RHSVal = getValue(I.getOperand(2)); |
3255 | 70.5k | auto BaseOps = {Cond}; |
3256 | 70.5k | ISD::NodeType OpCode = Cond.getValueType().isVector() ? |
3257 | 42.9k | ISD::VSELECT27.6k : ISD::SELECT; |
3258 | 70.5k | |
3259 | 70.5k | bool IsUnaryAbs = false; |
3260 | 70.5k | |
3261 | 70.5k | // Min/max matching is only viable if all output VTs are the same. |
3262 | 70.5k | if (is_splat(ValueVTs)) { |
3263 | 70.5k | EVT VT = ValueVTs[0]; |
3264 | 70.5k | LLVMContext &Ctx = *DAG.getContext(); |
3265 | 70.5k | auto &TLI = DAG.getTargetLoweringInfo(); |
3266 | 70.5k | |
3267 | 70.5k | // We care about the legality of the operation after it has been type |
3268 | 70.5k | // legalized. |
3269 | 85.1k | while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && |
3270 | 85.1k | VT != TLI.getTypeToTransformTo(Ctx, VT)14.6k ) |
3271 | 14.6k | VT = TLI.getTypeToTransformTo(Ctx, VT); |
3272 | 70.5k | |
3273 | 70.5k | // If the vselect is legal, assume we want to leave this as a vector setcc + |
3274 | 70.5k | // vselect. Otherwise, if this is going to be scalarized, we want to see if |
3275 | 70.5k | // min/max is legal on the scalar type. |
3276 | 70.5k | bool UseScalarMinMax = VT.isVector() && |
3277 | 70.5k | !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)27.8k ; |
3278 | 70.5k | |
3279 | 70.5k | Value *LHS, *RHS; |
3280 | 70.5k | auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); |
3281 | 70.5k | ISD::NodeType Opc = ISD::DELETED_NODE; |
3282 | 70.5k | switch (SPR.Flavor) { |
3283 | 70.5k | case SPF_UMAX: Opc = ISD::UMAX; break5.69k ; |
3284 | 70.5k | case SPF_UMIN: Opc = ISD::UMIN; break5.62k ; |
3285 | 70.5k | case SPF_SMAX: Opc = ISD::SMAX; break5.44k ; |
3286 | 70.5k | case SPF_SMIN: Opc = ISD::SMIN; break5.05k ; |
3287 | 70.5k | case SPF_FMINNUM: |
3288 | 546 | switch (SPR.NaNBehavior) { |
3289 | 546 | case SPNB_NA: 0 llvm_unreachable0 ("No NaN behavior for FP op?"); |
3290 | 546 | case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break99 ; |
3291 | 546 | case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break88 ; |
3292 | 546 | case SPNB_RETURNS_ANY: { |
3293 | 359 | if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) |
3294 | 109 | Opc = ISD::FMINNUM; |
3295 | 250 | else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)) |
3296 | 0 | Opc = ISD::FMINIMUM; |
3297 | 250 | else if (UseScalarMinMax) |
3298 | 3 | Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? |
3299 | 3 | ISD::FMINNUM : ISD::FMINIMUM0 ; |
3300 | 359 | break; |
3301 | 546 | } |
3302 | 546 | } |
3303 | 546 | break; |
3304 | 600 | case SPF_FMAXNUM: |
3305 | 600 | switch (SPR.NaNBehavior) { |
3306 | 600 | case SPNB_NA: 0 llvm_unreachable0 ("No NaN behavior for FP op?"); |
3307 | 600 | case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break147 ; |
3308 | 600 | case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break71 ; |
3309 | 600 | case SPNB_RETURNS_ANY: |
3310 | 382 | |
3311 | 382 | if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) |
3312 | 131 | Opc = ISD::FMAXNUM; |
3313 | 251 | else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)) |
3314 | 0 | Opc = ISD::FMAXIMUM; |
3315 | 251 | else if (UseScalarMinMax) |
3316 | 5 | Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? |
3317 | 3 | ISD::FMAXNUM : ISD::FMAXIMUM2 ; |
3318 | 382 | break; |
3319 | 600 | } |
3320 | 600 | break; |
3321 | 1.04k | case SPF_ABS: |
3322 | 1.04k | IsUnaryAbs = true; |
3323 | 1.04k | Opc = ISD::ABS; |
3324 | 1.04k | break; |
3325 | 600 | case SPF_NABS: |
3326 | 11 | // TODO: we need to produce sub(0, abs(X)). |
3327 | 46.5k | default: break; |
3328 | 70.5k | } |
3329 | 70.5k | |
3330 | 70.5k | if (!IsUnaryAbs && Opc != ISD::DELETED_NODE69.5k && |
3331 | 70.5k | (22.4k TLI.isOperationLegalOrCustom(Opc, VT)22.4k || |
3332 | 22.4k | (7.96k UseScalarMinMax7.96k && |
3333 | 7.96k | TLI.isOperationLegalOrCustom(Opc, VT.getScalarType())298 )) && |
3334 | 70.5k | // If the underlying comparison instruction is used by any other |
3335 | 70.5k | // instruction, the consumed instructions won't be destroyed, so it is |
3336 | 70.5k | // not profitable to convert to a min/max. |
3337 | 70.5k | hasOnlySelectUsers(cast<SelectInst>(I).getCondition())14.6k ) { |
3338 | 14.5k | OpCode = Opc; |
3339 | 14.5k | LHSVal = getValue(LHS); |
3340 | 14.5k | RHSVal = getValue(RHS); |
3341 | 14.5k | BaseOps = {}; |
3342 | 14.5k | } |
3343 | 70.5k | |
3344 | 70.5k | if (IsUnaryAbs) { |
3345 | 1.04k | OpCode = Opc; |
3346 | 1.04k | LHSVal = getValue(LHS); |
3347 | 1.04k | BaseOps = {}; |
3348 | 1.04k | } |
3349 | 70.5k | } |
3350 | 70.5k | |
3351 | 70.5k | if (IsUnaryAbs) { |
3352 | 2.09k | for (unsigned i = 0; i != NumValues; ++i1.04k ) { |
3353 | 1.04k | Values[i] = |
3354 | 1.04k | DAG.getNode(OpCode, getCurSDLoc(), |
3355 | 1.04k | LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), |
3356 | 1.04k | SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); |
3357 | 1.04k | } |
3358 | 69.5k | } else { |
3359 | 139k | for (unsigned i = 0; i != NumValues; ++i69.5k ) { |
3360 | 69.5k | SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); |
3361 | 69.5k | Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); |
3362 | 69.5k | Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); |
3363 | 69.5k | Values[i] = DAG.getNode( |
3364 | 69.5k | OpCode, getCurSDLoc(), |
3365 | 69.5k | LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); |
3366 | 69.5k | } |
3367 | 69.5k | } |
3368 | 70.5k | |
3369 | 70.5k | setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), |
3370 | 70.5k | DAG.getVTList(ValueVTs), Values)); |
3371 | 70.5k | } |
3372 | | |
3373 | 101k | void SelectionDAGBuilder::visitTrunc(const User &I) { |
3374 | 101k | // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). |
3375 | 101k | SDValue N = getValue(I.getOperand(0)); |
3376 | 101k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3377 | 101k | I.getType()); |
3378 | 101k | setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); |
3379 | 101k | } |
3380 | | |
3381 | 81.0k | void SelectionDAGBuilder::visitZExt(const User &I) { |
3382 | 81.0k | // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). |
3383 | 81.0k | // ZExt also can't be a cast to bool for same reason. So, nothing much to do |
3384 | 81.0k | SDValue N = getValue(I.getOperand(0)); |
3385 | 81.0k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3386 | 81.0k | I.getType()); |
3387 | 81.0k | setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); |
3388 | 81.0k | } |
3389 | | |
3390 | 61.9k | void SelectionDAGBuilder::visitSExt(const User &I) { |
3391 | 61.9k | // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). |
3392 | 61.9k | // SExt also can't be a cast to bool for same reason. So, nothing much to do |
3393 | 61.9k | SDValue N = getValue(I.getOperand(0)); |
3394 | 61.9k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3395 | 61.9k | I.getType()); |
3396 | 61.9k | setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); |
3397 | 61.9k | } |
3398 | | |
3399 | 1.55k | void SelectionDAGBuilder::visitFPTrunc(const User &I) { |
3400 | 1.55k | // FPTrunc is never a no-op cast, no need to check |
3401 | 1.55k | SDValue N = getValue(I.getOperand(0)); |
3402 | 1.55k | SDLoc dl = getCurSDLoc(); |
3403 | 1.55k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3404 | 1.55k | EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3405 | 1.55k | setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, |
3406 | 1.55k | DAG.getTargetConstant( |
3407 | 1.55k | 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); |
3408 | 1.55k | } |
3409 | | |
3410 | 4.69k | void SelectionDAGBuilder::visitFPExt(const User &I) { |
3411 | 4.69k | // FPExt is never a no-op cast, no need to check |
3412 | 4.69k | SDValue N = getValue(I.getOperand(0)); |
3413 | 4.69k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3414 | 4.69k | I.getType()); |
3415 | 4.69k | setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); |
3416 | 4.69k | } |
3417 | | |
3418 | 2.67k | void SelectionDAGBuilder::visitFPToUI(const User &I) { |
3419 | 2.67k | // FPToUI is never a no-op cast, no need to check |
3420 | 2.67k | SDValue N = getValue(I.getOperand(0)); |
3421 | 2.67k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3422 | 2.67k | I.getType()); |
3423 | 2.67k | setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); |
3424 | 2.67k | } |
3425 | | |
3426 | 3.61k | void SelectionDAGBuilder::visitFPToSI(const User &I) { |
3427 | 3.61k | // FPToSI is never a no-op cast, no need to check |
3428 | 3.61k | SDValue N = getValue(I.getOperand(0)); |
3429 | 3.61k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3430 | 3.61k | I.getType()); |
3431 | 3.61k | setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); |
3432 | 3.61k | } |
3433 | | |
3434 | 6.31k | void SelectionDAGBuilder::visitUIToFP(const User &I) { |
3435 | 6.31k | // UIToFP is never a no-op cast, no need to check |
3436 | 6.31k | SDValue N = getValue(I.getOperand(0)); |
3437 | 6.31k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3438 | 6.31k | I.getType()); |
3439 | 6.31k | setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); |
3440 | 6.31k | } |
3441 | | |
3442 | 26.1k | void SelectionDAGBuilder::visitSIToFP(const User &I) { |
3443 | 26.1k | // SIToFP is never a no-op cast, no need to check |
3444 | 26.1k | SDValue N = getValue(I.getOperand(0)); |
3445 | 26.1k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3446 | 26.1k | I.getType()); |
3447 | 26.1k | setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); |
3448 | 26.1k | } |
3449 | | |
3450 | 61.1k | void SelectionDAGBuilder::visitPtrToInt(const User &I) { |
3451 | 61.1k | // What to do depends on the size of the integer and the size of the pointer. |
3452 | 61.1k | // We can either truncate, zero extend, or no-op, accordingly. |
3453 | 61.1k | SDValue N = getValue(I.getOperand(0)); |
3454 | 61.1k | auto &TLI = DAG.getTargetLoweringInfo(); |
3455 | 61.1k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3456 | 61.1k | I.getType()); |
3457 | 61.1k | EVT PtrMemVT = |
3458 | 61.1k | TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); |
3459 | 61.1k | N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT); |
3460 | 61.1k | N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT); |
3461 | 61.1k | setValue(&I, N); |
3462 | 61.1k | } |
3463 | | |
3464 | 43.2k | void SelectionDAGBuilder::visitIntToPtr(const User &I) { |
3465 | 43.2k | // What to do depends on the size of the integer and the size of the pointer. |
3466 | 43.2k | // We can either truncate, zero extend, or no-op, accordingly. |
3467 | 43.2k | SDValue N = getValue(I.getOperand(0)); |
3468 | 43.2k | auto &TLI = DAG.getTargetLoweringInfo(); |
3469 | 43.2k | EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3470 | 43.2k | EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); |
3471 | 43.2k | N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT); |
3472 | 43.2k | N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT); |
3473 | 43.2k | setValue(&I, N); |
3474 | 43.2k | } |
3475 | | |
3476 | 705k | void SelectionDAGBuilder::visitBitCast(const User &I) { |
3477 | 705k | SDValue N = getValue(I.getOperand(0)); |
3478 | 705k | SDLoc dl = getCurSDLoc(); |
3479 | 705k | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
3480 | 705k | I.getType()); |
3481 | 705k | |
3482 | 705k | // BitCast assures us that source and destination are the same size so this is |
3483 | 705k | // either a BITCAST or a no-op. |
3484 | 705k | if (DestVT != N.getValueType()) |
3485 | 44.1k | setValue(&I, DAG.getNode(ISD::BITCAST, dl, |
3486 | 44.1k | DestVT, N)); // convert types. |
3487 | 661k | // Check if the original LLVM IR Operand was a ConstantInt, because getValue() |
3488 | 661k | // might fold any kind of constant expression to an integer constant and that |
3489 | 661k | // is not what we are looking for. Only recognize a bitcast of a genuine |
3490 | 661k | // constant integer as an opaque constant. |
3491 | 661k | else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) |
3492 | 11.0k | setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, |
3493 | 11.0k | /*isOpaque*/true)); |
3494 | 649k | else |
3495 | 649k | setValue(&I, N); // noop cast. |
3496 | 705k | } |
3497 | | |
3498 | 321 | void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { |
3499 | 321 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3500 | 321 | const Value *SV = I.getOperand(0); |
3501 | 321 | SDValue N = getValue(SV); |
3502 | 321 | EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3503 | 321 | |
3504 | 321 | unsigned SrcAS = SV->getType()->getPointerAddressSpace(); |
3505 | 321 | unsigned DestAS = I.getType()->getPointerAddressSpace(); |
3506 | 321 | |
3507 | 321 | if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) |
3508 | 228 | N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); |
3509 | 321 | |
3510 | 321 | setValue(&I, N); |
3511 | 321 | } |
3512 | | |
3513 | 42.0k | void SelectionDAGBuilder::visitInsertElement(const User &I) { |
3514 | 42.0k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3515 | 42.0k | SDValue InVec = getValue(I.getOperand(0)); |
3516 | 42.0k | SDValue InVal = getValue(I.getOperand(1)); |
3517 | 42.0k | SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), |
3518 | 42.0k | TLI.getVectorIdxTy(DAG.getDataLayout())); |
3519 | 42.0k | setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), |
3520 | 42.0k | TLI.getValueType(DAG.getDataLayout(), I.getType()), |
3521 | 42.0k | InVec, InVal, InIdx)); |
3522 | 42.0k | } |
3523 | | |
3524 | 73.1k | void SelectionDAGBuilder::visitExtractElement(const User &I) { |
3525 | 73.1k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3526 | 73.1k | SDValue InVec = getValue(I.getOperand(0)); |
3527 | 73.1k | SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), |
3528 | 73.1k | TLI.getVectorIdxTy(DAG.getDataLayout())); |
3529 | 73.1k | setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), |
3530 | 73.1k | TLI.getValueType(DAG.getDataLayout(), I.getType()), |
3531 | 73.1k | InVec, InIdx)); |
3532 | 73.1k | } |
3533 | | |
3534 | 81.2k | void SelectionDAGBuilder::visitShuffleVector(const User &I) { |
3535 | 81.2k | SDValue Src1 = getValue(I.getOperand(0)); |
3536 | 81.2k | SDValue Src2 = getValue(I.getOperand(1)); |
3537 | 81.2k | SDLoc DL = getCurSDLoc(); |
3538 | 81.2k | |
3539 | 81.2k | SmallVector<int, 8> Mask; |
3540 | 81.2k | ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); |
3541 | 81.2k | unsigned MaskNumElts = Mask.size(); |
3542 | 81.2k | |
3543 | 81.2k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3544 | 81.2k | EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
3545 | 81.2k | EVT SrcVT = Src1.getValueType(); |
3546 | 81.2k | unsigned SrcNumElts = SrcVT.getVectorNumElements(); |
3547 | 81.2k | |
3548 | 81.2k | if (SrcNumElts == MaskNumElts) { |
3549 | 49.1k | setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); |
3550 | 49.1k | return; |
3551 | 49.1k | } |
3552 | 32.1k | |
3553 | 32.1k | // Normalize the shuffle vector since mask and vector length don't match. |
3554 | 32.1k | if (SrcNumElts < MaskNumElts) { |
3555 | 9.03k | // Mask is longer than the source vectors. We can use concatenate vector to |
3556 | 9.03k | // make the mask and vectors lengths match. |
3557 | 9.03k | |
3558 | 9.03k | if (MaskNumElts % SrcNumElts == 0) { |
3559 | 8.40k | // Mask length is a multiple of the source vector length. |
3560 | 8.40k | // Check if the shuffle is some kind of concatenation of the input |
3561 | 8.40k | // vectors. |
3562 | 8.40k | unsigned NumConcat = MaskNumElts / SrcNumElts; |
3563 | 8.40k | bool IsConcat = true; |
3564 | 8.40k | SmallVector<int, 8> ConcatSrcs(NumConcat, -1); |
3565 | 141k | for (unsigned i = 0; i != MaskNumElts; ++i133k ) { |
3566 | 134k | int Idx = Mask[i]; |
3567 | 134k | if (Idx < 0) |
3568 | 5.87k | continue; |
3569 | 128k | // Ensure the indices in each SrcVT sized piece are sequential and that |
3570 | 128k | // the same source is used for the whole piece. |
3571 | 128k | if ((Idx % SrcNumElts != (i % SrcNumElts)) || |
3572 | 128k | (127k ConcatSrcs[i / SrcNumElts] >= 0127k && |
3573 | 127k | ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)104k )) { |
3574 | 1.52k | IsConcat = false; |
3575 | 1.52k | break; |
3576 | 1.52k | } |
3577 | 127k | // Remember which source this index came from. |
3578 | 127k | ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; |
3579 | 127k | } |
3580 | 8.40k | |
3581 | 8.40k | // The shuffle is concatenating multiple vectors together. Just emit |
3582 | 8.40k | // a CONCAT_VECTORS operation. |
3583 | 8.40k | if (IsConcat) { |
3584 | 6.87k | SmallVector<SDValue, 8> ConcatOps; |
3585 | 22.3k | for (auto Src : ConcatSrcs) { |
3586 | 22.3k | if (Src < 0) |
3587 | 1.23k | ConcatOps.push_back(DAG.getUNDEF(SrcVT)); |
3588 | 21.0k | else if (Src == 0) |
3589 | 8.80k | ConcatOps.push_back(Src1); |
3590 | 12.2k | else |
3591 | 12.2k | ConcatOps.push_back(Src2); |
3592 | 22.3k | } |
3593 | 6.87k | setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); |
3594 | 6.87k | return; |
3595 | 6.87k | } |
3596 | 2.16k | } |
3597 | 2.16k | |
3598 | 2.16k | unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); |
3599 | 2.16k | unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; |
3600 | 2.16k | EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), |
3601 | 2.16k | PaddedMaskNumElts); |
3602 | 2.16k | |
3603 | 2.16k | // Pad both vectors with undefs to make them the same length as the mask. |
3604 | 2.16k | SDValue UndefVal = DAG.getUNDEF(SrcVT); |
3605 | 2.16k | |
3606 | 2.16k | SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); |
3607 | 2.16k | SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); |
3608 | 2.16k | MOps1[0] = Src1; |
3609 | 2.16k | MOps2[0] = Src2; |
3610 | 2.16k | |
3611 | 2.16k | Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); |
3612 | 2.16k | Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); |
3613 | 2.16k | |
3614 | 2.16k | // Readjust mask for new input vector length. |
3615 | 2.16k | SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); |
3616 | 35.5k | for (unsigned i = 0; i != MaskNumElts; ++i33.3k ) { |
3617 | 33.3k | int Idx = Mask[i]; |
3618 | 33.3k | if (Idx >= (int)SrcNumElts) |
3619 | 8.29k | Idx -= SrcNumElts - PaddedMaskNumElts; |
3620 | 33.3k | MappedOps[i] = Idx; |
3621 | 33.3k | } |
3622 | 2.16k | |
3623 | 2.16k | SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); |
3624 | 2.16k | |
3625 | 2.16k | // If the concatenated vector was padded, extract a subvector with the |
3626 | 2.16k | // correct number of elements. |
3627 | 2.16k | if (MaskNumElts != PaddedMaskNumElts) |
3628 | 636 | Result = DAG.getNode( |
3629 | 636 | ISD::EXTRACT_SUBVECTOR, DL, VT, Result, |
3630 | 636 | DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
3631 | 2.16k | |
3632 | 2.16k | setValue(&I, Result); |
3633 | 2.16k | return; |
3634 | 2.16k | } |
3635 | 23.0k | |
3636 | 23.0k | if (SrcNumElts > MaskNumElts) { |
3637 | 23.0k | // Analyze the access pattern of the vector to see if we can extract |
3638 | 23.0k | // two subvectors and do the shuffle. |
3639 | 23.0k | int StartIdx[2] = { -1, -1 }; // StartIdx to extract from |
3640 | 23.0k | bool CanExtract = true; |
3641 | 76.0k | for (int Idx : Mask) { |
3642 | 76.0k | unsigned Input = 0; |
3643 | 76.0k | if (Idx < 0) |
3644 | 20 | continue; |
3645 | 76.0k | |
3646 | 76.0k | if (Idx >= (int)SrcNumElts) { |
3647 | 2.58k | Input = 1; |
3648 | 2.58k | Idx -= SrcNumElts; |
3649 | 2.58k | } |
3650 | 76.0k | |
3651 | 76.0k | // If all the indices come from the same MaskNumElts sized portion of |
3652 | 76.0k | // the sources we can use extract. Also make sure the extract wouldn't |
3653 | 76.0k | // extract past the end of the source. |
3654 | 76.0k | int NewStartIdx = alignDown(Idx, MaskNumElts); |
3655 | 76.0k | if (NewStartIdx + MaskNumElts > SrcNumElts || |
3656 | 76.0k | (75.9k StartIdx[Input] >= 075.9k && StartIdx[Input] != NewStartIdx52.8k )) |
3657 | 3.67k | CanExtract = false; |
3658 | 76.0k | // Make sure we always update StartIdx as we use it to track if all |
3659 | 76.0k | // elements are undef. |
3660 | 76.0k | StartIdx[Input] = NewStartIdx; |
3661 | 76.0k | } |
3662 | 23.0k | |
3663 | 23.0k | if (StartIdx[0] < 0 && StartIdx[1] < 0156 ) { |
3664 | 0 | setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. |
3665 | 0 | return; |
3666 | 0 | } |
3667 | 23.0k | if (CanExtract) { |
3668 | 21.0k | // Extract appropriate subvector and generate a vector shuffle |
3669 | 63.2k | for (unsigned Input = 0; Input < 2; ++Input42.1k ) { |
3670 | 42.1k | SDValue &Src = Input == 0 ? Src121.0k : Src221.0k ; |
3671 | 42.1k | if (StartIdx[Input] < 0) |
3672 | 20.9k | Src = DAG.getUNDEF(VT); |
3673 | 21.1k | else { |
3674 | 21.1k | Src = DAG.getNode( |
3675 | 21.1k | ISD::EXTRACT_SUBVECTOR, DL, VT, Src, |
3676 | 21.1k | DAG.getConstant(StartIdx[Input], DL, |
3677 | 21.1k | TLI.getVectorIdxTy(DAG.getDataLayout()))); |
3678 | 21.1k | } |
3679 | 42.1k | } |
3680 | 21.0k | |
3681 | 21.0k | // Calculate new mask. |
3682 | 21.0k | SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); |
3683 | 61.0k | for (int &Idx : MappedOps) { |
3684 | 61.0k | if (Idx >= (int)SrcNumElts) |
3685 | 2.52k | Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; |
3686 | 58.5k | else if (Idx >= 0) |
3687 | 58.5k | Idx -= StartIdx[0]; |
3688 | 61.0k | } |
3689 | 21.0k | |
3690 | 21.0k | setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); |
3691 | 21.0k | return; |
3692 | 21.0k | } |
3693 | 2.00k | } |
3694 | 2.00k | |
3695 | 2.00k | // We can't use either concat vectors or extract subvectors so fall back to |
3696 | 2.00k | // replacing the shuffle with extract and build vector. |
3697 | 2.00k | // to insert and build vector. |
3698 | 2.00k | EVT EltVT = VT.getVectorElementType(); |
3699 | 2.00k | EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); |
3700 | 2.00k | SmallVector<SDValue,8> Ops; |
3701 | 15.0k | for (int Idx : Mask) { |
3702 | 15.0k | SDValue Res; |
3703 | 15.0k | |
3704 | 15.0k | if (Idx < 0) { |
3705 | 8 | Res = DAG.getUNDEF(EltVT); |
3706 | 15.0k | } else { |
3707 | 15.0k | SDValue &Src = Idx < (int)SrcNumElts ? Src114.9k : Src267 ; |
3708 | 15.0k | if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts67 ; |
3709 | 15.0k | |
3710 | 15.0k | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, |
3711 | 15.0k | EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); |
3712 | 15.0k | } |
3713 | 15.0k | |
3714 | 15.0k | Ops.push_back(Res); |
3715 | 15.0k | } |
3716 | 2.00k | |
3717 | 2.00k | setValue(&I, DAG.getBuildVector(VT, DL, Ops)); |
3718 | 2.00k | } |
3719 | | |
3720 | 3.61k | void SelectionDAGBuilder::visitInsertValue(const User &I) { |
3721 | 3.61k | ArrayRef<unsigned> Indices; |
3722 | 3.61k | if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) |
3723 | 3.61k | Indices = IV->getIndices(); |
3724 | 0 | else |
3725 | 0 | Indices = cast<ConstantExpr>(&I)->getIndices(); |
3726 | 3.61k | |
3727 | 3.61k | const Value *Op0 = I.getOperand(0); |
3728 | 3.61k | const Value *Op1 = I.getOperand(1); |
3729 | 3.61k | Type *AggTy = I.getType(); |
3730 | 3.61k | Type *ValTy = Op1->getType(); |
3731 | 3.61k | bool IntoUndef = isa<UndefValue>(Op0); |
3732 | 3.61k | bool FromUndef = isa<UndefValue>(Op1); |
3733 | 3.61k | |
3734 | 3.61k | unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); |
3735 | 3.61k | |
3736 | 3.61k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3737 | 3.61k | SmallVector<EVT, 4> AggValueVTs; |
3738 | 3.61k | ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); |
3739 | 3.61k | SmallVector<EVT, 4> ValValueVTs; |
3740 | 3.61k | ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); |
3741 | 3.61k | |
3742 | 3.61k | unsigned NumAggValues = AggValueVTs.size(); |
3743 | 3.61k | unsigned NumValValues = ValValueVTs.size(); |
3744 | 3.61k | SmallVector<SDValue, 4> Values(NumAggValues); |
3745 | 3.61k | |
3746 | 3.61k | // Ignore an insertvalue that produces an empty object |
3747 | 3.61k | if (!NumAggValues) { |
3748 | 1 | setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); |
3749 | 1 | return; |
3750 | 1 | } |
3751 | 3.61k | |
3752 | 3.61k | SDValue Agg = getValue(Op0); |
3753 | 3.61k | unsigned i = 0; |
3754 | 3.61k | // Copy the beginning value(s) from the original aggregate. |
3755 | 12.8k | for (; i != LinearIndex; ++i9.23k ) |
3756 | 9.23k | Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i])90 : |
3757 | 9.23k | SDValue(Agg.getNode(), Agg.getResNo() + i)9.14k ; |
3758 | 3.61k | // Copy values from the inserted value(s). |
3759 | 3.61k | if (NumValValues) { |
3760 | 3.61k | SDValue Val = getValue(Op1); |
3761 | 7.25k | for (; i != LinearIndex + NumValValues; ++i3.63k ) |
3762 | 3.63k | Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i])13 : |
3763 | 3.63k | SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex)3.62k ; |
3764 | 3.61k | } |
3765 | 3.61k | // Copy remaining value(s) from the original aggregate. |
3766 | 12.7k | for (; i != NumAggValues; ++i9.09k ) |
3767 | 9.09k | Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i])1.84k : |
3768 | 9.09k | SDValue(Agg.getNode(), Agg.getResNo() + i)7.24k ; |
3769 | 3.61k | |
3770 | 3.61k | setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), |
3771 | 3.61k | DAG.getVTList(AggValueVTs), Values)); |
3772 | 3.61k | } |
3773 | | |
3774 | 23.6k | void SelectionDAGBuilder::visitExtractValue(const User &I) { |
3775 | 23.6k | ArrayRef<unsigned> Indices; |
3776 | 23.6k | if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) |
3777 | 23.6k | Indices = EV->getIndices(); |
3778 | 1 | else |
3779 | 1 | Indices = cast<ConstantExpr>(&I)->getIndices(); |
3780 | 23.6k | |
3781 | 23.6k | const Value *Op0 = I.getOperand(0); |
3782 | 23.6k | Type *AggTy = Op0->getType(); |
3783 | 23.6k | Type *ValTy = I.getType(); |
3784 | 23.6k | bool OutOfUndef = isa<UndefValue>(Op0); |
3785 | 23.6k | |
3786 | 23.6k | unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); |
3787 | 23.6k | |
3788 | 23.6k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3789 | 23.6k | SmallVector<EVT, 4> ValValueVTs; |
3790 | 23.6k | ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); |
3791 | 23.6k | |
3792 | 23.6k | unsigned NumValValues = ValValueVTs.size(); |
3793 | 23.6k | |
3794 | 23.6k | // Ignore a extractvalue that produces an empty object |
3795 | 23.6k | if (!NumValValues) { |
3796 | 4 | setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); |
3797 | 4 | return; |
3798 | 4 | } |
3799 | 23.6k | |
3800 | 23.6k | SmallVector<SDValue, 4> Values(NumValValues); |
3801 | 23.6k | |
3802 | 23.6k | SDValue Agg = getValue(Op0); |
3803 | 23.6k | // Copy out the selected value(s). |
3804 | 47.3k | for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i23.7k ) |
3805 | 23.7k | Values[i - LinearIndex] = |
3806 | 23.7k | OutOfUndef ? |
3807 | 15 | DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : |
3808 | 23.7k | SDValue(Agg.getNode(), Agg.getResNo() + i)23.6k ; |
3809 | 23.6k | |
3810 | 23.6k | setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), |
3811 | 23.6k | DAG.getVTList(ValValueVTs), Values)); |
3812 | 23.6k | } |
3813 | | |
3814 | 1.22M | void SelectionDAGBuilder::visitGetElementPtr(const User &I) { |
3815 | 1.22M | Value *Op0 = I.getOperand(0); |
3816 | 1.22M | // Note that the pointer operand may be a vector of pointers. Take the scalar |
3817 | 1.22M | // element which holds a pointer. |
3818 | 1.22M | unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); |
3819 | 1.22M | SDValue N = getValue(Op0); |
3820 | 1.22M | SDLoc dl = getCurSDLoc(); |
3821 | 1.22M | auto &TLI = DAG.getTargetLoweringInfo(); |
3822 | 1.22M | MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); |
3823 | 1.22M | MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); |
3824 | 1.22M | |
3825 | 1.22M | // Normalize Vector GEP - all scalar operands should be converted to the |
3826 | 1.22M | // splat vector. |
3827 | 1.22M | unsigned VectorWidth = I.getType()->isVectorTy() ? |
3828 | 1.22M | cast<VectorType>(I.getType())->getVectorNumElements()389 : 0; |
3829 | 1.22M | |
3830 | 1.22M | if (VectorWidth && !N.getValueType().isVector()389 ) { |
3831 | 215 | LLVMContext &Context = *DAG.getContext(); |
3832 | 215 | EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); |
3833 | 215 | N = DAG.getSplatBuildVector(VT, dl, N); |
3834 | 215 | } |
3835 | 1.22M | |
3836 | 1.22M | for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); |
3837 | 3.29M | GTI != E; ++GTI2.07M ) { |
3838 | 2.07M | const Value *Idx = GTI.getOperand(); |
3839 | 2.07M | if (StructType *StTy = GTI.getStructTypeOrNull()) { |
3840 | 438k | unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); |
3841 | 438k | if (Field) { |
3842 | 324k | // N = N + Offset |
3843 | 324k | uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); |
3844 | 324k | |
3845 | 324k | // In an inbounds GEP with an offset that is nonnegative even when |
3846 | 324k | // interpreted as signed, assume there is no unsigned overflow. |
3847 | 324k | SDNodeFlags Flags; |
3848 | 324k | if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) |
3849 | 308k | Flags.setNoUnsignedWrap(true); |
3850 | 324k | |
3851 | 324k | N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, |
3852 | 324k | DAG.getConstant(Offset, dl, N.getValueType()), Flags); |
3853 | 324k | } |
3854 | 1.63M | } else { |
3855 | 1.63M | unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); |
3856 | 1.63M | MVT IdxTy = MVT::getIntegerVT(IdxSize); |
3857 | 1.63M | APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); |
3858 | 1.63M | |
3859 | 1.63M | // If this is a scalar constant or a splat vector of constants, |
3860 | 1.63M | // handle it quickly. |
3861 | 1.63M | const auto *CI = dyn_cast<ConstantInt>(Idx); |
3862 | 1.63M | if (!CI && isa<ConstantDataVector>(Idx)166k && |
3863 | 1.63M | cast<ConstantDataVector>(Idx)->getSplatValue()62 ) |
3864 | 27 | CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue()); |
3865 | 1.63M | |
3866 | 1.63M | if (CI) { |
3867 | 1.47M | if (CI->isZero()) |
3868 | 852k | continue; |
3869 | 618k | APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); |
3870 | 618k | LLVMContext &Context = *DAG.getContext(); |
3871 | 618k | SDValue OffsVal = VectorWidth ? |
3872 | 34 | DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : |
3873 | 618k | DAG.getConstant(Offs, dl, IdxTy)618k ; |
3874 | 618k | |
3875 | 618k | // In an inbouds GEP with an offset that is nonnegative even when |
3876 | 618k | // interpreted as signed, assume there is no unsigned overflow. |
3877 | 618k | SDNodeFlags Flags; |
3878 | 618k | if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()579k ) |
3879 | 390k | Flags.setNoUnsignedWrap(true); |
3880 | 618k | |
3881 | 618k | OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); |
3882 | 618k | |
3883 | 618k | N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); |
3884 | 618k | continue; |
3885 | 618k | } |
3886 | 166k | |
3887 | 166k | // N = N + Idx * ElementSize; |
3888 | 166k | SDValue IdxN = getValue(Idx); |
3889 | 166k | |
3890 | 166k | if (!IdxN.getValueType().isVector() && VectorWidth166k ) { |
3891 | 22 | EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); |
3892 | 22 | IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); |
3893 | 22 | } |
3894 | 166k | |
3895 | 166k | // If the index is smaller or larger than intptr_t, truncate or extend |
3896 | 166k | // it. |
3897 | 166k | IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); |
3898 | 166k | |
3899 | 166k | // If this is a multiply by a power of two, turn it into a shl |
3900 | 166k | // immediately. This is a very common case. |
3901 | 166k | if (ElementSize != 1) { |
3902 | 85.1k | if (ElementSize.isPowerOf2()) { |
3903 | 69.5k | unsigned Amt = ElementSize.logBase2(); |
3904 | 69.5k | IdxN = DAG.getNode(ISD::SHL, dl, |
3905 | 69.5k | N.getValueType(), IdxN, |
3906 | 69.5k | DAG.getConstant(Amt, dl, IdxN.getValueType())); |
3907 | 69.5k | } else { |
3908 | 15.5k | SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, |
3909 | 15.5k | IdxN.getValueType()); |
3910 | 15.5k | IdxN = DAG.getNode(ISD::MUL, dl, |
3911 | 15.5k | N.getValueType(), IdxN, Scale); |
3912 | 15.5k | } |
3913 | 85.1k | } |
3914 | 166k | |
3915 | 166k | N = DAG.getNode(ISD::ADD, dl, |
3916 | 166k | N.getValueType(), N, IdxN); |
3917 | 166k | } |
3918 | 2.07M | } |
3919 | 1.22M | |
3920 | 1.22M | if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds()0 ) |
3921 | 0 | N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); |
3922 | 1.22M | |
3923 | 1.22M | setValue(&I, N); |
3924 | 1.22M | } |
3925 | | |
3926 | 46.6k | void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { |
3927 | 46.6k | // If this is a fixed sized alloca in the entry block of the function, |
3928 | 46.6k | // allocate it statically on the stack. |
3929 | 46.6k | if (FuncInfo.StaticAllocaMap.count(&I)) |
3930 | 45.8k | return; // getValue will auto-populate this. |
3931 | 796 | |
3932 | 796 | SDLoc dl = getCurSDLoc(); |
3933 | 796 | Type *Ty = I.getAllocatedType(); |
3934 | 796 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3935 | 796 | auto &DL = DAG.getDataLayout(); |
3936 | 796 | uint64_t TySize = DL.getTypeAllocSize(Ty); |
3937 | 796 | unsigned Align = |
3938 | 796 | std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); |
3939 | 796 | |
3940 | 796 | SDValue AllocSize = getValue(I.getArraySize()); |
3941 | 796 | |
3942 | 796 | EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); |
3943 | 796 | if (AllocSize.getValueType() != IntPtr) |
3944 | 210 | AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); |
3945 | 796 | |
3946 | 796 | AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, |
3947 | 796 | AllocSize, |
3948 | 796 | DAG.getConstant(TySize, dl, IntPtr)); |
3949 | 796 | |
3950 | 796 | // Handle alignment. If the requested alignment is less than or equal to |
3951 | 796 | // the stack alignment, ignore it. If the size is greater than or equal to |
3952 | 796 | // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. |
3953 | 796 | unsigned StackAlign = |
3954 | 796 | DAG.getSubtarget().getFrameLowering()->getStackAlignment(); |
3955 | 796 | if (Align <= StackAlign) |
3956 | 706 | Align = 0; |
3957 | 796 | |
3958 | 796 | // Round the size of the allocation up to the stack alignment size |
3959 | 796 | // by add SA-1 to the size. This doesn't overflow because we're computing |
3960 | 796 | // an address inside an alloca. |
3961 | 796 | SDNodeFlags Flags; |
3962 | 796 | Flags.setNoUnsignedWrap(true); |
3963 | 796 | AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, |
3964 | 796 | DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); |
3965 | 796 | |
3966 | 796 | // Mask out the low bits for alignment purposes. |
3967 | 796 | AllocSize = |
3968 | 796 | DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, |
3969 | 796 | DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); |
3970 | 796 | |
3971 | 796 | SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; |
3972 | 796 | SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); |
3973 | 796 | SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); |
3974 | 796 | setValue(&I, DSA); |
3975 | 796 | DAG.setRoot(DSA.getValue(1)); |
3976 | 796 | |
3977 | 796 | assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); |
3978 | 796 | } |
3979 | | |
3980 | 621k | void SelectionDAGBuilder::visitLoad(const LoadInst &I) { |
3981 | 621k | if (I.isAtomic()) |
3982 | 1.66k | return visitAtomicLoad(I); |
3983 | 620k | |
3984 | 620k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3985 | 620k | const Value *SV = I.getOperand(0); |
3986 | 620k | if (TLI.supportSwiftError()) { |
3987 | 537k | // Swifterror values can come from either a function parameter with |
3988 | 537k | // swifterror attribute or an alloca with swifterror attribute. |
3989 | 537k | if (const Argument *Arg = dyn_cast<Argument>(SV)) { |
3990 | 25.8k | if (Arg->hasSwiftErrorAttr()) |
3991 | 6 | return visitLoadFromSwiftError(I); |
3992 | 537k | } |
3993 | 537k | |
3994 | 537k | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { |
3995 | 7.80k | if (Alloca->isSwiftError()) |
3996 | 47 | return visitLoadFromSwiftError(I); |
3997 | 620k | } |
3998 | 537k | } |
3999 | 620k | |
4000 | 620k | SDValue Ptr = getValue(SV); |
4001 | 620k | |
4002 | 620k | Type *Ty = I.getType(); |
4003 | 620k | |
4004 | 620k | bool isVolatile = I.isVolatile(); |
4005 | 620k | bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; |
4006 | 620k | bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; |
4007 | 620k | bool isDereferenceable = |
4008 | 620k | isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); |
4009 | 620k | unsigned Alignment = I.getAlignment(); |
4010 | 620k | |
4011 | 620k | AAMDNodes AAInfo; |
4012 | 620k | I.getAAMetadata(AAInfo); |
4013 | 620k | const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); |
4014 | 620k | |
4015 | 620k | SmallVector<EVT, 4> ValueVTs, MemVTs; |
4016 | 620k | SmallVector<uint64_t, 4> Offsets; |
4017 | 620k | ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); |
4018 | 620k | unsigned NumValues = ValueVTs.size(); |
4019 | 620k | if (NumValues == 0) |
4020 | 3 | return; |
4021 | 620k | |
4022 | 620k | SDValue Root; |
4023 | 620k | bool ConstantMemory = false; |
4024 | 620k | if (isVolatile || NumValues > MaxParallelChains597k ) |
4025 | 22.5k | // Serialize volatile loads with other side effects. |
4026 | 22.5k | Root = getRoot(); |
4027 | 597k | else if (AA && |
4028 | 597k | AA->pointsToConstantMemory(MemoryLocation( |
4029 | 593k | SV, |
4030 | 593k | LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), |
4031 | 593k | AAInfo))) { |
4032 | 27.4k | // Do not serialize (non-volatile) loads of constant memory with anything. |
4033 | 27.4k | Root = DAG.getEntryNode(); |
4034 | 27.4k | ConstantMemory = true; |
4035 | 570k | } else { |
4036 | 570k | // Do not serialize non-volatile loads against each other. |
4037 | 570k | Root = DAG.getRoot(); |
4038 | 570k | } |
4039 | 620k | |
4040 | 620k | SDLoc dl = getCurSDLoc(); |
4041 | 620k | |
4042 | 620k | if (isVolatile) |
4043 | 22.5k | Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); |
4044 | 620k | |
4045 | 620k | // An aggregate load cannot wrap around the address space, so offsets to its |
4046 | 620k | // parts don't wrap either. |
4047 | 620k | SDNodeFlags Flags; |
4048 | 620k | Flags.setNoUnsignedWrap(true); |
4049 | 620k | |
4050 | 620k | SmallVector<SDValue, 4> Values(NumValues); |
4051 | 620k | SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); |
4052 | 620k | EVT PtrVT = Ptr.getValueType(); |
4053 | 620k | unsigned ChainI = 0; |
4054 | 1.24M | for (unsigned i = 0; i != NumValues; ++i, ++ChainI623k ) { |
4055 | 623k | // Serializing loads here may result in excessive register pressure, and |
4056 | 623k | // TokenFactor places arbitrary choke points on the scheduler. SD scheduling |
4057 | 623k | // could recover a bit by hoisting nodes upward in the chain by recognizing |
4058 | 623k | // they are side-effect free or do not alias. The optimizer should really |
4059 | 623k | // avoid this case by converting large object/array copies to llvm.memcpy |
4060 | 623k | // (MaxParallelChains should always remain as failsafe). |
4061 | 623k | if (ChainI == MaxParallelChains) { |
4062 | 0 | assert(PendingLoads.empty() && "PendingLoads must be serialized first"); |
4063 | 0 | SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
4064 | 0 | makeArrayRef(Chains.data(), ChainI)); |
4065 | 0 | Root = Chain; |
4066 | 0 | ChainI = 0; |
4067 | 0 | } |
4068 | 623k | SDValue A = DAG.getNode(ISD::ADD, dl, |
4069 | 623k | PtrVT, Ptr, |
4070 | 623k | DAG.getConstant(Offsets[i], dl, PtrVT), |
4071 | 623k | Flags); |
4072 | 623k | auto MMOFlags = MachineMemOperand::MONone; |
4073 | 623k | if (isVolatile) |
4074 | 24.7k | MMOFlags |= MachineMemOperand::MOVolatile; |
4075 | 623k | if (isNonTemporal) |
4076 | 793 | MMOFlags |= MachineMemOperand::MONonTemporal; |
4077 | 623k | if (isInvariant) |
4078 | 24.0k | MMOFlags |= MachineMemOperand::MOInvariant; |
4079 | 623k | if (isDereferenceable) |
4080 | 170k | MMOFlags |= MachineMemOperand::MODereferenceable; |
4081 | 623k | MMOFlags |= TLI.getMMOFlags(I); |
4082 | 623k | |
4083 | 623k | SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, |
4084 | 623k | MachinePointerInfo(SV, Offsets[i]), Alignment, |
4085 | 623k | MMOFlags, AAInfo, Ranges); |
4086 | 623k | Chains[ChainI] = L.getValue(1); |
4087 | 623k | |
4088 | 623k | if (MemVTs[i] != ValueVTs[i]) |
4089 | 0 | L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]); |
4090 | 623k | |
4091 | 623k | Values[i] = L; |
4092 | 623k | } |
4093 | 620k | |
4094 | 620k | if (!ConstantMemory) { |
4095 | 592k | SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
4096 | 592k | makeArrayRef(Chains.data(), ChainI)); |
4097 | 592k | if (isVolatile) |
4098 | 22.5k | DAG.setRoot(Chain); |
4099 | 570k | else |
4100 | 570k | PendingLoads.push_back(Chain); |
4101 | 592k | } |
4102 | 620k | |
4103 | 620k | setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, |
4104 | 620k | DAG.getVTList(ValueVTs), Values)); |
4105 | 620k | } |
4106 | | |
4107 | 108 | void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { |
4108 | 108 | assert(DAG.getTargetLoweringInfo().supportSwiftError() && |
4109 | 108 | "call visitStoreToSwiftError when backend supports swifterror"); |
4110 | 108 | |
4111 | 108 | SmallVector<EVT, 4> ValueVTs; |
4112 | 108 | SmallVector<uint64_t, 4> Offsets; |
4113 | 108 | const Value *SrcV = I.getOperand(0); |
4114 | 108 | ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), |
4115 | 108 | SrcV->getType(), ValueVTs, &Offsets); |
4116 | 108 | assert(ValueVTs.size() == 1 && Offsets[0] == 0 && |
4117 | 108 | "expect a single EVT for swifterror"); |
4118 | 108 | |
4119 | 108 | SDValue Src = getValue(SrcV); |
4120 | 108 | // Create a virtual register, then update the virtual register. |
4121 | 108 | unsigned VReg = |
4122 | 108 | SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); |
4123 | 108 | // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue |
4124 | 108 | // Chain can be getRoot or getControlRoot. |
4125 | 108 | SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, |
4126 | 108 | SDValue(Src.getNode(), Src.getResNo())); |
4127 | 108 | DAG.setRoot(CopyNode); |
4128 | 108 | } |
4129 | | |
4130 | 53 | void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { |
4131 | 53 | assert(DAG.getTargetLoweringInfo().supportSwiftError() && |
4132 | 53 | "call visitLoadFromSwiftError when backend supports swifterror"); |
4133 | 53 | |
4134 | 53 | assert(!I.isVolatile() && |
4135 | 53 | I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && |
4136 | 53 | I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && |
4137 | 53 | "Support volatile, non temporal, invariant for load_from_swift_error"); |
4138 | 53 | |
4139 | 53 | const Value *SV = I.getOperand(0); |
4140 | 53 | Type *Ty = I.getType(); |
4141 | 53 | AAMDNodes AAInfo; |
4142 | 53 | I.getAAMetadata(AAInfo); |
4143 | 53 | assert( |
4144 | 53 | (!AA || |
4145 | 53 | !AA->pointsToConstantMemory(MemoryLocation( |
4146 | 53 | SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), |
4147 | 53 | AAInfo))) && |
4148 | 53 | "load_from_swift_error should not be constant memory"); |
4149 | 53 | |
4150 | 53 | SmallVector<EVT, 4> ValueVTs; |
4151 | 53 | SmallVector<uint64_t, 4> Offsets; |
4152 | 53 | ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, |
4153 | 53 | ValueVTs, &Offsets); |
4154 | 53 | assert(ValueVTs.size() == 1 && Offsets[0] == 0 && |
4155 | 53 | "expect a single EVT for swifterror"); |
4156 | 53 | |
4157 | 53 | // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT |
4158 | 53 | SDValue L = DAG.getCopyFromReg( |
4159 | 53 | getRoot(), getCurSDLoc(), |
4160 | 53 | SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]); |
4161 | 53 | |
4162 | 53 | setValue(&I, L); |
4163 | 53 | } |
4164 | | |
4165 | 545k | void SelectionDAGBuilder::visitStore(const StoreInst &I) { |
4166 | 545k | if (I.isAtomic()) |
4167 | 6.85k | return visitAtomicStore(I); |
4168 | 539k | |
4169 | 539k | const Value *SrcV = I.getOperand(0); |
4170 | 539k | const Value *PtrV = I.getOperand(1); |
4171 | 539k | |
4172 | 539k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4173 | 539k | if (TLI.supportSwiftError()) { |
4174 | 482k | // Swifterror values can come from either a function parameter with |
4175 | 482k | // swifterror attribute or an alloca with swifterror attribute. |
4176 | 482k | if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { |
4177 | 13.8k | if (Arg->hasSwiftErrorAttr()) |
4178 | 45 | return visitStoreToSwiftError(I); |
4179 | 482k | } |
4180 | 482k | |
4181 | 482k | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { |
4182 | 7.50k | if (Alloca->isSwiftError()) |
4183 | 63 | return visitStoreToSwiftError(I); |
4184 | 538k | } |
4185 | 482k | } |
4186 | 538k | |
4187 | 538k | SmallVector<EVT, 4> ValueVTs, MemVTs; |
4188 | 538k | SmallVector<uint64_t, 4> Offsets; |
4189 | 538k | ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), |
4190 | 538k | SrcV->getType(), ValueVTs, &MemVTs, &Offsets); |
4191 | 538k | unsigned NumValues = ValueVTs.size(); |
4192 | 538k | if (NumValues == 0) |
4193 | 21 | return; |
4194 | 538k | |
4195 | 538k | // Get the lowered operands. Note that we do this after |
4196 | 538k | // checking if NumResults is zero, because with zero results |
4197 | 538k | // the operands won't have values in the map. |
4198 | 538k | SDValue Src = getValue(SrcV); |
4199 | 538k | SDValue Ptr = getValue(PtrV); |
4200 | 538k | |
4201 | 538k | SDValue Root = getRoot(); |
4202 | 538k | SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); |
4203 | 538k | SDLoc dl = getCurSDLoc(); |
4204 | 538k | EVT PtrVT = Ptr.getValueType(); |
4205 | 538k | unsigned Alignment = I.getAlignment(); |
4206 | 538k | AAMDNodes AAInfo; |
4207 | 538k | I.getAAMetadata(AAInfo); |
4208 | 538k | |
4209 | 538k | auto MMOFlags = MachineMemOperand::MONone; |
4210 | 538k | if (I.isVolatile()) |
4211 | 20.1k | MMOFlags |= MachineMemOperand::MOVolatile; |
4212 | 538k | if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) |
4213 | 833 | MMOFlags |= MachineMemOperand::MONonTemporal; |
4214 | 538k | MMOFlags |= TLI.getMMOFlags(I); |
4215 | 538k | |
4216 | 538k | // An aggregate load cannot wrap around the address space, so offsets to its |
4217 | 538k | // parts don't wrap either. |
4218 | 538k | SDNodeFlags Flags; |
4219 | 538k | Flags.setNoUnsignedWrap(true); |
4220 | 538k | |
4221 | 538k | unsigned ChainI = 0; |
4222 | 1.08M | for (unsigned i = 0; i != NumValues; ++i, ++ChainI542k ) { |
4223 | 542k | // See visitLoad comments. |
4224 | 542k | if (ChainI == MaxParallelChains) { |
4225 | 0 | SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
4226 | 0 | makeArrayRef(Chains.data(), ChainI)); |
4227 | 0 | Root = Chain; |
4228 | 0 | ChainI = 0; |
4229 | 0 | } |
4230 | 542k | SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, |
4231 | 542k | DAG.getConstant(Offsets[i], dl, PtrVT), Flags); |
4232 | 542k | SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); |
4233 | 542k | if (MemVTs[i] != ValueVTs[i]) |
4234 | 0 | Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); |
4235 | 542k | SDValue St = |
4236 | 542k | DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), |
4237 | 542k | Alignment, MMOFlags, AAInfo); |
4238 | 542k | Chains[ChainI] = St; |
4239 | 542k | } |
4240 | 538k | |
4241 | 538k | SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
4242 | 538k | makeArrayRef(Chains.data(), ChainI)); |
4243 | 538k | DAG.setRoot(StoreNode); |
4244 | 538k | } |
4245 | | |
4246 | | void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, |
4247 | 559 | bool IsCompressing) { |
4248 | 559 | SDLoc sdl = getCurSDLoc(); |
4249 | 559 | |
4250 | 559 | auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, |
4251 | 559 | unsigned& Alignment) { |
4252 | 394 | // llvm.masked.store.*(Src0, Ptr, alignment, Mask) |
4253 | 394 | Src0 = I.getArgOperand(0); |
4254 | 394 | Ptr = I.getArgOperand(1); |
4255 | 394 | Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); |
4256 | 394 | Mask = I.getArgOperand(3); |
4257 | 394 | }; |
4258 | 559 | auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, |
4259 | 559 | unsigned& Alignment) { |
4260 | 165 | // llvm.masked.compressstore.*(Src0, Ptr, Mask) |
4261 | 165 | Src0 = I.getArgOperand(0); |
4262 | 165 | Ptr = I.getArgOperand(1); |
4263 | 165 | Mask = I.getArgOperand(2); |
4264 | 165 | Alignment = 0; |
4265 | 165 | }; |
4266 | 559 | |
4267 | 559 | Value *PtrOperand, *MaskOperand, *Src0Operand; |
4268 | 559 | unsigned Alignment; |
4269 | 559 | if (IsCompressing) |
4270 | 165 | getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4271 | 394 | else |
4272 | 394 | getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4273 | 559 | |
4274 | 559 | SDValue Ptr = getValue(PtrOperand); |
4275 | 559 | SDValue Src0 = getValue(Src0Operand); |
4276 | 559 | SDValue Mask = getValue(MaskOperand); |
4277 | 559 | |
4278 | 559 | EVT VT = Src0.getValueType(); |
4279 | 559 | if (!Alignment) |
4280 | 165 | Alignment = DAG.getEVTAlignment(VT); |
4281 | 559 | |
4282 | 559 | AAMDNodes AAInfo; |
4283 | 559 | I.getAAMetadata(AAInfo); |
4284 | 559 | |
4285 | 559 | MachineMemOperand *MMO = |
4286 | 559 | DAG.getMachineFunction(). |
4287 | 559 | getMachineMemOperand(MachinePointerInfo(PtrOperand), |
4288 | 559 | MachineMemOperand::MOStore, VT.getStoreSize(), |
4289 | 559 | Alignment, AAInfo); |
4290 | 559 | SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, |
4291 | 559 | MMO, false /* Truncating */, |
4292 | 559 | IsCompressing); |
4293 | 559 | DAG.setRoot(StoreNode); |
4294 | 559 | setValue(&I, StoreNode); |
4295 | 559 | } |
4296 | | |
4297 | | // Get a uniform base for the Gather/Scatter intrinsic. |
4298 | | // The first argument of the Gather/Scatter intrinsic is a vector of pointers. |
4299 | | // We try to represent it as a base pointer + vector of indices. |
4300 | | // Usually, the vector of pointers comes from a 'getelementptr' instruction. |
4301 | | // The first operand of the GEP may be a single pointer or a vector of pointers |
4302 | | // Example: |
4303 | | // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind |
4304 | | // or |
4305 | | // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind |
4306 | | // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. |
4307 | | // |
4308 | | // When the first GEP operand is a single pointer - it is the uniform base we |
4309 | | // are looking for. If first operand of the GEP is a splat vector - we |
4310 | | // extract the splat value and use it as a uniform base. |
4311 | | // In all other cases the function returns 'false'. |
4312 | | static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, |
4313 | 427 | SDValue &Scale, SelectionDAGBuilder* SDB) { |
4314 | 427 | SelectionDAG& DAG = SDB->DAG; |
4315 | 427 | LLVMContext &Context = *DAG.getContext(); |
4316 | 427 | |
4317 | 427 | assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); |
4318 | 427 | const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); |
4319 | 427 | if (!GEP) |
4320 | 177 | return false; |
4321 | 250 | |
4322 | 250 | const Value *GEPPtr = GEP->getPointerOperand(); |
4323 | 250 | if (!GEPPtr->getType()->isVectorTy()) |
4324 | 152 | Ptr = GEPPtr; |
4325 | 98 | else if (!(Ptr = getSplatValue(GEPPtr))) |
4326 | 6 | return false; |
4327 | 244 | |
4328 | 244 | unsigned FinalIndex = GEP->getNumOperands() - 1; |
4329 | 244 | Value *IndexVal = GEP->getOperand(FinalIndex); |
4330 | 244 | |
4331 | 244 | // Ensure all the other indices are 0. |
4332 | 250 | for (unsigned i = 1; i < FinalIndex; ++i6 ) { |
4333 | 18 | auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i)); |
4334 | 18 | if (!C || !C->isZero()6 ) |
4335 | 12 | return false; |
4336 | 18 | } |
4337 | 244 | |
4338 | 244 | // The operands of the GEP may be defined in another basic block. |
4339 | 244 | // In this case we'll not find nodes for the operands. |
4340 | 244 | if (232 !SDB->findValue(Ptr)232 || !SDB->findValue(IndexVal)231 ) |
4341 | 17 | return false; |
4342 | 215 | |
4343 | 215 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4344 | 215 | const DataLayout &DL = DAG.getDataLayout(); |
4345 | 215 | Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), |
4346 | 215 | SDB->getCurSDLoc(), TLI.getPointerTy(DL)); |
4347 | 215 | Base = SDB->getValue(Ptr); |
4348 | 215 | Index = SDB->getValue(IndexVal); |
4349 | 215 | |
4350 | 215 | if (!Index.getValueType().isVector()) { |
4351 | 6 | unsigned GEPWidth = GEP->getType()->getVectorNumElements(); |
4352 | 6 | EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); |
4353 | 6 | Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); |
4354 | 6 | } |
4355 | 215 | return true; |
4356 | 215 | } |
4357 | | |
4358 | 103 | void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { |
4359 | 103 | SDLoc sdl = getCurSDLoc(); |
4360 | 103 | |
4361 | 103 | // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) |
4362 | 103 | const Value *Ptr = I.getArgOperand(1); |
4363 | 103 | SDValue Src0 = getValue(I.getArgOperand(0)); |
4364 | 103 | SDValue Mask = getValue(I.getArgOperand(3)); |
4365 | 103 | EVT VT = Src0.getValueType(); |
4366 | 103 | unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); |
4367 | 103 | if (!Alignment) |
4368 | 0 | Alignment = DAG.getEVTAlignment(VT); |
4369 | 103 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4370 | 103 | |
4371 | 103 | AAMDNodes AAInfo; |
4372 | 103 | I.getAAMetadata(AAInfo); |
4373 | 103 | |
4374 | 103 | SDValue Base; |
4375 | 103 | SDValue Index; |
4376 | 103 | SDValue Scale; |
4377 | 103 | const Value *BasePtr = Ptr; |
4378 | 103 | bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); |
4379 | 103 | |
4380 | 103 | const Value *MemOpBasePtr = UniformBase ? BasePtr37 : nullptr66 ; |
4381 | 103 | MachineMemOperand *MMO = DAG.getMachineFunction(). |
4382 | 103 | getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), |
4383 | 103 | MachineMemOperand::MOStore, VT.getStoreSize(), |
4384 | 103 | Alignment, AAInfo); |
4385 | 103 | if (!UniformBase) { |
4386 | 66 | Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); |
4387 | 66 | Index = getValue(Ptr); |
4388 | 66 | Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); |
4389 | 66 | } |
4390 | 103 | SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; |
4391 | 103 | SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, |
4392 | 103 | Ops, MMO); |
4393 | 103 | DAG.setRoot(Scatter); |
4394 | 103 | setValue(&I, Scatter); |
4395 | 103 | } |
4396 | | |
4397 | 632 | void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { |
4398 | 632 | SDLoc sdl = getCurSDLoc(); |
4399 | 632 | |
4400 | 632 | auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, |
4401 | 632 | unsigned& Alignment) { |
4402 | 381 | // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) |
4403 | 381 | Ptr = I.getArgOperand(0); |
4404 | 381 | Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); |
4405 | 381 | Mask = I.getArgOperand(2); |
4406 | 381 | Src0 = I.getArgOperand(3); |
4407 | 381 | }; |
4408 | 632 | auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, |
4409 | 632 | unsigned& Alignment) { |
4410 | 251 | // @llvm.masked.expandload.*(Ptr, Mask, Src0) |
4411 | 251 | Ptr = I.getArgOperand(0); |
4412 | 251 | Alignment = 0; |
4413 | 251 | Mask = I.getArgOperand(1); |
4414 | 251 | Src0 = I.getArgOperand(2); |
4415 | 251 | }; |
4416 | 632 | |
4417 | 632 | Value *PtrOperand, *MaskOperand, *Src0Operand; |
4418 | 632 | unsigned Alignment; |
4419 | 632 | if (IsExpanding) |
4420 | 251 | getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4421 | 381 | else |
4422 | 381 | getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4423 | 632 | |
4424 | 632 | SDValue Ptr = getValue(PtrOperand); |
4425 | 632 | SDValue Src0 = getValue(Src0Operand); |
4426 | 632 | SDValue Mask = getValue(MaskOperand); |
4427 | 632 | |
4428 | 632 | EVT VT = Src0.getValueType(); |
4429 | 632 | if (!Alignment) |
4430 | 251 | Alignment = DAG.getEVTAlignment(VT); |
4431 | 632 | |
4432 | 632 | AAMDNodes AAInfo; |
4433 | 632 | I.getAAMetadata(AAInfo); |
4434 | 632 | const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); |
4435 | 632 | |
4436 | 632 | // Do not serialize masked loads of constant memory with anything. |
4437 | 632 | bool AddToChain = |
4438 | 632 | !AA || !AA->pointsToConstantMemory(MemoryLocation( |
4439 | 632 | PtrOperand, |
4440 | 632 | LocationSize::precise( |
4441 | 632 | DAG.getDataLayout().getTypeStoreSize(I.getType())), |
4442 | 632 | AAInfo)); |
4443 | 632 | SDValue InChain = AddToChain ? DAG.getRoot()631 : DAG.getEntryNode()1 ; |
4444 | 632 | |
4445 | 632 | MachineMemOperand *MMO = |
4446 | 632 | DAG.getMachineFunction(). |
4447 | 632 | getMachineMemOperand(MachinePointerInfo(PtrOperand), |
4448 | 632 | MachineMemOperand::MOLoad, VT.getStoreSize(), |
4449 | 632 | Alignment, AAInfo, Ranges); |
4450 | 632 | |
4451 | 632 | SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, |
4452 | 632 | ISD::NON_EXTLOAD, IsExpanding); |
4453 | 632 | if (AddToChain) |
4454 | 631 | PendingLoads.push_back(Load.getValue(1)); |
4455 | 632 | setValue(&I, Load); |
4456 | 632 | } |
4457 | | |
4458 | 324 | void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { |
4459 | 324 | SDLoc sdl = getCurSDLoc(); |
4460 | 324 | |
4461 | 324 | // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) |
4462 | 324 | const Value *Ptr = I.getArgOperand(0); |
4463 | 324 | SDValue Src0 = getValue(I.getArgOperand(3)); |
4464 | 324 | SDValue Mask = getValue(I.getArgOperand(2)); |
4465 | 324 | |
4466 | 324 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4467 | 324 | EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
4468 | 324 | unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); |
4469 | 324 | if (!Alignment) |
4470 | 39 | Alignment = DAG.getEVTAlignment(VT); |
4471 | 324 | |
4472 | 324 | AAMDNodes AAInfo; |
4473 | 324 | I.getAAMetadata(AAInfo); |
4474 | 324 | const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); |
4475 | 324 | |
4476 | 324 | SDValue Root = DAG.getRoot(); |
4477 | 324 | SDValue Base; |
4478 | 324 | SDValue Index; |
4479 | 324 | SDValue Scale; |
4480 | 324 | const Value *BasePtr = Ptr; |
4481 | 324 | bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); |
4482 | 324 | bool ConstantMemory = false; |
4483 | 324 | if (UniformBase && AA178 && |
4484 | 324 | AA->pointsToConstantMemory( |
4485 | 164 | MemoryLocation(BasePtr, |
4486 | 164 | LocationSize::precise( |
4487 | 164 | DAG.getDataLayout().getTypeStoreSize(I.getType())), |
4488 | 164 | AAInfo))) { |
4489 | 5 | // Do not serialize (non-volatile) loads of constant memory with anything. |
4490 | 5 | Root = DAG.getEntryNode(); |
4491 | 5 | ConstantMemory = true; |
4492 | 5 | } |
4493 | 324 | |
4494 | 324 | MachineMemOperand *MMO = |
4495 | 324 | DAG.getMachineFunction(). |
4496 | 324 | getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr178 : nullptr146 ), |
4497 | 324 | MachineMemOperand::MOLoad, VT.getStoreSize(), |
4498 | 324 | Alignment, AAInfo, Ranges); |
4499 | 324 | |
4500 | 324 | if (!UniformBase) { |
4501 | 146 | Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); |
4502 | 146 | Index = getValue(Ptr); |
4503 | 146 | Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); |
4504 | 146 | } |
4505 | 324 | SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; |
4506 | 324 | SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, |
4507 | 324 | Ops, MMO); |
4508 | 324 | |
4509 | 324 | SDValue OutChain = Gather.getValue(1); |
4510 | 324 | if (!ConstantMemory) |
4511 | 319 | PendingLoads.push_back(OutChain); |
4512 | 324 | setValue(&I, Gather); |
4513 | 324 | } |
4514 | | |
4515 | 2.59k | void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { |
4516 | 2.59k | SDLoc dl = getCurSDLoc(); |
4517 | 2.59k | AtomicOrdering SuccessOrdering = I.getSuccessOrdering(); |
4518 | 2.59k | AtomicOrdering FailureOrdering = I.getFailureOrdering(); |
4519 | 2.59k | SyncScope::ID SSID = I.getSyncScopeID(); |
4520 | 2.59k | |
4521 | 2.59k | SDValue InChain = getRoot(); |
4522 | 2.59k | |
4523 | 2.59k | MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); |
4524 | 2.59k | SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); |
4525 | 2.59k | |
4526 | 2.59k | auto Alignment = DAG.getEVTAlignment(MemVT); |
4527 | 2.59k | |
4528 | 2.59k | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
4529 | 2.59k | if (I.isVolatile()) |
4530 | 696 | Flags |= MachineMemOperand::MOVolatile; |
4531 | 2.59k | Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); |
4532 | 2.59k | |
4533 | 2.59k | MachineFunction &MF = DAG.getMachineFunction(); |
4534 | 2.59k | MachineMemOperand *MMO = |
4535 | 2.59k | MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), |
4536 | 2.59k | Flags, MemVT.getStoreSize(), Alignment, |
4537 | 2.59k | AAMDNodes(), nullptr, SSID, SuccessOrdering, |
4538 | 2.59k | FailureOrdering); |
4539 | 2.59k | |
4540 | 2.59k | SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, |
4541 | 2.59k | dl, MemVT, VTs, InChain, |
4542 | 2.59k | getValue(I.getPointerOperand()), |
4543 | 2.59k | getValue(I.getCompareOperand()), |
4544 | 2.59k | getValue(I.getNewValOperand()), MMO); |
4545 | 2.59k | |
4546 | 2.59k | SDValue OutChain = L.getValue(2); |
4547 | 2.59k | |
4548 | 2.59k | setValue(&I, L); |
4549 | 2.59k | DAG.setRoot(OutChain); |
4550 | 2.59k | } |
4551 | | |
4552 | 7.29k | void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { |
4553 | 7.29k | SDLoc dl = getCurSDLoc(); |
4554 | 7.29k | ISD::NodeType NT; |
4555 | 7.29k | switch (I.getOperation()) { |
4556 | 7.29k | default: 0 llvm_unreachable0 ("Unknown atomicrmw operation"); |
4557 | 7.29k | case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break1.89k ; |
4558 | 7.29k | case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break1.37k ; |
4559 | 7.29k | case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break781 ; |
4560 | 7.29k | case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break536 ; |
4561 | 7.29k | case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break158 ; |
4562 | 7.29k | case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break526 ; |
4563 | 7.29k | case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break490 ; |
4564 | 7.29k | case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break371 ; |
4565 | 7.29k | case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break384 ; |
4566 | 7.29k | case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break376 ; |
4567 | 7.29k | case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break376 ; |
4568 | 7.29k | case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break34 ; |
4569 | 7.29k | case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break0 ; |
4570 | 7.29k | } |
4571 | 7.29k | AtomicOrdering Ordering = I.getOrdering(); |
4572 | 7.29k | SyncScope::ID SSID = I.getSyncScopeID(); |
4573 | 7.29k | |
4574 | 7.29k | SDValue InChain = getRoot(); |
4575 | 7.29k | |
4576 | 7.29k | auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); |
4577 | 7.29k | auto Alignment = DAG.getEVTAlignment(MemVT); |
4578 | 7.29k | |
4579 | 7.29k | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
4580 | 7.29k | if (I.isVolatile()) |
4581 | 1.38k | Flags |= MachineMemOperand::MOVolatile; |
4582 | 7.29k | Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); |
4583 | 7.29k | |
4584 | 7.29k | MachineFunction &MF = DAG.getMachineFunction(); |
4585 | 7.29k | MachineMemOperand *MMO = |
4586 | 7.29k | MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, |
4587 | 7.29k | MemVT.getStoreSize(), Alignment, AAMDNodes(), |
4588 | 7.29k | nullptr, SSID, Ordering); |
4589 | 7.29k | |
4590 | 7.29k | SDValue L = |
4591 | 7.29k | DAG.getAtomic(NT, dl, MemVT, InChain, |
4592 | 7.29k | getValue(I.getPointerOperand()), getValue(I.getValOperand()), |
4593 | 7.29k | MMO); |
4594 | 7.29k | |
4595 | 7.29k | SDValue OutChain = L.getValue(1); |
4596 | 7.29k | |
4597 | 7.29k | setValue(&I, L); |
4598 | 7.29k | DAG.setRoot(OutChain); |
4599 | 7.29k | } |
4600 | | |
4601 | 4.61k | void SelectionDAGBuilder::visitFence(const FenceInst &I) { |
4602 | 4.61k | SDLoc dl = getCurSDLoc(); |
4603 | 4.61k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4604 | 4.61k | SDValue Ops[3]; |
4605 | 4.61k | Ops[0] = getRoot(); |
4606 | 4.61k | Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, |
4607 | 4.61k | TLI.getFenceOperandTy(DAG.getDataLayout())); |
4608 | 4.61k | Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, |
4609 | 4.61k | TLI.getFenceOperandTy(DAG.getDataLayout())); |
4610 | 4.61k | DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); |
4611 | 4.61k | } |
4612 | | |
4613 | 1.66k | void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { |
4614 | 1.66k | SDLoc dl = getCurSDLoc(); |
4615 | 1.66k | AtomicOrdering Order = I.getOrdering(); |
4616 | 1.66k | SyncScope::ID SSID = I.getSyncScopeID(); |
4617 | 1.66k | |
4618 | 1.66k | SDValue InChain = getRoot(); |
4619 | 1.66k | |
4620 | 1.66k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4621 | 1.66k | EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
4622 | 1.66k | EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); |
4623 | 1.66k | |
4624 | 1.66k | if (!TLI.supportsUnalignedAtomics() && |
4625 | 1.66k | I.getAlignment() < MemVT.getSizeInBits() / 8) |
4626 | 0 | report_fatal_error("Cannot generate unaligned atomic load"); |
4627 | 1.66k | |
4628 | 1.66k | auto Flags = MachineMemOperand::MOLoad; |
4629 | 1.66k | if (I.isVolatile()) |
4630 | 2 | Flags |= MachineMemOperand::MOVolatile; |
4631 | 1.66k | if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) |
4632 | 4 | Flags |= MachineMemOperand::MOInvariant; |
4633 | 1.66k | if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), |
4634 | 1.66k | DAG.getDataLayout())) |
4635 | 540 | Flags |= MachineMemOperand::MODereferenceable; |
4636 | 1.66k | |
4637 | 1.66k | Flags |= TLI.getMMOFlags(I); |
4638 | 1.66k | |
4639 | 1.66k | MachineMemOperand *MMO = |
4640 | 1.66k | DAG.getMachineFunction(). |
4641 | 1.66k | getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), |
4642 | 1.66k | Flags, MemVT.getStoreSize(), |
4643 | 1.66k | I.getAlignment() ? I.getAlignment() : |
4644 | 1.66k | DAG.getEVTAlignment(MemVT)0 , |
4645 | 1.66k | AAMDNodes(), nullptr, SSID, Order); |
4646 | 1.66k | |
4647 | 1.66k | InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); |
4648 | 1.66k | SDValue L = |
4649 | 1.66k | DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, |
4650 | 1.66k | getValue(I.getPointerOperand()), MMO); |
4651 | 1.66k | |
4652 | 1.66k | SDValue OutChain = L.getValue(1); |
4653 | 1.66k | if (MemVT != VT) |
4654 | 0 | L = DAG.getPtrExtOrTrunc(L, dl, VT); |
4655 | 1.66k | |
4656 | 1.66k | setValue(&I, L); |
4657 | 1.66k | DAG.setRoot(OutChain); |
4658 | 1.66k | } |
4659 | | |
4660 | 6.85k | void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { |
4661 | 6.85k | SDLoc dl = getCurSDLoc(); |
4662 | 6.85k | |
4663 | 6.85k | AtomicOrdering Ordering = I.getOrdering(); |
4664 | 6.85k | SyncScope::ID SSID = I.getSyncScopeID(); |
4665 | 6.85k | |
4666 | 6.85k | SDValue InChain = getRoot(); |
4667 | 6.85k | |
4668 | 6.85k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4669 | 6.85k | EVT MemVT = |
4670 | 6.85k | TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); |
4671 | 6.85k | |
4672 | 6.85k | if (I.getAlignment() < MemVT.getSizeInBits() / 8) |
4673 | 0 | report_fatal_error("Cannot generate unaligned atomic store"); |
4674 | 6.85k | |
4675 | 6.85k | auto Flags = MachineMemOperand::MOStore; |
4676 | 6.85k | if (I.isVolatile()) |
4677 | 121 | Flags |= MachineMemOperand::MOVolatile; |
4678 | 6.85k | Flags |= TLI.getMMOFlags(I); |
4679 | 6.85k | |
4680 | 6.85k | MachineFunction &MF = DAG.getMachineFunction(); |
4681 | 6.85k | MachineMemOperand *MMO = |
4682 | 6.85k | MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, |
4683 | 6.85k | MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(), |
4684 | 6.85k | nullptr, SSID, Ordering); |
4685 | 6.85k | |
4686 | 6.85k | SDValue Val = getValue(I.getValueOperand()); |
4687 | 6.85k | if (Val.getValueType() != MemVT) |
4688 | 0 | Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); |
4689 | 6.85k | |
4690 | 6.85k | SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, |
4691 | 6.85k | getValue(I.getPointerOperand()), Val, MMO); |
4692 | 6.85k | |
4693 | 6.85k | |
4694 | 6.85k | DAG.setRoot(OutChain); |
4695 | 6.85k | } |
4696 | | |
4697 | | /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC |
4698 | | /// node. |
4699 | | void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, |
4700 | 137k | unsigned Intrinsic) { |
4701 | 137k | // Ignore the callsite's attributes. A specific call site may be marked with |
4702 | 137k | // readnone, but the lowering code will expect the chain based on the |
4703 | 137k | // definition. |
4704 | 137k | const Function *F = I.getCalledFunction(); |
4705 | 137k | bool HasChain = !F->doesNotAccessMemory(); |
4706 | 137k | bool OnlyLoad = HasChain && F->onlyReadsMemory()80.7k ; |
4707 | 137k | |
4708 | 137k | // Build the operand list. |
4709 | 137k | SmallVector<SDValue, 8> Ops; |
4710 | 137k | if (HasChain) { // If this intrinsic has side-effects, chainify it. |
4711 | 80.7k | if (OnlyLoad) { |
4712 | 3.73k | // We don't need to serialize loads against other loads. |
4713 | 3.73k | Ops.push_back(DAG.getRoot()); |
4714 | 76.9k | } else { |
4715 | 76.9k | Ops.push_back(getRoot()); |
4716 | 76.9k | } |
4717 | 80.7k | } |
4718 | 137k | |
4719 | 137k | // Info is set by getTgtMemInstrinsic |
4720 | 137k | TargetLowering::IntrinsicInfo Info; |
4721 | 137k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4722 | 137k | bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, |
4723 | 137k | DAG.getMachineFunction(), |
4724 | 137k | Intrinsic); |
4725 | 137k | |
4726 | 137k | // Add the intrinsic ID as an integer operand if it's not a target intrinsic. |
4727 | 137k | if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID68.6k || |
4728 | 137k | Info.opc == ISD::INTRINSIC_W_CHAIN53.9k ) |
4729 | 137k | Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), |
4730 | 137k | TLI.getPointerTy(DAG.getDataLayout()))); |
4731 | 137k | |
4732 | 137k | // Add all operands of the call to the operand list. |
4733 | 381k | for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i244k ) { |
4734 | 244k | SDValue Op = getValue(I.getArgOperand(i)); |
4735 | 244k | Ops.push_back(Op); |
4736 | 244k | } |
4737 | 137k | |
4738 | 137k | SmallVector<EVT, 4> ValueVTs; |
4739 | 137k | ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); |
4740 | 137k | |
4741 | 137k | if (HasChain) |
4742 | 80.7k | ValueVTs.push_back(MVT::Other); |
4743 | 137k | |
4744 | 137k | SDVTList VTs = DAG.getVTList(ValueVTs); |
4745 | 137k | |
4746 | 137k | // Create the node. |
4747 | 137k | SDValue Result; |
4748 | 137k | if (IsTgtIntrinsic) { |
4749 | 68.6k | // This is target intrinsic that touches memory |
4750 | 68.6k | AAMDNodes AAInfo; |
4751 | 68.6k | I.getAAMetadata(AAInfo); |
4752 | 68.6k | Result = |
4753 | 68.6k | DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, |
4754 | 68.6k | MachinePointerInfo(Info.ptrVal, Info.offset), |
4755 | 68.6k | Info.align, Info.flags, Info.size, AAInfo); |
4756 | 68.6k | } else if (68.3k !HasChain68.3k ) { |
4757 | 56.3k | Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); |
4758 | 56.3k | } else if (12.0k !I.getType()->isVoidTy()12.0k ) { |
4759 | 2.23k | Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); |
4760 | 9.80k | } else { |
4761 | 9.80k | Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); |
4762 | 9.80k | } |
4763 | 137k | |
4764 | 137k | if (HasChain) { |
4765 | 80.7k | SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); |
4766 | 80.7k | if (OnlyLoad) |
4767 | 3.73k | PendingLoads.push_back(Chain); |
4768 | 76.9k | else |
4769 | 76.9k | DAG.setRoot(Chain); |
4770 | 80.7k | } |
4771 | 137k | |
4772 | 137k | if (!I.getType()->isVoidTy()) { |
4773 | 112k | if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { |
4774 | 24.2k | EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); |
4775 | 24.2k | Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); |
4776 | 24.2k | } else |
4777 | 88.2k | Result = lowerRangeToAssertZExt(DAG, I, Result); |
4778 | 112k | |
4779 | 112k | setValue(&I, Result); |
4780 | 112k | } |
4781 | 137k | } |
4782 | | |
4783 | | /// GetSignificand - Get the significand and build it into a floating-point |
4784 | | /// number with exponent of 1: |
4785 | | /// |
4786 | | /// Op = (Op & 0x007fffff) | 0x3f800000; |
4787 | | /// |
4788 | | /// where Op is the hexadecimal representation of floating point value. |
4789 | 9 | static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { |
4790 | 9 | SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, |
4791 | 9 | DAG.getConstant(0x007fffff, dl, MVT::i32)); |
4792 | 9 | SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, |
4793 | 9 | DAG.getConstant(0x3f800000, dl, MVT::i32)); |
4794 | 9 | return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); |
4795 | 9 | } |
4796 | | |
4797 | | /// GetExponent - Get the exponent: |
4798 | | /// |
4799 | | /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); |
4800 | | /// |
4801 | | /// where Op is the hexadecimal representation of floating point value. |
4802 | | static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, |
4803 | 9 | const TargetLowering &TLI, const SDLoc &dl) { |
4804 | 9 | SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, |
4805 | 9 | DAG.getConstant(0x7f800000, dl, MVT::i32)); |
4806 | 9 | SDValue t1 = DAG.getNode( |
4807 | 9 | ISD::SRL, dl, MVT::i32, t0, |
4808 | 9 | DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); |
4809 | 9 | SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, |
4810 | 9 | DAG.getConstant(127, dl, MVT::i32)); |
4811 | 9 | return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); |
4812 | 9 | } |
4813 | | |
4814 | | /// getF32Constant - Get 32-bit floating point constant. |
4815 | | static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, |
4816 | 97 | const SDLoc &dl) { |
4817 | 97 | return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl, |
4818 | 97 | MVT::f32); |
4819 | 97 | } |
4820 | | |
4821 | | static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, |
4822 | 9 | SelectionDAG &DAG) { |
4823 | 9 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
4824 | 9 | |
4825 | 9 | // IntegerPartOfX = ((int32_t)(t0); |
4826 | 9 | SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); |
4827 | 9 | |
4828 | 9 | // FractionalPartOfX = t0 - (float)IntegerPartOfX; |
4829 | 9 | SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); |
4830 | 9 | SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); |
4831 | 9 | |
4832 | 9 | // IntegerPartOfX <<= 23; |
4833 | 9 | IntegerPartOfX = DAG.getNode( |
4834 | 9 | ISD::SHL, dl, MVT::i32, IntegerPartOfX, |
4835 | 9 | DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( |
4836 | 9 | DAG.getDataLayout()))); |
4837 | 9 | |
4838 | 9 | SDValue TwoToFractionalPartOfX; |
4839 | 9 | if (LimitFloatPrecision <= 6) { |
4840 | 3 | // For floating-point precision of 6: |
4841 | 3 | // |
4842 | 3 | // TwoToFractionalPartOfX = |
4843 | 3 | // 0.997535578f + |
4844 | 3 | // (0.735607626f + 0.252464424f * x) * x; |
4845 | 3 | // |
4846 | 3 | // error 0.0144103317, which is 6 bits |
4847 | 3 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4848 | 3 | getF32Constant(DAG, 0x3e814304, dl)); |
4849 | 3 | SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
4850 | 3 | getF32Constant(DAG, 0x3f3c50c8, dl)); |
4851 | 3 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4852 | 3 | TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4853 | 3 | getF32Constant(DAG, 0x3f7f5e7e, dl)); |
4854 | 6 | } else if (LimitFloatPrecision <= 12) { |
4855 | 3 | // For floating-point precision of 12: |
4856 | 3 | // |
4857 | 3 | // TwoToFractionalPartOfX = |
4858 | 3 | // 0.999892986f + |
4859 | 3 | // (0.696457318f + |
4860 | 3 | // (0.224338339f + 0.792043434e-1f * x) * x) * x; |
4861 | 3 | // |
4862 | 3 | // error 0.000107046256, which is 13 to 14 bits |
4863 | 3 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4864 | 3 | getF32Constant(DAG, 0x3da235e3, dl)); |
4865 | 3 | SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
4866 | 3 | getF32Constant(DAG, 0x3e65b8f3, dl)); |
4867 | 3 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4868 | 3 | SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4869 | 3 | getF32Constant(DAG, 0x3f324b07, dl)); |
4870 | 3 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4871 | 3 | TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, |
4872 | 3 | getF32Constant(DAG, 0x3f7ff8fd, dl)); |
4873 | 3 | } else { // LimitFloatPrecision <= 18 |
4874 | 3 | // For floating-point precision of 18: |
4875 | 3 | // |
4876 | 3 | // TwoToFractionalPartOfX = |
4877 | 3 | // 0.999999982f + |
4878 | 3 | // (0.693148872f + |
4879 | 3 | // (0.240227044f + |
4880 | 3 | // (0.554906021e-1f + |
4881 | 3 | // (0.961591928e-2f + |
4882 | 3 | // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; |
4883 | 3 | // error 2.47208000*10^(-7), which is better than 18 bits |
4884 | 3 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4885 | 3 | getF32Constant(DAG, 0x3924b03e, dl)); |
4886 | 3 | SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
4887 | 3 | getF32Constant(DAG, 0x3ab24b87, dl)); |
4888 | 3 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4889 | 3 | SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4890 | 3 | getF32Constant(DAG, 0x3c1d8c17, dl)); |
4891 | 3 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4892 | 3 | SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, |
4893 | 3 | getF32Constant(DAG, 0x3d634a1d, dl)); |
4894 | 3 | SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
4895 | 3 | SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, |
4896 | 3 | getF32Constant(DAG, 0x3e75fe14, dl)); |
4897 | 3 | SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); |
4898 | 3 | SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, |
4899 | 3 | getF32Constant(DAG, 0x3f317234, dl)); |
4900 | 3 | SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); |
4901 | 3 | TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, |
4902 | 3 | getF32Constant(DAG, 0x3f800000, dl)); |
4903 | 3 | } |
4904 | 9 | |
4905 | 9 | // Add the exponent into the result in integer domain. |
4906 | 9 | SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); |
4907 | 9 | return DAG.getNode(ISD::BITCAST, dl, MVT::f32, |
4908 | 9 | DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); |
4909 | 9 | } |
4910 | | |
4911 | | /// expandExp - Lower an exp intrinsic. Handles the special sequences for |
4912 | | /// limited-precision mode. |
4913 | | static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
4914 | 165 | const TargetLowering &TLI) { |
4915 | 165 | if (Op.getValueType() == MVT::f32 && |
4916 | 165 | LimitFloatPrecision > 032 && LimitFloatPrecision <= 183 ) { |
4917 | 3 | |
4918 | 3 | // Put the exponent in the right bit position for later addition to the |
4919 | 3 | // final result: |
4920 | 3 | // |
4921 | 3 | // #define LOG2OFe 1.4426950f |
4922 | 3 | // t0 = Op * LOG2OFe |
4923 | 3 | |
4924 | 3 | // TODO: What fast-math-flags should be set here? |
4925 | 3 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, |
4926 | 3 | getF32Constant(DAG, 0x3fb8aa3b, dl)); |
4927 | 3 | return getLimitedPrecisionExp2(t0, dl, DAG); |
4928 | 3 | } |
4929 | 162 | |
4930 | 162 | // No special expansion. |
4931 | 162 | return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); |
4932 | 162 | } |
4933 | | |
4934 | | /// expandLog - Lower a log intrinsic. Handles the special sequences for |
4935 | | /// limited-precision mode. |
4936 | | static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
4937 | 107 | const TargetLowering &TLI) { |
4938 | 107 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
4939 | 107 | |
4940 | 107 | if (Op.getValueType() == MVT::f32 && |
4941 | 107 | LimitFloatPrecision > 028 && LimitFloatPrecision <= 183 ) { |
4942 | 3 | SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); |
4943 | 3 | |
4944 | 3 | // Scale the exponent by log(2) [0.69314718f]. |
4945 | 3 | SDValue Exp = GetExponent(DAG, Op1, TLI, dl); |
4946 | 3 | SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, |
4947 | 3 | getF32Constant(DAG, 0x3f317218, dl)); |
4948 | 3 | |
4949 | 3 | // Get the significand and build it into a floating-point number with |
4950 | 3 | // exponent of 1. |
4951 | 3 | SDValue X = GetSignificand(DAG, Op1, dl); |
4952 | 3 | |
4953 | 3 | SDValue LogOfMantissa; |
4954 | 3 | if (LimitFloatPrecision <= 6) { |
4955 | 1 | // For floating-point precision of 6: |
4956 | 1 | // |
4957 | 1 | // LogofMantissa = |
4958 | 1 | // -1.1609546f + |
4959 | 1 | // (1.4034025f - 0.23903021f * x) * x; |
4960 | 1 | // |
4961 | 1 | // error 0.0034276066, which is better than 8 bits |
4962 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4963 | 1 | getF32Constant(DAG, 0xbe74c456, dl)); |
4964 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4965 | 1 | getF32Constant(DAG, 0x3fb3a2b1, dl)); |
4966 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4967 | 1 | LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4968 | 1 | getF32Constant(DAG, 0x3f949a29, dl)); |
4969 | 2 | } else if (LimitFloatPrecision <= 12) { |
4970 | 1 | // For floating-point precision of 12: |
4971 | 1 | // |
4972 | 1 | // LogOfMantissa = |
4973 | 1 | // -1.7417939f + |
4974 | 1 | // (2.8212026f + |
4975 | 1 | // (-1.4699568f + |
4976 | 1 | // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; |
4977 | 1 | // |
4978 | 1 | // error 0.000061011436, which is 14 bits |
4979 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
4980 | 1 | getF32Constant(DAG, 0xbd67b6d6, dl)); |
4981 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
4982 | 1 | getF32Constant(DAG, 0x3ee4f4b8, dl)); |
4983 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
4984 | 1 | SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
4985 | 1 | getF32Constant(DAG, 0x3fbc278b, dl)); |
4986 | 1 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
4987 | 1 | SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
4988 | 1 | getF32Constant(DAG, 0x40348e95, dl)); |
4989 | 1 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
4990 | 1 | LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
4991 | 1 | getF32Constant(DAG, 0x3fdef31a, dl)); |
4992 | 1 | } else { // LimitFloatPrecision <= 18 |
4993 | 1 | // For floating-point precision of 18: |
4994 | 1 | // |
4995 | 1 | // LogOfMantissa = |
4996 | 1 | // -2.1072184f + |
4997 | 1 | // (4.2372794f + |
4998 | 1 | // (-3.7029485f + |
4999 | 1 | // (2.2781945f + |
5000 | 1 | // (-0.87823314f + |
5001 | 1 | // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; |
5002 | 1 | // |
5003 | 1 | // error 0.0000023660568, which is better than 18 bits |
5004 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5005 | 1 | getF32Constant(DAG, 0xbc91e5ac, dl)); |
5006 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
5007 | 1 | getF32Constant(DAG, 0x3e4350aa, dl)); |
5008 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5009 | 1 | SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
5010 | 1 | getF32Constant(DAG, 0x3f60d3e3, dl)); |
5011 | 1 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
5012 | 1 | SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
5013 | 1 | getF32Constant(DAG, 0x4011cdf0, dl)); |
5014 | 1 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
5015 | 1 | SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
5016 | 1 | getF32Constant(DAG, 0x406cfd1c, dl)); |
5017 | 1 | SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
5018 | 1 | SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, |
5019 | 1 | getF32Constant(DAG, 0x408797cb, dl)); |
5020 | 1 | SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); |
5021 | 1 | LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, |
5022 | 1 | getF32Constant(DAG, 0x4006dcab, dl)); |
5023 | 1 | } |
5024 | 3 | |
5025 | 3 | return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); |
5026 | 3 | } |
5027 | 104 | |
5028 | 104 | // No special expansion. |
5029 | 104 | return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); |
5030 | 104 | } |
5031 | | |
5032 | | /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for |
5033 | | /// limited-precision mode. |
5034 | | static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5035 | 104 | const TargetLowering &TLI) { |
5036 | 104 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
5037 | 104 | |
5038 | 104 | if (Op.getValueType() == MVT::f32 && |
5039 | 104 | LimitFloatPrecision > 032 && LimitFloatPrecision <= 183 ) { |
5040 | 3 | SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); |
5041 | 3 | |
5042 | 3 | // Get the exponent. |
5043 | 3 | SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); |
5044 | 3 | |
5045 | 3 | // Get the significand and build it into a floating-point number with |
5046 | 3 | // exponent of 1. |
5047 | 3 | SDValue X = GetSignificand(DAG, Op1, dl); |
5048 | 3 | |
5049 | 3 | // Different possible minimax approximations of significand in |
5050 | 3 | // floating-point for various degrees of accuracy over [1,2]. |
5051 | 3 | SDValue Log2ofMantissa; |
5052 | 3 | if (LimitFloatPrecision <= 6) { |
5053 | 1 | // For floating-point precision of 6: |
5054 | 1 | // |
5055 | 1 | // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; |
5056 | 1 | // |
5057 | 1 | // error 0.0049451742, which is more than 7 bits |
5058 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5059 | 1 | getF32Constant(DAG, 0xbeb08fe0, dl)); |
5060 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
5061 | 1 | getF32Constant(DAG, 0x40019463, dl)); |
5062 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5063 | 1 | Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
5064 | 1 | getF32Constant(DAG, 0x3fd6633d, dl)); |
5065 | 2 | } else if (LimitFloatPrecision <= 12) { |
5066 | 1 | // For floating-point precision of 12: |
5067 | 1 | // |
5068 | 1 | // Log2ofMantissa = |
5069 | 1 | // -2.51285454f + |
5070 | 1 | // (4.07009056f + |
5071 | 1 | // (-2.12067489f + |
5072 | 1 | // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; |
5073 | 1 | // |
5074 | 1 | // error 0.0000876136000, which is better than 13 bits |
5075 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5076 | 1 | getF32Constant(DAG, 0xbda7262e, dl)); |
5077 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
5078 | 1 | getF32Constant(DAG, 0x3f25280b, dl)); |
5079 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5080 | 1 | SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
5081 | 1 | getF32Constant(DAG, 0x4007b923, dl)); |
5082 | 1 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
5083 | 1 | SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
5084 | 1 | getF32Constant(DAG, 0x40823e2f, dl)); |
5085 | 1 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
5086 | 1 | Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
5087 | 1 | getF32Constant(DAG, 0x4020d29c, dl)); |
5088 | 1 | } else { // LimitFloatPrecision <= 18 |
5089 | 1 | // For floating-point precision of 18: |
5090 | 1 | // |
5091 | 1 | // Log2ofMantissa = |
5092 | 1 | // -3.0400495f + |
5093 | 1 | // (6.1129976f + |
5094 | 1 | // (-5.3420409f + |
5095 | 1 | // (3.2865683f + |
5096 | 1 | // (-1.2669343f + |
5097 | 1 | // (0.27515199f - |
5098 | 1 | // 0.25691327e-1f * x) * x) * x) * x) * x) * x; |
5099 | 1 | // |
5100 | 1 | // error 0.0000018516, which is better than 18 bits |
5101 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5102 | 1 | getF32Constant(DAG, 0xbcd2769e, dl)); |
5103 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
5104 | 1 | getF32Constant(DAG, 0x3e8ce0b9, dl)); |
5105 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5106 | 1 | SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
5107 | 1 | getF32Constant(DAG, 0x3fa22ae7, dl)); |
5108 | 1 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
5109 | 1 | SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, |
5110 | 1 | getF32Constant(DAG, 0x40525723, dl)); |
5111 | 1 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
5112 | 1 | SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, |
5113 | 1 | getF32Constant(DAG, 0x40aaf200, dl)); |
5114 | 1 | SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
5115 | 1 | SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, |
5116 | 1 | getF32Constant(DAG, 0x40c39dad, dl)); |
5117 | 1 | SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); |
5118 | 1 | Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, |
5119 | 1 | getF32Constant(DAG, 0x4042902c, dl)); |
5120 | 1 | } |
5121 | 3 | |
5122 | 3 | return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); |
5123 | 3 | } |
5124 | 101 | |
5125 | 101 | // No special expansion. |
5126 | 101 | return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); |
5127 | 101 | } |
5128 | | |
5129 | | /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for |
5130 | | /// limited-precision mode. |
5131 | | static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5132 | 134 | const TargetLowering &TLI) { |
5133 | 134 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
5134 | 134 | |
5135 | 134 | if (Op.getValueType() == MVT::f32 && |
5136 | 134 | LimitFloatPrecision > 033 && LimitFloatPrecision <= 183 ) { |
5137 | 3 | SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); |
5138 | 3 | |
5139 | 3 | // Scale the exponent by log10(2) [0.30102999f]. |
5140 | 3 | SDValue Exp = GetExponent(DAG, Op1, TLI, dl); |
5141 | 3 | SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, |
5142 | 3 | getF32Constant(DAG, 0x3e9a209a, dl)); |
5143 | 3 | |
5144 | 3 | // Get the significand and build it into a floating-point number with |
5145 | 3 | // exponent of 1. |
5146 | 3 | SDValue X = GetSignificand(DAG, Op1, dl); |
5147 | 3 | |
5148 | 3 | SDValue Log10ofMantissa; |
5149 | 3 | if (LimitFloatPrecision <= 6) { |
5150 | 1 | // For floating-point precision of 6: |
5151 | 1 | // |
5152 | 1 | // Log10ofMantissa = |
5153 | 1 | // -0.50419619f + |
5154 | 1 | // (0.60948995f - 0.10380950f * x) * x; |
5155 | 1 | // |
5156 | 1 | // error 0.0014886165, which is 6 bits |
5157 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5158 | 1 | getF32Constant(DAG, 0xbdd49a13, dl)); |
5159 | 1 | SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, |
5160 | 1 | getF32Constant(DAG, 0x3f1c0789, dl)); |
5161 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5162 | 1 | Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, |
5163 | 1 | getF32Constant(DAG, 0x3f011300, dl)); |
5164 | 2 | } else if (LimitFloatPrecision <= 12) { |
5165 | 1 | // For floating-point precision of 12: |
5166 | 1 | // |
5167 | 1 | // Log10ofMantissa = |
5168 | 1 | // -0.64831180f + |
5169 | 1 | // (0.91751397f + |
5170 | 1 | // (-0.31664806f + 0.47637168e-1f * x) * x) * x; |
5171 | 1 | // |
5172 | 1 | // error 0.00019228036, which is better than 12 bits |
5173 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5174 | 1 | getF32Constant(DAG, 0x3d431f31, dl)); |
5175 | 1 | SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, |
5176 | 1 | getF32Constant(DAG, 0x3ea21fb2, dl)); |
5177 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5178 | 1 | SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
5179 | 1 | getF32Constant(DAG, 0x3f6ae232, dl)); |
5180 | 1 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
5181 | 1 | Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, |
5182 | 1 | getF32Constant(DAG, 0x3f25f7c3, dl)); |
5183 | 1 | } else { // LimitFloatPrecision <= 18 |
5184 | 1 | // For floating-point precision of 18: |
5185 | 1 | // |
5186 | 1 | // Log10ofMantissa = |
5187 | 1 | // -0.84299375f + |
5188 | 1 | // (1.5327582f + |
5189 | 1 | // (-1.0688956f + |
5190 | 1 | // (0.49102474f + |
5191 | 1 | // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; |
5192 | 1 | // |
5193 | 1 | // error 0.0000037995730, which is better than 18 bits |
5194 | 1 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, |
5195 | 1 | getF32Constant(DAG, 0x3c5d51ce, dl)); |
5196 | 1 | SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, |
5197 | 1 | getF32Constant(DAG, 0x3e00685a, dl)); |
5198 | 1 | SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); |
5199 | 1 | SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, |
5200 | 1 | getF32Constant(DAG, 0x3efb6798, dl)); |
5201 | 1 | SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); |
5202 | 1 | SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, |
5203 | 1 | getF32Constant(DAG, 0x3f88d192, dl)); |
5204 | 1 | SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); |
5205 | 1 | SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, |
5206 | 1 | getF32Constant(DAG, 0x3fc4316c, dl)); |
5207 | 1 | SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); |
5208 | 1 | Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, |
5209 | 1 | getF32Constant(DAG, 0x3f57ce70, dl)); |
5210 | 1 | } |
5211 | 3 | |
5212 | 3 | return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); |
5213 | 3 | } |
5214 | 131 | |
5215 | 131 | // No special expansion. |
5216 | 131 | return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); |
5217 | 131 | } |
5218 | | |
5219 | | /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for |
5220 | | /// limited-precision mode. |
5221 | | static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5222 | 126 | const TargetLowering &TLI) { |
5223 | 126 | if (Op.getValueType() == MVT::f32 && |
5224 | 126 | LimitFloatPrecision > 045 && LimitFloatPrecision <= 183 ) |
5225 | 3 | return getLimitedPrecisionExp2(Op, dl, DAG); |
5226 | 123 | |
5227 | 123 | // No special expansion. |
5228 | 123 | return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); |
5229 | 123 | } |
5230 | | |
5231 | | /// visitPow - Lower a pow intrinsic. Handles the special sequences for |
5232 | | /// limited-precision mode with x == 10.0f. |
5233 | | static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, |
5234 | 171 | SelectionDAG &DAG, const TargetLowering &TLI) { |
5235 | 171 | bool IsExp10 = false; |
5236 | 171 | if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f3248 && |
5237 | 171 | LimitFloatPrecision > 048 && LimitFloatPrecision <= 183 ) { |
5238 | 3 | if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { |
5239 | 3 | APFloat Ten(10.0f); |
5240 | 3 | IsExp10 = LHSC->isExactlyValue(Ten); |
5241 | 3 | } |
5242 | 3 | } |
5243 | 171 | |
5244 | 171 | // TODO: What fast-math-flags should be set on the FMUL node? |
5245 | 171 | if (IsExp10) { |
5246 | 3 | // Put the exponent in the right bit position for later addition to the |
5247 | 3 | // final result: |
5248 | 3 | // |
5249 | 3 | // #define LOG2OF10 3.3219281f |
5250 | 3 | // t0 = Op * LOG2OF10; |
5251 | 3 | SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, |
5252 | 3 | getF32Constant(DAG, 0x40549a78, dl)); |
5253 | 3 | return getLimitedPrecisionExp2(t0, dl, DAG); |
5254 | 3 | } |
5255 | 168 | |
5256 | 168 | // No special expansion. |
5257 | 168 | return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); |
5258 | 168 | } |
5259 | | |
5260 | | /// ExpandPowI - Expand a llvm.powi intrinsic. |
5261 | | static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, |
5262 | 137 | SelectionDAG &DAG) { |
5263 | 137 | // If RHS is a constant, we can expand this out to a multiplication tree, |
5264 | 137 | // otherwise we end up lowering to a call to __powidf2 (for example). When |
5265 | 137 | // optimizing for size, we only want to do this if the expansion would produce |
5266 | 137 | // a small number of multiplies, otherwise we do the full expansion. |
5267 | 137 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { |
5268 | 18 | // Get the exponent as a positive value. |
5269 | 18 | unsigned Val = RHSC->getSExtValue(); |
5270 | 18 | if ((int)Val < 0) Val = -Val1 ; |
5271 | 18 | |
5272 | 18 | // powi(x, 0) -> 1.0 |
5273 | 18 | if (Val == 0) |
5274 | 0 | return DAG.getConstantFP(1.0, DL, LHS.getValueType()); |
5275 | 18 | |
5276 | 18 | const Function &F = DAG.getMachineFunction().getFunction(); |
5277 | 18 | if (!F.hasOptSize() || |
5278 | 18 | // If optimizing for size, don't insert too many multiplies. |
5279 | 18 | // This inserts up to 5 multiplies. |
5280 | 18 | countPopulation(Val) + Log2_32(Val) < 76 ) { |
5281 | 12 | // We use the simple binary decomposition method to generate the multiply |
5282 | 12 | // sequence. There are more optimal ways to do this (for example, |
5283 | 12 | // powi(x,15) generates one more multiply than it should), but this has |
5284 | 12 | // the benefit of being both really simple and much better than a libcall. |
5285 | 12 | SDValue Res; // Logically starts equal to 1.0 |
5286 | 12 | SDValue CurSquare = LHS; |
5287 | 12 | // TODO: Intrinsics should have fast-math-flags that propagate to these |
5288 | 12 | // nodes. |
5289 | 47 | while (Val) { |
5290 | 35 | if (Val & 1) { |
5291 | 22 | if (Res.getNode()) |
5292 | 10 | Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); |
5293 | 12 | else |
5294 | 12 | Res = CurSquare; // 1.0*CurSquare. |
5295 | 22 | } |
5296 | 35 | |
5297 | 35 | CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), |
5298 | 35 | CurSquare, CurSquare); |
5299 | 35 | Val >>= 1; |
5300 | 35 | } |
5301 | 12 | |
5302 | 12 | // If the original was negative, invert the result, producing 1/(x*x*x). |
5303 | 12 | if (RHSC->getSExtValue() < 0) |
5304 | 1 | Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), |
5305 | 1 | DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); |
5306 | 12 | return Res; |
5307 | 12 | } |
5308 | 125 | } |
5309 | 125 | |
5310 | 125 | // Otherwise, expand to a libcall. |
5311 | 125 | return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); |
5312 | 125 | } |
5313 | | |
5314 | | // getUnderlyingArgRegs - Find underlying registers used for a truncated, |
5315 | | // bitcasted, or split argument. Returns a list of <Register, size in bits> |
5316 | | void getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, |
5317 | 405 | const SDValue &N) { |
5318 | 405 | switch (N.getOpcode()) { |
5319 | 405 | case ISD::CopyFromReg: { |
5320 | 291 | SDValue Op = N.getOperand(1); |
5321 | 291 | Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(), |
5322 | 291 | Op.getValueType().getSizeInBits()); |
5323 | 291 | return; |
5324 | 405 | } |
5325 | 405 | case ISD::BITCAST: |
5326 | 51 | case ISD::AssertZext: |
5327 | 51 | case ISD::AssertSext: |
5328 | 51 | case ISD::TRUNCATE: |
5329 | 51 | getUnderlyingArgRegs(Regs, N.getOperand(0)); |
5330 | 51 | return; |
5331 | 51 | case ISD::BUILD_PAIR: |
5332 | 22 | case ISD::BUILD_VECTOR: |
5333 | 22 | case ISD::CONCAT_VECTORS: |
5334 | 22 | for (SDValue Op : N->op_values()) |
5335 | 50 | getUnderlyingArgRegs(Regs, Op); |
5336 | 22 | return; |
5337 | 41 | default: |
5338 | 41 | return; |
5339 | 405 | } |
5340 | 405 | } |
5341 | | |
5342 | | /// If the DbgValueInst is a dbg_value of a function argument, create the |
5343 | | /// corresponding DBG_VALUE machine instruction for it now. At the end of |
5344 | | /// instruction selection, they will be inserted to the entry BB. |
5345 | | bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( |
5346 | | const Value *V, DILocalVariable *Variable, DIExpression *Expr, |
5347 | 658 | DILocation *DL, bool IsDbgDeclare, const SDValue &N) { |
5348 | 658 | const Argument *Arg = dyn_cast<Argument>(V); |
5349 | 658 | if (!Arg) |
5350 | 336 | return false; |
5351 | 322 | |
5352 | 322 | if (!IsDbgDeclare) { |
5353 | 309 | // ArgDbgValues are hoisted to the beginning of the entry block. So we |
5354 | 309 | // should only emit as ArgDbgValue if the dbg.value intrinsic is found in |
5355 | 309 | // the entry block. |
5356 | 309 | bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front(); |
5357 | 309 | if (!IsInEntryBlock) |
5358 | 1 | return false; |
5359 | 308 | |
5360 | 308 | // ArgDbgValues are hoisted to the beginning of the entry block. So we |
5361 | 308 | // should only emit as ArgDbgValue if the dbg.value intrinsic describes a |
5362 | 308 | // variable that also is a param. |
5363 | 308 | // |
5364 | 308 | // Although, if we are at the top of the entry block already, we can still |
5365 | 308 | // emit using ArgDbgValue. This might catch some situations when the |
5366 | 308 | // dbg.value refers to an argument that isn't used in the entry block, so |
5367 | 308 | // any CopyToReg node would be optimized out and the only way to express |
5368 | 308 | // this DBG_VALUE is by using the physical reg (or FI) as done in this |
5369 | 308 | // method. ArgDbgValues are hoisted to the beginning of the entry block. So |
5370 | 308 | // we should only emit as ArgDbgValue if the Variable is an argument to the |
5371 | 308 | // current function, and the dbg.value intrinsic is found in the entry |
5372 | 308 | // block. |
5373 | 308 | bool VariableIsFunctionInputArg = Variable->isParameter() && |
5374 | 308 | !DL->getInlinedAt()295 ; |
5375 | 308 | bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder; |
5376 | 308 | if (!IsInPrologue && !VariableIsFunctionInputArg70 ) |
5377 | 9 | return false; |
5378 | 299 | |
5379 | 299 | // Here we assume that a function argument on IR level only can be used to |
5380 | 299 | // describe one input parameter on source level. If we for example have |
5381 | 299 | // source code like this |
5382 | 299 | // |
5383 | 299 | // struct A { long x, y; }; |
5384 | 299 | // void foo(struct A a, long b) { |
5385 | 299 | // ... |
5386 | 299 | // b = a.x; |
5387 | 299 | // ... |
5388 | 299 | // } |
5389 | 299 | // |
5390 | 299 | // and IR like this |
5391 | 299 | // |
5392 | 299 | // define void @foo(i32 %a1, i32 %a2, i32 %b) { |
5393 | 299 | // entry: |
5394 | 299 | // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment |
5395 | 299 | // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment |
5396 | 299 | // call void @llvm.dbg.value(metadata i32 %b, "b", |
5397 | 299 | // ... |
5398 | 299 | // call void @llvm.dbg.value(metadata i32 %a1, "b" |
5399 | 299 | // ... |
5400 | 299 | // |
5401 | 299 | // then the last dbg.value is describing a parameter "b" using a value that |
5402 | 299 | // is an argument. But since we already has used %a1 to describe a parameter |
5403 | 299 | // we should not handle that last dbg.value here (that would result in an |
5404 | 299 | // incorrect hoisting of the DBG_VALUE to the function entry). |
5405 | 299 | // Notice that we allow one dbg.value per IR level argument, to accomodate |
5406 | 299 | // for the situation with fragments above. |
5407 | 299 | if (VariableIsFunctionInputArg) { |
5408 | 289 | unsigned ArgNo = Arg->getArgNo(); |
5409 | 289 | if (ArgNo >= FuncInfo.DescribedArgs.size()) |
5410 | 275 | FuncInfo.DescribedArgs.resize(ArgNo + 1, false); |
5411 | 14 | else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo)10 ) |
5412 | 4 | return false; |
5413 | 285 | FuncInfo.DescribedArgs.set(ArgNo); |
5414 | 285 | } |
5415 | 299 | } |
5416 | 322 | |
5417 | 322 | MachineFunction &MF = DAG.getMachineFunction(); |
5418 | 308 | const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); |
5419 | 308 | |
5420 | 308 | bool IsIndirect = false; |
5421 | 308 | Optional<MachineOperand> Op; |
5422 | 308 | // Some arguments' frame index is recorded during argument lowering. |
5423 | 308 | int FI = FuncInfo.getArgumentFrameIndex(Arg); |
5424 | 308 | if (FI != std::numeric_limits<int>::max()) |
5425 | 4 | Op = MachineOperand::CreateFI(FI); |
5426 | 308 | |
5427 | 308 | SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes; |
5428 | 308 | if (!Op && N.getNode()304 ) { |
5429 | 304 | getUnderlyingArgRegs(ArgRegsAndSizes, N); |
5430 | 304 | Register Reg; |
5431 | 304 | if (ArgRegsAndSizes.size() == 1) |
5432 | 249 | Reg = ArgRegsAndSizes.front().first; |
5433 | 304 | |
5434 | 304 | if (Reg && Reg.isVirtual()249 ) { |
5435 | 249 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
5436 | 249 | Register PR = RegInfo.getLiveInPhysReg(Reg); |
5437 | 249 | if (PR) |
5438 | 248 | Reg = PR; |
5439 | 249 | } |
5440 | 304 | if (Reg) { |
5441 | 249 | Op = MachineOperand::CreateReg(Reg, false); |
5442 | 249 | IsIndirect = IsDbgDeclare; |
5443 | 249 | } |
5444 | 304 | } |
5445 | 308 | |
5446 | 308 | if (!Op && N.getNode()55 ) { |
5447 | 55 | // Check if frame index is available. |
5448 | 55 | SDValue LCandidate = peekThroughBitcasts(N); |
5449 | 55 | if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode())) |
5450 | 22 | if (FrameIndexSDNode *FINode = |
5451 | 22 | dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) |
5452 | 22 | Op = MachineOperand::CreateFI(FINode->getIndex()); |
5453 | 55 | } |
5454 | 308 | |
5455 | 308 | if (!Op) { |
5456 | 33 | // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg |
5457 | 33 | auto splitMultiRegDbgValue |
5458 | 33 | = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { |
5459 | 18 | unsigned Offset = 0; |
5460 | 47 | for (auto RegAndSize : SplitRegs) { |
5461 | 47 | auto FragmentExpr = DIExpression::createFragmentExpression( |
5462 | 47 | Expr, Offset, RegAndSize.second); |
5463 | 47 | if (!FragmentExpr) |
5464 | 0 | continue; |
5465 | 47 | FuncInfo.ArgDbgValues.push_back( |
5466 | 47 | BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, |
5467 | 47 | RegAndSize.first, Variable, *FragmentExpr)); |
5468 | 47 | Offset += RegAndSize.second; |
5469 | 47 | } |
5470 | 18 | }; |
5471 | 33 | |
5472 | 33 | // Check if ValueMap has reg number. |
5473 | 33 | DenseMap<const Value *, unsigned>::const_iterator |
5474 | 33 | VMI = FuncInfo.ValueMap.find(V); |
5475 | 33 | if (VMI != FuncInfo.ValueMap.end()) { |
5476 | 7 | const auto &TLI = DAG.getTargetLoweringInfo(); |
5477 | 7 | RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, |
5478 | 7 | V->getType(), getABIRegCopyCC(V)); |
5479 | 7 | if (RFV.occupiesMultipleRegs()) { |
5480 | 3 | splitMultiRegDbgValue(RFV.getRegsAndSizes()); |
5481 | 3 | return true; |
5482 | 3 | } |
5483 | 4 | |
5484 | 4 | Op = MachineOperand::CreateReg(VMI->second, false); |
5485 | 4 | IsIndirect = IsDbgDeclare; |
5486 | 26 | } else if (ArgRegsAndSizes.size() > 1) { |
5487 | 15 | // This was split due to the calling convention, and no virtual register |
5488 | 15 | // mapping exists for the value. |
5489 | 15 | splitMultiRegDbgValue(ArgRegsAndSizes); |
5490 | 15 | return true; |
5491 | 15 | } |
5492 | 290 | } |
5493 | 290 | |
5494 | 290 | if (!Op) |
5495 | 11 | return false; |
5496 | 279 | |
5497 | 279 | assert(Variable->isValidLocationForIntrinsic(DL) && |
5498 | 279 | "Expected inlined-at fields to agree"); |
5499 | 279 | IsIndirect = (Op->isReg()) ? IsIndirect253 : true26 ; |
5500 | 279 | FuncInfo.ArgDbgValues.push_back( |
5501 | 279 | BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, |
5502 | 279 | *Op, Variable, Expr)); |
5503 | 279 | |
5504 | 279 | return true; |
5505 | 279 | } |
5506 | | |
5507 | | /// Return the appropriate SDDbgValue based on N. |
5508 | | SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, |
5509 | | DILocalVariable *Variable, |
5510 | | DIExpression *Expr, |
5511 | | const DebugLoc &dl, |
5512 | 360 | unsigned DbgSDNodeOrder) { |
5513 | 360 | if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { |
5514 | 0 | // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe |
5515 | 0 | // stack slot locations. |
5516 | 0 | // |
5517 | 0 | // Consider "int x = 0; int *px = &x;". There are two kinds of interesting |
5518 | 0 | // debug values here after optimization: |
5519 | 0 | // |
5520 | 0 | // dbg.value(i32* %px, !"int *px", !DIExpression()), and |
5521 | 0 | // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) |
5522 | 0 | // |
5523 | 0 | // Both describe the direct values of their associated variables. |
5524 | 0 | return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), |
5525 | 0 | /*IsIndirect*/ false, dl, DbgSDNodeOrder); |
5526 | 0 | } |
5527 | 360 | return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), |
5528 | 360 | /*IsIndirect*/ false, dl, DbgSDNodeOrder); |
5529 | 360 | } |
5530 | | |
5531 | | // VisualStudio defines setjmp as _setjmp |
5532 | | #if defined(_MSC_VER) && defined(setjmp) && \ |
5533 | | !defined(setjmp_undefined_for_msvc) |
5534 | | # pragma push_macro("setjmp") |
5535 | | # undef setjmp |
5536 | | # define setjmp_undefined_for_msvc |
5537 | | #endif |
5538 | | |
5539 | 43 | static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { |
5540 | 43 | switch (Intrinsic) { |
5541 | 43 | case Intrinsic::smul_fix: |
5542 | 21 | return ISD::SMULFIX; |
5543 | 43 | case Intrinsic::umul_fix: |
5544 | 22 | return ISD::UMULFIX; |
5545 | 43 | default: |
5546 | 0 | llvm_unreachable("Unhandled fixed point intrinsic"); |
5547 | 43 | } |
5548 | 43 | } |
5549 | | |
5550 | | void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, |
5551 | 2 | const char *FunctionName) { |
5552 | 2 | assert(FunctionName && "FunctionName must not be nullptr"); |
5553 | 2 | SDValue Callee = DAG.getExternalSymbol( |
5554 | 2 | FunctionName, |
5555 | 2 | DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); |
5556 | 2 | LowerCallTo(&I, Callee, I.isTailCall()); |
5557 | 2 | } |
5558 | | |
5559 | | /// Lower the call to the specified intrinsic function. |
5560 | | void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, |
5561 | 286k | unsigned Intrinsic) { |
5562 | 286k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5563 | 286k | SDLoc sdl = getCurSDLoc(); |
5564 | 286k | DebugLoc dl = getCurDebugLoc(); |
5565 | 286k | SDValue Res; |
5566 | 286k | |
5567 | 286k | switch (Intrinsic) { |
5568 | 286k | default: |
5569 | 137k | // By default, turn this into a target intrinsic node. |
5570 | 137k | visitTargetIntrinsic(I, Intrinsic); |
5571 | 137k | return; |
5572 | 286k | case Intrinsic::vastart: visitVAStart(I); return532 ; |
5573 | 286k | case Intrinsic::vaend: visitVAEnd(I); return700 ; |
5574 | 286k | case Intrinsic::vacopy: visitVACopy(I); return263 ; |
5575 | 286k | case Intrinsic::returnaddress: |
5576 | 6.27k | setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, |
5577 | 6.27k | TLI.getPointerTy(DAG.getDataLayout()), |
5578 | 6.27k | getValue(I.getArgOperand(0)))); |
5579 | 6.27k | return; |
5580 | 286k | case Intrinsic::addressofreturnaddress: |
5581 | 8 | setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, |
5582 | 8 | TLI.getPointerTy(DAG.getDataLayout()))); |
5583 | 8 | return; |
5584 | 286k | case Intrinsic::sponentry: |
5585 | 10 | setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, |
5586 | 10 | TLI.getFrameIndexTy(DAG.getDataLayout()))); |
5587 | 10 | return; |
5588 | 286k | case Intrinsic::frameaddress: |
5589 | 10.6k | setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, |
5590 | 10.6k | TLI.getFrameIndexTy(DAG.getDataLayout()), |
5591 | 10.6k | getValue(I.getArgOperand(0)))); |
5592 | 10.6k | return; |
5593 | 286k | case Intrinsic::read_register: { |
5594 | 201 | Value *Reg = I.getArgOperand(0); |
5595 | 201 | SDValue Chain = getRoot(); |
5596 | 201 | SDValue RegName = |
5597 | 201 | DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); |
5598 | 201 | EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
5599 | 201 | Res = DAG.getNode(ISD::READ_REGISTER, sdl, |
5600 | 201 | DAG.getVTList(VT, MVT::Other), Chain, RegName); |
5601 | 201 | setValue(&I, Res); |
5602 | 201 | DAG.setRoot(Res.getValue(1)); |
5603 | 201 | return; |
5604 | 286k | } |
5605 | 286k | case Intrinsic::write_register: { |
5606 | 189 | Value *Reg = I.getArgOperand(0); |
5607 | 189 | Value *RegValue = I.getArgOperand(1); |
5608 | 189 | SDValue Chain = getRoot(); |
5609 | 189 | SDValue RegName = |
5610 | 189 | DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); |
5611 | 189 | DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, |
5612 | 189 | RegName, getValue(RegValue))); |
5613 | 189 | return; |
5614 | 286k | } |
5615 | 286k | case Intrinsic::setjmp: |
5616 | 0 | lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]); |
5617 | 0 | return; |
5618 | 286k | case Intrinsic::longjmp: |
5619 | 0 | lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]); |
5620 | 0 | return; |
5621 | 286k | case Intrinsic::memcpy: { |
5622 | 6.15k | const auto &MCI = cast<MemCpyInst>(I); |
5623 | 6.15k | SDValue Op1 = getValue(I.getArgOperand(0)); |
5624 | 6.15k | SDValue Op2 = getValue(I.getArgOperand(1)); |
5625 | 6.15k | SDValue Op3 = getValue(I.getArgOperand(2)); |
5626 | 6.15k | // @llvm.memcpy defines 0 and 1 to both mean no alignment. |
5627 | 6.15k | unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); |
5628 | 6.15k | unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); |
5629 | 6.15k | unsigned Align = MinAlign(DstAlign, SrcAlign); |
5630 | 6.15k | bool isVol = MCI.isVolatile(); |
5631 | 6.15k | bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget())1.13k ; |
5632 | 6.15k | // FIXME: Support passing different dest/src alignments to the memcpy DAG |
5633 | 6.15k | // node. |
5634 | 6.15k | SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, |
5635 | 6.15k | false, isTC, |
5636 | 6.15k | MachinePointerInfo(I.getArgOperand(0)), |
5637 | 6.15k | MachinePointerInfo(I.getArgOperand(1))); |
5638 | 6.15k | updateDAGForMaybeTailCall(MC); |
5639 | 6.15k | return; |
5640 | 286k | } |
5641 | 286k | case Intrinsic::memset: { |
5642 | 7.94k | const auto &MSI = cast<MemSetInst>(I); |
5643 | 7.94k | SDValue Op1 = getValue(I.getArgOperand(0)); |
5644 | 7.94k | SDValue Op2 = getValue(I.getArgOperand(1)); |
5645 | 7.94k | SDValue Op3 = getValue(I.getArgOperand(2)); |
5646 | 7.94k | // @llvm.memset defines 0 and 1 to both mean no alignment. |
5647 | 7.94k | unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); |
5648 | 7.94k | bool isVol = MSI.isVolatile(); |
5649 | 7.94k | bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget())690 ; |
5650 | 7.94k | SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, |
5651 | 7.94k | isTC, MachinePointerInfo(I.getArgOperand(0))); |
5652 | 7.94k | updateDAGForMaybeTailCall(MS); |
5653 | 7.94k | return; |
5654 | 286k | } |
5655 | 286k | case Intrinsic::memmove: { |
5656 | 185 | const auto &MMI = cast<MemMoveInst>(I); |
5657 | 185 | SDValue Op1 = getValue(I.getArgOperand(0)); |
5658 | 185 | SDValue Op2 = getValue(I.getArgOperand(1)); |
5659 | 185 | SDValue Op3 = getValue(I.getArgOperand(2)); |
5660 | 185 | // @llvm.memmove defines 0 and 1 to both mean no alignment. |
5661 | 185 | unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); |
5662 | 185 | unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); |
5663 | 185 | unsigned Align = MinAlign(DstAlign, SrcAlign); |
5664 | 185 | bool isVol = MMI.isVolatile(); |
5665 | 185 | bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget())13 ; |
5666 | 185 | // FIXME: Support passing different dest/src alignments to the memmove DAG |
5667 | 185 | // node. |
5668 | 185 | SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, |
5669 | 185 | isTC, MachinePointerInfo(I.getArgOperand(0)), |
5670 | 185 | MachinePointerInfo(I.getArgOperand(1))); |
5671 | 185 | updateDAGForMaybeTailCall(MM); |
5672 | 185 | return; |
5673 | 286k | } |
5674 | 286k | case Intrinsic::memcpy_element_unordered_atomic: { |
5675 | 15 | const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); |
5676 | 15 | SDValue Dst = getValue(MI.getRawDest()); |
5677 | 15 | SDValue Src = getValue(MI.getRawSource()); |
5678 | 15 | SDValue Length = getValue(MI.getLength()); |
5679 | 15 | |
5680 | 15 | unsigned DstAlign = MI.getDestAlignment(); |
5681 | 15 | unsigned SrcAlign = MI.getSourceAlignment(); |
5682 | 15 | Type *LengthTy = MI.getLength()->getType(); |
5683 | 15 | unsigned ElemSz = MI.getElementSizeInBytes(); |
5684 | 15 | bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget())0 ; |
5685 | 15 | SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, |
5686 | 15 | SrcAlign, Length, LengthTy, ElemSz, isTC, |
5687 | 15 | MachinePointerInfo(MI.getRawDest()), |
5688 | 15 | MachinePointerInfo(MI.getRawSource())); |
5689 | 15 | updateDAGForMaybeTailCall(MC); |
5690 | 15 | return; |
5691 | 286k | } |
5692 | 286k | case Intrinsic::memmove_element_unordered_atomic: { |
5693 | 13 | auto &MI = cast<AtomicMemMoveInst>(I); |
5694 | 13 | SDValue Dst = getValue(MI.getRawDest()); |
5695 | 13 | SDValue Src = getValue(MI.getRawSource()); |
5696 | 13 | SDValue Length = getValue(MI.getLength()); |
5697 | 13 | |
5698 | 13 | unsigned DstAlign = MI.getDestAlignment(); |
5699 | 13 | unsigned SrcAlign = MI.getSourceAlignment(); |
5700 | 13 | Type *LengthTy = MI.getLength()->getType(); |
5701 | 13 | unsigned ElemSz = MI.getElementSizeInBytes(); |
5702 | 13 | bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget())0 ; |
5703 | 13 | SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, |
5704 | 13 | SrcAlign, Length, LengthTy, ElemSz, isTC, |
5705 | 13 | MachinePointerInfo(MI.getRawDest()), |
5706 | 13 | MachinePointerInfo(MI.getRawSource())); |
5707 | 13 | updateDAGForMaybeTailCall(MC); |
5708 | 13 | return; |
5709 | 286k | } |
5710 | 286k | case Intrinsic::memset_element_unordered_atomic: { |
5711 | 23 | auto &MI = cast<AtomicMemSetInst>(I); |
5712 | 23 | SDValue Dst = getValue(MI.getRawDest()); |
5713 | 23 | SDValue Val = getValue(MI.getValue()); |
5714 | 23 | SDValue Length = getValue(MI.getLength()); |
5715 | 23 | |
5716 | 23 | unsigned DstAlign = MI.getDestAlignment(); |
5717 | 23 | Type *LengthTy = MI.getLength()->getType(); |
5718 | 23 | unsigned ElemSz = MI.getElementSizeInBytes(); |
5719 | 23 | bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget())0 ; |
5720 | 23 | SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, |
5721 | 23 | LengthTy, ElemSz, isTC, |
5722 | 23 | MachinePointerInfo(MI.getRawDest())); |
5723 | 23 | updateDAGForMaybeTailCall(MC); |
5724 | 23 | return; |
5725 | 286k | } |
5726 | 286k | case Intrinsic::dbg_addr: |
5727 | 536 | case Intrinsic::dbg_declare: { |
5728 | 536 | const auto &DI = cast<DbgVariableIntrinsic>(I); |
5729 | 536 | DILocalVariable *Variable = DI.getVariable(); |
5730 | 536 | DIExpression *Expression = DI.getExpression(); |
5731 | 536 | dropDanglingDebugInfo(Variable, Expression); |
5732 | 536 | assert(Variable && "Missing variable"); |
5733 | 536 | |
5734 | 536 | // Check if address has undef value. |
5735 | 536 | const Value *Address = DI.getVariableLocation(); |
5736 | 536 | if (!Address || isa<UndefValue>(Address)529 || |
5737 | 536 | (506 Address->use_empty()506 && !isa<Argument>(Address)74 )) { |
5738 | 96 | LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); |
5739 | 96 | return; |
5740 | 96 | } |
5741 | 440 | |
5742 | 440 | bool isParameter = Variable->isParameter() || isa<Argument>(Address)199 ; |
5743 | 440 | |
5744 | 440 | // Check if this variable can be described by a frame index, typically |
5745 | 440 | // either as a static alloca or a byval parameter. |
5746 | 440 | int FI = std::numeric_limits<int>::max(); |
5747 | 440 | if (const auto *AI = |
5748 | 409 | dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) { |
5749 | 409 | if (AI->isStaticAlloca()) { |
5750 | 403 | auto I = FuncInfo.StaticAllocaMap.find(AI); |
5751 | 403 | if (I != FuncInfo.StaticAllocaMap.end()) |
5752 | 403 | FI = I->second; |
5753 | 403 | } |
5754 | 409 | } else if (const auto *31 Arg31 = dyn_cast<Argument>( |
5755 | 28 | Address->stripInBoundsConstantOffsets())) { |
5756 | 28 | FI = FuncInfo.getArgumentFrameIndex(Arg); |
5757 | 28 | } |
5758 | 440 | |
5759 | 440 | // llvm.dbg.addr is control dependent and always generates indirect |
5760 | 440 | // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in |
5761 | 440 | // the MachineFunction variable table. |
5762 | 440 | if (FI != std::numeric_limits<int>::max()) { |
5763 | 418 | if (Intrinsic == Intrinsic::dbg_addr) { |
5764 | 3 | SDDbgValue *SDV = DAG.getFrameIndexDbgValue( |
5765 | 3 | Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); |
5766 | 3 | DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); |
5767 | 3 | } |
5768 | 418 | return; |
5769 | 418 | } |
5770 | 22 | |
5771 | 22 | SDValue &N = NodeMap[Address]; |
5772 | 22 | if (!N.getNode() && isa<Argument>(Address)0 ) |
5773 | 0 | // Check unused arguments map. |
5774 | 0 | N = UnusedArgNodeMap[Address]; |
5775 | 22 | SDDbgValue *SDV; |
5776 | 22 | if (N.getNode()) { |
5777 | 22 | if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) |
5778 | 2 | Address = BCI->getOperand(0); |
5779 | 22 | // Parameters are handled specially. |
5780 | 22 | auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); |
5781 | 22 | if (isParameter && FINode12 ) { |
5782 | 0 | // Byval parameter. We have a frame index at this point. |
5783 | 0 | SDV = |
5784 | 0 | DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), |
5785 | 0 | /*IsIndirect*/ true, dl, SDNodeOrder); |
5786 | 22 | } else if (isa<Argument>(Address)) { |
5787 | 13 | // Address is an argument, so try to emit its dbg value using |
5788 | 13 | // virtual register info from the FuncInfo.ValueMap. |
5789 | 13 | EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); |
5790 | 13 | return; |
5791 | 13 | } else { |
5792 | 9 | SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), |
5793 | 9 | true, dl, SDNodeOrder); |
5794 | 9 | } |
5795 | 22 | DAG.AddDbgValue(SDV, N.getNode(), isParameter); |
5796 | 9 | } else { |
5797 | 0 | // If Address is an argument then try to emit its dbg value using |
5798 | 0 | // virtual register info from the FuncInfo.ValueMap. |
5799 | 0 | if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, |
5800 | 0 | N)) { |
5801 | 0 | LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); |
5802 | 0 | } |
5803 | 0 | } |
5804 | 22 | return9 ; |
5805 | 22 | } |
5806 | 22 | case Intrinsic::dbg_label: { |
5807 | 4 | const DbgLabelInst &DI = cast<DbgLabelInst>(I); |
5808 | 4 | DILabel *Label = DI.getLabel(); |
5809 | 4 | assert(Label && "Missing label"); |
5810 | 4 | |
5811 | 4 | SDDbgLabel *SDV; |
5812 | 4 | SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); |
5813 | 4 | DAG.AddDbgLabel(SDV); |
5814 | 4 | return; |
5815 | 22 | } |
5816 | 4.93k | case Intrinsic::dbg_value: { |
5817 | 4.93k | const DbgValueInst &DI = cast<DbgValueInst>(I); |
5818 | 4.93k | assert(DI.getVariable() && "Missing variable"); |
5819 | 4.93k | |
5820 | 4.93k | DILocalVariable *Variable = DI.getVariable(); |
5821 | 4.93k | DIExpression *Expression = DI.getExpression(); |
5822 | 4.93k | dropDanglingDebugInfo(Variable, Expression); |
5823 | 4.93k | const Value *V = DI.getValue(); |
5824 | 4.93k | if (!V) |
5825 | 2 | return; |
5826 | 4.93k | |
5827 | 4.93k | if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(), |
5828 | 4.93k | SDNodeOrder)) |
5829 | 4.90k | return; |
5830 | 32 | |
5831 | 32 | // TODO: Dangling debug info will eventually either be resolved or produce |
5832 | 32 | // an Undef DBG_VALUE. However in the resolution case, a gap may appear |
5833 | 32 | // between the original dbg.value location and its resolved DBG_VALUE, which |
5834 | 32 | // we should ideally fill with an extra Undef DBG_VALUE. |
5835 | 32 | |
5836 | 32 | DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); |
5837 | 32 | return; |
5838 | 32 | } |
5839 | 32 | |
5840 | 81 | case Intrinsic::eh_typeid_for: { |
5841 | 81 | // Find the type id for the given typeinfo. |
5842 | 81 | GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); |
5843 | 81 | unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); |
5844 | 81 | Res = DAG.getConstant(TypeID, sdl, MVT::i32); |
5845 | 81 | setValue(&I, Res); |
5846 | 81 | return; |
5847 | 32 | } |
5848 | 32 | |
5849 | 32 | case Intrinsic::eh_return_i32: |
5850 | 29 | case Intrinsic::eh_return_i64: |
5851 | 29 | DAG.getMachineFunction().setCallsEHReturn(true); |
5852 | 29 | DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, |
5853 | 29 | MVT::Other, |
5854 | 29 | getControlRoot(), |
5855 | 29 | getValue(I.getArgOperand(0)), |
5856 | 29 | getValue(I.getArgOperand(1)))); |
5857 | 29 | return; |
5858 | 29 | case Intrinsic::eh_unwind_init: |
5859 | 15 | DAG.getMachineFunction().setCallsUnwindInit(true); |
5860 | 15 | return; |
5861 | 29 | case Intrinsic::eh_dwarf_cfa: |
5862 | 19 | setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, |
5863 | 19 | TLI.getPointerTy(DAG.getDataLayout()), |
5864 | 19 | getValue(I.getArgOperand(0)))); |
5865 | 19 | return; |
5866 | 175 | case Intrinsic::eh_sjlj_callsite: { |
5867 | 175 | MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); |
5868 | 175 | ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); |
5869 | 175 | assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); |
5870 | 175 | assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); |
5871 | 175 | |
5872 | 175 | MMI.setCurrentCallSite(CI->getZExtValue()); |
5873 | 175 | return; |
5874 | 29 | } |
5875 | 36 | case Intrinsic::eh_sjlj_functioncontext: { |
5876 | 36 | // Get and store the index of the function context. |
5877 | 36 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
5878 | 36 | AllocaInst *FnCtx = |
5879 | 36 | cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); |
5880 | 36 | int FI = FuncInfo.StaticAllocaMap[FnCtx]; |
5881 | 36 | MFI.setFunctionContextIndex(FI); |
5882 | 36 | return; |
5883 | 29 | } |
5884 | 31 | case Intrinsic::eh_sjlj_setjmp: { |
5885 | 31 | SDValue Ops[2]; |
5886 | 31 | Ops[0] = getRoot(); |
5887 | 31 | Ops[1] = getValue(I.getArgOperand(0)); |
5888 | 31 | SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, |
5889 | 31 | DAG.getVTList(MVT::i32, MVT::Other), Ops); |
5890 | 31 | setValue(&I, Op.getValue(0)); |
5891 | 31 | DAG.setRoot(Op.getValue(1)); |
5892 | 31 | return; |
5893 | 29 | } |
5894 | 29 | case Intrinsic::eh_sjlj_longjmp: |
5895 | 20 | DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, |
5896 | 20 | getRoot(), getValue(I.getArgOperand(0)))); |
5897 | 20 | return; |
5898 | 36 | case Intrinsic::eh_sjlj_setup_dispatch: |
5899 | 36 | DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, |
5900 | 36 | getRoot())); |
5901 | 36 | return; |
5902 | 324 | case Intrinsic::masked_gather: |
5903 | 324 | visitMaskedGather(I); |
5904 | 324 | return; |
5905 | 381 | case Intrinsic::masked_load: |
5906 | 381 | visitMaskedLoad(I); |
5907 | 381 | return; |
5908 | 103 | case Intrinsic::masked_scatter: |
5909 | 103 | visitMaskedScatter(I); |
5910 | 103 | return; |
5911 | 394 | case Intrinsic::masked_store: |
5912 | 394 | visitMaskedStore(I); |
5913 | 394 | return; |
5914 | 251 | case Intrinsic::masked_expandload: |
5915 | 251 | visitMaskedLoad(I, true /* IsExpanding */); |
5916 | 251 | return; |
5917 | 165 | case Intrinsic::masked_compressstore: |
5918 | 165 | visitMaskedStore(I, true /* IsCompressing */); |
5919 | 165 | return; |
5920 | 66 | case Intrinsic::x86_mmx_pslli_w: |
5921 | 66 | case Intrinsic::x86_mmx_pslli_d: |
5922 | 66 | case Intrinsic::x86_mmx_pslli_q: |
5923 | 66 | case Intrinsic::x86_mmx_psrli_w: |
5924 | 66 | case Intrinsic::x86_mmx_psrli_d: |
5925 | 66 | case Intrinsic::x86_mmx_psrli_q: |
5926 | 66 | case Intrinsic::x86_mmx_psrai_w: |
5927 | 66 | case Intrinsic::x86_mmx_psrai_d: { |
5928 | 66 | SDValue ShAmt = getValue(I.getArgOperand(1)); |
5929 | 66 | if (isa<ConstantSDNode>(ShAmt)) { |
5930 | 41 | visitTargetIntrinsic(I, Intrinsic); |
5931 | 41 | return; |
5932 | 41 | } |
5933 | 25 | unsigned NewIntrinsic = 0; |
5934 | 25 | EVT ShAmtVT = MVT::v2i32; |
5935 | 25 | switch (Intrinsic) { |
5936 | 25 | case Intrinsic::x86_mmx_pslli_w: |
5937 | 2 | NewIntrinsic = Intrinsic::x86_mmx_psll_w; |
5938 | 2 | break; |
5939 | 25 | case Intrinsic::x86_mmx_pslli_d: |
5940 | 2 | NewIntrinsic = Intrinsic::x86_mmx_psll_d; |
5941 | 2 | break; |
5942 | 25 | case Intrinsic::x86_mmx_pslli_q: |
5943 | 9 | NewIntrinsic = Intrinsic::x86_mmx_psll_q; |
5944 | 9 | break; |
5945 | 25 | case Intrinsic::x86_mmx_psrli_w: |
5946 | 2 | NewIntrinsic = Intrinsic::x86_mmx_psrl_w; |
5947 | 2 | break; |
5948 | 25 | case Intrinsic::x86_mmx_psrli_d: |
5949 | 2 | NewIntrinsic = Intrinsic::x86_mmx_psrl_d; |
5950 | 2 | break; |
5951 | 25 | case Intrinsic::x86_mmx_psrli_q: |
5952 | 4 | NewIntrinsic = Intrinsic::x86_mmx_psrl_q; |
5953 | 4 | break; |
5954 | 25 | case Intrinsic::x86_mmx_psrai_w: |
5955 | 2 | NewIntrinsic = Intrinsic::x86_mmx_psra_w; |
5956 | 2 | break; |
5957 | 25 | case Intrinsic::x86_mmx_psrai_d: |
5958 | 2 | NewIntrinsic = Intrinsic::x86_mmx_psra_d; |
5959 | 2 | break; |
5960 | 25 | default: 0 llvm_unreachable0 ("Impossible intrinsic"); // Can't reach here. |
5961 | 25 | } |
5962 | 25 | |
5963 | 25 | // The vector shift intrinsics with scalars uses 32b shift amounts but |
5964 | 25 | // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits |
5965 | 25 | // to be zero. |
5966 | 25 | // We must do this early because v2i32 is not a legal type. |
5967 | 25 | SDValue ShOps[2]; |
5968 | 25 | ShOps[0] = ShAmt; |
5969 | 25 | ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); |
5970 | 25 | ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); |
5971 | 25 | EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
5972 | 25 | ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); |
5973 | 25 | Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, |
5974 | 25 | DAG.getConstant(NewIntrinsic, sdl, MVT::i32), |
5975 | 25 | getValue(I.getArgOperand(0)), ShAmt); |
5976 | 25 | setValue(&I, Res); |
5977 | 25 | return; |
5978 | 25 | } |
5979 | 137 | case Intrinsic::powi: |
5980 | 137 | setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), |
5981 | 137 | getValue(I.getArgOperand(1)), DAG)); |
5982 | 137 | return; |
5983 | 107 | case Intrinsic::log: |
5984 | 107 | setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); |
5985 | 107 | return; |
5986 | 104 | case Intrinsic::log2: |
5987 | 104 | setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); |
5988 | 104 | return; |
5989 | 134 | case Intrinsic::log10: |
5990 | 134 | setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); |
5991 | 134 | return; |
5992 | 165 | case Intrinsic::exp: |
5993 | 165 | setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); |
5994 | 165 | return; |
5995 | 126 | case Intrinsic::exp2: |
5996 | 126 | setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); |
5997 | 126 | return; |
5998 | 171 | case Intrinsic::pow: |
5999 | 171 | setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), |
6000 | 171 | getValue(I.getArgOperand(1)), DAG, TLI)); |
6001 | 171 | return; |
6002 | 5.74k | case Intrinsic::sqrt: |
6003 | 5.74k | case Intrinsic::fabs: |
6004 | 5.74k | case Intrinsic::sin: |
6005 | 5.74k | case Intrinsic::cos: |
6006 | 5.74k | case Intrinsic::floor: |
6007 | 5.74k | case Intrinsic::ceil: |
6008 | 5.74k | case Intrinsic::trunc: |
6009 | 5.74k | case Intrinsic::rint: |
6010 | 5.74k | case Intrinsic::nearbyint: |
6011 | 5.74k | case Intrinsic::round: |
6012 | 5.74k | case Intrinsic::canonicalize: { |
6013 | 5.74k | unsigned Opcode; |
6014 | 5.74k | switch (Intrinsic) { |
6015 | 5.74k | default: 0 llvm_unreachable0 ("Impossible intrinsic"); // Can't reach here. |
6016 | 5.74k | case Intrinsic::sqrt: Opcode = ISD::FSQRT; break878 ; |
6017 | 5.74k | case Intrinsic::fabs: Opcode = ISD::FABS; break2.27k ; |
6018 | 5.74k | case Intrinsic::sin: Opcode = ISD::FSIN; break192 ; |
6019 | 5.74k | case Intrinsic::cos: Opcode = ISD::FCOS; break142 ; |
6020 | 5.74k | case Intrinsic::floor: Opcode = ISD::FFLOOR; break498 ; |
6021 | 5.74k | case Intrinsic::ceil: Opcode = ISD::FCEIL; break370 ; |
6022 | 5.74k | case Intrinsic::trunc: Opcode = ISD::FTRUNC; break315 ; |
6023 | 5.74k | case Intrinsic::rint: Opcode = ISD::FRINT; break220 ; |
6024 | 5.74k | case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break210 ; |
6025 | 5.74k | case Intrinsic::round: Opcode = ISD::FROUND; break114 ; |
6026 | 5.74k | case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break533 ; |
6027 | 5.74k | } |
6028 | 5.74k | |
6029 | 5.74k | setValue(&I, DAG.getNode(Opcode, sdl, |
6030 | 5.74k | getValue(I.getArgOperand(0)).getValueType(), |
6031 | 5.74k | getValue(I.getArgOperand(0)))); |
6032 | 5.74k | return; |
6033 | 5.74k | } |
6034 | 5.74k | case Intrinsic::lround: |
6035 | 157 | case Intrinsic::llround: |
6036 | 157 | case Intrinsic::lrint: |
6037 | 157 | case Intrinsic::llrint: { |
6038 | 157 | unsigned Opcode; |
6039 | 157 | switch (Intrinsic) { |
6040 | 157 | default: 0 llvm_unreachable0 ("Impossible intrinsic"); // Can't reach here. |
6041 | 157 | case Intrinsic::lround: Opcode = ISD::LROUND; break43 ; |
6042 | 157 | case Intrinsic::llround: Opcode = ISD::LLROUND; break37 ; |
6043 | 157 | case Intrinsic::lrint: Opcode = ISD::LRINT; break42 ; |
6044 | 157 | case Intrinsic::llrint: Opcode = ISD::LLRINT; break35 ; |
6045 | 157 | } |
6046 | 157 | |
6047 | 157 | EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
6048 | 157 | setValue(&I, DAG.getNode(Opcode, sdl, RetVT, |
6049 | 157 | getValue(I.getArgOperand(0)))); |
6050 | 157 | return; |
6051 | 157 | } |
6052 | 896 | case Intrinsic::minnum: |
6053 | 896 | setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, |
6054 | 896 | getValue(I.getArgOperand(0)).getValueType(), |
6055 | 896 | getValue(I.getArgOperand(0)), |
6056 | 896 | getValue(I.getArgOperand(1)))); |
6057 | 896 | return; |
6058 | 918 | case Intrinsic::maxnum: |
6059 | 918 | setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, |
6060 | 918 | getValue(I.getArgOperand(0)).getValueType(), |
6061 | 918 | getValue(I.getArgOperand(0)), |
6062 | 918 | getValue(I.getArgOperand(1)))); |
6063 | 918 | return; |
6064 | 157 | case Intrinsic::minimum: |
6065 | 34 | setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, |
6066 | 34 | getValue(I.getArgOperand(0)).getValueType(), |
6067 | 34 | getValue(I.getArgOperand(0)), |
6068 | 34 | getValue(I.getArgOperand(1)))); |
6069 | 34 | return; |
6070 | 157 | case Intrinsic::maximum: |
6071 | 34 | setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, |
6072 | 34 | getValue(I.getArgOperand(0)).getValueType(), |
6073 | 34 | getValue(I.getArgOperand(0)), |
6074 | 34 | getValue(I.getArgOperand(1)))); |
6075 | 34 | return; |
6076 | 1.49k | case Intrinsic::copysign: |
6077 | 1.49k | setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, |
6078 | 1.49k | getValue(I.getArgOperand(0)).getValueType(), |
6079 | 1.49k | getValue(I.getArgOperand(0)), |
6080 | 1.49k | getValue(I.getArgOperand(1)))); |
6081 | 1.49k | return; |
6082 | 3.24k | case Intrinsic::fma: |
6083 | 3.24k | setValue(&I, DAG.getNode(ISD::FMA, sdl, |
6084 | 3.24k | getValue(I.getArgOperand(0)).getValueType(), |
6085 | 3.24k | getValue(I.getArgOperand(0)), |
6086 | 3.24k | getValue(I.getArgOperand(1)), |
6087 | 3.24k | getValue(I.getArgOperand(2)))); |
6088 | 3.24k | return; |
6089 | 1.38k | case Intrinsic::experimental_constrained_fadd: |
6090 | 1.38k | case Intrinsic::experimental_constrained_fsub: |
6091 | 1.38k | case Intrinsic::experimental_constrained_fmul: |
6092 | 1.38k | case Intrinsic::experimental_constrained_fdiv: |
6093 | 1.38k | case Intrinsic::experimental_constrained_frem: |
6094 | 1.38k | case Intrinsic::experimental_constrained_fma: |
6095 | 1.38k | case Intrinsic::experimental_constrained_fptrunc: |
6096 | 1.38k | case Intrinsic::experimental_constrained_fpext: |
6097 | 1.38k | case Intrinsic::experimental_constrained_sqrt: |
6098 | 1.38k | case Intrinsic::experimental_constrained_pow: |
6099 | 1.38k | case Intrinsic::experimental_constrained_powi: |
6100 | 1.38k | case Intrinsic::experimental_constrained_sin: |
6101 | 1.38k | case Intrinsic::experimental_constrained_cos: |
6102 | 1.38k | case Intrinsic::experimental_constrained_exp: |
6103 | 1.38k | case Intrinsic::experimental_constrained_exp2: |
6104 | 1.38k | case Intrinsic::experimental_constrained_log: |
6105 | 1.38k | case Intrinsic::experimental_constrained_log10: |
6106 | 1.38k | case Intrinsic::experimental_constrained_log2: |
6107 | 1.38k | case Intrinsic::experimental_constrained_rint: |
6108 | 1.38k | case Intrinsic::experimental_constrained_nearbyint: |
6109 | 1.38k | case Intrinsic::experimental_constrained_maxnum: |
6110 | 1.38k | case Intrinsic::experimental_constrained_minnum: |
6111 | 1.38k | case Intrinsic::experimental_constrained_ceil: |
6112 | 1.38k | case Intrinsic::experimental_constrained_floor: |
6113 | 1.38k | case Intrinsic::experimental_constrained_round: |
6114 | 1.38k | case Intrinsic::experimental_constrained_trunc: |
6115 | 1.38k | visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); |
6116 | 1.38k | return; |
6117 | 1.38k | case Intrinsic::fmuladd: { |
6118 | 1.08k | EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
6119 | 1.08k | if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && |
6120 | 1.08k | TLI.isFMAFasterThanFMulAndFAdd(VT)) { |
6121 | 174 | setValue(&I, DAG.getNode(ISD::FMA, sdl, |
6122 | 174 | getValue(I.getArgOperand(0)).getValueType(), |
6123 | 174 | getValue(I.getArgOperand(0)), |
6124 | 174 | getValue(I.getArgOperand(1)), |
6125 | 174 | getValue(I.getArgOperand(2)))); |
6126 | 907 | } else { |
6127 | 907 | // TODO: Intrinsic calls should have fast-math-flags. |
6128 | 907 | SDValue Mul = DAG.getNode(ISD::FMUL, sdl, |
6129 | 907 | getValue(I.getArgOperand(0)).getValueType(), |
6130 | 907 | getValue(I.getArgOperand(0)), |
6131 | 907 | getValue(I.getArgOperand(1))); |
6132 | 907 | SDValue Add = DAG.getNode(ISD::FADD, sdl, |
6133 | 907 | getValue(I.getArgOperand(0)).getValueType(), |
6134 | 907 | Mul, |
6135 | 907 | getValue(I.getArgOperand(2))); |
6136 | 907 | setValue(&I, Add); |
6137 | 907 | } |
6138 | 1.08k | return; |
6139 | 1.38k | } |
6140 | 1.38k | case Intrinsic::convert_to_fp16: |
6141 | 231 | setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, |
6142 | 231 | DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, |
6143 | 231 | getValue(I.getArgOperand(0)), |
6144 | 231 | DAG.getTargetConstant(0, sdl, |
6145 | 231 | MVT::i32)))); |
6146 | 231 | return; |
6147 | 1.38k | case Intrinsic::convert_from_fp16: |
6148 | 277 | setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, |
6149 | 277 | TLI.getValueType(DAG.getDataLayout(), I.getType()), |
6150 | 277 | DAG.getNode(ISD::BITCAST, sdl, MVT::f16, |
6151 | 277 | getValue(I.getArgOperand(0))))); |
6152 | 277 | return; |
6153 | 1.38k | case Intrinsic::pcmarker: { |
6154 | 0 | SDValue Tmp = getValue(I.getArgOperand(0)); |
6155 | 0 | DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); |
6156 | 0 | return; |
6157 | 1.38k | } |
6158 | 1.38k | case Intrinsic::readcyclecounter: { |
6159 | 28 | SDValue Op = getRoot(); |
6160 | 28 | Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, |
6161 | 28 | DAG.getVTList(MVT::i64, MVT::Other), Op); |
6162 | 28 | setValue(&I, Res); |
6163 | 28 | DAG.setRoot(Res.getValue(1)); |
6164 | 28 | return; |
6165 | 1.38k | } |
6166 | 1.38k | case Intrinsic::bitreverse: |
6167 | 296 | setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, |
6168 | 296 | getValue(I.getArgOperand(0)).getValueType(), |
6169 | 296 | getValue(I.getArgOperand(0)))); |
6170 | 296 | return; |
6171 | 1.38k | case Intrinsic::bswap: |
6172 | 1.00k | setValue(&I, DAG.getNode(ISD::BSWAP, sdl, |
6173 | 1.00k | getValue(I.getArgOperand(0)).getValueType(), |
6174 | 1.00k | getValue(I.getArgOperand(0)))); |
6175 | 1.00k | return; |
6176 | 1.38k | case Intrinsic::cttz: { |
6177 | 1.37k | SDValue Arg = getValue(I.getArgOperand(0)); |
6178 | 1.37k | ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); |
6179 | 1.37k | EVT Ty = Arg.getValueType(); |
6180 | 1.37k | setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ564 : ISD::CTTZ_ZERO_UNDEF814 , |
6181 | 1.37k | sdl, Ty, Arg)); |
6182 | 1.37k | return; |
6183 | 1.38k | } |
6184 | 3.68k | case Intrinsic::ctlz: { |
6185 | 3.68k | SDValue Arg = getValue(I.getArgOperand(0)); |
6186 | 3.68k | ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); |
6187 | 3.68k | EVT Ty = Arg.getValueType(); |
6188 | 3.68k | setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ1.15k : ISD::CTLZ_ZERO_UNDEF2.52k , |
6189 | 3.68k | sdl, Ty, Arg)); |
6190 | 3.68k | return; |
6191 | 1.38k | } |
6192 | 2.82k | case Intrinsic::ctpop: { |
6193 | 2.82k | SDValue Arg = getValue(I.getArgOperand(0)); |
6194 | 2.82k | EVT Ty = Arg.getValueType(); |
6195 | 2.82k | setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); |
6196 | 2.82k | return; |
6197 | 1.38k | } |
6198 | 3.23k | case Intrinsic::fshl: |
6199 | 3.23k | case Intrinsic::fshr: { |
6200 | 3.23k | bool IsFSHL = Intrinsic == Intrinsic::fshl; |
6201 | 3.23k | SDValue X = getValue(I.getArgOperand(0)); |
6202 | 3.23k | SDValue Y = getValue(I.getArgOperand(1)); |
6203 | 3.23k | SDValue Z = getValue(I.getArgOperand(2)); |
6204 | 3.23k | EVT VT = X.getValueType(); |
6205 | 3.23k | SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); |
6206 | 3.23k | SDValue Zero = DAG.getConstant(0, sdl, VT); |
6207 | 3.23k | SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); |
6208 | 3.23k | |
6209 | 3.23k | auto FunnelOpcode = IsFSHL ? ISD::FSHL1.63k : ISD::FSHR1.59k ; |
6210 | 3.23k | if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { |
6211 | 994 | setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); |
6212 | 994 | return; |
6213 | 994 | } |
6214 | 2.23k | |
6215 | 2.23k | // When X == Y, this is rotate. If the data type has a power-of-2 size, we |
6216 | 2.23k | // avoid the select that is necessary in the general case to filter out |
6217 | 2.23k | // the 0-shift possibility that leads to UB. |
6218 | 2.23k | if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())1.33k ) { |
6219 | 1.32k | auto RotateOpcode = IsFSHL ? ISD::ROTL682 : ISD::ROTR646 ; |
6220 | 1.32k | if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { |
6221 | 929 | setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); |
6222 | 929 | return; |
6223 | 929 | } |
6224 | 399 | |
6225 | 399 | // Some targets only rotate one way. Try the opposite direction. |
6226 | 399 | RotateOpcode = IsFSHL ? ISD::ROTR103 : ISD::ROTL296 ; |
6227 | 399 | if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { |
6228 | 201 | // Negate the shift amount because it is safe to ignore the high bits. |
6229 | 201 | SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); |
6230 | 201 | setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); |
6231 | 201 | return; |
6232 | 201 | } |
6233 | 198 | |
6234 | 198 | // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) |
6235 | 198 | // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW)) |
6236 | 198 | SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); |
6237 | 198 | SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); |
6238 | 198 | SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt99 : NShAmt99 ); |
6239 | 198 | SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt99 : ShAmt99 ); |
6240 | 198 | setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); |
6241 | 198 | return; |
6242 | 198 | } |
6243 | 910 | |
6244 | 910 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
6245 | 910 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
6246 | 910 | SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt); |
6247 | 910 | SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt455 : InvShAmt455 ); |
6248 | 910 | SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt455 : ShAmt455 ); |
6249 | 910 | SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); |
6250 | 910 | |
6251 | 910 | // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, |
6252 | 910 | // and that is undefined. We must compare and select to avoid UB. |
6253 | 910 | EVT CCVT = MVT::i1; |
6254 | 910 | if (VT.isVector()) |
6255 | 838 | CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); |
6256 | 910 | |
6257 | 910 | // For fshl, 0-shift returns the 1st arg (X). |
6258 | 910 | // For fshr, 0-shift returns the 2nd arg (Y). |
6259 | 910 | SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); |
6260 | 910 | setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X455 : Y455 , Or)); |
6261 | 910 | return; |
6262 | 910 | } |
6263 | 910 | case Intrinsic::sadd_sat: { |
6264 | 489 | SDValue Op1 = getValue(I.getArgOperand(0)); |
6265 | 489 | SDValue Op2 = getValue(I.getArgOperand(1)); |
6266 | 489 | setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2)); |
6267 | 489 | return; |
6268 | 910 | } |
6269 | 910 | case Intrinsic::uadd_sat: { |
6270 | 491 | SDValue Op1 = getValue(I.getArgOperand(0)); |
6271 | 491 | SDValue Op2 = getValue(I.getArgOperand(1)); |
6272 | 491 | setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2)); |
6273 | 491 | return; |
6274 | 910 | } |
6275 | 910 | case Intrinsic::ssub_sat: { |
6276 | 454 | SDValue Op1 = getValue(I.getArgOperand(0)); |
6277 | 454 | SDValue Op2 = getValue(I.getArgOperand(1)); |
6278 | 454 | setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2)); |
6279 | 454 | return; |
6280 | 910 | } |
6281 | 910 | case Intrinsic::usub_sat: { |
6282 | 482 | SDValue Op1 = getValue(I.getArgOperand(0)); |
6283 | 482 | SDValue Op2 = getValue(I.getArgOperand(1)); |
6284 | 482 | setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); |
6285 | 482 | return; |
6286 | 910 | } |
6287 | 910 | case Intrinsic::smul_fix: |
6288 | 43 | case Intrinsic::umul_fix: { |
6289 | 43 | SDValue Op1 = getValue(I.getArgOperand(0)); |
6290 | 43 | SDValue Op2 = getValue(I.getArgOperand(1)); |
6291 | 43 | SDValue Op3 = getValue(I.getArgOperand(2)); |
6292 | 43 | setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl, |
6293 | 43 | Op1.getValueType(), Op1, Op2, Op3)); |
6294 | 43 | return; |
6295 | 43 | } |
6296 | 43 | case Intrinsic::smul_fix_sat: { |
6297 | 25 | SDValue Op1 = getValue(I.getArgOperand(0)); |
6298 | 25 | SDValue Op2 = getValue(I.getArgOperand(1)); |
6299 | 25 | SDValue Op3 = getValue(I.getArgOperand(2)); |
6300 | 25 | setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, |
6301 | 25 | Op3)); |
6302 | 25 | return; |
6303 | 43 | } |
6304 | 224 | case Intrinsic::stacksave: { |
6305 | 224 | SDValue Op = getRoot(); |
6306 | 224 | Res = DAG.getNode( |
6307 | 224 | ISD::STACKSAVE, sdl, |
6308 | 224 | DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); |
6309 | 224 | setValue(&I, Res); |
6310 | 224 | DAG.setRoot(Res.getValue(1)); |
6311 | 224 | return; |
6312 | 43 | } |
6313 | 85 | case Intrinsic::stackrestore: |
6314 | 85 | Res = getValue(I.getArgOperand(0)); |
6315 | 85 | DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); |
6316 | 85 | return; |
6317 | 43 | case Intrinsic::get_dynamic_area_offset: { |
6318 | 4 | SDValue Op = getRoot(); |
6319 | 4 | EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
6320 | 4 | EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
6321 | 4 | // Result type for @llvm.get.dynamic.area.offset should match PtrTy for |
6322 | 4 | // target. |
6323 | 4 | if (PtrTy.getSizeInBits() < ResTy.getSizeInBits()) |
6324 | 0 | report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" |
6325 | 0 | " intrinsic!"); |
6326 | 4 | Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), |
6327 | 4 | Op); |
6328 | 4 | DAG.setRoot(Op); |
6329 | 4 | setValue(&I, Res); |
6330 | 4 | return; |
6331 | 4 | } |
6332 | 1.41k | case Intrinsic::stackguard: { |
6333 | 1.41k | EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
6334 | 1.41k | MachineFunction &MF = DAG.getMachineFunction(); |
6335 | 1.41k | const Module &M = *MF.getFunction().getParent(); |
6336 | 1.41k | SDValue Chain = getRoot(); |
6337 | 1.41k | if (TLI.useLoadStackGuardNode()) { |
6338 | 1.16k | Res = getLoadStackGuard(DAG, sdl, Chain); |
6339 | 1.16k | } else { |
6340 | 248 | const Value *Global = TLI.getSDagStackGuard(M); |
6341 | 248 | unsigned Align = DL->getPrefTypeAlignment(Global->getType()); |
6342 | 248 | Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), |
6343 | 248 | MachinePointerInfo(Global, 0), Align, |
6344 | 248 | MachineMemOperand::MOVolatile); |
6345 | 248 | } |
6346 | 1.41k | if (TLI.useStackGuardXorFP()) |
6347 | 137 | Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); |
6348 | 1.41k | DAG.setRoot(Chain); |
6349 | 1.41k | setValue(&I, Res); |
6350 | 1.41k | return; |
6351 | 4 | } |
6352 | 1.22k | case Intrinsic::stackprotector: { |
6353 | 1.22k | // Emit code into the DAG to store the stack guard onto the stack. |
6354 | 1.22k | MachineFunction &MF = DAG.getMachineFunction(); |
6355 | 1.22k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6356 | 1.22k | EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); |
6357 | 1.22k | SDValue Src, Chain = getRoot(); |
6358 | 1.22k | |
6359 | 1.22k | if (TLI.useLoadStackGuardNode()) |
6360 | 709 | Src = getLoadStackGuard(DAG, sdl, Chain); |
6361 | 516 | else |
6362 | 516 | Src = getValue(I.getArgOperand(0)); // The guard's value. |
6363 | 1.22k | |
6364 | 1.22k | AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); |
6365 | 1.22k | |
6366 | 1.22k | int FI = FuncInfo.StaticAllocaMap[Slot]; |
6367 | 1.22k | MFI.setStackProtectorIndex(FI); |
6368 | 1.22k | |
6369 | 1.22k | SDValue FIN = DAG.getFrameIndex(FI, PtrTy); |
6370 | 1.22k | |
6371 | 1.22k | // Store the stack protector onto the stack. |
6372 | 1.22k | Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( |
6373 | 1.22k | DAG.getMachineFunction(), FI), |
6374 | 1.22k | /* Alignment = */ 0, MachineMemOperand::MOVolatile); |
6375 | 1.22k | setValue(&I, Res); |
6376 | 1.22k | DAG.setRoot(Res); |
6377 | 1.22k | return; |
6378 | 4 | } |
6379 | 4 | case Intrinsic::objectsize: { |
6380 | 2 | // If we don't know by now, we're never going to know. |
6381 | 2 | ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); |
6382 | 2 | |
6383 | 2 | assert(CI && "Non-constant type in __builtin_object_size?"); |
6384 | 2 | |
6385 | 2 | SDValue Arg = getValue(I.getCalledValue()); |
6386 | 2 | EVT Ty = Arg.getValueType(); |
6387 | 2 | |
6388 | 2 | if (CI->isZero()) |
6389 | 2 | Res = DAG.getConstant(-1ULL, sdl, Ty); |
6390 | 0 | else |
6391 | 0 | Res = DAG.getConstant(0, sdl, Ty); |
6392 | 2 | |
6393 | 2 | setValue(&I, Res); |
6394 | 2 | return; |
6395 | 4 | } |
6396 | 4 | |
6397 | 19 | case Intrinsic::is_constant: |
6398 | 19 | // If this wasn't constant-folded away by now, then it's not a |
6399 | 19 | // constant. |
6400 | 19 | setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); |
6401 | 19 | return; |
6402 | 4 | |
6403 | 4 | case Intrinsic::annotation: |
6404 | 4 | case Intrinsic::ptr_annotation: |
6405 | 4 | case Intrinsic::launder_invariant_group: |
6406 | 4 | case Intrinsic::strip_invariant_group: |
6407 | 4 | // Drop the intrinsic, but forward the value |
6408 | 4 | setValue(&I, getValue(I.getOperand(0))); |
6409 | 4 | return; |
6410 | 57 | case Intrinsic::assume: |
6411 | 57 | case Intrinsic::var_annotation: |
6412 | 57 | case Intrinsic::sideeffect: |
6413 | 57 | // Discard annotate attributes, assumptions, and artificial side-effects. |
6414 | 57 | return; |
6415 | 57 | |
6416 | 57 | case Intrinsic::codeview_annotation: { |
6417 | 8 | // Emit a label associated with this metadata. |
6418 | 8 | MachineFunction &MF = DAG.getMachineFunction(); |
6419 | 8 | MCSymbol *Label = |
6420 | 8 | MF.getMMI().getContext().createTempSymbol("annotation", true); |
6421 | 8 | Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); |
6422 | 8 | MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); |
6423 | 8 | Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); |
6424 | 8 | DAG.setRoot(Res); |
6425 | 8 | return; |
6426 | 57 | } |
6427 | 57 | |
6428 | 57 | case Intrinsic::init_trampoline: { |
6429 | 4 | const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); |
6430 | 4 | |
6431 | 4 | SDValue Ops[6]; |
6432 | 4 | Ops[0] = getRoot(); |
6433 | 4 | Ops[1] = getValue(I.getArgOperand(0)); |
6434 | 4 | Ops[2] = getValue(I.getArgOperand(1)); |
6435 | 4 | Ops[3] = getValue(I.getArgOperand(2)); |
6436 | 4 | Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); |
6437 | 4 | Ops[5] = DAG.getSrcValue(F); |
6438 | 4 | |
6439 | 4 | Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); |
6440 | 4 | |
6441 | 4 | DAG.setRoot(Res); |
6442 | 4 | return; |
6443 | 57 | } |
6444 | 57 | case Intrinsic::adjust_trampoline: |
6445 | 4 | setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, |
6446 | 4 | TLI.getPointerTy(DAG.getDataLayout()), |
6447 | 4 | getValue(I.getArgOperand(0)))); |
6448 | 4 | return; |
6449 | 57 | case Intrinsic::gcroot: { |
6450 | 2 | assert(DAG.getMachineFunction().getFunction().hasGC() && |
6451 | 2 | "only valid in functions with gc specified, enforced by Verifier"); |
6452 | 2 | assert(GFI && "implied by previous"); |
6453 | 2 | const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); |
6454 | 2 | const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); |
6455 | 2 | |
6456 | 2 | FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); |
6457 | 2 | GFI->addStackRoot(FI->getIndex(), TypeMap); |
6458 | 2 | return; |
6459 | 57 | } |
6460 | 57 | case Intrinsic::gcread: |
6461 | 0 | case Intrinsic::gcwrite: |
6462 | 0 | llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); |
6463 | 9 | case Intrinsic::flt_rounds: |
6464 | 9 | setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); |
6465 | 9 | return; |
6466 | 0 |
|
6467 | 11 | case Intrinsic::expect: |
6468 | 11 | // Just replace __builtin_expect(exp, c) with EXP. |
6469 | 11 | setValue(&I, getValue(I.getArgOperand(0))); |
6470 | 11 | return; |
6471 | 0 |
|
6472 | 296 | case Intrinsic::debugtrap: |
6473 | 296 | case Intrinsic::trap: { |
6474 | 296 | StringRef TrapFuncName = |
6475 | 296 | I.getAttributes() |
6476 | 296 | .getAttribute(AttributeList::FunctionIndex, "trap-func-name") |
6477 | 296 | .getValueAsString(); |
6478 | 296 | if (TrapFuncName.empty()) { |
6479 | 287 | ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? |
6480 | 252 | ISD::TRAP : ISD::DEBUGTRAP35 ; |
6481 | 287 | DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); |
6482 | 287 | return; |
6483 | 287 | } |
6484 | 9 | TargetLowering::ArgListTy Args; |
6485 | 9 | |
6486 | 9 | TargetLowering::CallLoweringInfo CLI(DAG); |
6487 | 9 | CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( |
6488 | 9 | CallingConv::C, I.getType(), |
6489 | 9 | DAG.getExternalSymbol(TrapFuncName.data(), |
6490 | 9 | TLI.getPointerTy(DAG.getDataLayout())), |
6491 | 9 | std::move(Args)); |
6492 | 9 | |
6493 | 9 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
6494 | 9 | DAG.setRoot(Result.second); |
6495 | 9 | return; |
6496 | 9 | } |
6497 | 9 | |
6498 | 4.61k | case Intrinsic::uadd_with_overflow: |
6499 | 4.61k | case Intrinsic::sadd_with_overflow: |
6500 | 4.61k | case Intrinsic::usub_with_overflow: |
6501 | 4.61k | case Intrinsic::ssub_with_overflow: |
6502 | 4.61k | case Intrinsic::umul_with_overflow: |
6503 | 4.61k | case Intrinsic::smul_with_overflow: { |
6504 | 4.61k | ISD::NodeType Op; |
6505 | 4.61k | switch (Intrinsic) { |
6506 | 4.61k | default: 0 llvm_unreachable0 ("Impossible intrinsic"); // Can't reach here. |
6507 | 4.61k | case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break2.29k ; |
6508 | 4.61k | case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break442 ; |
6509 | 4.61k | case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break864 ; |
6510 | 4.61k | case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break382 ; |
6511 | 4.61k | case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break466 ; |
6512 | 4.61k | case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break168 ; |
6513 | 4.61k | } |
6514 | 4.61k | SDValue Op1 = getValue(I.getArgOperand(0)); |
6515 | 4.61k | SDValue Op2 = getValue(I.getArgOperand(1)); |
6516 | 4.61k | |
6517 | 4.61k | EVT ResultVT = Op1.getValueType(); |
6518 | 4.61k | EVT OverflowVT = MVT::i1; |
6519 | 4.61k | if (ResultVT.isVector()) |
6520 | 540 | OverflowVT = EVT::getVectorVT( |
6521 | 540 | *Context, OverflowVT, ResultVT.getVectorNumElements()); |
6522 | 4.61k | |
6523 | 4.61k | SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); |
6524 | 4.61k | setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); |
6525 | 4.61k | return; |
6526 | 4.61k | } |
6527 | 4.61k | case Intrinsic::prefetch: { |
6528 | 370 | SDValue Ops[5]; |
6529 | 370 | unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); |
6530 | 370 | auto Flags = rw == 0 ? MachineMemOperand::MOLoad303 :MachineMemOperand::MOStore67 ; |
6531 | 370 | Ops[0] = DAG.getRoot(); |
6532 | 370 | Ops[1] = getValue(I.getArgOperand(0)); |
6533 | 370 | Ops[2] = getValue(I.getArgOperand(1)); |
6534 | 370 | Ops[3] = getValue(I.getArgOperand(2)); |
6535 | 370 | Ops[4] = getValue(I.getArgOperand(3)); |
6536 | 370 | SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, |
6537 | 370 | DAG.getVTList(MVT::Other), Ops, |
6538 | 370 | EVT::getIntegerVT(*Context, 8), |
6539 | 370 | MachinePointerInfo(I.getArgOperand(0)), |
6540 | 370 | 0, /* align */ |
6541 | 370 | Flags); |
6542 | 370 | |
6543 | 370 | // Chain the prefetch in parallell with any pending loads, to stay out of |
6544 | 370 | // the way of later optimizations. |
6545 | 370 | PendingLoads.push_back(Result); |
6546 | 370 | Result = getRoot(); |
6547 | 370 | DAG.setRoot(Result); |
6548 | 370 | return; |
6549 | 4.61k | } |
6550 | 67.9k | case Intrinsic::lifetime_start: |
6551 | 67.9k | case Intrinsic::lifetime_end: { |
6552 | 67.9k | bool IsStart = (Intrinsic == Intrinsic::lifetime_start); |
6553 | 67.9k | // Stack coloring is not enabled in O0, discard region information. |
6554 | 67.9k | if (TM.getOptLevel() == CodeGenOpt::None) |
6555 | 19 | return; |
6556 | 67.9k | |
6557 | 67.9k | const int64_t ObjectSize = |
6558 | 67.9k | cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); |
6559 | 67.9k | Value *const ObjectPtr = I.getArgOperand(1); |
6560 | 67.9k | SmallVector<const Value *, 4> Allocas; |
6561 | 67.9k | GetUnderlyingObjects(ObjectPtr, Allocas, *DL); |
6562 | 67.9k | |
6563 | 67.9k | for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), |
6564 | 136k | E = Allocas.end(); Object != E; ++Object68.1k ) { |
6565 | 68.1k | const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); |
6566 | 68.1k | |
6567 | 68.1k | // Could not find an Alloca. |
6568 | 68.1k | if (!LifetimeObject) |
6569 | 3 | continue; |
6570 | 68.1k | |
6571 | 68.1k | // First check that the Alloca is static, otherwise it won't have a |
6572 | 68.1k | // valid frame index. |
6573 | 68.1k | auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); |
6574 | 68.1k | if (SI == FuncInfo.StaticAllocaMap.end()) |
6575 | 2 | return; |
6576 | 68.1k | |
6577 | 68.1k | const int FrameIndex = SI->second; |
6578 | 68.1k | int64_t Offset; |
6579 | 68.1k | if (GetPointerBaseWithConstantOffset( |
6580 | 68.1k | ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject) |
6581 | 454 | Offset = -1; // Cannot determine offset from alloca to lifetime object. |
6582 | 68.1k | Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize, |
6583 | 68.1k | Offset); |
6584 | 68.1k | DAG.setRoot(Res); |
6585 | 68.1k | } |
6586 | 67.9k | return67.9k ; |
6587 | 67.9k | } |
6588 | 67.9k | case Intrinsic::invariant_start: |
6589 | 45 | // Discard region information. |
6590 | 45 | setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); |
6591 | 45 | return; |
6592 | 67.9k | case Intrinsic::invariant_end: |
6593 | 0 | // Discard region information. |
6594 | 0 | return; |
6595 | 67.9k | case Intrinsic::clear_cache: |
6596 | 4 | /// FunctionName may be null. |
6597 | 4 | if (const char *FunctionName = TLI.getClearCacheBuiltinName()) |
6598 | 2 | lowerCallToExternalSymbol(I, FunctionName); |
6599 | 4 | return; |
6600 | 67.9k | case Intrinsic::donothing: |
6601 | 1 | // ignore |
6602 | 1 | return; |
6603 | 67.9k | case Intrinsic::experimental_stackmap: |
6604 | 140 | visitStackmap(I); |
6605 | 140 | return; |
6606 | 67.9k | case Intrinsic::experimental_patchpoint_void: |
6607 | 144 | case Intrinsic::experimental_patchpoint_i64: |
6608 | 144 | visitPatchpoint(&I); |
6609 | 144 | return; |
6610 | 144 | case Intrinsic::experimental_gc_statepoint: |
6611 | 75 | LowerStatepoint(ImmutableStatepoint(&I)); |
6612 | 75 | return; |
6613 | 144 | case Intrinsic::experimental_gc_result: |
6614 | 24 | visitGCResult(cast<GCResultInst>(I)); |
6615 | 24 | return; |
6616 | 144 | case Intrinsic::experimental_gc_relocate: |
6617 | 69 | visitGCRelocate(cast<GCRelocateInst>(I)); |
6618 | 69 | return; |
6619 | 144 | case Intrinsic::instrprof_increment: |
6620 | 0 | llvm_unreachable("instrprof failed to lower an increment"); |
6621 | 144 | case Intrinsic::instrprof_value_profile: |
6622 | 0 | llvm_unreachable("instrprof failed to lower a value profiling call"); |
6623 | 144 | case Intrinsic::localescape: { |
6624 | 18 | MachineFunction &MF = DAG.getMachineFunction(); |
6625 | 18 | const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); |
6626 | 18 | |
6627 | 18 | // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission |
6628 | 18 | // is the same on all targets. |
6629 | 42 | for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx24 ) { |
6630 | 24 | Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); |
6631 | 24 | if (isa<ConstantPointerNull>(Arg)) |
6632 | 0 | continue; // Skip null pointers. They represent a hole in index space. |
6633 | 24 | AllocaInst *Slot = cast<AllocaInst>(Arg); |
6634 | 24 | assert(FuncInfo.StaticAllocaMap.count(Slot) && |
6635 | 24 | "can only escape static allocas"); |
6636 | 24 | int FI = FuncInfo.StaticAllocaMap[Slot]; |
6637 | 24 | MCSymbol *FrameAllocSym = |
6638 | 24 | MF.getMMI().getContext().getOrCreateFrameAllocSymbol( |
6639 | 24 | GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); |
6640 | 24 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, |
6641 | 24 | TII->get(TargetOpcode::LOCAL_ESCAPE)) |
6642 | 24 | .addSym(FrameAllocSym) |
6643 | 24 | .addFrameIndex(FI); |
6644 | 24 | } |
6645 | 18 | |
6646 | 18 | return; |
6647 | 144 | } |
6648 | 144 | |
6649 | 144 | case Intrinsic::localrecover: { |
6650 | 19 | // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) |
6651 | 19 | MachineFunction &MF = DAG.getMachineFunction(); |
6652 | 19 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); |
6653 | 19 | |
6654 | 19 | // Get the symbol that defines the frame offset. |
6655 | 19 | auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); |
6656 | 19 | auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); |
6657 | 19 | unsigned IdxVal = |
6658 | 19 | unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); |
6659 | 19 | MCSymbol *FrameAllocSym = |
6660 | 19 | MF.getMMI().getContext().getOrCreateFrameAllocSymbol( |
6661 | 19 | GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); |
6662 | 19 | |
6663 | 19 | // Create a MCSymbol for the label to avoid any target lowering |
6664 | 19 | // that would make this PC relative. |
6665 | 19 | SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); |
6666 | 19 | SDValue OffsetVal = |
6667 | 19 | DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); |
6668 | 19 | |
6669 | 19 | // Add the offset to the FP. |
6670 | 19 | Value *FP = I.getArgOperand(1); |
6671 | 19 | SDValue FPVal = getValue(FP); |
6672 | 19 | SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); |
6673 | 19 | setValue(&I, Add); |
6674 | 19 | |
6675 | 19 | return; |
6676 | 144 | } |
6677 | 144 | |
6678 | 144 | case Intrinsic::eh_exceptionpointer: |
6679 | 7 | case Intrinsic::eh_exceptioncode: { |
6680 | 7 | // Get the exception pointer vreg, copy from it, and resize it to fit. |
6681 | 7 | const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); |
6682 | 7 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
6683 | 7 | const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); |
6684 | 7 | unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); |
6685 | 7 | SDValue N = |
6686 | 7 | DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); |
6687 | 7 | if (Intrinsic == Intrinsic::eh_exceptioncode) |
6688 | 4 | N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); |
6689 | 7 | setValue(&I, N); |
6690 | 7 | return; |
6691 | 7 | } |
6692 | 7 | case Intrinsic::xray_customevent: { |
6693 | 2 | // Here we want to make sure that the intrinsic behaves as if it has a |
6694 | 2 | // specific calling convention, and only for x86_64. |
6695 | 2 | // FIXME: Support other platforms later. |
6696 | 2 | const auto &Triple = DAG.getTarget().getTargetTriple(); |
6697 | 2 | if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) |
6698 | 0 | return; |
6699 | 2 | |
6700 | 2 | SDLoc DL = getCurSDLoc(); |
6701 | 2 | SmallVector<SDValue, 8> Ops; |
6702 | 2 | |
6703 | 2 | // We want to say that we always want the arguments in registers. |
6704 | 2 | SDValue LogEntryVal = getValue(I.getArgOperand(0)); |
6705 | 2 | SDValue StrSizeVal = getValue(I.getArgOperand(1)); |
6706 | 2 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
6707 | 2 | SDValue Chain = getRoot(); |
6708 | 2 | Ops.push_back(LogEntryVal); |
6709 | 2 | Ops.push_back(StrSizeVal); |
6710 | 2 | Ops.push_back(Chain); |
6711 | 2 | |
6712 | 2 | // We need to enforce the calling convention for the callsite, so that |
6713 | 2 | // argument ordering is enforced correctly, and that register allocation can |
6714 | 2 | // see that some registers may be assumed clobbered and have to preserve |
6715 | 2 | // them across calls to the intrinsic. |
6716 | 2 | MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, |
6717 | 2 | DL, NodeTys, Ops); |
6718 | 2 | SDValue patchableNode = SDValue(MN, 0); |
6719 | 2 | DAG.setRoot(patchableNode); |
6720 | 2 | setValue(&I, patchableNode); |
6721 | 2 | return; |
6722 | 2 | } |
6723 | 2 | case Intrinsic::xray_typedevent: { |
6724 | 2 | // Here we want to make sure that the intrinsic behaves as if it has a |
6725 | 2 | // specific calling convention, and only for x86_64. |
6726 | 2 | // FIXME: Support other platforms later. |
6727 | 2 | const auto &Triple = DAG.getTarget().getTargetTriple(); |
6728 | 2 | if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) |
6729 | 0 | return; |
6730 | 2 | |
6731 | 2 | SDLoc DL = getCurSDLoc(); |
6732 | 2 | SmallVector<SDValue, 8> Ops; |
6733 | 2 | |
6734 | 2 | // We want to say that we always want the arguments in registers. |
6735 | 2 | // It's unclear to me how manipulating the selection DAG here forces callers |
6736 | 2 | // to provide arguments in registers instead of on the stack. |
6737 | 2 | SDValue LogTypeId = getValue(I.getArgOperand(0)); |
6738 | 2 | SDValue LogEntryVal = getValue(I.getArgOperand(1)); |
6739 | 2 | SDValue StrSizeVal = getValue(I.getArgOperand(2)); |
6740 | 2 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
6741 | 2 | SDValue Chain = getRoot(); |
6742 | 2 | Ops.push_back(LogTypeId); |
6743 | 2 | Ops.push_back(LogEntryVal); |
6744 | 2 | Ops.push_back(StrSizeVal); |
6745 | 2 | Ops.push_back(Chain); |
6746 | 2 | |
6747 | 2 | // We need to enforce the calling convention for the callsite, so that |
6748 | 2 | // argument ordering is enforced correctly, and that register allocation can |
6749 | 2 | // see that some registers may be assumed clobbered and have to preserve |
6750 | 2 | // them across calls to the intrinsic. |
6751 | 2 | MachineSDNode *MN = DAG.getMachineNode( |
6752 | 2 | TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); |
6753 | 2 | SDValue patchableNode = SDValue(MN, 0); |
6754 | 2 | DAG.setRoot(patchableNode); |
6755 | 2 | setValue(&I, patchableNode); |
6756 | 2 | return; |
6757 | 2 | } |
6758 | 2 | case Intrinsic::experimental_deoptimize: |
6759 | 0 | LowerDeoptimizeCall(&I); |
6760 | 0 | return; |
6761 | 2 | |
6762 | 857 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
6763 | 857 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
6764 | 857 | case Intrinsic::experimental_vector_reduce_add: |
6765 | 857 | case Intrinsic::experimental_vector_reduce_mul: |
6766 | 857 | case Intrinsic::experimental_vector_reduce_and: |
6767 | 857 | case Intrinsic::experimental_vector_reduce_or: |
6768 | 857 | case Intrinsic::experimental_vector_reduce_xor: |
6769 | 857 | case Intrinsic::experimental_vector_reduce_smax: |
6770 | 857 | case Intrinsic::experimental_vector_reduce_smin: |
6771 | 857 | case Intrinsic::experimental_vector_reduce_umax: |
6772 | 857 | case Intrinsic::experimental_vector_reduce_umin: |
6773 | 857 | case Intrinsic::experimental_vector_reduce_fmax: |
6774 | 857 | case Intrinsic::experimental_vector_reduce_fmin: |
6775 | 857 | visitVectorReduce(I, Intrinsic); |
6776 | 857 | return; |
6777 | 857 | |
6778 | 857 | case Intrinsic::icall_branch_funnel: { |
6779 | 13 | SmallVector<SDValue, 16> Ops; |
6780 | 13 | Ops.push_back(getValue(I.getArgOperand(0))); |
6781 | 13 | |
6782 | 13 | int64_t Offset; |
6783 | 13 | auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( |
6784 | 13 | I.getArgOperand(1), Offset, DAG.getDataLayout())); |
6785 | 13 | if (!Base) |
6786 | 0 | report_fatal_error( |
6787 | 0 | "llvm.icall.branch.funnel operand must be a GlobalValue"); |
6788 | 13 | Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); |
6789 | 13 | |
6790 | 13 | struct BranchFunnelTarget { |
6791 | 13 | int64_t Offset; |
6792 | 13 | SDValue Target; |
6793 | 13 | }; |
6794 | 13 | SmallVector<BranchFunnelTarget, 8> Targets; |
6795 | 13 | |
6796 | 53 | for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 240 ) { |
6797 | 40 | auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( |
6798 | 40 | I.getArgOperand(Op), Offset, DAG.getDataLayout())); |
6799 | 40 | if (ElemBase != Base) |
6800 | 0 | report_fatal_error("all llvm.icall.branch.funnel operands must refer " |
6801 | 0 | "to the same GlobalValue"); |
6802 | 40 | |
6803 | 40 | SDValue Val = getValue(I.getArgOperand(Op + 1)); |
6804 | 40 | auto *GA = dyn_cast<GlobalAddressSDNode>(Val); |
6805 | 40 | if (!GA) |
6806 | 0 | report_fatal_error( |
6807 | 0 | "llvm.icall.branch.funnel operand must be a GlobalValue"); |
6808 | 40 | Targets.push_back({Offset, DAG.getTargetGlobalAddress( |
6809 | 40 | GA->getGlobal(), getCurSDLoc(), |
6810 | 40 | Val.getValueType(), GA->getOffset())}); |
6811 | 40 | } |
6812 | 13 | llvm::sort(Targets, |
6813 | 28 | [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { |
6814 | 28 | return T1.Offset < T2.Offset; |
6815 | 28 | }); |
6816 | 13 | |
6817 | 40 | for (auto &T : Targets) { |
6818 | 40 | Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); |
6819 | 40 | Ops.push_back(T.Target); |
6820 | 40 | } |
6821 | 13 | |
6822 | 13 | Ops.push_back(DAG.getRoot()); // Chain |
6823 | 13 | SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, |
6824 | 13 | getCurSDLoc(), MVT::Other, Ops), |
6825 | 13 | 0); |
6826 | 13 | DAG.setRoot(N); |
6827 | 13 | setValue(&I, N); |
6828 | 13 | HasTailCall = true; |
6829 | 13 | return; |
6830 | 13 | } |
6831 | 13 | |
6832 | 13 | case Intrinsic::wasm_landingpad_index: |
6833 | 13 | // Information this intrinsic contained has been transferred to |
6834 | 13 | // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely |
6835 | 13 | // delete it now. |
6836 | 13 | return; |
6837 | 13 | |
6838 | 13 | case Intrinsic::aarch64_settag: |
6839 | 12 | case Intrinsic::aarch64_settag_zero: { |
6840 | 12 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
6841 | 12 | bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; |
6842 | 12 | SDValue Val = TSI.EmitTargetCodeForSetTag( |
6843 | 12 | DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), |
6844 | 12 | getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), |
6845 | 12 | ZeroMemory); |
6846 | 12 | DAG.setRoot(Val); |
6847 | 12 | setValue(&I, Val); |
6848 | 12 | return; |
6849 | 12 | } |
6850 | 286k | } |
6851 | 286k | } |
6852 | | |
6853 | | void SelectionDAGBuilder::visitConstrainedFPIntrinsic( |
6854 | 1.38k | const ConstrainedFPIntrinsic &FPI) { |
6855 | 1.38k | SDLoc sdl = getCurSDLoc(); |
6856 | 1.38k | unsigned Opcode; |
6857 | 1.38k | switch (FPI.getIntrinsicID()) { |
6858 | 1.38k | default: 0 llvm_unreachable0 ("Impossible intrinsic"); // Can't reach here. |
6859 | 1.38k | case Intrinsic::experimental_constrained_fadd: |
6860 | 110 | Opcode = ISD::STRICT_FADD; |
6861 | 110 | break; |
6862 | 1.38k | case Intrinsic::experimental_constrained_fsub: |
6863 | 110 | Opcode = ISD::STRICT_FSUB; |
6864 | 110 | break; |
6865 | 1.38k | case Intrinsic::experimental_constrained_fmul: |
6866 | 159 | Opcode = ISD::STRICT_FMUL; |
6867 | 159 | break; |
6868 | 1.38k | case Intrinsic::experimental_constrained_fdiv: |
6869 | 106 | Opcode = ISD::STRICT_FDIV; |
6870 | 106 | break; |
6871 | 1.38k | case Intrinsic::experimental_constrained_frem: |
6872 | 32 | Opcode = ISD::STRICT_FREM; |
6873 | 32 | break; |
6874 | 1.38k | case Intrinsic::experimental_constrained_fma: |
6875 | 87 | Opcode = ISD::STRICT_FMA; |
6876 | 87 | break; |
6877 | 1.38k | case Intrinsic::experimental_constrained_fptrunc: |
6878 | 40 | Opcode = ISD::STRICT_FP_ROUND; |
6879 | 40 | break; |
6880 | 1.38k | case Intrinsic::experimental_constrained_fpext: |
6881 | 48 | Opcode = ISD::STRICT_FP_EXTEND; |
6882 | 48 | break; |
6883 | 1.38k | case Intrinsic::experimental_constrained_sqrt: |
6884 | 74 | Opcode = ISD::STRICT_FSQRT; |
6885 | 74 | break; |
6886 | 1.38k | case Intrinsic::experimental_constrained_pow: |
6887 | 32 | Opcode = ISD::STRICT_FPOW; |
6888 | 32 | break; |
6889 | 1.38k | case Intrinsic::experimental_constrained_powi: |
6890 | 32 | Opcode = ISD::STRICT_FPOWI; |
6891 | 32 | break; |
6892 | 1.38k | case Intrinsic::experimental_constrained_sin: |
6893 | 32 | Opcode = ISD::STRICT_FSIN; |
6894 | 32 | break; |
6895 | 1.38k | case Intrinsic::experimental_constrained_cos: |
6896 | 32 | Opcode = ISD::STRICT_FCOS; |
6897 | 32 | break; |
6898 | 1.38k | case Intrinsic::experimental_constrained_exp: |
6899 | 32 | Opcode = ISD::STRICT_FEXP; |
6900 | 32 | break; |
6901 | 1.38k | case Intrinsic::experimental_constrained_exp2: |
6902 | 32 | Opcode = ISD::STRICT_FEXP2; |
6903 | 32 | break; |
6904 | 1.38k | case Intrinsic::experimental_constrained_log: |
6905 | 32 | Opcode = ISD::STRICT_FLOG; |
6906 | 32 | break; |
6907 | 1.38k | case Intrinsic::experimental_constrained_log10: |
6908 | 32 | Opcode = ISD::STRICT_FLOG10; |
6909 | 32 | break; |
6910 | 1.38k | case Intrinsic::experimental_constrained_log2: |
6911 | 32 | Opcode = ISD::STRICT_FLOG2; |
6912 | 32 | break; |
6913 | 1.38k | case Intrinsic::experimental_constrained_rint: |
6914 | 48 | Opcode = ISD::STRICT_FRINT; |
6915 | 48 | break; |
6916 | 1.38k | case Intrinsic::experimental_constrained_nearbyint: |
6917 | 48 | Opcode = ISD::STRICT_FNEARBYINT; |
6918 | 48 | break; |
6919 | 1.38k | case Intrinsic::experimental_constrained_maxnum: |
6920 | 35 | Opcode = ISD::STRICT_FMAXNUM; |
6921 | 35 | break; |
6922 | 1.38k | case Intrinsic::experimental_constrained_minnum: |
6923 | 35 | Opcode = ISD::STRICT_FMINNUM; |
6924 | 35 | break; |
6925 | 1.38k | case Intrinsic::experimental_constrained_ceil: |
6926 | 40 | Opcode = ISD::STRICT_FCEIL; |
6927 | 40 | break; |
6928 | 1.38k | case Intrinsic::experimental_constrained_floor: |
6929 | 40 | Opcode = ISD::STRICT_FFLOOR; |
6930 | 40 | break; |
6931 | 1.38k | case Intrinsic::experimental_constrained_round: |
6932 | 40 | Opcode = ISD::STRICT_FROUND; |
6933 | 40 | break; |
6934 | 1.38k | case Intrinsic::experimental_constrained_trunc: |
6935 | 40 | Opcode = ISD::STRICT_FTRUNC; |
6936 | 40 | break; |
6937 | 1.38k | } |
6938 | 1.38k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
6939 | 1.38k | SDValue Chain = getRoot(); |
6940 | 1.38k | SmallVector<EVT, 4> ValueVTs; |
6941 | 1.38k | ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); |
6942 | 1.38k | ValueVTs.push_back(MVT::Other); // Out chain |
6943 | 1.38k | |
6944 | 1.38k | SDVTList VTs = DAG.getVTList(ValueVTs); |
6945 | 1.38k | SDValue Result; |
6946 | 1.38k | if (Opcode == ISD::STRICT_FP_ROUND) |
6947 | 40 | Result = DAG.getNode(Opcode, sdl, VTs, |
6948 | 40 | { Chain, getValue(FPI.getArgOperand(0)), |
6949 | 40 | DAG.getTargetConstant(0, sdl, |
6950 | 40 | TLI.getPointerTy(DAG.getDataLayout())) }); |
6951 | 1.34k | else if (FPI.isUnaryOp()) |
6952 | 602 | Result = DAG.getNode(Opcode, sdl, VTs, |
6953 | 602 | { Chain, getValue(FPI.getArgOperand(0)) }); |
6954 | 738 | else if (FPI.isTernaryOp()) |
6955 | 87 | Result = DAG.getNode(Opcode, sdl, VTs, |
6956 | 87 | { Chain, getValue(FPI.getArgOperand(0)), |
6957 | 87 | getValue(FPI.getArgOperand(1)), |
6958 | 87 | getValue(FPI.getArgOperand(2)) }); |
6959 | 651 | else |
6960 | 651 | Result = DAG.getNode(Opcode, sdl, VTs, |
6961 | 651 | { Chain, getValue(FPI.getArgOperand(0)), |
6962 | 651 | getValue(FPI.getArgOperand(1)) }); |
6963 | 1.38k | |
6964 | 1.38k | if (FPI.getExceptionBehavior() != |
6965 | 1.38k | ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { |
6966 | 1.37k | SDNodeFlags Flags; |
6967 | 1.37k | Flags.setFPExcept(true); |
6968 | 1.37k | Result->setFlags(Flags); |
6969 | 1.37k | } |
6970 | 1.38k | |
6971 | 1.38k | assert(Result.getNode()->getNumValues() == 2); |
6972 | 1.38k | SDValue OutChain = Result.getValue(1); |
6973 | 1.38k | DAG.setRoot(OutChain); |
6974 | 1.38k | SDValue FPResult = Result.getValue(0); |
6975 | 1.38k | setValue(&FPI, FPResult); |
6976 | 1.38k | } |
6977 | | |
6978 | | std::pair<SDValue, SDValue> |
6979 | | SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, |
6980 | 440k | const BasicBlock *EHPadBB) { |
6981 | 440k | MachineFunction &MF = DAG.getMachineFunction(); |
6982 | 440k | MachineModuleInfo &MMI = MF.getMMI(); |
6983 | 440k | MCSymbol *BeginLabel = nullptr; |
6984 | 440k | |
6985 | 440k | if (EHPadBB) { |
6986 | 6.36k | // Insert a label before the invoke call to mark the try range. This can be |
6987 | 6.36k | // used to detect deletion of the invoke via the MachineModuleInfo. |
6988 | 6.36k | BeginLabel = MMI.getContext().createTempSymbol(); |
6989 | 6.36k | |
6990 | 6.36k | // For SjLj, keep track of which landing pads go with which invokes |
6991 | 6.36k | // so as to maintain the ordering of pads in the LSDA. |
6992 | 6.36k | unsigned CallSiteIndex = MMI.getCurrentCallSite(); |
6993 | 6.36k | if (CallSiteIndex) { |
6994 | 175 | MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); |
6995 | 175 | LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); |
6996 | 175 | |
6997 | 175 | // Now that the call site is handled, stop tracking it. |
6998 | 175 | MMI.setCurrentCallSite(0); |
6999 | 175 | } |
7000 | 6.36k | |
7001 | 6.36k | // Both PendingLoads and PendingExports must be flushed here; |
7002 | 6.36k | // this call might not return. |
7003 | 6.36k | (void)getRoot(); |
7004 | 6.36k | DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); |
7005 | 6.36k | |
7006 | 6.36k | CLI.setChain(getRoot()); |
7007 | 6.36k | } |
7008 | 440k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7009 | 440k | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
7010 | 440k | |
7011 | 440k | assert((CLI.IsTailCall || Result.second.getNode()) && |
7012 | 440k | "Non-null chain expected with non-tail call!"); |
7013 | 440k | assert((Result.second.getNode() || !Result.first.getNode()) && |
7014 | 440k | "Null value expected with tail call!"); |
7015 | 440k | |
7016 | 440k | if (!Result.second.getNode()) { |
7017 | 63.8k | // As a special case, a null chain means that a tail call has been emitted |
7018 | 63.8k | // and the DAG root is already updated. |
7019 | 63.8k | HasTailCall = true; |
7020 | 63.8k | |
7021 | 63.8k | // Since there's no actual continuation from this block, nothing can be |
7022 | 63.8k | // relying on us setting vregs for them. |
7023 | 63.8k | PendingExports.clear(); |
7024 | 377k | } else { |
7025 | 377k | DAG.setRoot(Result.second); |
7026 | 377k | } |
7027 | 440k | |
7028 | 440k | if (EHPadBB) { |
7029 | 6.36k | // Insert a label at the end of the invoke call to mark the try range. This |
7030 | 6.36k | // can be used to detect deletion of the invoke via the MachineModuleInfo. |
7031 | 6.36k | MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); |
7032 | 6.36k | DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); |
7033 | 6.36k | |
7034 | 6.36k | // Inform MachineModuleInfo of range. |
7035 | 6.36k | auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); |
7036 | 6.36k | // There is a platform (e.g. wasm) that uses funclet style IR but does not |
7037 | 6.36k | // actually use outlined funclets and their LSDA info style. |
7038 | 6.36k | if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)236 ) { |
7039 | 166 | assert(CLI.CS); |
7040 | 166 | WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); |
7041 | 166 | EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), |
7042 | 166 | BeginLabel, EndLabel); |
7043 | 6.19k | } else if (!isScopedEHPersonality(Pers)) { |
7044 | 6.12k | MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); |
7045 | 6.12k | } |
7046 | 6.36k | } |
7047 | 440k | |
7048 | 440k | return Result; |
7049 | 440k | } |
7050 | | |
7051 | | void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, |
7052 | | bool isTailCall, |
7053 | 440k | const BasicBlock *EHPadBB) { |
7054 | 440k | auto &DL = DAG.getDataLayout(); |
7055 | 440k | FunctionType *FTy = CS.getFunctionType(); |
7056 | 440k | Type *RetTy = CS.getType(); |
7057 | 440k | |
7058 | 440k | TargetLowering::ArgListTy Args; |
7059 | 440k | Args.reserve(CS.arg_size()); |
7060 | 440k | |
7061 | 440k | const Value *SwiftErrorVal = nullptr; |
7062 | 440k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7063 | 440k | |
7064 | 440k | // We can't tail call inside a function with a swifterror argument. Lowering |
7065 | 440k | // does not support this yet. It would have to move into the swifterror |
7066 | 440k | // register before the call. |
7067 | 440k | auto *Caller = CS.getInstruction()->getParent()->getParent(); |
7068 | 440k | if (TLI.supportSwiftError() && |
7069 | 440k | Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)414k ) |
7070 | 111 | isTailCall = false; |
7071 | 440k | |
7072 | 440k | for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); |
7073 | 1.44M | i != e; ++i1.00M ) { |
7074 | 1.00M | TargetLowering::ArgListEntry Entry; |
7075 | 1.00M | const Value *V = *i; |
7076 | 1.00M | |
7077 | 1.00M | // Skip empty types |
7078 | 1.00M | if (V->getType()->isEmptyTy()) |
7079 | 6 | continue; |
7080 | 1.00M | |
7081 | 1.00M | SDValue ArgNode = getValue(V); |
7082 | 1.00M | Entry.Node = ArgNode; Entry.Ty = V->getType(); |
7083 | 1.00M | |
7084 | 1.00M | Entry.setAttributes(&CS, i - CS.arg_begin()); |
7085 | 1.00M | |
7086 | 1.00M | // Use swifterror virtual register as input to the call. |
7087 | 1.00M | if (Entry.IsSwiftError && TLI.supportSwiftError()129 ) { |
7088 | 110 | SwiftErrorVal = V; |
7089 | 110 | // We find the virtual register for the actual swifterror argument. |
7090 | 110 | // Instead of using the Value, we use the virtual register instead. |
7091 | 110 | Entry.Node = DAG.getRegister( |
7092 | 110 | SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V), |
7093 | 110 | EVT(TLI.getPointerTy(DL))); |
7094 | 110 | } |
7095 | 1.00M | |
7096 | 1.00M | Args.push_back(Entry); |
7097 | 1.00M | |
7098 | 1.00M | // If we have an explicit sret argument that is an Instruction, (i.e., it |
7099 | 1.00M | // might point to function-local memory), we can't meaningfully tail-call. |
7100 | 1.00M | if (Entry.IsSRet && isa<Instruction>(V)1.07k ) |
7101 | 919 | isTailCall = false; |
7102 | 1.00M | } |
7103 | 440k | |
7104 | 440k | // Check if target-independent constraints permit a tail call here. |
7105 | 440k | // Target-dependent constraints are checked within TLI->LowerCallTo. |
7106 | 440k | if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())283k ) |
7107 | 216k | isTailCall = false; |
7108 | 440k | |
7109 | 440k | // Disable tail calls if there is an swifterror argument. Targets have not |
7110 | 440k | // been updated to support tail calls. |
7111 | 440k | if (TLI.supportSwiftError() && SwiftErrorVal414k ) |
7112 | 110 | isTailCall = false; |
7113 | 440k | |
7114 | 440k | TargetLowering::CallLoweringInfo CLI(DAG); |
7115 | 440k | CLI.setDebugLoc(getCurSDLoc()) |
7116 | 440k | .setChain(getRoot()) |
7117 | 440k | .setCallee(RetTy, FTy, Callee, std::move(Args), CS) |
7118 | 440k | .setTailCall(isTailCall) |
7119 | 440k | .setConvergent(CS.isConvergent()); |
7120 | 440k | std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); |
7121 | 440k | |
7122 | 440k | if (Result.first.getNode()) { |
7123 | 197k | const Instruction *Inst = CS.getInstruction(); |
7124 | 197k | Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); |
7125 | 197k | setValue(Inst, Result.first); |
7126 | 197k | } |
7127 | 440k | |
7128 | 440k | // The last element of CLI.InVals has the SDValue for swifterror return. |
7129 | 440k | // Here we copy it to a virtual register and update SwiftErrorMap for |
7130 | 440k | // book-keeping. |
7131 | 440k | if (SwiftErrorVal && TLI.supportSwiftError()110 ) { |
7132 | 110 | // Get the last element of InVals. |
7133 | 110 | SDValue Src = CLI.InVals.back(); |
7134 | 110 | unsigned VReg = SwiftError.getOrCreateVRegDefAt( |
7135 | 110 | CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); |
7136 | 110 | SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); |
7137 | 110 | DAG.setRoot(CopyNode); |
7138 | 110 | } |
7139 | 440k | } |
7140 | | |
7141 | | static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, |
7142 | 56 | SelectionDAGBuilder &Builder) { |
7143 | 56 | // Check to see if this load can be trivially constant folded, e.g. if the |
7144 | 56 | // input is from a string literal. |
7145 | 56 | if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { |
7146 | 14 | // Cast pointer to the type we really want to load. |
7147 | 14 | Type *LoadTy = |
7148 | 14 | Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); |
7149 | 14 | if (LoadVT.isVector()) |
7150 | 4 | LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); |
7151 | 14 | |
7152 | 14 | LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), |
7153 | 14 | PointerType::getUnqual(LoadTy)); |
7154 | 14 | |
7155 | 14 | if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( |
7156 | 14 | const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL)) |
7157 | 14 | return Builder.getValue(LoadCst); |
7158 | 42 | } |
7159 | 42 | |
7160 | 42 | // Otherwise, we have to emit the load. If the pointer is to unfoldable but |
7161 | 42 | // still constant memory, the input chain can be the entry node. |
7162 | 42 | SDValue Root; |
7163 | 42 | bool ConstantMemory = false; |
7164 | 42 | |
7165 | 42 | // Do not serialize (non-volatile) loads of constant memory with anything. |
7166 | 42 | if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { |
7167 | 0 | Root = Builder.DAG.getEntryNode(); |
7168 | 0 | ConstantMemory = true; |
7169 | 42 | } else { |
7170 | 42 | // Do not serialize non-volatile loads against each other. |
7171 | 42 | Root = Builder.DAG.getRoot(); |
7172 | 42 | } |
7173 | 42 | |
7174 | 42 | SDValue Ptr = Builder.getValue(PtrVal); |
7175 | 42 | SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, |
7176 | 42 | Ptr, MachinePointerInfo(PtrVal), |
7177 | 42 | /* Alignment = */ 1); |
7178 | 42 | |
7179 | 42 | if (!ConstantMemory) |
7180 | 42 | Builder.PendingLoads.push_back(LoadVal.getValue(1)); |
7181 | 42 | return LoadVal; |
7182 | 42 | } |
7183 | | |
7184 | | /// Record the value for an instruction that produces an integer result, |
7185 | | /// converting the type where necessary. |
7186 | | void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, |
7187 | | SDValue Value, |
7188 | 46 | bool IsSigned) { |
7189 | 46 | EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
7190 | 46 | I.getType(), true); |
7191 | 46 | if (IsSigned) |
7192 | 16 | Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); |
7193 | 30 | else |
7194 | 30 | Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); |
7195 | 46 | setValue(&I, Value); |
7196 | 46 | } |
7197 | | |
7198 | | /// See if we can lower a memcmp call into an optimized form. If so, return |
7199 | | /// true and lower it. Otherwise return false, and it will be lowered like a |
7200 | | /// normal call. |
7201 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7202 | | /// correct prototype. |
7203 | 276 | bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { |
7204 | 276 | const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); |
7205 | 276 | const Value *Size = I.getArgOperand(2); |
7206 | 276 | const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); |
7207 | 276 | if (CSize && CSize->getZExtValue() == 0252 ) { |
7208 | 19 | EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), |
7209 | 19 | I.getType(), true); |
7210 | 19 | setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); |
7211 | 19 | return true; |
7212 | 19 | } |
7213 | 257 | |
7214 | 257 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7215 | 257 | std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( |
7216 | 257 | DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), |
7217 | 257 | getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); |
7218 | 257 | if (Res.first.getNode()) { |
7219 | 12 | processIntegerCallValue(I, Res.first, true); |
7220 | 12 | PendingLoads.push_back(Res.second); |
7221 | 12 | return true; |
7222 | 12 | } |
7223 | 245 | |
7224 | 245 | // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 |
7225 | 245 | // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 |
7226 | 245 | if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)221 ) |
7227 | 112 | return false; |
7228 | 133 | |
7229 | 133 | // If the target has a fast compare for the given size, it will return a |
7230 | 133 | // preferred load type for that size. Require that the load VT is legal and |
7231 | 133 | // that the target supports unaligned loads of that type. Otherwise, return |
7232 | 133 | // INVALID. |
7233 | 133 | auto hasFastLoadsAndCompare = [&](unsigned NumBits) { |
7234 | 38 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7235 | 38 | MVT LVT = TLI.hasFastEqualityCompare(NumBits); |
7236 | 38 | if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { |
7237 | 12 | // TODO: Handle 5 byte compare as 4-byte + 1 byte. |
7238 | 12 | // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. |
7239 | 12 | // TODO: Check alignment of src and dest ptrs. |
7240 | 12 | unsigned DstAS = LHS->getType()->getPointerAddressSpace(); |
7241 | 12 | unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); |
7242 | 12 | if (!TLI.isTypeLegal(LVT) || |
7243 | 12 | !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) || |
7244 | 12 | !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS)) |
7245 | 0 | LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; |
7246 | 12 | } |
7247 | 38 | |
7248 | 38 | return LVT; |
7249 | 38 | }; |
7250 | 133 | |
7251 | 133 | // This turns into unaligned loads. We only do this if the target natively |
7252 | 133 | // supports the MVT we'll be loading or if it is small enough (<= 4) that |
7253 | 133 | // we'll only produce a small number of byte loads. |
7254 | 133 | MVT LoadVT; |
7255 | 133 | unsigned NumBitsToCompare = CSize->getZExtValue() * 8; |
7256 | 133 | switch (NumBitsToCompare) { |
7257 | 133 | default: |
7258 | 79 | return false; |
7259 | 133 | case 16: |
7260 | 8 | LoadVT = MVT::i16; |
7261 | 8 | break; |
7262 | 133 | case 32: |
7263 | 8 | LoadVT = MVT::i32; |
7264 | 8 | break; |
7265 | 133 | case 64: |
7266 | 38 | case 128: |
7267 | 38 | case 256: |
7268 | 38 | LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); |
7269 | 38 | break; |
7270 | 54 | } |
7271 | 54 | |
7272 | 54 | if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) |
7273 | 26 | return false; |
7274 | 28 | |
7275 | 28 | SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this); |
7276 | 28 | SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this); |
7277 | 28 | |
7278 | 28 | // Bitcast to a wide integer type if the loads are vectors. |
7279 | 28 | if (LoadVT.isVector()) { |
7280 | 8 | EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits()); |
7281 | 8 | LoadL = DAG.getBitcast(CmpVT, LoadL); |
7282 | 8 | LoadR = DAG.getBitcast(CmpVT, LoadR); |
7283 | 8 | } |
7284 | 28 | |
7285 | 28 | SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE); |
7286 | 28 | processIntegerCallValue(I, Cmp, false); |
7287 | 28 | return true; |
7288 | 28 | } |
7289 | | |
7290 | | /// See if we can lower a memchr call into an optimized form. If so, return |
7291 | | /// true and lower it. Otherwise return false, and it will be lowered like a |
7292 | | /// normal call. |
7293 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7294 | | /// correct prototype. |
7295 | 25 | bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { |
7296 | 25 | const Value *Src = I.getArgOperand(0); |
7297 | 25 | const Value *Char = I.getArgOperand(1); |
7298 | 25 | const Value *Length = I.getArgOperand(2); |
7299 | 25 | |
7300 | 25 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7301 | 25 | std::pair<SDValue, SDValue> Res = |
7302 | 25 | TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), |
7303 | 25 | getValue(Src), getValue(Char), getValue(Length), |
7304 | 25 | MachinePointerInfo(Src)); |
7305 | 25 | if (Res.first.getNode()) { |
7306 | 5 | setValue(&I, Res.first); |
7307 | 5 | PendingLoads.push_back(Res.second); |
7308 | 5 | return true; |
7309 | 5 | } |
7310 | 20 | |
7311 | 20 | return false; |
7312 | 20 | } |
7313 | | |
7314 | | /// See if we can lower a mempcpy call into an optimized form. If so, return |
7315 | | /// true and lower it. Otherwise return false, and it will be lowered like a |
7316 | | /// normal call. |
7317 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7318 | | /// correct prototype. |
7319 | 2 | bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { |
7320 | 2 | SDValue Dst = getValue(I.getArgOperand(0)); |
7321 | 2 | SDValue Src = getValue(I.getArgOperand(1)); |
7322 | 2 | SDValue Size = getValue(I.getArgOperand(2)); |
7323 | 2 | |
7324 | 2 | unsigned DstAlign = DAG.InferPtrAlignment(Dst); |
7325 | 2 | unsigned SrcAlign = DAG.InferPtrAlignment(Src); |
7326 | 2 | unsigned Align = std::min(DstAlign, SrcAlign); |
7327 | 2 | if (Align == 0) // Alignment of one or both could not be inferred. |
7328 | 2 | Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. |
7329 | 2 | |
7330 | 2 | bool isVol = false; |
7331 | 2 | SDLoc sdl = getCurSDLoc(); |
7332 | 2 | |
7333 | 2 | // In the mempcpy context we need to pass in a false value for isTailCall |
7334 | 2 | // because the return pointer needs to be adjusted by the size of |
7335 | 2 | // the copied memory. |
7336 | 2 | SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, |
7337 | 2 | false, /*isTailCall=*/false, |
7338 | 2 | MachinePointerInfo(I.getArgOperand(0)), |
7339 | 2 | MachinePointerInfo(I.getArgOperand(1))); |
7340 | 2 | assert(MC.getNode() != nullptr && |
7341 | 2 | "** memcpy should not be lowered as TailCall in mempcpy context **"); |
7342 | 2 | DAG.setRoot(MC); |
7343 | 2 | |
7344 | 2 | // Check if Size needs to be truncated or extended. |
7345 | 2 | Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType()); |
7346 | 2 | |
7347 | 2 | // Adjust return pointer to point just past the last dst byte. |
7348 | 2 | SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(), |
7349 | 2 | Dst, Size); |
7350 | 2 | setValue(&I, DstPlusSize); |
7351 | 2 | return true; |
7352 | 2 | } |
7353 | | |
7354 | | /// See if we can lower a strcpy call into an optimized form. If so, return |
7355 | | /// true and lower it, otherwise return false and it will be lowered like a |
7356 | | /// normal call. |
7357 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7358 | | /// correct prototype. |
7359 | 209 | bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { |
7360 | 209 | const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); |
7361 | 209 | |
7362 | 209 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7363 | 209 | std::pair<SDValue, SDValue> Res = |
7364 | 209 | TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), |
7365 | 209 | getValue(Arg0), getValue(Arg1), |
7366 | 209 | MachinePointerInfo(Arg0), |
7367 | 209 | MachinePointerInfo(Arg1), isStpcpy); |
7368 | 209 | if (Res.first.getNode()) { |
7369 | 3 | setValue(&I, Res.first); |
7370 | 3 | DAG.setRoot(Res.second); |
7371 | 3 | return true; |
7372 | 3 | } |
7373 | 206 | |
7374 | 206 | return false; |
7375 | 206 | } |
7376 | | |
7377 | | /// See if we can lower a strcmp call into an optimized form. If so, return |
7378 | | /// true and lower it, otherwise return false and it will be lowered like a |
7379 | | /// normal call. |
7380 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7381 | | /// correct prototype. |
7382 | 5.09k | bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { |
7383 | 5.09k | const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); |
7384 | 5.09k | |
7385 | 5.09k | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7386 | 5.09k | std::pair<SDValue, SDValue> Res = |
7387 | 5.09k | TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), |
7388 | 5.09k | getValue(Arg0), getValue(Arg1), |
7389 | 5.09k | MachinePointerInfo(Arg0), |
7390 | 5.09k | MachinePointerInfo(Arg1)); |
7391 | 5.09k | if (Res.first.getNode()) { |
7392 | 4 | processIntegerCallValue(I, Res.first, true); |
7393 | 4 | PendingLoads.push_back(Res.second); |
7394 | 4 | return true; |
7395 | 4 | } |
7396 | 5.08k | |
7397 | 5.08k | return false; |
7398 | 5.08k | } |
7399 | | |
7400 | | /// See if we can lower a strlen call into an optimized form. If so, return |
7401 | | /// true and lower it, otherwise return false and it will be lowered like a |
7402 | | /// normal call. |
7403 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7404 | | /// correct prototype. |
7405 | 436 | bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { |
7406 | 436 | const Value *Arg0 = I.getArgOperand(0); |
7407 | 436 | |
7408 | 436 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7409 | 436 | std::pair<SDValue, SDValue> Res = |
7410 | 436 | TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), |
7411 | 436 | getValue(Arg0), MachinePointerInfo(Arg0)); |
7412 | 436 | if (Res.first.getNode()) { |
7413 | 1 | processIntegerCallValue(I, Res.first, false); |
7414 | 1 | PendingLoads.push_back(Res.second); |
7415 | 1 | return true; |
7416 | 1 | } |
7417 | 435 | |
7418 | 435 | return false; |
7419 | 435 | } |
7420 | | |
7421 | | /// See if we can lower a strnlen call into an optimized form. If so, return |
7422 | | /// true and lower it, otherwise return false and it will be lowered like a |
7423 | | /// normal call. |
7424 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7425 | | /// correct prototype. |
7426 | 2 | bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { |
7427 | 2 | const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); |
7428 | 2 | |
7429 | 2 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7430 | 2 | std::pair<SDValue, SDValue> Res = |
7431 | 2 | TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), |
7432 | 2 | getValue(Arg0), getValue(Arg1), |
7433 | 2 | MachinePointerInfo(Arg0)); |
7434 | 2 | if (Res.first.getNode()) { |
7435 | 1 | processIntegerCallValue(I, Res.first, false); |
7436 | 1 | PendingLoads.push_back(Res.second); |
7437 | 1 | return true; |
7438 | 1 | } |
7439 | 1 | |
7440 | 1 | return false; |
7441 | 1 | } |
7442 | | |
7443 | | /// See if we can lower a unary floating-point operation into an SDNode with |
7444 | | /// the specified Opcode. If so, return true and lower it, otherwise return |
7445 | | /// false and it will be lowered like a normal call. |
7446 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7447 | | /// correct prototype. |
7448 | | bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, |
7449 | 802 | unsigned Opcode) { |
7450 | 802 | // We already checked this call's prototype; verify it doesn't modify errno. |
7451 | 802 | if (!I.onlyReadsMemory()) |
7452 | 201 | return false; |
7453 | 601 | |
7454 | 601 | SDValue Tmp = getValue(I.getArgOperand(0)); |
7455 | 601 | setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); |
7456 | 601 | return true; |
7457 | 601 | } |
7458 | | |
7459 | | /// See if we can lower a binary floating-point operation into an SDNode with |
7460 | | /// the specified Opcode. If so, return true and lower it. Otherwise return |
7461 | | /// false, and it will be lowered like a normal call. |
7462 | | /// The caller already checked that \p I calls the appropriate LibFunc with a |
7463 | | /// correct prototype. |
7464 | | bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, |
7465 | 48 | unsigned Opcode) { |
7466 | 48 | // We already checked this call's prototype; verify it doesn't modify errno. |
7467 | 48 | if (!I.onlyReadsMemory()) |
7468 | 0 | return false; |
7469 | 48 | |
7470 | 48 | SDValue Tmp0 = getValue(I.getArgOperand(0)); |
7471 | 48 | SDValue Tmp1 = getValue(I.getArgOperand(1)); |
7472 | 48 | EVT VT = Tmp0.getValueType(); |
7473 | 48 | setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); |
7474 | 48 | return true; |
7475 | 48 | } |
7476 | | |
7477 | 742k | void SelectionDAGBuilder::visitCall(const CallInst &I) { |
7478 | 742k | // Handle inline assembly differently. |
7479 | 742k | if (isa<InlineAsm>(I.getCalledValue())) { |
7480 | 20.5k | visitInlineAsm(&I); |
7481 | 20.5k | return; |
7482 | 20.5k | } |
7483 | 721k | |
7484 | 721k | if (Function *F = I.getCalledFunction()) { |
7485 | 714k | if (F->isDeclaration()) { |
7486 | 610k | // Is this an LLVM intrinsic or a target-specific intrinsic? |
7487 | 610k | unsigned IID = F->getIntrinsicID(); |
7488 | 610k | if (!IID) |
7489 | 324k | if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) |
7490 | 0 | IID = II->getIntrinsicID(F); |
7491 | 610k | |
7492 | 610k | if (IID) { |
7493 | 286k | visitIntrinsicCall(I, IID); |
7494 | 286k | return; |
7495 | 286k | } |
7496 | 428k | } |
7497 | 428k | |
7498 | 428k | // Check for well-known libc/libm calls. If the function is internal, it |
7499 | 428k | // can't be a library call. Don't do the check if marked as nobuiltin for |
7500 | 428k | // some reason or the call site requires strict floating point semantics. |
7501 | 428k | LibFunc Func; |
7502 | 428k | if (!I.isNoBuiltin() && !I.isStrictFP()137k && !F->hasLocalLinkage()137k && |
7503 | 428k | F->hasName()128k && LibInfo->getLibFunc(*F, Func)128k && |
7504 | 428k | LibInfo->hasOptimizedCodeGen(Func)46.7k ) { |
7505 | 7.06k | switch (Func) { |
7506 | 7.06k | default: break57 ; |
7507 | 7.06k | case LibFunc_copysign: |
7508 | 112 | case LibFunc_copysignf: |
7509 | 112 | case LibFunc_copysignl: |
7510 | 112 | // We already checked this call's prototype; verify it doesn't modify |
7511 | 112 | // errno. |
7512 | 112 | if (I.onlyReadsMemory()) { |
7513 | 107 | SDValue LHS = getValue(I.getArgOperand(0)); |
7514 | 107 | SDValue RHS = getValue(I.getArgOperand(1)); |
7515 | 107 | setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), |
7516 | 107 | LHS.getValueType(), LHS, RHS)); |
7517 | 107 | return; |
7518 | 107 | } |
7519 | 5 | break; |
7520 | 131 | case LibFunc_fabs: |
7521 | 131 | case LibFunc_fabsf: |
7522 | 131 | case LibFunc_fabsl: |
7523 | 131 | if (visitUnaryFloatCall(I, ISD::FABS)) |
7524 | 129 | return; |
7525 | 2 | break; |
7526 | 24 | case LibFunc_fmin: |
7527 | 24 | case LibFunc_fminf: |
7528 | 24 | case LibFunc_fminl: |
7529 | 24 | if (visitBinaryFloatCall(I, ISD::FMINNUM)) |
7530 | 24 | return; |
7531 | 0 | break; |
7532 | 24 | case LibFunc_fmax: |
7533 | 24 | case LibFunc_fmaxf: |
7534 | 24 | case LibFunc_fmaxl: |
7535 | 24 | if (visitBinaryFloatCall(I, ISD::FMAXNUM)) |
7536 | 24 | return; |
7537 | 0 | break; |
7538 | 163 | case LibFunc_sin: |
7539 | 163 | case LibFunc_sinf: |
7540 | 163 | case LibFunc_sinl: |
7541 | 163 | if (visitUnaryFloatCall(I, ISD::FSIN)) |
7542 | 92 | return; |
7543 | 71 | break; |
7544 | 124 | case LibFunc_cos: |
7545 | 124 | case LibFunc_cosf: |
7546 | 124 | case LibFunc_cosl: |
7547 | 124 | if (visitUnaryFloatCall(I, ISD::FCOS)) |
7548 | 66 | return; |
7549 | 58 | break; |
7550 | 104 | case LibFunc_sqrt: |
7551 | 104 | case LibFunc_sqrtf: |
7552 | 104 | case LibFunc_sqrtl: |
7553 | 104 | case LibFunc_sqrt_finite: |
7554 | 104 | case LibFunc_sqrtf_finite: |
7555 | 104 | case LibFunc_sqrtl_finite: |
7556 | 104 | if (visitUnaryFloatCall(I, ISD::FSQRT)) |
7557 | 69 | return; |
7558 | 35 | break; |
7559 | 69 | case LibFunc_floor: |
7560 | 69 | case LibFunc_floorf: |
7561 | 69 | case LibFunc_floorl: |
7562 | 69 | if (visitUnaryFloatCall(I, ISD::FFLOOR)) |
7563 | 61 | return; |
7564 | 8 | break; |
7565 | 25 | case LibFunc_nearbyint: |
7566 | 25 | case LibFunc_nearbyintf: |
7567 | 25 | case LibFunc_nearbyintl: |
7568 | 25 | if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) |
7569 | 25 | return; |
7570 | 0 | break; |
7571 | 56 | case LibFunc_ceil: |
7572 | 56 | case LibFunc_ceilf: |
7573 | 56 | case LibFunc_ceill: |
7574 | 56 | if (visitUnaryFloatCall(I, ISD::FCEIL)) |
7575 | 49 | return; |
7576 | 7 | break; |
7577 | 25 | case LibFunc_rint: |
7578 | 25 | case LibFunc_rintf: |
7579 | 25 | case LibFunc_rintl: |
7580 | 25 | if (visitUnaryFloatCall(I, ISD::FRINT)) |
7581 | 25 | return; |
7582 | 0 | break; |
7583 | 39 | case LibFunc_round: |
7584 | 39 | case LibFunc_roundf: |
7585 | 39 | case LibFunc_roundl: |
7586 | 39 | if (visitUnaryFloatCall(I, ISD::FROUND)) |
7587 | 39 | return; |
7588 | 0 | break; |
7589 | 46 | case LibFunc_trunc: |
7590 | 46 | case LibFunc_truncf: |
7591 | 46 | case LibFunc_truncl: |
7592 | 46 | if (visitUnaryFloatCall(I, ISD::FTRUNC)) |
7593 | 46 | return; |
7594 | 0 | break; |
7595 | 10 | case LibFunc_log2: |
7596 | 10 | case LibFunc_log2f: |
7597 | 10 | case LibFunc_log2l: |
7598 | 10 | if (visitUnaryFloatCall(I, ISD::FLOG2)) |
7599 | 0 | return; |
7600 | 10 | break; |
7601 | 10 | case LibFunc_exp2: |
7602 | 10 | case LibFunc_exp2f: |
7603 | 10 | case LibFunc_exp2l: |
7604 | 10 | if (visitUnaryFloatCall(I, ISD::FEXP2)) |
7605 | 0 | return; |
7606 | 10 | break; |
7607 | 276 | case LibFunc_memcmp: |
7608 | 276 | if (visitMemCmpCall(I)) |
7609 | 59 | return; |
7610 | 217 | break; |
7611 | 217 | case LibFunc_mempcpy: |
7612 | 2 | if (visitMemPCpyCall(I)) |
7613 | 2 | return; |
7614 | 0 | break; |
7615 | 25 | case LibFunc_memchr: |
7616 | 25 | if (visitMemChrCall(I)) |
7617 | 5 | return; |
7618 | 20 | break; |
7619 | 208 | case LibFunc_strcpy: |
7620 | 208 | if (visitStrCpyCall(I, false)) |
7621 | 2 | return; |
7622 | 206 | break; |
7623 | 206 | case LibFunc_stpcpy: |
7624 | 1 | if (visitStrCpyCall(I, true)) |
7625 | 1 | return; |
7626 | 0 | break; |
7627 | 5.09k | case LibFunc_strcmp: |
7628 | 5.09k | if (visitStrCmpCall(I)) |
7629 | 4 | return; |
7630 | 5.08k | break; |
7631 | 5.08k | case LibFunc_strlen: |
7632 | 436 | if (visitStrLenCall(I)) |
7633 | 1 | return; |
7634 | 435 | break; |
7635 | 435 | case LibFunc_strnlen: |
7636 | 2 | if (visitStrNLenCall(I)) |
7637 | 1 | return; |
7638 | 1 | break; |
7639 | 7.06k | } |
7640 | 7.06k | } |
7641 | 428k | } |
7642 | 434k | |
7643 | 434k | // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't |
7644 | 434k | // have to do anything here to lower funclet bundles. |
7645 | 434k | assert(!I.hasOperandBundlesOtherThan( |
7646 | 434k | {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && |
7647 | 434k | "Cannot lower calls with arbitrary operand bundles!"); |
7648 | 434k | |
7649 | 434k | SDValue Callee = getValue(I.getCalledValue()); |
7650 | 434k | |
7651 | 434k | if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) |
7652 | 5 | LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); |
7653 | 434k | else |
7654 | 434k | // Check if we can potentially perform a tail call. More detailed checking |
7655 | 434k | // is be done within LowerCallTo, after more information about the call is |
7656 | 434k | // known. |
7657 | 434k | LowerCallTo(&I, Callee, I.isTailCall()); |
7658 | 434k | } |
7659 | | |
7660 | | namespace { |
7661 | | |
7662 | | /// AsmOperandInfo - This contains information for each constraint that we are |
7663 | | /// lowering. |
7664 | | class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { |
7665 | | public: |
7666 | | /// CallOperand - If this is the result output operand or a clobber |
7667 | | /// this is null, otherwise it is the incoming operand to the CallInst. |
7668 | | /// This gets modified as the asm is processed. |
7669 | | SDValue CallOperand; |
7670 | | |
7671 | | /// AssignedRegs - If this is a register or register class operand, this |
7672 | | /// contains the set of register corresponding to the operand. |
7673 | | RegsForValue AssignedRegs; |
7674 | | |
7675 | | explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) |
7676 | 102k | : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { |
7677 | 102k | } |
7678 | | |
7679 | | /// Whether or not this operand accesses memory |
7680 | 5.74k | bool hasMemory(const TargetLowering &TLI) const { |
7681 | 5.74k | // Indirect operand accesses access memory. |
7682 | 5.74k | if (isIndirect) |
7683 | 162 | return true; |
7684 | 5.58k | |
7685 | 5.58k | for (const auto &Code : Codes) |
7686 | 5.71k | if (TLI.getConstraintType(Code) == TargetLowering::C_Memory) |
7687 | 67 | return true; |
7688 | 5.58k | |
7689 | 5.58k | return false5.51k ; |
7690 | 5.58k | } |
7691 | | |
7692 | | /// getCallOperandValEVT - Return the EVT of the Value* that this operand |
7693 | | /// corresponds to. If there is no Value* for this operand, it returns |
7694 | | /// MVT::Other. |
7695 | | EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, |
7696 | 5.76k | const DataLayout &DL) const { |
7697 | 5.76k | if (!CallOperandVal) return MVT::Other0 ; |
7698 | 5.76k | |
7699 | 5.76k | if (isa<BasicBlock>(CallOperandVal)) |
7700 | 2 | return TLI.getPointerTy(DL); |
7701 | 5.76k | |
7702 | 5.76k | llvm::Type *OpTy = CallOperandVal->getType(); |
7703 | 5.76k | |
7704 | 5.76k | // FIXME: code duplicated from TargetLowering::ParseConstraints(). |
7705 | 5.76k | // If this is an indirect operand, the operand is a pointer to the |
7706 | 5.76k | // accessed type. |
7707 | 5.76k | if (isIndirect) { |
7708 | 447 | PointerType *PtrTy = dyn_cast<PointerType>(OpTy); |
7709 | 447 | if (!PtrTy) |
7710 | 0 | report_fatal_error("Indirect operand for inline asm not a pointer!"); |
7711 | 447 | OpTy = PtrTy->getElementType(); |
7712 | 447 | } |
7713 | 5.76k | |
7714 | 5.76k | // Look for vector wrapped in a struct. e.g. { <16 x i8> }. |
7715 | 5.76k | if (StructType *STy = dyn_cast<StructType>(OpTy)) |
7716 | 15 | if (STy->getNumElements() == 1) |
7717 | 8 | OpTy = STy->getElementType(0); |
7718 | 5.76k | |
7719 | 5.76k | // If OpTy is not a single value, it may be a struct/union that we |
7720 | 5.76k | // can tile with integers. |
7721 | 5.76k | if (!OpTy->isSingleValueType() && OpTy->isSized()26 ) { |
7722 | 20 | unsigned BitSize = DL.getTypeSizeInBits(OpTy); |
7723 | 20 | switch (BitSize) { |
7724 | 20 | default: break7 ; |
7725 | 20 | case 1: |
7726 | 13 | case 8: |
7727 | 13 | case 16: |
7728 | 13 | case 32: |
7729 | 13 | case 64: |
7730 | 13 | case 128: |
7731 | 13 | OpTy = IntegerType::get(Context, BitSize); |
7732 | 13 | break; |
7733 | 5.76k | } |
7734 | 5.76k | } |
7735 | 5.76k | |
7736 | 5.76k | return TLI.getValueType(DL, OpTy, true); |
7737 | 5.76k | } |
7738 | | }; |
7739 | | |
7740 | | using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; |
7741 | | |
7742 | | } // end anonymous namespace |
7743 | | |
7744 | | /// Make sure that the output operand \p OpInfo and its corresponding input |
7745 | | /// operand \p MatchingOpInfo have compatible constraint types (otherwise error |
7746 | | /// out). |
7747 | | static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, |
7748 | | SDISelAsmOperandInfo &MatchingOpInfo, |
7749 | 444 | SelectionDAG &DAG) { |
7750 | 444 | if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) |
7751 | 431 | return; |
7752 | 13 | |
7753 | 13 | const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); |
7754 | 13 | const auto &TLI = DAG.getTargetLoweringInfo(); |
7755 | 13 | |
7756 | 13 | std::pair<unsigned, const TargetRegisterClass *> MatchRC = |
7757 | 13 | TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, |
7758 | 13 | OpInfo.ConstraintVT); |
7759 | 13 | std::pair<unsigned, const TargetRegisterClass *> InputRC = |
7760 | 13 | TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode, |
7761 | 13 | MatchingOpInfo.ConstraintVT); |
7762 | 13 | if ((OpInfo.ConstraintVT.isInteger() != |
7763 | 13 | MatchingOpInfo.ConstraintVT.isInteger()) || |
7764 | 13 | (MatchRC.second != InputRC.second)) { |
7765 | 0 | // FIXME: error out in a more elegant fashion |
7766 | 0 | report_fatal_error("Unsupported asm: input constraint" |
7767 | 0 | " with a matching output constraint of" |
7768 | 0 | " incompatible type!"); |
7769 | 0 | } |
7770 | 13 | MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; |
7771 | 13 | } |
7772 | | |
7773 | | /// Get a direct memory input to behave well as an indirect operand. |
7774 | | /// This may introduce stores, hence the need for a \p Chain. |
7775 | | /// \return The (possibly updated) chain. |
7776 | | static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, |
7777 | | SDISelAsmOperandInfo &OpInfo, |
7778 | 118 | SelectionDAG &DAG) { |
7779 | 118 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7780 | 118 | |
7781 | 118 | // If we don't have an indirect input, put it in the constpool if we can, |
7782 | 118 | // otherwise spill it to a stack slot. |
7783 | 118 | // TODO: This isn't quite right. We need to handle these according to |
7784 | 118 | // the addressing mode that the constraint wants. Also, this may take |
7785 | 118 | // an additional register for the computation and we don't want that |
7786 | 118 | // either. |
7787 | 118 | |
7788 | 118 | // If the operand is a float, integer, or vector constant, spill to a |
7789 | 118 | // constant pool entry to get its address. |
7790 | 118 | const Value *OpVal = OpInfo.CallOperandVal; |
7791 | 118 | if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal)114 || |
7792 | 118 | isa<ConstantVector>(OpVal)108 || isa<ConstantDataVector>(OpVal)108 ) { |
7793 | 10 | OpInfo.CallOperand = DAG.getConstantPool( |
7794 | 10 | cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); |
7795 | 10 | return Chain; |
7796 | 10 | } |
7797 | 108 | |
7798 | 108 | // Otherwise, create a stack slot and emit a store to it before the asm. |
7799 | 108 | Type *Ty = OpVal->getType(); |
7800 | 108 | auto &DL = DAG.getDataLayout(); |
7801 | 108 | uint64_t TySize = DL.getTypeAllocSize(Ty); |
7802 | 108 | unsigned Align = DL.getPrefTypeAlignment(Ty); |
7803 | 108 | MachineFunction &MF = DAG.getMachineFunction(); |
7804 | 108 | int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); |
7805 | 108 | SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); |
7806 | 108 | Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, |
7807 | 108 | MachinePointerInfo::getFixedStack(MF, SSFI), |
7808 | 108 | TLI.getMemValueType(DL, Ty)); |
7809 | 108 | OpInfo.CallOperand = StackSlot; |
7810 | 108 | |
7811 | 108 | return Chain; |
7812 | 108 | } |
7813 | | |
7814 | | /// GetRegistersForValue - Assign registers (virtual or physical) for the |
7815 | | /// specified operand. We prefer to assign virtual registers, to allow the |
7816 | | /// register allocator to handle the assignment process. However, if the asm |
7817 | | /// uses features that we can't model on machineinstrs, we have SDISel do the |
7818 | | /// allocation. This produces generally horrible, but correct, code. |
7819 | | /// |
7820 | | /// OpInfo describes the operand |
7821 | | /// RefOpInfo describes the matching operand if any, the operand otherwise |
7822 | | static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, |
7823 | | SDISelAsmOperandInfo &OpInfo, |
7824 | 102k | SDISelAsmOperandInfo &RefOpInfo) { |
7825 | 102k | LLVMContext &Context = *DAG.getContext(); |
7826 | 102k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7827 | 102k | |
7828 | 102k | MachineFunction &MF = DAG.getMachineFunction(); |
7829 | 102k | SmallVector<unsigned, 4> Regs; |
7830 | 102k | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
7831 | 102k | |
7832 | 102k | // No work to do for memory operations. |
7833 | 102k | if (OpInfo.ConstraintType == TargetLowering::C_Memory) |
7834 | 8.57k | return; |
7835 | 94.0k | |
7836 | 94.0k | // If this is a constraint for a single physreg, or a constraint for a |
7837 | 94.0k | // register class, find it. |
7838 | 94.0k | unsigned AssignedReg; |
7839 | 94.0k | const TargetRegisterClass *RC; |
7840 | 94.0k | std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( |
7841 | 94.0k | &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); |
7842 | 94.0k | // RC is unset only on failure. Return immediately. |
7843 | 94.0k | if (!RC) |
7844 | 1.33k | return; |
7845 | 92.7k | |
7846 | 92.7k | // Get the actual register value type. This is important, because the user |
7847 | 92.7k | // may have asked for (e.g.) the AX register in i32 type. We need to |
7848 | 92.7k | // remember that AX is actually i16 to get the right extension. |
7849 | 92.7k | const MVT RegVT = *TRI.legalclasstypes_begin(*RC); |
7850 | 92.7k | |
7851 | 92.7k | if (OpInfo.ConstraintVT != MVT::Other) { |
7852 | 9.24k | // If this is an FP operand in an integer register (or visa versa), or more |
7853 | 9.24k | // generally if the operand value disagrees with the register class we plan |
7854 | 9.24k | // to stick it in, fix the operand type. |
7855 | 9.24k | // |
7856 | 9.24k | // If this is an input value, the bitcast to the new type is done now. |
7857 | 9.24k | // Bitcast for output value is done at the end of visitInlineAsm(). |
7858 | 9.24k | if ((OpInfo.Type == InlineAsm::isOutput || |
7859 | 9.24k | OpInfo.Type == InlineAsm::isInput4.66k ) && |
7860 | 9.24k | !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) { |
7861 | 560 | // Try to convert to the first EVT that the reg class contains. If the |
7862 | 560 | // types are identical size, use a bitcast to convert (e.g. two differing |
7863 | 560 | // vector types). Note: output bitcast is done at the end of |
7864 | 560 | // visitInlineAsm(). |
7865 | 560 | if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { |
7866 | 185 | // Exclude indirect inputs while they are unsupported because the code |
7867 | 185 | // to perform the load is missing and thus OpInfo.CallOperand still |
7868 | 185 | // refers to the input address rather than the pointed-to value. |
7869 | 185 | if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect88 ) |
7870 | 87 | OpInfo.CallOperand = |
7871 | 87 | DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); |
7872 | 185 | OpInfo.ConstraintVT = RegVT; |
7873 | 185 | // If the operand is an FP value and we want it in integer registers, |
7874 | 185 | // use the corresponding integer type. This turns an f64 value into |
7875 | 185 | // i64, which can be passed with two i32 values on a 32-bit machine. |
7876 | 375 | } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()344 ) { |
7877 | 33 | MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); |
7878 | 33 | if (OpInfo.Type == InlineAsm::isInput) |
7879 | 13 | OpInfo.CallOperand = |
7880 | 13 | DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand); |
7881 | 33 | OpInfo.ConstraintVT = VT; |
7882 | 33 | } |
7883 | 560 | } |
7884 | 9.24k | } |
7885 | 92.7k | |
7886 | 92.7k | // No need to allocate a matching input constraint since the constraint it's |
7887 | 92.7k | // matching to has already been allocated. |
7888 | 92.7k | if (OpInfo.isMatchingInputConstraint()) |
7889 | 436 | return; |
7890 | 92.3k | |
7891 | 92.3k | EVT ValueVT = OpInfo.ConstraintVT; |
7892 | 92.3k | if (OpInfo.ConstraintVT == MVT::Other) |
7893 | 83.4k | ValueVT = RegVT; |
7894 | 92.3k | |
7895 | 92.3k | // Initialize NumRegs. |
7896 | 92.3k | unsigned NumRegs = 1; |
7897 | 92.3k | if (OpInfo.ConstraintVT != MVT::Other) |
7898 | 8.80k | NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); |
7899 | 92.3k | |
7900 | 92.3k | // If this is a constraint for a specific physical register, like {r17}, |
7901 | 92.3k | // assign it now. |
7902 | 92.3k | |
7903 | 92.3k | // If this associated to a specific register, initialize iterator to correct |
7904 | 92.3k | // place. If virtual, make sure we have enough registers |
7905 | 92.3k | |
7906 | 92.3k | // Initialize iterator if necessary |
7907 | 92.3k | TargetRegisterClass::iterator I = RC->begin(); |
7908 | 92.3k | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
7909 | 92.3k | |
7910 | 92.3k | // Do not check for single registers. |
7911 | 92.3k | if (AssignedReg) { |
7912 | 1.13M | for (; *I != AssignedReg; ++I1.04M ) |
7913 | 85.3k | assert(I != RC->end() && "AssignedReg should be member of RC"); |
7914 | 85.3k | } |
7915 | 92.3k | |
7916 | 184k | for (; NumRegs; --NumRegs, ++I92.5k ) { |
7917 | 92.5k | assert(I != RC->end() && "Ran out of registers to allocate!"); |
7918 | 92.5k | Register R = AssignedReg ? Register(*I)85.3k : RegInfo.createVirtualRegister(RC)7.17k ; |
7919 | 92.5k | Regs.push_back(R); |
7920 | 92.5k | } |
7921 | 92.3k | |
7922 | 92.3k | OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); |
7923 | 92.3k | } |
7924 | | |
7925 | | static unsigned |
7926 | | findMatchingInlineAsmOperand(unsigned OperandNo, |
7927 | 436 | const std::vector<SDValue> &AsmNodeOperands) { |
7928 | 436 | // Scan until we find the definition we already emitted of this operand. |
7929 | 436 | unsigned CurOp = InlineAsm::Op_FirstOperand; |
7930 | 3.17k | for (; OperandNo; --OperandNo2.73k ) { |
7931 | 2.73k | // Advance to the next operand. |
7932 | 2.73k | unsigned OpFlag = |
7933 | 2.73k | cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); |
7934 | 2.73k | assert((InlineAsm::isRegDefKind(OpFlag) || |
7935 | 2.73k | InlineAsm::isRegDefEarlyClobberKind(OpFlag) || |
7936 | 2.73k | InlineAsm::isMemKind(OpFlag)) && |
7937 | 2.73k | "Skipped past definitions?"); |
7938 | 2.73k | CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1; |
7939 | 2.73k | } |
7940 | 436 | return CurOp; |
7941 | 436 | } |
7942 | | |
7943 | | namespace { |
7944 | | |
7945 | | class ExtraFlags { |
7946 | | unsigned Flags = 0; |
7947 | | |
7948 | | public: |
7949 | 20.5k | explicit ExtraFlags(ImmutableCallSite CS) { |
7950 | 20.5k | const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); |
7951 | 20.5k | if (IA->hasSideEffects()) |
7952 | 19.2k | Flags |= InlineAsm::Extra_HasSideEffects; |
7953 | 20.5k | if (IA->isAlignStack()) |
7954 | 21 | Flags |= InlineAsm::Extra_IsAlignStack; |
7955 | 20.5k | if (CS.isConvergent()) |
7956 | 1 | Flags |= InlineAsm::Extra_IsConvergent; |
7957 | 20.5k | Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; |
7958 | 20.5k | } |
7959 | | |
7960 | 102k | void update(const TargetLowering::AsmOperandInfo &OpInfo) { |
7961 | 102k | // Ideally, we would only check against memory constraints. However, the |
7962 | 102k | // meaning of an Other constraint can be target-specific and we can't easily |
7963 | 102k | // reason about it. Therefore, be conservative and set MayLoad/MayStore |
7964 | 102k | // for Other constraints as well. |
7965 | 102k | if (OpInfo.ConstraintType == TargetLowering::C_Memory || |
7966 | 102k | OpInfo.ConstraintType == TargetLowering::C_Other94.1k ) { |
7967 | 9.05k | if (OpInfo.Type == InlineAsm::isInput) |
7968 | 714 | Flags |= InlineAsm::Extra_MayLoad; |
7969 | 8.34k | else if (OpInfo.Type == InlineAsm::isOutput) |
7970 | 287 | Flags |= InlineAsm::Extra_MayStore; |
7971 | 8.05k | else if (OpInfo.Type == InlineAsm::isClobber) |
7972 | 8.05k | Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); |
7973 | 9.05k | } |
7974 | 102k | } |
7975 | | |
7976 | 20.5k | unsigned get() const { return Flags; } |
7977 | | }; |
7978 | | |
7979 | | } // end anonymous namespace |
7980 | | |
7981 | | /// visitInlineAsm - Handle a call to an InlineAsm object. |
7982 | 20.5k | void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { |
7983 | 20.5k | const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); |
7984 | 20.5k | |
7985 | 20.5k | /// ConstraintOperands - Information about all of the constraints. |
7986 | 20.5k | SDISelAsmOperandInfoVector ConstraintOperands; |
7987 | 20.5k | |
7988 | 20.5k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
7989 | 20.5k | TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( |
7990 | 20.5k | DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); |
7991 | 20.5k | |
7992 | 20.5k | // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, |
7993 | 20.5k | // AsmDialect, MayLoad, MayStore). |
7994 | 20.5k | bool HasSideEffect = IA->hasSideEffects(); |
7995 | 20.5k | ExtraFlags ExtraInfo(CS); |
7996 | 20.5k | |
7997 | 20.5k | unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. |
7998 | 20.5k | unsigned ResNo = 0; // ResNo - The result number of the next output. |
7999 | 102k | for (auto &T : TargetConstraints) { |
8000 | 102k | ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); |
8001 | 102k | SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); |
8002 | 102k | |
8003 | 102k | // Compute the value type for each operand. |
8004 | 102k | if (OpInfo.Type == InlineAsm::isInput || |
8005 | 102k | (97.2k OpInfo.Type == InlineAsm::isOutput97.2k && OpInfo.isIndirect4.84k )) { |
8006 | 5.76k | OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); |
8007 | 5.76k | |
8008 | 5.76k | // Process the call argument. BasicBlocks are labels, currently appearing |
8009 | 5.76k | // only in asm's. |
8010 | 5.76k | const Instruction *I = CS.getInstruction(); |
8011 | 5.76k | if (isa<CallBrInst>(I) && |
8012 | 5.76k | (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() - |
8013 | 11 | cast<CallBrInst>(I)->getNumIndirectDests())) { |
8014 | 10 | const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); |
8015 | 10 | EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); |
8016 | 10 | OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); |
8017 | 5.75k | } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { |
8018 | 2 | OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); |
8019 | 5.75k | } else { |
8020 | 5.75k | OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); |
8021 | 5.75k | } |
8022 | 5.76k | |
8023 | 5.76k | OpInfo.ConstraintVT = |
8024 | 5.76k | OpInfo |
8025 | 5.76k | .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) |
8026 | 5.76k | .getSimpleVT(); |
8027 | 96.9k | } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect4.57k ) { |
8028 | 4.57k | // The return value of the call is this value. As such, there is no |
8029 | 4.57k | // corresponding argument. |
8030 | 4.57k | assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); |
8031 | 4.57k | if (StructType *STy = dyn_cast<StructType>(CS.getType())) { |
8032 | 794 | OpInfo.ConstraintVT = TLI.getSimpleValueType( |
8033 | 794 | DAG.getDataLayout(), STy->getElementType(ResNo)); |
8034 | 3.78k | } else { |
8035 | 3.78k | assert(ResNo == 0 && "Asm only has one result!"); |
8036 | 3.78k | OpInfo.ConstraintVT = |
8037 | 3.78k | TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); |
8038 | 3.78k | } |
8039 | 4.57k | ++ResNo; |
8040 | 92.3k | } else { |
8041 | 92.3k | OpInfo.ConstraintVT = MVT::Other; |
8042 | 92.3k | } |
8043 | 102k | |
8044 | 102k | if (!HasSideEffect) |
8045 | 5.74k | HasSideEffect = OpInfo.hasMemory(TLI); |
8046 | 102k | |
8047 | 102k | // Determine if this InlineAsm MayLoad or MayStore based on the constraints. |
8048 | 102k | // FIXME: Could we compute this on OpInfo rather than T? |
8049 | 102k | |
8050 | 102k | // Compute the constraint code and ConstraintType to use. |
8051 | 102k | TLI.ComputeConstraintToUse(T, SDValue()); |
8052 | 102k | |
8053 | 102k | ExtraInfo.update(T); |
8054 | 102k | } |
8055 | 20.5k | |
8056 | 20.5k | |
8057 | 20.5k | // We won't need to flush pending loads if this asm doesn't touch |
8058 | 20.5k | // memory and is nonvolatile. |
8059 | 20.5k | SDValue Flag, Chain = (HasSideEffect) ? getRoot()19.4k : DAG.getRoot()1.09k ; |
8060 | 20.5k | |
8061 | 20.5k | bool IsCallBr = isa<CallBrInst>(CS.getInstruction()); |
8062 | 20.5k | if (IsCallBr) { |
8063 | 5 | // If this is a callbr we need to flush pending exports since inlineasm_br |
8064 | 5 | // is a terminator. We need to do this before nodes are glued to |
8065 | 5 | // the inlineasm_br node. |
8066 | 5 | Chain = getControlRoot(); |
8067 | 5 | } |
8068 | 20.5k | |
8069 | 20.5k | // Second pass over the constraints: compute which constraint option to use. |
8070 | 102k | for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { |
8071 | 102k | // If this is an output operand with a matching input operand, look up the |
8072 | 102k | // matching input. If their types mismatch, e.g. one is an integer, the |
8073 | 102k | // other is floating point, or their sizes are different, flag it as an |
8074 | 102k | // error. |
8075 | 102k | if (OpInfo.hasMatchingInput()) { |
8076 | 444 | SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; |
8077 | 444 | patchMatchingInput(OpInfo, Input, DAG); |
8078 | 444 | } |
8079 | 102k | |
8080 | 102k | // Compute the constraint code and ConstraintType to use. |
8081 | 102k | TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); |
8082 | 102k | |
8083 | 102k | if (OpInfo.ConstraintType == TargetLowering::C_Memory && |
8084 | 102k | OpInfo.Type == InlineAsm::isClobber8.57k ) |
8085 | 8.05k | continue; |
8086 | 94.6k | |
8087 | 94.6k | // If this is a memory input, and if the operand is not indirect, do what we |
8088 | 94.6k | // need to provide an address for the memory input. |
8089 | 94.6k | if (OpInfo.ConstraintType == TargetLowering::C_Memory && |
8090 | 94.6k | !OpInfo.isIndirect520 ) { |
8091 | 118 | assert((OpInfo.isMultipleAlternative || |
8092 | 118 | (OpInfo.Type == InlineAsm::isInput)) && |
8093 | 118 | "Can only indirectify direct input operands!"); |
8094 | 118 | |
8095 | 118 | // Memory operands really want the address of the value. |
8096 | 118 | Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG); |
8097 | 118 | |
8098 | 118 | // There is no longer a Value* corresponding to this operand. |
8099 | 118 | OpInfo.CallOperandVal = nullptr; |
8100 | 118 | |
8101 | 118 | // It is now an indirect operand. |
8102 | 118 | OpInfo.isIndirect = true; |
8103 | 118 | } |
8104 | 94.6k | |
8105 | 94.6k | } |
8106 | 20.5k | |
8107 | 20.5k | // AsmNodeOperands - The operands for the ISD::INLINEASM node. |
8108 | 20.5k | std::vector<SDValue> AsmNodeOperands; |
8109 | 20.5k | AsmNodeOperands.push_back(SDValue()); // reserve space for input chain |
8110 | 20.5k | AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( |
8111 | 20.5k | IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); |
8112 | 20.5k | |
8113 | 20.5k | // If we have a !srcloc metadata node associated with it, we want to attach |
8114 | 20.5k | // this to the ultimately generated inline asm machineinstr. To do this, we |
8115 | 20.5k | // pass in the third operand as this (potentially null) inline asm MDNode. |
8116 | 20.5k | const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); |
8117 | 20.5k | AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); |
8118 | 20.5k | |
8119 | 20.5k | // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore |
8120 | 20.5k | // bits as operand 3. |
8121 | 20.5k | AsmNodeOperands.push_back(DAG.getTargetConstant( |
8122 | 20.5k | ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); |
8123 | 20.5k | |
8124 | 20.5k | // Third pass: Loop over operands to prepare DAG-level operands.. As part of |
8125 | 20.5k | // this, assign virtual and physical registers for inputs and otput. |
8126 | 102k | for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { |
8127 | 102k | // Assign Registers. |
8128 | 102k | SDISelAsmOperandInfo &RefOpInfo = |
8129 | 102k | OpInfo.isMatchingInputConstraint() |
8130 | 102k | ? ConstraintOperands[OpInfo.getMatchedOperand()]436 |
8131 | 102k | : OpInfo102k ; |
8132 | 102k | GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); |
8133 | 102k | |
8134 | 102k | switch (OpInfo.Type) { |
8135 | 102k | case InlineAsm::isOutput: |
8136 | 4.84k | if (OpInfo.ConstraintType == TargetLowering::C_Memory || |
8137 | 4.84k | (4.61k OpInfo.ConstraintType == TargetLowering::C_Other4.61k && |
8138 | 4.61k | OpInfo.isIndirect60 )) { |
8139 | 230 | unsigned ConstraintID = |
8140 | 230 | TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); |
8141 | 230 | assert(ConstraintID != InlineAsm::Constraint_Unknown && |
8142 | 230 | "Failed to convert memory constraint code to constraint id."); |
8143 | 230 | |
8144 | 230 | // Add information to the INLINEASM node to know about this output. |
8145 | 230 | unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); |
8146 | 230 | OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); |
8147 | 230 | AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), |
8148 | 230 | MVT::i32)); |
8149 | 230 | AsmNodeOperands.push_back(OpInfo.CallOperand); |
8150 | 230 | break; |
8151 | 4.61k | } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && |
8152 | 4.61k | !OpInfo.isIndirect57 ) || |
8153 | 4.61k | OpInfo.ConstraintType == TargetLowering::C_Register4.55k || |
8154 | 4.61k | OpInfo.ConstraintType == TargetLowering::C_RegisterClass3.55k ) { |
8155 | 4.61k | // Otherwise, this outputs to a register (directly for C_Register / |
8156 | 4.61k | // C_RegisterClass, and a target-defined fashion for C_Other). Find a |
8157 | 4.61k | // register that we can use. |
8158 | 4.61k | if (OpInfo.AssignedRegs.Regs.empty()) { |
8159 | 34 | emitInlineAsmError( |
8160 | 34 | CS, "couldn't allocate output register for constraint '" + |
8161 | 34 | Twine(OpInfo.ConstraintCode) + "'"); |
8162 | 34 | return; |
8163 | 34 | } |
8164 | 4.58k | |
8165 | 4.58k | // Add information to the INLINEASM node to know that this register is |
8166 | 4.58k | // set. |
8167 | 4.58k | OpInfo.AssignedRegs.AddInlineAsmOperands( |
8168 | 4.58k | OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber222 |
8169 | 4.58k | : InlineAsm::Kind_RegDef4.35k , |
8170 | 4.58k | false, 0, getCurSDLoc(), DAG, AsmNodeOperands); |
8171 | 4.58k | } |
8172 | 4.84k | break4.58k ; |
8173 | 4.84k | |
8174 | 5.45k | case InlineAsm::isInput: { |
8175 | 5.45k | SDValue InOperandVal = OpInfo.CallOperand; |
8176 | 5.45k | |
8177 | 5.45k | if (OpInfo.isMatchingInputConstraint()) { |
8178 | 436 | // If this is required to match an output register we have already set, |
8179 | 436 | // just use its register. |
8180 | 436 | auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), |
8181 | 436 | AsmNodeOperands); |
8182 | 436 | unsigned OpFlag = |
8183 | 436 | cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); |
8184 | 436 | if (InlineAsm::isRegDefKind(OpFlag) || |
8185 | 436 | InlineAsm::isRegDefEarlyClobberKind(OpFlag)45 ) { |
8186 | 436 | // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. |
8187 | 436 | if (OpInfo.isIndirect) { |
8188 | 1 | // This happens on gcc/testsuite/gcc.dg/pr8788-1.c |
8189 | 1 | emitInlineAsmError(CS, "inline asm not supported yet:" |
8190 | 1 | " don't know how to handle tied " |
8191 | 1 | "indirect register inputs"); |
8192 | 1 | return; |
8193 | 1 | } |
8194 | 435 | |
8195 | 435 | MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); |
8196 | 435 | SmallVector<unsigned, 4> Regs; |
8197 | 435 | |
8198 | 435 | if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) { |
8199 | 435 | unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); |
8200 | 435 | MachineRegisterInfo &RegInfo = |
8201 | 435 | DAG.getMachineFunction().getRegInfo(); |
8202 | 898 | for (unsigned i = 0; i != NumRegs; ++i463 ) |
8203 | 463 | Regs.push_back(RegInfo.createVirtualRegister(RC)); |
8204 | 435 | } else { |
8205 | 0 | emitInlineAsmError(CS, "inline asm error: This value type register " |
8206 | 0 | "class is not natively supported!"); |
8207 | 0 | return; |
8208 | 0 | } |
8209 | 435 | |
8210 | 435 | RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); |
8211 | 435 | |
8212 | 435 | SDLoc dl = getCurSDLoc(); |
8213 | 435 | // Use the produced MatchedRegs object to |
8214 | 435 | MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, |
8215 | 435 | CS.getInstruction()); |
8216 | 435 | MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, |
8217 | 435 | true, OpInfo.getMatchedOperand(), dl, |
8218 | 435 | DAG, AsmNodeOperands); |
8219 | 435 | break; |
8220 | 435 | } |
8221 | 0 | |
8222 | 0 | assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); |
8223 | 0 | assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && |
8224 | 0 | "Unexpected number of operands"); |
8225 | 0 | // Add information to the INLINEASM node to know about this input. |
8226 | 0 | // See InlineAsm.h isUseOperandTiedToDef. |
8227 | 0 | OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); |
8228 | 0 | OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, |
8229 | 0 | OpInfo.getMatchedOperand()); |
8230 | 0 | AsmNodeOperands.push_back(DAG.getTargetConstant( |
8231 | 0 | OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); |
8232 | 0 | AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); |
8233 | 0 | break; |
8234 | 0 | } |
8235 | 5.01k | |
8236 | 5.01k | // Treat indirect 'X' constraint as memory. |
8237 | 5.01k | if (OpInfo.ConstraintType == TargetLowering::C_Other && |
8238 | 5.01k | OpInfo.isIndirect467 ) |
8239 | 1 | OpInfo.ConstraintType = TargetLowering::C_Memory; |
8240 | 5.01k | |
8241 | 5.01k | if (OpInfo.ConstraintType == TargetLowering::C_Other) { |
8242 | 466 | std::vector<SDValue> Ops; |
8243 | 466 | TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, |
8244 | 466 | Ops, DAG); |
8245 | 466 | if (Ops.empty()) { |
8246 | 29 | emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + |
8247 | 29 | Twine(OpInfo.ConstraintCode) + "'"); |
8248 | 29 | return; |
8249 | 29 | } |
8250 | 437 | |
8251 | 437 | // Add information to the INLINEASM node to know about this input. |
8252 | 437 | unsigned ResOpType = |
8253 | 437 | InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); |
8254 | 437 | AsmNodeOperands.push_back(DAG.getTargetConstant( |
8255 | 437 | ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); |
8256 | 437 | AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); |
8257 | 437 | break; |
8258 | 437 | } |
8259 | 4.55k | |
8260 | 4.55k | if (OpInfo.ConstraintType == TargetLowering::C_Memory) { |
8261 | 294 | assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); |
8262 | 294 | assert(InOperandVal.getValueType() == |
8263 | 294 | TLI.getPointerTy(DAG.getDataLayout()) && |
8264 | 294 | "Memory operands expect pointer values"); |
8265 | 294 | |
8266 | 294 | unsigned ConstraintID = |
8267 | 294 | TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); |
8268 | 294 | assert(ConstraintID != InlineAsm::Constraint_Unknown && |
8269 | 294 | "Failed to convert memory constraint code to constraint id."); |
8270 | 294 | |
8271 | 294 | // Add information to the INLINEASM node to know about this input. |
8272 | 294 | unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); |
8273 | 294 | ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); |
8274 | 294 | AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, |
8275 | 294 | getCurSDLoc(), |
8276 | 294 | MVT::i32)); |
8277 | 294 | AsmNodeOperands.push_back(InOperandVal); |
8278 | 294 | break; |
8279 | 294 | } |
8280 | 4.25k | |
8281 | 4.25k | assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || |
8282 | 4.25k | OpInfo.ConstraintType == TargetLowering::C_Register) && |
8283 | 4.25k | "Unknown constraint type!"); |
8284 | 4.25k | |
8285 | 4.25k | // TODO: Support this. |
8286 | 4.25k | if (OpInfo.isIndirect) { |
8287 | 2 | emitInlineAsmError( |
8288 | 2 | CS, "Don't know how to handle indirect register inputs yet " |
8289 | 2 | "for constraint '" + |
8290 | 2 | Twine(OpInfo.ConstraintCode) + "'"); |
8291 | 2 | return; |
8292 | 2 | } |
8293 | 4.25k | |
8294 | 4.25k | // Copy the input into the appropriate registers. |
8295 | 4.25k | if (OpInfo.AssignedRegs.Regs.empty()) { |
8296 | 28 | emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + |
8297 | 28 | Twine(OpInfo.ConstraintCode) + "'"); |
8298 | 28 | return; |
8299 | 28 | } |
8300 | 4.22k | |
8301 | 4.22k | SDLoc dl = getCurSDLoc(); |
8302 | 4.22k | |
8303 | 4.22k | OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, |
8304 | 4.22k | Chain, &Flag, CS.getInstruction()); |
8305 | 4.22k | |
8306 | 4.22k | OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, |
8307 | 4.22k | dl, DAG, AsmNodeOperands); |
8308 | 4.22k | break; |
8309 | 4.22k | } |
8310 | 92.3k | case InlineAsm::isClobber: |
8311 | 92.3k | // Add the clobbered value to the operand list, so that the register |
8312 | 92.3k | // allocator is aware that the physreg got clobbered. |
8313 | 92.3k | if (!OpInfo.AssignedRegs.Regs.empty()) |
8314 | 83.4k | OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, |
8315 | 83.4k | false, 0, getCurSDLoc(), DAG, |
8316 | 83.4k | AsmNodeOperands); |
8317 | 92.3k | break; |
8318 | 102k | } |
8319 | 102k | } |
8320 | 20.5k | |
8321 | 20.5k | // Finish up input operands. Set the input chain and add the flag last. |
8322 | 20.5k | AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; |
8323 | 20.4k | if (Flag.getNode()) AsmNodeOperands.push_back(Flag)3.37k ; |
8324 | 20.4k | |
8325 | 20.4k | unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR5 : ISD::INLINEASM20.4k ; |
8326 | 20.4k | Chain = DAG.getNode(ISDOpc, getCurSDLoc(), |
8327 | 20.4k | DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); |
8328 | 20.4k | Flag = Chain.getValue(1); |
8329 | 20.4k | |
8330 | 20.4k | // Do additional work to generate outputs. |
8331 | 20.4k | |
8332 | 20.4k | SmallVector<EVT, 1> ResultVTs; |
8333 | 20.4k | SmallVector<SDValue, 1> ResultValues; |
8334 | 20.4k | SmallVector<SDValue, 8> OutChains; |
8335 | 20.4k | |
8336 | 20.4k | llvm::Type *CSResultType = CS.getType(); |
8337 | 20.4k | ArrayRef<Type *> ResultTypes; |
8338 | 20.4k | if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) |
8339 | 197 | ResultTypes = StructResult->elements(); |
8340 | 20.2k | else if (!CSResultType->isVoidTy()) |
8341 | 3.73k | ResultTypes = makeArrayRef(CSResultType); |
8342 | 20.4k | |
8343 | 20.4k | auto CurResultType = ResultTypes.begin(); |
8344 | 20.4k | auto handleRegAssign = [&](SDValue V) { |
8345 | 4.52k | assert(CurResultType != ResultTypes.end() && "Unexpected value"); |
8346 | 4.52k | assert((*CurResultType)->isSized() && "Unexpected unsized type"); |
8347 | 4.52k | EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType); |
8348 | 4.52k | ++CurResultType; |
8349 | 4.52k | // If the type of the inline asm call site return value is different but has |
8350 | 4.52k | // same size as the type of the asm output bitcast it. One example of this |
8351 | 4.52k | // is for vectors with different width / number of elements. This can |
8352 | 4.52k | // happen for register classes that can contain multiple different value |
8353 | 4.52k | // types. The preg or vreg allocated may not have the same VT as was |
8354 | 4.52k | // expected. |
8355 | 4.52k | // |
8356 | 4.52k | // This can also happen for a return value that disagrees with the register |
8357 | 4.52k | // class it is put in, eg. a double in a general-purpose register on a |
8358 | 4.52k | // 32-bit machine. |
8359 | 4.52k | if (ResultVT != V.getValueType() && |
8360 | 4.52k | ResultVT.getSizeInBits() == V.getValueSizeInBits()117 ) |
8361 | 117 | V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V); |
8362 | 4.40k | else if (ResultVT != V.getValueType() && ResultVT.isInteger()0 && |
8363 | 4.40k | V.getValueType().isInteger()0 ) { |
8364 | 0 | // If a result value was tied to an input value, the computed result |
8365 | 0 | // may have a wider width than the expected result. Extract the |
8366 | 0 | // relevant portion. |
8367 | 0 | V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V); |
8368 | 0 | } |
8369 | 4.52k | assert(ResultVT == V.getValueType() && "Asm result value mismatch!"); |
8370 | 4.52k | ResultVTs.push_back(ResultVT); |
8371 | 4.52k | ResultValues.push_back(V); |
8372 | 4.52k | }; |
8373 | 20.4k | |
8374 | 20.4k | // Deal with output operands. |
8375 | 102k | for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { |
8376 | 102k | if (OpInfo.Type == InlineAsm::isOutput) { |
8377 | 4.78k | SDValue Val; |
8378 | 4.78k | // Skip trivial output operands. |
8379 | 4.78k | if (OpInfo.AssignedRegs.Regs.empty()) |
8380 | 230 | continue; |
8381 | 4.55k | |
8382 | 4.55k | switch (OpInfo.ConstraintType) { |
8383 | 4.55k | case TargetLowering::C_Register: |
8384 | 4.50k | case TargetLowering::C_RegisterClass: |
8385 | 4.50k | Val = OpInfo.AssignedRegs.getCopyFromRegs( |
8386 | 4.50k | DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); |
8387 | 4.50k | break; |
8388 | 4.50k | case TargetLowering::C_Other: |
8389 | 57 | Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), |
8390 | 57 | OpInfo, DAG); |
8391 | 57 | break; |
8392 | 4.50k | case TargetLowering::C_Memory: |
8393 | 0 | break; // Already handled. |
8394 | 4.50k | case TargetLowering::C_Unknown: |
8395 | 0 | assert(false && "Unexpected unknown constraint"); |
8396 | 4.55k | } |
8397 | 4.55k | |
8398 | 4.55k | // Indirect output manifest as stores. Record output chains. |
8399 | 4.55k | if (OpInfo.isIndirect) { |
8400 | 34 | const Value *Ptr = OpInfo.CallOperandVal; |
8401 | 34 | assert(Ptr && "Expected value CallOperandVal for indirect asm operand"); |
8402 | 34 | SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr), |
8403 | 34 | MachinePointerInfo(Ptr)); |
8404 | 34 | OutChains.push_back(Store); |
8405 | 4.52k | } else { |
8406 | 4.52k | // generate CopyFromRegs to associated registers. |
8407 | 4.52k | assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); |
8408 | 4.52k | if (Val.getOpcode() == ISD::MERGE_VALUES) { |
8409 | 0 | for (const SDValue &V : Val->op_values()) |
8410 | 0 | handleRegAssign(V); |
8411 | 0 | } else |
8412 | 4.52k | handleRegAssign(Val); |
8413 | 4.52k | } |
8414 | 4.55k | } |
8415 | 102k | } |
8416 | 20.4k | |
8417 | 20.4k | // Set results. |
8418 | 20.4k | if (!ResultValues.empty()) { |
8419 | 3.93k | assert(CurResultType == ResultTypes.end() && |
8420 | 3.93k | "Mismatch in number of ResultTypes"); |
8421 | 3.93k | assert(ResultValues.size() == ResultTypes.size() && |
8422 | 3.93k | "Mismatch in number of output operands in asm result"); |
8423 | 3.93k | |
8424 | 3.93k | SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), |
8425 | 3.93k | DAG.getVTList(ResultVTs), ResultValues); |
8426 | 3.93k | setValue(CS.getInstruction(), V); |
8427 | 3.93k | } |
8428 | 20.4k | |
8429 | 20.4k | // Collect store chains. |
8430 | 20.4k | if (!OutChains.empty()) |
8431 | 31 | Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); |
8432 | 20.4k | |
8433 | 20.4k | // Only Update Root if inline assembly has a memory effect. |
8434 | 20.4k | if (ResultValues.empty() || HasSideEffect3.93k || !OutChains.empty()893 || IsCallBr893 ) |
8435 | 19.5k | DAG.setRoot(Chain); |
8436 | 20.4k | } |
8437 | | |
8438 | | void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, |
8439 | 94 | const Twine &Message) { |
8440 | 94 | LLVMContext &Ctx = *DAG.getContext(); |
8441 | 94 | Ctx.emitError(CS.getInstruction(), Message); |
8442 | 94 | |
8443 | 94 | // Make sure we leave the DAG in a valid state |
8444 | 94 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8445 | 94 | SmallVector<EVT, 1> ValueVTs; |
8446 | 94 | ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); |
8447 | 94 | |
8448 | 94 | if (ValueVTs.empty()) |
8449 | 41 | return; |
8450 | 53 | |
8451 | 53 | SmallVector<SDValue, 1> Ops; |
8452 | 108 | for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i55 ) |
8453 | 55 | Ops.push_back(DAG.getUNDEF(ValueVTs[i])); |
8454 | 53 | |
8455 | 53 | setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc())); |
8456 | 53 | } |
8457 | | |
8458 | 532 | void SelectionDAGBuilder::visitVAStart(const CallInst &I) { |
8459 | 532 | DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), |
8460 | 532 | MVT::Other, getRoot(), |
8461 | 532 | getValue(I.getArgOperand(0)), |
8462 | 532 | DAG.getSrcValue(I.getArgOperand(0)))); |
8463 | 532 | } |
8464 | | |
8465 | 334 | void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { |
8466 | 334 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8467 | 334 | const DataLayout &DL = DAG.getDataLayout(); |
8468 | 334 | SDValue V = DAG.getVAArg( |
8469 | 334 | TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), |
8470 | 334 | getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), |
8471 | 334 | DL.getABITypeAlignment(I.getType())); |
8472 | 334 | DAG.setRoot(V.getValue(1)); |
8473 | 334 | |
8474 | 334 | if (I.getType()->isPointerTy()) |
8475 | 22 | V = DAG.getPtrExtOrTrunc( |
8476 | 22 | V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType())); |
8477 | 334 | setValue(&I, V); |
8478 | 334 | } |
8479 | | |
8480 | 700 | void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { |
8481 | 700 | DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), |
8482 | 700 | MVT::Other, getRoot(), |
8483 | 700 | getValue(I.getArgOperand(0)), |
8484 | 700 | DAG.getSrcValue(I.getArgOperand(0)))); |
8485 | 700 | } |
8486 | | |
8487 | 263 | void SelectionDAGBuilder::visitVACopy(const CallInst &I) { |
8488 | 263 | DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), |
8489 | 263 | MVT::Other, getRoot(), |
8490 | 263 | getValue(I.getArgOperand(0)), |
8491 | 263 | getValue(I.getArgOperand(1)), |
8492 | 263 | DAG.getSrcValue(I.getArgOperand(0)), |
8493 | 263 | DAG.getSrcValue(I.getArgOperand(1)))); |
8494 | 263 | } |
8495 | | |
8496 | | SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, |
8497 | | const Instruction &I, |
8498 | 286k | SDValue Op) { |
8499 | 286k | const MDNode *Range = I.getMetadata(LLVMContext::MD_range); |
8500 | 286k | if (!Range) |
8501 | 281k | return Op; |
8502 | 4.25k | |
8503 | 4.25k | ConstantRange CR = getConstantRangeFromMetadata(*Range); |
8504 | 4.25k | if (CR.isFullSet() || CR.isEmptySet()4.25k || CR.isUpperWrapped()4.25k ) |
8505 | 0 | return Op; |
8506 | 4.25k | |
8507 | 4.25k | APInt Lo = CR.getUnsignedMin(); |
8508 | 4.25k | if (!Lo.isMinValue()) |
8509 | 1 | return Op; |
8510 | 4.25k | |
8511 | 4.25k | APInt Hi = CR.getUnsignedMax(); |
8512 | 4.25k | unsigned Bits = std::max(Hi.getActiveBits(), |
8513 | 4.25k | static_cast<unsigned>(IntegerType::MIN_INT_BITS)); |
8514 | 4.25k | |
8515 | 4.25k | EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); |
8516 | 4.25k | |
8517 | 4.25k | SDLoc SL = getCurSDLoc(); |
8518 | 4.25k | |
8519 | 4.25k | SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, |
8520 | 4.25k | DAG.getValueType(SmallVT)); |
8521 | 4.25k | unsigned NumVals = Op.getNode()->getNumValues(); |
8522 | 4.25k | if (NumVals == 1) |
8523 | 4.24k | return ZExt; |
8524 | 5 | |
8525 | 5 | SmallVector<SDValue, 4> Ops; |
8526 | 5 | |
8527 | 5 | Ops.push_back(ZExt); |
8528 | 13 | for (unsigned I = 1; I != NumVals; ++I8 ) |
8529 | 8 | Ops.push_back(Op.getValue(I)); |
8530 | 5 | |
8531 | 5 | return DAG.getMergeValues(Ops, SL); |
8532 | 5 | } |
8533 | | |
8534 | | /// Populate a CallLowerinInfo (into \p CLI) based on the properties of |
8535 | | /// the call being lowered. |
8536 | | /// |
8537 | | /// This is a helper for lowering intrinsics that follow a target calling |
8538 | | /// convention or require stack pointer adjustment. Only a subset of the |
8539 | | /// intrinsic's operands need to participate in the calling convention. |
8540 | | void SelectionDAGBuilder::populateCallLoweringInfo( |
8541 | | TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, |
8542 | | unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, |
8543 | 234 | bool IsPatchPoint) { |
8544 | 234 | TargetLowering::ArgListTy Args; |
8545 | 234 | Args.reserve(NumArgs); |
8546 | 234 | |
8547 | 234 | // Populate the argument list. |
8548 | 234 | // Attributes for args start at offset 1, after the return attribute. |
8549 | 234 | for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; |
8550 | 449 | ArgI != ArgE; ++ArgI215 ) { |
8551 | 215 | const Value *V = Call->getOperand(ArgI); |
8552 | 215 | |
8553 | 215 | assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); |
8554 | 215 | |
8555 | 215 | TargetLowering::ArgListEntry Entry; |
8556 | 215 | Entry.Node = getValue(V); |
8557 | 215 | Entry.Ty = V->getType(); |
8558 | 215 | Entry.setAttributes(Call, ArgI); |
8559 | 215 | Args.push_back(Entry); |
8560 | 215 | } |
8561 | 234 | |
8562 | 234 | CLI.setDebugLoc(getCurSDLoc()) |
8563 | 234 | .setChain(getRoot()) |
8564 | 234 | .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) |
8565 | 234 | .setDiscardResult(Call->use_empty()) |
8566 | 234 | .setIsPatchPoint(IsPatchPoint); |
8567 | 234 | } |
8568 | | |
8569 | | /// Add a stack map intrinsic call's live variable operands to a stackmap |
8570 | | /// or patchpoint target node's operand list. |
8571 | | /// |
8572 | | /// Constants are converted to TargetConstants purely as an optimization to |
8573 | | /// avoid constant materialization and register allocation. |
8574 | | /// |
8575 | | /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not |
8576 | | /// generate addess computation nodes, and so FinalizeISel can convert the |
8577 | | /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids |
8578 | | /// address materialization and register allocation, but may also be required |
8579 | | /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an |
8580 | | /// alloca in the entry block, then the runtime may assume that the alloca's |
8581 | | /// StackMap location can be read immediately after compilation and that the |
8582 | | /// location is valid at any point during execution (this is similar to the |
8583 | | /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were |
8584 | | /// only available in a register, then the runtime would need to trap when |
8585 | | /// execution reaches the StackMap in order to read the alloca's location. |
8586 | | static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, |
8587 | | const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, |
8588 | 286 | SelectionDAGBuilder &Builder) { |
8589 | 671 | for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i385 ) { |
8590 | 385 | SDValue OpVal = Builder.getValue(CS.getArgument(i)); |
8591 | 385 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { |
8592 | 58 | Ops.push_back( |
8593 | 58 | Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); |
8594 | 58 | Ops.push_back( |
8595 | 58 | Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); |
8596 | 327 | } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { |
8597 | 21 | const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); |
8598 | 21 | Ops.push_back(Builder.DAG.getTargetFrameIndex( |
8599 | 21 | FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); |
8600 | 21 | } else |
8601 | 306 | Ops.push_back(OpVal); |
8602 | 385 | } |
8603 | 286 | } |
8604 | | |
8605 | | /// Lower llvm.experimental.stackmap directly to its target opcode. |
8606 | 140 | void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { |
8607 | 140 | // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, |
8608 | 140 | // [live variables...]) |
8609 | 140 | |
8610 | 140 | assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); |
8611 | 140 | |
8612 | 140 | SDValue Chain, InFlag, Callee, NullPtr; |
8613 | 140 | SmallVector<SDValue, 32> Ops; |
8614 | 140 | |
8615 | 140 | SDLoc DL = getCurSDLoc(); |
8616 | 140 | Callee = getValue(CI.getCalledValue()); |
8617 | 140 | NullPtr = DAG.getIntPtrConstant(0, DL, true); |
8618 | 140 | |
8619 | 140 | // The stackmap intrinsic only records the live variables (the arguemnts |
8620 | 140 | // passed to it) and emits NOPS (if requested). Unlike the patchpoint |
8621 | 140 | // intrinsic, this won't be lowered to a function call. This means we don't |
8622 | 140 | // have to worry about calling conventions and target specific lowering code. |
8623 | 140 | // Instead we perform the call lowering right here. |
8624 | 140 | // |
8625 | 140 | // chain, flag = CALLSEQ_START(chain, 0, 0) |
8626 | 140 | // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) |
8627 | 140 | // chain, flag = CALLSEQ_END(chain, 0, 0, flag) |
8628 | 140 | // |
8629 | 140 | Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); |
8630 | 140 | InFlag = Chain.getValue(1); |
8631 | 140 | |
8632 | 140 | // Add the <id> and <numBytes> constants. |
8633 | 140 | SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); |
8634 | 140 | Ops.push_back(DAG.getTargetConstant( |
8635 | 140 | cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); |
8636 | 140 | SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); |
8637 | 140 | Ops.push_back(DAG.getTargetConstant( |
8638 | 140 | cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, |
8639 | 140 | MVT::i32)); |
8640 | 140 | |
8641 | 140 | // Push live variables for the stack map. |
8642 | 140 | addStackMapLiveVars(&CI, 2, DL, Ops, *this); |
8643 | 140 | |
8644 | 140 | // We are not pushing any register mask info here on the operands list, |
8645 | 140 | // because the stackmap doesn't clobber anything. |
8646 | 140 | |
8647 | 140 | // Push the chain and the glue flag. |
8648 | 140 | Ops.push_back(Chain); |
8649 | 140 | Ops.push_back(InFlag); |
8650 | 140 | |
8651 | 140 | // Create the STACKMAP node. |
8652 | 140 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
8653 | 140 | SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); |
8654 | 140 | Chain = SDValue(SM, 0); |
8655 | 140 | InFlag = Chain.getValue(1); |
8656 | 140 | |
8657 | 140 | Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); |
8658 | 140 | |
8659 | 140 | // Stackmaps don't generate values, so nothing goes into the NodeMap. |
8660 | 140 | |
8661 | 140 | // Set the root to the target-lowered call chain. |
8662 | 140 | DAG.setRoot(Chain); |
8663 | 140 | |
8664 | 140 | // Inform the Frame Information that we have a stackmap in this function. |
8665 | 140 | FuncInfo.MF->getFrameInfo().setHasStackMap(); |
8666 | 140 | } |
8667 | | |
8668 | | /// Lower llvm.experimental.patchpoint directly to its target opcode. |
8669 | | void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, |
8670 | 146 | const BasicBlock *EHPadBB) { |
8671 | 146 | // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, |
8672 | 146 | // i32 <numBytes>, |
8673 | 146 | // i8* <target>, |
8674 | 146 | // i32 <numArgs>, |
8675 | 146 | // [Args...], |
8676 | 146 | // [live variables...]) |
8677 | 146 | |
8678 | 146 | CallingConv::ID CC = CS.getCallingConv(); |
8679 | 146 | bool IsAnyRegCC = CC == CallingConv::AnyReg; |
8680 | 146 | bool HasDef = !CS->getType()->isVoidTy(); |
8681 | 146 | SDLoc dl = getCurSDLoc(); |
8682 | 146 | SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); |
8683 | 146 | |
8684 | 146 | // Handle immediate and symbolic callees. |
8685 | 146 | if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) |
8686 | 141 | Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, |
8687 | 141 | /*isTarget=*/true); |
8688 | 5 | else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee)) |
8689 | 5 | Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), |
8690 | 5 | SDLoc(SymbolicCallee), |
8691 | 5 | SymbolicCallee->getValueType(0)); |
8692 | 146 | |
8693 | 146 | // Get the real number of arguments participating in the call <numArgs> |
8694 | 146 | SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); |
8695 | 146 | unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); |
8696 | 146 | |
8697 | 146 | // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> |
8698 | 146 | // Intrinsics include all meta-operands up to but not including CC. |
8699 | 146 | unsigned NumMetaOpers = PatchPointOpers::CCPos; |
8700 | 146 | assert(CS.arg_size() >= NumMetaOpers + NumArgs && |
8701 | 146 | "Not enough arguments provided to the patchpoint intrinsic"); |
8702 | 146 | |
8703 | 146 | // For AnyRegCC the arguments are lowered later on manually. |
8704 | 146 | unsigned NumCallArgs = IsAnyRegCC ? 066 : NumArgs80 ; |
8705 | 146 | Type *ReturnTy = |
8706 | 146 | IsAnyRegCC ? Type::getVoidTy(*DAG.getContext())66 : CS->getType()80 ; |
8707 | 146 | |
8708 | 146 | TargetLowering::CallLoweringInfo CLI(DAG); |
8709 | 146 | populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()), |
8710 | 146 | NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); |
8711 | 146 | std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); |
8712 | 146 | |
8713 | 146 | SDNode *CallEnd = Result.second.getNode(); |
8714 | 146 | if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)81 ) |
8715 | 34 | CallEnd = CallEnd->getOperand(0).getNode(); |
8716 | 146 | |
8717 | 146 | /// Get a call instruction from the call sequence chain. |
8718 | 146 | /// Tail calls are not allowed. |
8719 | 146 | assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && |
8720 | 146 | "Expected a callseq node."); |
8721 | 146 | SDNode *Call = CallEnd->getOperand(0).getNode(); |
8722 | 146 | bool HasGlue = Call->getGluedNode(); |
8723 | 146 | |
8724 | 146 | // Replace the target specific call node with the patchable intrinsic. |
8725 | 146 | SmallVector<SDValue, 8> Ops; |
8726 | 146 | |
8727 | 146 | // Add the <id> and <numBytes> constants. |
8728 | 146 | SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); |
8729 | 146 | Ops.push_back(DAG.getTargetConstant( |
8730 | 146 | cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); |
8731 | 146 | SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); |
8732 | 146 | Ops.push_back(DAG.getTargetConstant( |
8733 | 146 | cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, |
8734 | 146 | MVT::i32)); |
8735 | 146 | |
8736 | 146 | // Add the callee. |
8737 | 146 | Ops.push_back(Callee); |
8738 | 146 | |
8739 | 146 | // Adjust <numArgs> to account for any arguments that have been passed on the |
8740 | 146 | // stack instead. |
8741 | 146 | // Call Node: Chain, Target, {Args}, RegMask, [Glue] |
8742 | 146 | unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 458 : 388 ); |
8743 | 146 | NumCallRegArgs = IsAnyRegCC ? NumArgs66 : NumCallRegArgs80 ; |
8744 | 146 | Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); |
8745 | 146 | |
8746 | 146 | // Add the calling convention |
8747 | 146 | Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); |
8748 | 146 | |
8749 | 146 | // Add the arguments we omitted previously. The register allocator should |
8750 | 146 | // place these in any free register. |
8751 | 146 | if (IsAnyRegCC) |
8752 | 329 | for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; 66 i != e; ++i263 ) |
8753 | 263 | Ops.push_back(getValue(CS.getArgument(i))); |
8754 | 146 | |
8755 | 146 | // Push the arguments from the call instruction up to the register mask. |
8756 | 146 | SDNode::op_iterator e = HasGlue ? Call->op_end()-258 : Call->op_end()-188 ; |
8757 | 146 | Ops.append(Call->op_begin() + 2, e); |
8758 | 146 | |
8759 | 146 | // Push live variables for the stack map. |
8760 | 146 | addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); |
8761 | 146 | |
8762 | 146 | // Push the register mask info. |
8763 | 146 | if (HasGlue) |
8764 | 58 | Ops.push_back(*(Call->op_end()-2)); |
8765 | 88 | else |
8766 | 88 | Ops.push_back(*(Call->op_end()-1)); |
8767 | 146 | |
8768 | 146 | // Push the chain (this is originally the first operand of the call, but |
8769 | 146 | // becomes now the last or second to last operand). |
8770 | 146 | Ops.push_back(*(Call->op_begin())); |
8771 | 146 | |
8772 | 146 | // Push the glue flag (last operand). |
8773 | 146 | if (HasGlue) |
8774 | 58 | Ops.push_back(*(Call->op_end()-1)); |
8775 | 146 | |
8776 | 146 | SDVTList NodeTys; |
8777 | 146 | if (IsAnyRegCC && HasDef66 ) { |
8778 | 47 | // Create the return types based on the intrinsic definition |
8779 | 47 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8780 | 47 | SmallVector<EVT, 3> ValueVTs; |
8781 | 47 | ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); |
8782 | 47 | assert(ValueVTs.size() == 1 && "Expected only one return value type."); |
8783 | 47 | |
8784 | 47 | // There is always a chain and a glue type at the end |
8785 | 47 | ValueVTs.push_back(MVT::Other); |
8786 | 47 | ValueVTs.push_back(MVT::Glue); |
8787 | 47 | NodeTys = DAG.getVTList(ValueVTs); |
8788 | 47 | } else |
8789 | 99 | NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
8790 | 146 | |
8791 | 146 | // Replace the target specific call node with a PATCHPOINT node. |
8792 | 146 | MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, |
8793 | 146 | dl, NodeTys, Ops); |
8794 | 146 | |
8795 | 146 | // Update the NodeMap. |
8796 | 146 | if (HasDef) { |
8797 | 81 | if (IsAnyRegCC) |
8798 | 47 | setValue(CS.getInstruction(), SDValue(MN, 0)); |
8799 | 34 | else |
8800 | 34 | setValue(CS.getInstruction(), Result.first); |
8801 | 81 | } |
8802 | 146 | |
8803 | 146 | // Fixup the consumers of the intrinsic. The chain and glue may be used in the |
8804 | 146 | // call sequence. Furthermore the location of the chain and glue can change |
8805 | 146 | // when the AnyReg calling convention is used and the intrinsic returns a |
8806 | 146 | // value. |
8807 | 146 | if (IsAnyRegCC && HasDef66 ) { |
8808 | 47 | SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; |
8809 | 47 | SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; |
8810 | 47 | DAG.ReplaceAllUsesOfValuesWith(From, To, 2); |
8811 | 47 | } else |
8812 | 99 | DAG.ReplaceAllUsesWith(Call, MN); |
8813 | 146 | DAG.DeleteNode(Call); |
8814 | 146 | |
8815 | 146 | // Inform the Frame Information that we have a patchpoint in this function. |
8816 | 146 | FuncInfo.MF->getFrameInfo().setHasPatchPoint(); |
8817 | 146 | } |
8818 | | |
8819 | | void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, |
8820 | 857 | unsigned Intrinsic) { |
8821 | 857 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8822 | 857 | SDValue Op1 = getValue(I.getArgOperand(0)); |
8823 | 857 | SDValue Op2; |
8824 | 857 | if (I.getNumArgOperands() > 1) |
8825 | 23 | Op2 = getValue(I.getArgOperand(1)); |
8826 | 857 | SDLoc dl = getCurSDLoc(); |
8827 | 857 | EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); |
8828 | 857 | SDValue Res; |
8829 | 857 | FastMathFlags FMF; |
8830 | 857 | if (isa<FPMathOperator>(I)) |
8831 | 34 | FMF = I.getFastMathFlags(); |
8832 | 857 | |
8833 | 857 | switch (Intrinsic) { |
8834 | 857 | case Intrinsic::experimental_vector_reduce_v2_fadd: |
8835 | 23 | if (FMF.allowReassoc()) |
8836 | 23 | Res = DAG.getNode(ISD::FADD, dl, VT, Op1, |
8837 | 23 | DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); |
8838 | 0 | else |
8839 | 0 | Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); |
8840 | 23 | break; |
8841 | 857 | case Intrinsic::experimental_vector_reduce_v2_fmul: |
8842 | 0 | if (FMF.allowReassoc()) |
8843 | 0 | Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, |
8844 | 0 | DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); |
8845 | 0 | else |
8846 | 0 | Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); |
8847 | 0 | break; |
8848 | 857 | case Intrinsic::experimental_vector_reduce_add: |
8849 | 730 | Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); |
8850 | 730 | break; |
8851 | 857 | case Intrinsic::experimental_vector_reduce_mul: |
8852 | 0 | Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); |
8853 | 0 | break; |
8854 | 857 | case Intrinsic::experimental_vector_reduce_and: |
8855 | 20 | Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); |
8856 | 20 | break; |
8857 | 857 | case Intrinsic::experimental_vector_reduce_or: |
8858 | 6 | Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); |
8859 | 6 | break; |
8860 | 857 | case Intrinsic::experimental_vector_reduce_xor: |
8861 | 0 | Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); |
8862 | 0 | break; |
8863 | 857 | case Intrinsic::experimental_vector_reduce_smax: |
8864 | 18 | Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); |
8865 | 18 | break; |
8866 | 857 | case Intrinsic::experimental_vector_reduce_smin: |
8867 | 15 | Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); |
8868 | 15 | break; |
8869 | 857 | case Intrinsic::experimental_vector_reduce_umax: |
8870 | 24 | Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); |
8871 | 24 | break; |
8872 | 857 | case Intrinsic::experimental_vector_reduce_umin: |
8873 | 10 | Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); |
8874 | 10 | break; |
8875 | 857 | case Intrinsic::experimental_vector_reduce_fmax: |
8876 | 9 | Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); |
8877 | 9 | break; |
8878 | 857 | case Intrinsic::experimental_vector_reduce_fmin: |
8879 | 2 | Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); |
8880 | 2 | break; |
8881 | 857 | default: |
8882 | 0 | llvm_unreachable("Unhandled vector reduce intrinsic"); |
8883 | 857 | } |
8884 | 857 | setValue(&I, Res); |
8885 | 857 | } |
8886 | | |
8887 | | /// Returns an AttributeList representing the attributes applied to the return |
8888 | | /// value of the given call. |
8889 | 459k | static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { |
8890 | 459k | SmallVector<Attribute::AttrKind, 2> Attrs; |
8891 | 459k | if (CLI.RetSExt) |
8892 | 1.44k | Attrs.push_back(Attribute::SExt); |
8893 | 459k | if (CLI.RetZExt) |
8894 | 27.3k | Attrs.push_back(Attribute::ZExt); |
8895 | 459k | if (CLI.IsInReg) |
8896 | 223 | Attrs.push_back(Attribute::InReg); |
8897 | 459k | |
8898 | 459k | return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, |
8899 | 459k | Attrs); |
8900 | 459k | } |
8901 | | |
8902 | | /// TargetLowering::LowerCallTo - This is the default LowerCallTo |
8903 | | /// implementation, which just calls LowerCall. |
8904 | | /// FIXME: When all targets are |
8905 | | /// migrated to using LowerCall, this hook should be integrated into SDISel. |
8906 | | std::pair<SDValue, SDValue> |
8907 | 459k | TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { |
8908 | 459k | // Handle the incoming return values from the call. |
8909 | 459k | CLI.Ins.clear(); |
8910 | 459k | Type *OrigRetTy = CLI.RetTy; |
8911 | 459k | SmallVector<EVT, 4> RetTys; |
8912 | 459k | SmallVector<uint64_t, 4> Offsets; |
8913 | 459k | auto &DL = CLI.DAG.getDataLayout(); |
8914 | 459k | ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); |
8915 | 459k | |
8916 | 459k | if (CLI.IsPostTypeLegalization) { |
8917 | 5.66k | // If we are lowering a libcall after legalization, split the return type. |
8918 | 5.66k | SmallVector<EVT, 4> OldRetTys; |
8919 | 5.66k | SmallVector<uint64_t, 4> OldOffsets; |
8920 | 5.66k | RetTys.swap(OldRetTys); |
8921 | 5.66k | Offsets.swap(OldOffsets); |
8922 | 5.66k | |
8923 | 11.3k | for (size_t i = 0, e = OldRetTys.size(); i != e; ++i5.66k ) { |
8924 | 5.66k | EVT RetVT = OldRetTys[i]; |
8925 | 5.66k | uint64_t Offset = OldOffsets[i]; |
8926 | 5.66k | MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); |
8927 | 5.66k | unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); |
8928 | 5.66k | unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; |
8929 | 5.66k | RetTys.append(NumRegs, RegisterVT); |
8930 | 11.3k | for (unsigned j = 0; j != NumRegs; ++j5.67k ) |
8931 | 5.67k | Offsets.push_back(Offset + j * RegisterVTByteSZ); |
8932 | 5.66k | } |
8933 | 5.66k | } |
8934 | 459k | |
8935 | 459k | SmallVector<ISD::OutputArg, 4> Outs; |
8936 | 459k | GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); |
8937 | 459k | |
8938 | 459k | bool CanLowerReturn = |
8939 | 459k | this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), |
8940 | 459k | CLI.IsVarArg, Outs, CLI.RetTy->getContext()); |
8941 | 459k | |
8942 | 459k | SDValue DemoteStackSlot; |
8943 | 459k | int DemoteStackIdx = -100; |
8944 | 459k | if (!CanLowerReturn) { |
8945 | 272 | // FIXME: equivalent assert? |
8946 | 272 | // assert(!CS.hasInAllocaArgument() && |
8947 | 272 | // "sret demotion is incompatible with inalloca"); |
8948 | 272 | uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); |
8949 | 272 | unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); |
8950 | 272 | MachineFunction &MF = CLI.DAG.getMachineFunction(); |
8951 | 272 | DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); |
8952 | 272 | Type *StackSlotPtrType = PointerType::get(CLI.RetTy, |
8953 | 272 | DL.getAllocaAddrSpace()); |
8954 | 272 | |
8955 | 272 | DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); |
8956 | 272 | ArgListEntry Entry; |
8957 | 272 | Entry.Node = DemoteStackSlot; |
8958 | 272 | Entry.Ty = StackSlotPtrType; |
8959 | 272 | Entry.IsSExt = false; |
8960 | 272 | Entry.IsZExt = false; |
8961 | 272 | Entry.IsInReg = false; |
8962 | 272 | Entry.IsSRet = true; |
8963 | 272 | Entry.IsNest = false; |
8964 | 272 | Entry.IsByVal = false; |
8965 | 272 | Entry.IsReturned = false; |
8966 | 272 | Entry.IsSwiftSelf = false; |
8967 | 272 | Entry.IsSwiftError = false; |
8968 | 272 | Entry.Alignment = Align; |
8969 | 272 | CLI.getArgs().insert(CLI.getArgs().begin(), Entry); |
8970 | 272 | CLI.NumFixedArgs += 1; |
8971 | 272 | CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); |
8972 | 272 | |
8973 | 272 | // sret demotion isn't compatible with tail-calls, since the sret argument |
8974 | 272 | // points into the callers stack frame. |
8975 | 272 | CLI.IsTailCall = false; |
8976 | 459k | } else { |
8977 | 459k | bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( |
8978 | 459k | CLI.RetTy, CLI.CallConv, CLI.IsVarArg); |
8979 | 726k | for (unsigned I = 0, E = RetTys.size(); I != E; ++I267k ) { |
8980 | 267k | ISD::ArgFlagsTy Flags; |
8981 | 267k | if (NeedsRegBlock) { |
8982 | 818 | Flags.setInConsecutiveRegs(); |
8983 | 818 | if (I == RetTys.size() - 1) |
8984 | 686 | Flags.setInConsecutiveRegsLast(); |
8985 | 818 | } |
8986 | 267k | EVT VT = RetTys[I]; |
8987 | 267k | MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), |
8988 | 267k | CLI.CallConv, VT); |
8989 | 267k | unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), |
8990 | 267k | CLI.CallConv, VT); |
8991 | 538k | for (unsigned i = 0; i != NumRegs; ++i271k ) { |
8992 | 271k | ISD::InputArg MyFlags; |
8993 | 271k | MyFlags.Flags = Flags; |
8994 | 271k | MyFlags.VT = RegisterVT; |
8995 | 271k | MyFlags.ArgVT = VT; |
8996 | 271k | MyFlags.Used = CLI.IsReturnValueUsed; |
8997 | 271k | if (CLI.RetTy->isPointerTy()) { |
8998 | 89.8k | MyFlags.Flags.setPointer(); |
8999 | 89.8k | MyFlags.Flags.setPointerAddrSpace( |
9000 | 89.8k | cast<PointerType>(CLI.RetTy)->getAddressSpace()); |
9001 | 89.8k | } |
9002 | 271k | if (CLI.RetSExt) |
9003 | 2.27k | MyFlags.Flags.setSExt(); |
9004 | 271k | if (CLI.RetZExt) |
9005 | 28.8k | MyFlags.Flags.setZExt(); |
9006 | 271k | if (CLI.IsInReg) |
9007 | 476 | MyFlags.Flags.setInReg(); |
9008 | 271k | CLI.Ins.push_back(MyFlags); |
9009 | 271k | } |
9010 | 267k | } |
9011 | 459k | } |
9012 | 459k | |
9013 | 459k | // We push in swifterror return as the last element of CLI.Ins. |
9014 | 459k | ArgListTy &Args = CLI.getArgs(); |
9015 | 459k | if (supportSwiftError()) { |
9016 | 1.40M | for (unsigned i = 0, e = Args.size(); i != e; ++i972k ) { |
9017 | 972k | if (Args[i].IsSwiftError) { |
9018 | 110 | ISD::InputArg MyFlags; |
9019 | 110 | MyFlags.VT = getPointerTy(DL); |
9020 | 110 | MyFlags.ArgVT = EVT(getPointerTy(DL)); |
9021 | 110 | MyFlags.Flags.setSwiftError(); |
9022 | 110 | CLI.Ins.push_back(MyFlags); |
9023 | 110 | } |
9024 | 972k | } |
9025 | 430k | } |
9026 | 459k | |
9027 | 459k | // Handle all of the outgoing arguments. |
9028 | 459k | CLI.Outs.clear(); |
9029 | 459k | CLI.OutVals.clear(); |
9030 | 1.49M | for (unsigned i = 0, e = Args.size(); i != e; ++i1.03M ) { |
9031 | 1.03M | SmallVector<EVT, 4> ValueVTs; |
9032 | 1.03M | ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); |
9033 | 1.03M | // FIXME: Split arguments if CLI.IsPostTypeLegalization |
9034 | 1.03M | Type *FinalType = Args[i].Ty; |
9035 | 1.03M | if (Args[i].IsByVal) |
9036 | 1.10k | FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); |
9037 | 1.03M | bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( |
9038 | 1.03M | FinalType, CLI.CallConv, CLI.IsVarArg); |
9039 | 2.07M | for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; |
9040 | 1.03M | ++Value) { |
9041 | 1.03M | EVT VT = ValueVTs[Value]; |
9042 | 1.03M | Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); |
9043 | 1.03M | SDValue Op = SDValue(Args[i].Node.getNode(), |
9044 | 1.03M | Args[i].Node.getResNo() + Value); |
9045 | 1.03M | ISD::ArgFlagsTy Flags; |
9046 | 1.03M | |
9047 | 1.03M | // Certain targets (such as MIPS), may have a different ABI alignment |
9048 | 1.03M | // for a type depending on the context. Give the target a chance to |
9049 | 1.03M | // specify the alignment it wants. |
9050 | 1.03M | unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); |
9051 | 1.03M | |
9052 | 1.03M | if (Args[i].Ty->isPointerTy()) { |
9053 | 601k | Flags.setPointer(); |
9054 | 601k | Flags.setPointerAddrSpace( |
9055 | 601k | cast<PointerType>(Args[i].Ty)->getAddressSpace()); |
9056 | 601k | } |
9057 | 1.03M | if (Args[i].IsZExt) |
9058 | 43.4k | Flags.setZExt(); |
9059 | 1.03M | if (Args[i].IsSExt) |
9060 | 2.88k | Flags.setSExt(); |
9061 | 1.03M | if (Args[i].IsInReg) { |
9062 | 240 | // If we are using vectorcall calling convention, a structure that is |
9063 | 240 | // passed InReg - is surely an HVA |
9064 | 240 | if (CLI.CallConv == CallingConv::X86_VectorCall && |
9065 | 240 | isa<StructType>(FinalType)14 ) { |
9066 | 8 | // The first value of a structure is marked |
9067 | 8 | if (0 == Value) |
9068 | 2 | Flags.setHvaStart(); |
9069 | 8 | Flags.setHva(); |
9070 | 8 | } |
9071 | 240 | // Set InReg Flag |
9072 | 240 | Flags.setInReg(); |
9073 | 240 | } |
9074 | 1.03M | if (Args[i].IsSRet) |
9075 | 1.37k | Flags.setSRet(); |
9076 | 1.03M | if (Args[i].IsSwiftSelf) |
9077 | 97 | Flags.setSwiftSelf(); |
9078 | 1.03M | if (Args[i].IsSwiftError) |
9079 | 129 | Flags.setSwiftError(); |
9080 | 1.03M | if (Args[i].IsByVal) |
9081 | 1.10k | Flags.setByVal(); |
9082 | 1.03M | if (Args[i].IsInAlloca) { |
9083 | 22 | Flags.setInAlloca(); |
9084 | 22 | // Set the byval flag for CCAssignFn callbacks that don't know about |
9085 | 22 | // inalloca. This way we can know how many bytes we should've allocated |
9086 | 22 | // and how many bytes a callee cleanup function will pop. If we port |
9087 | 22 | // inalloca to more targets, we'll have to add custom inalloca handling |
9088 | 22 | // in the various CC lowering callbacks. |
9089 | 22 | Flags.setByVal(); |
9090 | 22 | } |
9091 | 1.03M | if (Args[i].IsByVal || Args[i].IsInAlloca1.03M ) { |
9092 | 1.13k | PointerType *Ty = cast<PointerType>(Args[i].Ty); |
9093 | 1.13k | Type *ElementTy = Ty->getElementType(); |
9094 | 1.13k | |
9095 | 1.13k | unsigned FrameSize = DL.getTypeAllocSize( |
9096 | 1.13k | Args[i].ByValType ? Args[i].ByValType1.10k : ElementTy22 ); |
9097 | 1.13k | Flags.setByValSize(FrameSize); |
9098 | 1.13k | |
9099 | 1.13k | // info is not there but there are cases it cannot get right. |
9100 | 1.13k | unsigned FrameAlign; |
9101 | 1.13k | if (Args[i].Alignment) |
9102 | 872 | FrameAlign = Args[i].Alignment; |
9103 | 259 | else |
9104 | 259 | FrameAlign = getByValTypeAlignment(ElementTy, DL); |
9105 | 1.13k | Flags.setByValAlign(FrameAlign); |
9106 | 1.13k | } |
9107 | 1.03M | if (Args[i].IsNest) |
9108 | 7 | Flags.setNest(); |
9109 | 1.03M | if (NeedsRegBlock) |
9110 | 2.90k | Flags.setInConsecutiveRegs(); |
9111 | 1.03M | Flags.setOrigAlign(OriginalAlignment); |
9112 | 1.03M | |
9113 | 1.03M | MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), |
9114 | 1.03M | CLI.CallConv, VT); |
9115 | 1.03M | unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(), |
9116 | 1.03M | CLI.CallConv, VT); |
9117 | 1.03M | SmallVector<SDValue, 4> Parts(NumParts); |
9118 | 1.03M | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
9119 | 1.03M | |
9120 | 1.03M | if (Args[i].IsSExt) |
9121 | 2.88k | ExtendKind = ISD::SIGN_EXTEND; |
9122 | 1.03M | else if (Args[i].IsZExt) |
9123 | 43.4k | ExtendKind = ISD::ZERO_EXTEND; |
9124 | 1.03M | |
9125 | 1.03M | // Conservatively only handle 'returned' on non-vectors that can be lowered, |
9126 | 1.03M | // for now. |
9127 | 1.03M | if (Args[i].IsReturned && !Op.getValueType().isVector()4.76k && |
9128 | 1.03M | CanLowerReturn4.76k ) { |
9129 | 4.76k | assert((CLI.RetTy == Args[i].Ty || |
9130 | 4.76k | (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && |
9131 | 4.76k | CLI.RetTy->getPointerAddressSpace() == |
9132 | 4.76k | Args[i].Ty->getPointerAddressSpace())) && |
9133 | 4.76k | RetTys.size() == NumValues && "unexpected use of 'returned'"); |
9134 | 4.76k | // Before passing 'returned' to the target lowering code, ensure that |
9135 | 4.76k | // either the register MVT and the actual EVT are the same size or that |
9136 | 4.76k | // the return value and argument are extended in the same way; in these |
9137 | 4.76k | // cases it's safe to pass the argument register value unchanged as the |
9138 | 4.76k | // return register value (although it's at the target's option whether |
9139 | 4.76k | // to do so) |
9140 | 4.76k | // TODO: allow code generation to take advantage of partially preserved |
9141 | 4.76k | // registers rather than clobbering the entire register when the |
9142 | 4.76k | // parameter extension method is not compatible with the return |
9143 | 4.76k | // extension method |
9144 | 4.76k | if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || |
9145 | 4.76k | (23 ExtendKind != ISD::ANY_EXTEND23 && CLI.RetSExt == Args[i].IsSExt14 && |
9146 | 23 | CLI.RetZExt == Args[i].IsZExt14 )) |
9147 | 4.74k | Flags.setReturned(); |
9148 | 4.76k | } |
9149 | 1.03M | |
9150 | 1.03M | getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, |
9151 | 1.03M | CLI.CS.getInstruction(), CLI.CallConv, ExtendKind); |
9152 | 1.03M | |
9153 | 2.09M | for (unsigned j = 0; j != NumParts; ++j1.05M ) { |
9154 | 1.05M | // if it isn't first piece, alignment must be 1 |
9155 | 1.05M | ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, |
9156 | 1.05M | i < CLI.NumFixedArgs, |
9157 | 1.05M | i, j*Parts[j].getValueType().getStoreSize()); |
9158 | 1.05M | if (NumParts > 1 && j == 040.0k ) |
9159 | 18.7k | MyFlags.Flags.setSplit(); |
9160 | 1.03M | else if (j != 0) { |
9161 | 21.2k | MyFlags.Flags.setOrigAlign(1); |
9162 | 21.2k | if (j == NumParts - 1) |
9163 | 18.7k | MyFlags.Flags.setSplitEnd(); |
9164 | 21.2k | } |
9165 | 1.05M | |
9166 | 1.05M | CLI.Outs.push_back(MyFlags); |
9167 | 1.05M | CLI.OutVals.push_back(Parts[j]); |
9168 | 1.05M | } |
9169 | 1.03M | |
9170 | 1.03M | if (NeedsRegBlock && Value == NumValues - 12.90k ) |
9171 | 1.64k | CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); |
9172 | 1.03M | } |
9173 | 1.03M | } |
9174 | 459k | |
9175 | 459k | SmallVector<SDValue, 4> InVals; |
9176 | 459k | CLI.Chain = LowerCall(CLI, InVals); |
9177 | 459k | |
9178 | 459k | // Update CLI.InVals to use outside of this function. |
9179 | 459k | CLI.InVals = InVals; |
9180 | 459k | |
9181 | 459k | // Verify that the target's LowerCall behaved as expected. |
9182 | 459k | assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && |
9183 | 459k | "LowerCall didn't return a valid chain!"); |
9184 | 459k | assert((!CLI.IsTailCall || InVals.empty()) && |
9185 | 459k | "LowerCall emitted a return value for a tail call!"); |
9186 | 459k | assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && |
9187 | 459k | "LowerCall didn't emit the correct number of values!"); |
9188 | 459k | |
9189 | 459k | // For a tail call, the return value is merely live-out and there aren't |
9190 | 459k | // any nodes in the DAG representing it. Return a special value to |
9191 | 459k | // indicate that a tail call has been emitted and no more Instructions |
9192 | 459k | // should be processed in the current block. |
9193 | 459k | if (CLI.IsTailCall) { |
9194 | 64.3k | CLI.DAG.setRoot(CLI.Chain); |
9195 | 64.3k | return std::make_pair(SDValue(), SDValue()); |
9196 | 64.3k | } |
9197 | 395k | |
9198 | | #ifndef NDEBUG |
9199 | | for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { |
9200 | | assert(InVals[i].getNode() && "LowerCall emitted a null value!"); |
9201 | | assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && |
9202 | | "LowerCall emitted a value with the wrong type!"); |
9203 | | } |
9204 | | #endif |
9205 | | |
9206 | 395k | SmallVector<SDValue, 4> ReturnValues; |
9207 | 395k | if (!CanLowerReturn) { |
9208 | 272 | // The instruction result is the result of loading from the |
9209 | 272 | // hidden sret parameter. |
9210 | 272 | SmallVector<EVT, 1> PVTs; |
9211 | 272 | Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace()); |
9212 | 272 | |
9213 | 272 | ComputeValueVTs(*this, DL, PtrRetTy, PVTs); |
9214 | 272 | assert(PVTs.size() == 1 && "Pointers should fit in one register"); |
9215 | 272 | EVT PtrVT = PVTs[0]; |
9216 | 272 | |
9217 | 272 | unsigned NumValues = RetTys.size(); |
9218 | 272 | ReturnValues.resize(NumValues); |
9219 | 272 | SmallVector<SDValue, 4> Chains(NumValues); |
9220 | 272 | |
9221 | 272 | // An aggregate return value cannot wrap around the address space, so |
9222 | 272 | // offsets to its parts don't wrap either. |
9223 | 272 | SDNodeFlags Flags; |
9224 | 272 | Flags.setNoUnsignedWrap(true); |
9225 | 272 | |
9226 | 704 | for (unsigned i = 0; i < NumValues; ++i432 ) { |
9227 | 432 | SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, |
9228 | 432 | CLI.DAG.getConstant(Offsets[i], CLI.DL, |
9229 | 432 | PtrVT), Flags); |
9230 | 432 | SDValue L = CLI.DAG.getLoad( |
9231 | 432 | RetTys[i], CLI.DL, CLI.Chain, Add, |
9232 | 432 | MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), |
9233 | 432 | DemoteStackIdx, Offsets[i]), |
9234 | 432 | /* Alignment = */ 1); |
9235 | 432 | ReturnValues[i] = L; |
9236 | 432 | Chains[i] = L.getValue(1); |
9237 | 432 | } |
9238 | 272 | |
9239 | 272 | CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); |
9240 | 395k | } else { |
9241 | 395k | // Collect the legal value parts into potentially illegal values |
9242 | 395k | // that correspond to the original function's return values. |
9243 | 395k | Optional<ISD::NodeType> AssertOp; |
9244 | 395k | if (CLI.RetSExt) |
9245 | 1.40k | AssertOp = ISD::AssertSext; |
9246 | 393k | else if (CLI.RetZExt) |
9247 | 26.5k | AssertOp = ISD::AssertZext; |
9248 | 395k | unsigned CurReg = 0; |
9249 | 606k | for (unsigned I = 0, E = RetTys.size(); I != E; ++I211k ) { |
9250 | 211k | EVT VT = RetTys[I]; |
9251 | 211k | MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), |
9252 | 211k | CLI.CallConv, VT); |
9253 | 211k | unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), |
9254 | 211k | CLI.CallConv, VT); |
9255 | 211k | |
9256 | 211k | ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], |
9257 | 211k | NumRegs, RegisterVT, VT, nullptr, |
9258 | 211k | CLI.CallConv, AssertOp)); |
9259 | 211k | CurReg += NumRegs; |
9260 | 211k | } |
9261 | 395k | |
9262 | 395k | // For a function returning void, there is no return value. We can't create |
9263 | 395k | // such a node, so we just return a null return value in that case. In |
9264 | 395k | // that case, nothing will actually look at the value. |
9265 | 395k | if (ReturnValues.empty()) |
9266 | 184k | return std::make_pair(SDValue(), CLI.Chain); |
9267 | 210k | } |
9268 | 210k | |
9269 | 210k | SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, |
9270 | 210k | CLI.DAG.getVTList(RetTys), ReturnValues); |
9271 | 210k | return std::make_pair(Res, CLI.Chain); |
9272 | 210k | } |
9273 | | |
9274 | | void TargetLowering::LowerOperationWrapper(SDNode *N, |
9275 | | SmallVectorImpl<SDValue> &Results, |
9276 | 4.72k | SelectionDAG &DAG) const { |
9277 | 4.72k | if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) |
9278 | 4.57k | Results.push_back(Res); |
9279 | 4.72k | } |
9280 | | |
9281 | 0 | SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
9282 | 0 | llvm_unreachable("LowerOperation not implemented for this target!"); |
9283 | 0 | } |
9284 | | |
9285 | | void |
9286 | 815k | SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { |
9287 | 815k | SDValue Op = getNonRegisterValue(V); |
9288 | 815k | assert((Op.getOpcode() != ISD::CopyFromReg || |
9289 | 815k | cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && |
9290 | 815k | "Copy from a reg to the same reg!"); |
9291 | 815k | assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); |
9292 | 815k | |
9293 | 815k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9294 | 815k | // If this is an InlineAsm we have to match the registers required, not the |
9295 | 815k | // notional registers required by the type. |
9296 | 815k | |
9297 | 815k | RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), |
9298 | 815k | None); // This is not an ABI copy. |
9299 | 815k | SDValue Chain = DAG.getEntryNode(); |
9300 | 815k | |
9301 | 815k | ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == |
9302 | 815k | FuncInfo.PreferredExtendType.end()) |
9303 | 815k | ? ISD::ANY_EXTEND183k |
9304 | 815k | : FuncInfo.PreferredExtendType[V]631k ; |
9305 | 815k | RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); |
9306 | 815k | PendingExports.push_back(Chain); |
9307 | 815k | } |
9308 | | |
9309 | | #include "llvm/CodeGen/SelectionDAGISel.h" |
9310 | | |
9311 | | /// isOnlyUsedInEntryBlock - If the specified argument is only used in the |
9312 | | /// entry block, return true. This includes arguments used by switches, since |
9313 | | /// the switch may expand into multiple basic blocks. |
9314 | 117k | static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { |
9315 | 117k | // With FastISel active, we may be splitting blocks, so force creation |
9316 | 117k | // of virtual registers for all non-dead arguments. |
9317 | 117k | if (FastISel) |
9318 | 6.00k | return A->use_empty(); |
9319 | 111k | |
9320 | 111k | const BasicBlock &Entry = A->getParent()->front(); |
9321 | 111k | for (const User *U : A->users()) |
9322 | 131k | if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U)115k ) |
9323 | 15.8k | return false; // Use not in entry block. |
9324 | 111k | |
9325 | 111k | return true95.2k ; |
9326 | 111k | } |
9327 | | |
9328 | | using ArgCopyElisionMapTy = |
9329 | | DenseMap<const Argument *, |
9330 | | std::pair<const AllocaInst *, const StoreInst *>>; |
9331 | | |
9332 | | /// Scan the entry block of the function in FuncInfo for arguments that look |
9333 | | /// like copies into a local alloca. Record any copied arguments in |
9334 | | /// ArgCopyElisionCandidates. |
9335 | | static void |
9336 | | findArgumentCopyElisionCandidates(const DataLayout &DL, |
9337 | | FunctionLoweringInfo *FuncInfo, |
9338 | 272k | ArgCopyElisionMapTy &ArgCopyElisionCandidates) { |
9339 | 272k | // Record the state of every static alloca used in the entry block. Argument |
9340 | 272k | // allocas are all used in the entry block, so we need approximately as many |
9341 | 272k | // entries as we have arguments. |
9342 | 272k | enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; |
9343 | 272k | SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas; |
9344 | 272k | unsigned NumArgs = FuncInfo->Fn->arg_size(); |
9345 | 272k | StaticAllocas.reserve(NumArgs * 2); |
9346 | 272k | |
9347 | 2.48M | auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { |
9348 | 2.48M | if (!V) |
9349 | 0 | return nullptr; |
9350 | 2.48M | V = V->stripPointerCasts(); |
9351 | 2.48M | const auto *AI = dyn_cast<AllocaInst>(V); |
9352 | 2.48M | if (!AI || !AI->isStaticAlloca()47.5k || !FuncInfo->StaticAllocaMap.count(AI)47.1k ) |
9353 | 2.44M | return nullptr; |
9354 | 47.1k | auto Iter = StaticAllocas.insert({AI, Unknown}); |
9355 | 47.1k | return &Iter.first->second; |
9356 | 47.1k | }; |
9357 | 272k | |
9358 | 272k | // Look for stores of arguments to static allocas. Look through bitcasts and |
9359 | 272k | // GEPs to handle type coercions, as long as the alloca is fully initialized |
9360 | 272k | // by the store. Any non-store use of an alloca escapes it and any subsequent |
9361 | 272k | // unanalyzed store might write it. |
9362 | 272k | // FIXME: Handle structs initialized with multiple stores. |
9363 | 1.55M | for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { |
9364 | 1.55M | // Look for stores, and handle non-store uses conservatively. |
9365 | 1.55M | const auto *SI = dyn_cast<StoreInst>(&I); |
9366 | 1.55M | if (!SI) { |
9367 | 1.45M | // We will look through cast uses, so ignore them completely. |
9368 | 1.45M | if (I.isCast()) |
9369 | 244k | continue; |
9370 | 1.21M | // Ignore debug info intrinsics, they don't escape or store to allocas. |
9371 | 1.21M | if (isa<DbgInfoIntrinsic>(I)) |
9372 | 5.12k | continue; |
9373 | 1.20M | // This is an unknown instruction. Assume it escapes or writes to all |
9374 | 1.20M | // static alloca operands. |
9375 | 2.29M | for (const Use &U : I.operands())1.20M { |
9376 | 2.29M | if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) |
9377 | 37.9k | *Info = StaticAllocaInfo::Clobbered; |
9378 | 2.29M | } |
9379 | 1.20M | continue; |
9380 | 1.20M | } |
9381 | 95.2k | |
9382 | 95.2k | // If the stored value is a static alloca, mark it as escaped. |
9383 | 95.2k | if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) |
9384 | 292 | *Info = StaticAllocaInfo::Clobbered; |
9385 | 95.2k | |
9386 | 95.2k | // Check if the destination is a static alloca. |
9387 | 95.2k | const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); |
9388 | 95.2k | StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); |
9389 | 95.2k | if (!Info) |
9390 | 86.3k | continue; |
9391 | 8.90k | const AllocaInst *AI = cast<AllocaInst>(Dst); |
9392 | 8.90k | |
9393 | 8.90k | // Skip allocas that have been initialized or clobbered. |
9394 | 8.90k | if (*Info != StaticAllocaInfo::Unknown) |
9395 | 3.47k | continue; |
9396 | 5.42k | |
9397 | 5.42k | // Check if the stored value is an argument, and that this store fully |
9398 | 5.42k | // initializes the alloca. Don't elide copies from the same argument twice. |
9399 | 5.42k | const Value *Val = SI->getValueOperand()->stripPointerCasts(); |
9400 | 5.42k | const auto *Arg = dyn_cast<Argument>(Val); |
9401 | 5.42k | if (!Arg || Arg->hasInAllocaAttr()2.11k || Arg->hasByValAttr()2.11k || |
9402 | 5.42k | Arg->getType()->isEmptyTy()2.11k || |
9403 | 5.42k | DL.getTypeStoreSize(Arg->getType()) != |
9404 | 2.11k | DL.getTypeAllocSize(AI->getAllocatedType()) || |
9405 | 5.42k | ArgCopyElisionCandidates.count(Arg)2.05k ) { |
9406 | 3.36k | *Info = StaticAllocaInfo::Clobbered; |
9407 | 3.36k | continue; |
9408 | 3.36k | } |
9409 | 2.05k | |
9410 | 2.05k | LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI |
9411 | 2.05k | << '\n'); |
9412 | 2.05k | |
9413 | 2.05k | // Mark this alloca and store for argument copy elision. |
9414 | 2.05k | *Info = StaticAllocaInfo::Elidable; |
9415 | 2.05k | ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); |
9416 | 2.05k | |
9417 | 2.05k | // Stop scanning if we've seen all arguments. This will happen early in -O0 |
9418 | 2.05k | // builds, which is useful, because -O0 builds have large entry blocks and |
9419 | 2.05k | // many allocas. |
9420 | 2.05k | if (ArgCopyElisionCandidates.size() == NumArgs) |
9421 | 1.16k | break; |
9422 | 2.05k | } |
9423 | 272k | } |
9424 | | |
9425 | | /// Try to elide argument copies from memory into a local alloca. Succeeds if |
9426 | | /// ArgVal is a load from a suitable fixed stack object. |
9427 | | static void tryToElideArgumentCopy( |
9428 | | FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, |
9429 | | DenseMap<int, int> &ArgCopyElisionFrameIndexMap, |
9430 | | SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, |
9431 | | ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, |
9432 | 2.05k | SDValue ArgVal, bool &ArgHasUses) { |
9433 | 2.05k | // Check if this is a load from a fixed stack object. |
9434 | 2.05k | auto *LNode = dyn_cast<LoadSDNode>(ArgVal); |
9435 | 2.05k | if (!LNode) |
9436 | 1.70k | return; |
9437 | 349 | auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()); |
9438 | 349 | if (!FINode) |
9439 | 2 | return; |
9440 | 347 | |
9441 | 347 | // Check that the fixed stack object is the right size and alignment. |
9442 | 347 | // Look at the alignment that the user wrote on the alloca instead of looking |
9443 | 347 | // at the stack object. |
9444 | 347 | auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); |
9445 | 347 | assert(ArgCopyIter != ArgCopyElisionCandidates.end()); |
9446 | 347 | const AllocaInst *AI = ArgCopyIter->second.first; |
9447 | 347 | int FixedIndex = FINode->getIndex(); |
9448 | 347 | int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; |
9449 | 347 | int OldIndex = AllocaIndex; |
9450 | 347 | MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); |
9451 | 347 | if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { |
9452 | 1 | LLVM_DEBUG( |
9453 | 1 | dbgs() << " argument copy elision failed due to bad fixed stack " |
9454 | 1 | "object size\n"); |
9455 | 1 | return; |
9456 | 1 | } |
9457 | 346 | unsigned RequiredAlignment = AI->getAlignment(); |
9458 | 346 | if (!RequiredAlignment) { |
9459 | 64 | RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( |
9460 | 64 | AI->getAllocatedType()); |
9461 | 64 | } |
9462 | 346 | if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { |
9463 | 25 | LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " |
9464 | 25 | "greater than stack argument alignment (" |
9465 | 25 | << RequiredAlignment << " vs " |
9466 | 25 | << MFI.getObjectAlignment(FixedIndex) << ")\n"); |
9467 | 25 | return; |
9468 | 25 | } |
9469 | 321 | |
9470 | 321 | // Perform the elision. Delete the old stack object and replace its only use |
9471 | 321 | // in the variable info map. Mark the stack object as mutable. |
9472 | 321 | LLVM_DEBUG({ |
9473 | 321 | dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' |
9474 | 321 | << " Replacing frame index " << OldIndex << " with " << FixedIndex |
9475 | 321 | << '\n'; |
9476 | 321 | }); |
9477 | 321 | MFI.RemoveStackObject(OldIndex); |
9478 | 321 | MFI.setIsImmutableObjectIndex(FixedIndex, false); |
9479 | 321 | AllocaIndex = FixedIndex; |
9480 | 321 | ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); |
9481 | 321 | Chains.push_back(ArgVal.getValue(1)); |
9482 | 321 | |
9483 | 321 | // Avoid emitting code for the store implementing the copy. |
9484 | 321 | const StoreInst *SI = ArgCopyIter->second.second; |
9485 | 321 | ElidedArgCopyInstrs.insert(SI); |
9486 | 321 | |
9487 | 321 | // Check for uses of the argument again so that we can avoid exporting ArgVal |
9488 | 321 | // if it is't used by anything other than the store. |
9489 | 321 | for (const Value *U : Arg.users()) { |
9490 | 321 | if (U != SI) { |
9491 | 34 | ArgHasUses = true; |
9492 | 34 | break; |
9493 | 34 | } |
9494 | 321 | } |
9495 | 321 | } |
9496 | | |
9497 | 272k | void SelectionDAGISel::LowerArguments(const Function &F) { |
9498 | 272k | SelectionDAG &DAG = SDB->DAG; |
9499 | 272k | SDLoc dl = SDB->getCurSDLoc(); |
9500 | 272k | const DataLayout &DL = DAG.getDataLayout(); |
9501 | 272k | SmallVector<ISD::InputArg, 16> Ins; |
9502 | 272k | |
9503 | 272k | if (!FuncInfo->CanLowerReturn) { |
9504 | 1.94k | // Put in an sret pointer parameter before all the other parameters. |
9505 | 1.94k | SmallVector<EVT, 1> ValueVTs; |
9506 | 1.94k | ComputeValueVTs(*TLI, DAG.getDataLayout(), |
9507 | 1.94k | F.getReturnType()->getPointerTo( |
9508 | 1.94k | DAG.getDataLayout().getAllocaAddrSpace()), |
9509 | 1.94k | ValueVTs); |
9510 | 1.94k | |
9511 | 1.94k | // NOTE: Assuming that a pointer will never break down to more than one VT |
9512 | 1.94k | // or one register. |
9513 | 1.94k | ISD::ArgFlagsTy Flags; |
9514 | 1.94k | Flags.setSRet(); |
9515 | 1.94k | MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); |
9516 | 1.94k | ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, |
9517 | 1.94k | ISD::InputArg::NoArgIndex, 0); |
9518 | 1.94k | Ins.push_back(RetArg); |
9519 | 1.94k | } |
9520 | 272k | |
9521 | 272k | // Look for stores of arguments to static allocas. Mark such arguments with a |
9522 | 272k | // flag to ask the target to give us the memory location of that argument if |
9523 | 272k | // available. |
9524 | 272k | ArgCopyElisionMapTy ArgCopyElisionCandidates; |
9525 | 272k | findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); |
9526 | 272k | |
9527 | 272k | // Set up the incoming argument description vector. |
9528 | 514k | for (const Argument &Arg : F.args()) { |
9529 | 514k | unsigned ArgNo = Arg.getArgNo(); |
9530 | 514k | SmallVector<EVT, 4> ValueVTs; |
9531 | 514k | ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); |
9532 | 514k | bool isArgValueUsed = !Arg.use_empty(); |
9533 | 514k | unsigned PartBase = 0; |
9534 | 514k | Type *FinalType = Arg.getType(); |
9535 | 514k | if (Arg.hasAttribute(Attribute::ByVal)) |
9536 | 646 | FinalType = Arg.getParamByValType(); |
9537 | 514k | bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( |
9538 | 514k | FinalType, F.getCallingConv(), F.isVarArg()); |
9539 | 514k | for (unsigned Value = 0, NumValues = ValueVTs.size(); |
9540 | 1.03M | Value != NumValues; ++Value522k ) { |
9541 | 522k | EVT VT = ValueVTs[Value]; |
9542 | 522k | Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); |
9543 | 522k | ISD::ArgFlagsTy Flags; |
9544 | 522k | |
9545 | 522k | // Certain targets (such as MIPS), may have a different ABI alignment |
9546 | 522k | // for a type depending on the context. Give the target a chance to |
9547 | 522k | // specify the alignment it wants. |
9548 | 522k | unsigned OriginalAlignment = |
9549 | 522k | TLI->getABIAlignmentForCallingConv(ArgTy, DL); |
9550 | 522k | |
9551 | 522k | if (Arg.getType()->isPointerTy()) { |
9552 | 187k | Flags.setPointer(); |
9553 | 187k | Flags.setPointerAddrSpace( |
9554 | 187k | cast<PointerType>(Arg.getType())->getAddressSpace()); |
9555 | 187k | } |
9556 | 522k | if (Arg.hasAttribute(Attribute::ZExt)) |
9557 | 9.35k | Flags.setZExt(); |
9558 | 522k | if (Arg.hasAttribute(Attribute::SExt)) |
9559 | 8.50k | Flags.setSExt(); |
9560 | 522k | if (Arg.hasAttribute(Attribute::InReg)) { |
9561 | 5.07k | // If we are using vectorcall calling convention, a structure that is |
9562 | 5.07k | // passed InReg - is surely an HVA |
9563 | 5.07k | if (F.getCallingConv() == CallingConv::X86_VectorCall && |
9564 | 5.07k | isa<StructType>(Arg.getType())74 ) { |
9565 | 50 | // The first value of a structure is marked |
9566 | 50 | if (0 == Value) |
9567 | 14 | Flags.setHvaStart(); |
9568 | 50 | Flags.setHva(); |
9569 | 50 | } |
9570 | 5.07k | // Set InReg Flag |
9571 | 5.07k | Flags.setInReg(); |
9572 | 5.07k | } |
9573 | 522k | if (Arg.hasAttribute(Attribute::StructRet)) |
9574 | 897 | Flags.setSRet(); |
9575 | 522k | if (Arg.hasAttribute(Attribute::SwiftSelf)) |
9576 | 76 | Flags.setSwiftSelf(); |
9577 | 522k | if (Arg.hasAttribute(Attribute::SwiftError)) |
9578 | 119 | Flags.setSwiftError(); |
9579 | 522k | if (Arg.hasAttribute(Attribute::ByVal)) |
9580 | 646 | Flags.setByVal(); |
9581 | 522k | if (Arg.hasAttribute(Attribute::InAlloca)) { |
9582 | 18 | Flags.setInAlloca(); |
9583 | 18 | // Set the byval flag for CCAssignFn callbacks that don't know about |
9584 | 18 | // inalloca. This way we can know how many bytes we should've allocated |
9585 | 18 | // and how many bytes a callee cleanup function will pop. If we port |
9586 | 18 | // inalloca to more targets, we'll have to add custom inalloca handling |
9587 | 18 | // in the various CC lowering callbacks. |
9588 | 18 | Flags.setByVal(); |
9589 | 18 | } |
9590 | 522k | if (F.getCallingConv() == CallingConv::X86_INTR) { |
9591 | 56 | // IA Interrupt passes frame (1st parameter) by value in the stack. |
9592 | 56 | if (ArgNo == 0) |
9593 | 39 | Flags.setByVal(); |
9594 | 56 | } |
9595 | 522k | if (Flags.isByVal() || Flags.isInAlloca()522k ) { |
9596 | 703 | Type *ElementTy = Arg.getParamByValType(); |
9597 | 703 | |
9598 | 703 | // For ByVal, size and alignment should be passed from FE. BE will |
9599 | 703 | // guess if this info is not there but there are cases it cannot get |
9600 | 703 | // right. |
9601 | 703 | unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType()); |
9602 | 703 | Flags.setByValSize(FrameSize); |
9603 | 703 | |
9604 | 703 | unsigned FrameAlign; |
9605 | 703 | if (Arg.getParamAlignment()) |
9606 | 286 | FrameAlign = Arg.getParamAlignment(); |
9607 | 417 | else |
9608 | 417 | FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); |
9609 | 703 | Flags.setByValAlign(FrameAlign); |
9610 | 703 | } |
9611 | 522k | if (Arg.hasAttribute(Attribute::Nest)) |
9612 | 69 | Flags.setNest(); |
9613 | 522k | if (NeedsRegBlock) |
9614 | 5.95k | Flags.setInConsecutiveRegs(); |
9615 | 522k | Flags.setOrigAlign(OriginalAlignment); |
9616 | 522k | if (ArgCopyElisionCandidates.count(&Arg)) |
9617 | 2.06k | Flags.setCopyElisionCandidate(); |
9618 | 522k | if (Arg.hasAttribute(Attribute::Returned)) |
9619 | 1.12k | Flags.setReturned(); |
9620 | 522k | |
9621 | 522k | MVT RegisterVT = TLI->getRegisterTypeForCallingConv( |
9622 | 522k | *CurDAG->getContext(), F.getCallingConv(), VT); |
9623 | 522k | unsigned NumRegs = TLI->getNumRegistersForCallingConv( |
9624 | 522k | *CurDAG->getContext(), F.getCallingConv(), VT); |
9625 | 1.10M | for (unsigned i = 0; i != NumRegs; ++i586k ) { |
9626 | 586k | ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, |
9627 | 586k | ArgNo, PartBase+i*RegisterVT.getStoreSize()); |
9628 | 586k | if (NumRegs > 1 && i == 092.0k ) |
9629 | 28.7k | MyFlags.Flags.setSplit(); |
9630 | 557k | // if it isn't first piece, alignment must be 1 |
9631 | 557k | else if (i > 0) { |
9632 | 63.3k | MyFlags.Flags.setOrigAlign(1); |
9633 | 63.3k | if (i == NumRegs - 1) |
9634 | 28.7k | MyFlags.Flags.setSplitEnd(); |
9635 | 63.3k | } |
9636 | 586k | Ins.push_back(MyFlags); |
9637 | 586k | } |
9638 | 522k | if (NeedsRegBlock && Value == NumValues - 15.95k ) |
9639 | 4.21k | Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); |
9640 | 522k | PartBase += VT.getStoreSize(); |
9641 | 522k | } |
9642 | 514k | } |
9643 | 272k | |
9644 | 272k | // Call the target to set up the argument values. |
9645 | 272k | SmallVector<SDValue, 8> InVals; |
9646 | 272k | SDValue NewRoot = TLI->LowerFormalArguments( |
9647 | 272k | DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); |
9648 | 272k | |
9649 | 272k | // Verify that the target's LowerFormalArguments behaved as expected. |
9650 | 272k | assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && |
9651 | 272k | "LowerFormalArguments didn't return a valid chain!"); |
9652 | 272k | assert(InVals.size() == Ins.size() && |
9653 | 272k | "LowerFormalArguments didn't emit the correct number of values!"); |
9654 | 272k | LLVM_DEBUG({ |
9655 | 272k | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
9656 | 272k | assert(InVals[i].getNode() && |
9657 | 272k | "LowerFormalArguments emitted a null value!"); |
9658 | 272k | assert(EVT(Ins[i].VT) == InVals[i].getValueType() && |
9659 | 272k | "LowerFormalArguments emitted a value with the wrong type!"); |
9660 | 272k | } |
9661 | 272k | }); |
9662 | 272k | |
9663 | 272k | // Update the DAG with the new chain value resulting from argument lowering. |
9664 | 272k | DAG.setRoot(NewRoot); |
9665 | 272k | |
9666 | 272k | // Set up the argument values. |
9667 | 272k | unsigned i = 0; |
9668 | 272k | if (!FuncInfo->CanLowerReturn) { |
9669 | 1.94k | // Create a virtual register for the sret pointer, and put in a copy |
9670 | 1.94k | // from the sret argument into it. |
9671 | 1.94k | SmallVector<EVT, 1> ValueVTs; |
9672 | 1.94k | ComputeValueVTs(*TLI, DAG.getDataLayout(), |
9673 | 1.94k | F.getReturnType()->getPointerTo( |
9674 | 1.94k | DAG.getDataLayout().getAllocaAddrSpace()), |
9675 | 1.94k | ValueVTs); |
9676 | 1.94k | MVT VT = ValueVTs[0].getSimpleVT(); |
9677 | 1.94k | MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); |
9678 | 1.94k | Optional<ISD::NodeType> AssertOp = None; |
9679 | 1.94k | SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, |
9680 | 1.94k | nullptr, F.getCallingConv(), AssertOp); |
9681 | 1.94k | |
9682 | 1.94k | MachineFunction& MF = SDB->DAG.getMachineFunction(); |
9683 | 1.94k | MachineRegisterInfo& RegInfo = MF.getRegInfo(); |
9684 | 1.94k | unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); |
9685 | 1.94k | FuncInfo->DemoteRegister = SRetReg; |
9686 | 1.94k | NewRoot = |
9687 | 1.94k | SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); |
9688 | 1.94k | DAG.setRoot(NewRoot); |
9689 | 1.94k | |
9690 | 1.94k | // i indexes lowered arguments. Bump it past the hidden sret argument. |
9691 | 1.94k | ++i; |
9692 | 1.94k | } |
9693 | 272k | |
9694 | 272k | SmallVector<SDValue, 4> Chains; |
9695 | 272k | DenseMap<int, int> ArgCopyElisionFrameIndexMap; |
9696 | 514k | for (const Argument &Arg : F.args()) { |
9697 | 514k | SmallVector<SDValue, 4> ArgValues; |
9698 | 514k | SmallVector<EVT, 4> ValueVTs; |
9699 | 514k | ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); |
9700 | 514k | unsigned NumValues = ValueVTs.size(); |
9701 | 514k | if (NumValues == 0) |
9702 | 24 | continue; |
9703 | 514k | |
9704 | 514k | bool ArgHasUses = !Arg.use_empty(); |
9705 | 514k | |
9706 | 514k | // Elide the copying store if the target loaded this argument from a |
9707 | 514k | // suitable fixed stack object. |
9708 | 514k | if (Ins[i].Flags.isCopyElisionCandidate()) { |
9709 | 2.05k | tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, |
9710 | 2.05k | ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, |
9711 | 2.05k | InVals[i], ArgHasUses); |
9712 | 2.05k | } |
9713 | 514k | |
9714 | 514k | // If this argument is unused then remember its value. It is used to generate |
9715 | 514k | // debugging information. |
9716 | 514k | bool isSwiftErrorArg = |
9717 | 514k | TLI->supportSwiftError() && |
9718 | 514k | Arg.hasAttribute(Attribute::SwiftError)340k ; |
9719 | 514k | if (!ArgHasUses && !isSwiftErrorArg64.5k ) { |
9720 | 64.5k | SDB->setUnusedArgValue(&Arg, InVals[i]); |
9721 | 64.5k | |
9722 | 64.5k | // Also remember any frame index for use in FastISel. |
9723 | 64.5k | if (FrameIndexSDNode *FI = |
9724 | 89 | dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) |
9725 | 89 | FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); |
9726 | 64.5k | } |
9727 | 514k | |
9728 | 1.03M | for (unsigned Val = 0; Val != NumValues; ++Val522k ) { |
9729 | 522k | EVT VT = ValueVTs[Val]; |
9730 | 522k | MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), |
9731 | 522k | F.getCallingConv(), VT); |
9732 | 522k | unsigned NumParts = TLI->getNumRegistersForCallingConv( |
9733 | 522k | *CurDAG->getContext(), F.getCallingConv(), VT); |
9734 | 522k | |
9735 | 522k | // Even an apparant 'unused' swifterror argument needs to be returned. So |
9736 | 522k | // we do generate a copy for it that can be used on return from the |
9737 | 522k | // function. |
9738 | 522k | if (ArgHasUses || isSwiftErrorArg71.0k ) { |
9739 | 451k | Optional<ISD::NodeType> AssertOp; |
9740 | 451k | if (Arg.hasAttribute(Attribute::SExt)) |
9741 | 8.38k | AssertOp = ISD::AssertSext; |
9742 | 443k | else if (Arg.hasAttribute(Attribute::ZExt)) |
9743 | 8.96k | AssertOp = ISD::AssertZext; |
9744 | 451k | |
9745 | 451k | ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, |
9746 | 451k | PartVT, VT, nullptr, |
9747 | 451k | F.getCallingConv(), AssertOp)); |
9748 | 451k | } |
9749 | 522k | |
9750 | 522k | i += NumParts; |
9751 | 522k | } |
9752 | 514k | |
9753 | 514k | // We don't need to do anything else for unused arguments. |
9754 | 514k | if (ArgValues.empty()) |
9755 | 64.5k | continue; |
9756 | 450k | |
9757 | 450k | // Note down frame index. |
9758 | 450k | if (FrameIndexSDNode *FI = |
9759 | 458 | dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) |
9760 | 458 | FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); |
9761 | 450k | |
9762 | 450k | SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), |
9763 | 450k | SDB->getCurSDLoc()); |
9764 | 450k | |
9765 | 450k | SDB->setValue(&Arg, Res); |
9766 | 450k | if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR428k ) { |
9767 | 7.76k | // We want to associate the argument with the frame index, among |
9768 | 7.76k | // involved operands, that correspond to the lowest address. The |
9769 | 7.76k | // getCopyFromParts function, called earlier, is swapping the order of |
9770 | 7.76k | // the operands to BUILD_PAIR depending on endianness. The result of |
9771 | 7.76k | // that swapping is that the least significant bits of the argument will |
9772 | 7.76k | // be in the first operand of the BUILD_PAIR node, and the most |
9773 | 7.76k | // significant bits will be in the second operand. |
9774 | 7.76k | unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 11.61k : 06.14k ; |
9775 | 7.76k | if (LoadSDNode *LNode = |
9776 | 2.74k | dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode())) |
9777 | 2.74k | if (FrameIndexSDNode *FI = |
9778 | 2.47k | dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) |
9779 | 2.47k | FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); |
9780 | 7.76k | } |
9781 | 450k | |
9782 | 450k | // Update the SwiftErrorVRegDefMap. |
9783 | 450k | if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg333k ) { |
9784 | 101 | unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); |
9785 | 101 | if (TargetRegisterInfo::isVirtualRegister(Reg)) |
9786 | 101 | SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), |
9787 | 101 | Reg); |
9788 | 101 | } |
9789 | 450k | |
9790 | 450k | // If this argument is live outside of the entry block, insert a copy from |
9791 | 450k | // wherever we got it to the vreg that other BB's will reference it as. |
9792 | 450k | if (Res.getOpcode() == ISD::CopyFromReg) { |
9793 | 333k | // If we can, though, try to skip creating an unnecessary vreg. |
9794 | 333k | // FIXME: This isn't very clean... it would be nice to make this more |
9795 | 333k | // general. |
9796 | 333k | unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); |
9797 | 333k | if (TargetRegisterInfo::isVirtualRegister(Reg)) { |
9798 | 333k | FuncInfo->ValueMap[&Arg] = Reg; |
9799 | 333k | continue; |
9800 | 333k | } |
9801 | 117k | } |
9802 | 117k | if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { |
9803 | 21.8k | FuncInfo->InitializeRegForValue(&Arg); |
9804 | 21.8k | SDB->CopyToExportRegsIfNeeded(&Arg); |
9805 | 21.8k | } |
9806 | 117k | } |
9807 | 272k | |
9808 | 272k | if (!Chains.empty()) { |
9809 | 203 | Chains.push_back(NewRoot); |
9810 | 203 | NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); |
9811 | 203 | } |
9812 | 272k | |
9813 | 272k | DAG.setRoot(NewRoot); |
9814 | 272k | |
9815 | 272k | assert(i == InVals.size() && "Argument register count mismatch!"); |
9816 | 272k | |
9817 | 272k | // If any argument copy elisions occurred and we have debug info, update the |
9818 | 272k | // stale frame indices used in the dbg.declare variable info table. |
9819 | 272k | MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); |
9820 | 272k | if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()0 ) { |
9821 | 0 | for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { |
9822 | 0 | auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); |
9823 | 0 | if (I != ArgCopyElisionFrameIndexMap.end()) |
9824 | 0 | VI.Slot = I->second; |
9825 | 0 | } |
9826 | 0 | } |
9827 | 272k | |
9828 | 272k | // Finally, if the target has anything special to do, allow it to do so. |
9829 | 272k | EmitFunctionEntryCode(); |
9830 | 272k | } |
9831 | | |
9832 | | /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to |
9833 | | /// ensure constants are generated when needed. Remember the virtual registers |
9834 | | /// that need to be added to the Machine PHI nodes as input. We cannot just |
9835 | | /// directly add them, because expansion might result in multiple MBB's for one |
9836 | | /// BB. As such, the start of the BB might correspond to a different MBB than |
9837 | | /// the end. |
9838 | | void |
9839 | 1.12M | SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { |
9840 | 1.12M | const Instruction *TI = LLVMBB->getTerminator(); |
9841 | 1.12M | |
9842 | 1.12M | SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; |
9843 | 1.12M | |
9844 | 1.12M | // Check PHI nodes in successors that expect a value to be available from this |
9845 | 1.12M | // block. |
9846 | 2.52M | for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ1.39M ) { |
9847 | 1.39M | const BasicBlock *SuccBB = TI->getSuccessor(succ); |
9848 | 1.39M | if (!isa<PHINode>(SuccBB->begin())) continue985k ; |
9849 | 412k | MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; |
9850 | 412k | |
9851 | 412k | // If this terminator has multiple identical successors (common for |
9852 | 412k | // switches), only handle each succ once. |
9853 | 412k | if (!SuccsHandled.insert(SuccMBB).second) |
9854 | 325 | continue; |
9855 | 412k | |
9856 | 412k | MachineBasicBlock::iterator MBBI = SuccMBB->begin(); |
9857 | 412k | |
9858 | 412k | // At this point we know that there is a 1-1 correspondence between LLVM PHI |
9859 | 412k | // nodes and Machine PHI nodes, but the incoming operands have not been |
9860 | 412k | // emitted yet. |
9861 | 609k | for (const PHINode &PN : SuccBB->phis()) { |
9862 | 609k | // Ignore dead phi's. |
9863 | 609k | if (PN.use_empty()) |
9864 | 9.53k | continue; |
9865 | 600k | |
9866 | 600k | // Skip empty types |
9867 | 600k | if (PN.getType()->isEmptyTy()) |
9868 | 4 | continue; |
9869 | 600k | |
9870 | 600k | unsigned Reg; |
9871 | 600k | const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); |
9872 | 600k | |
9873 | 600k | if (const Constant *C = dyn_cast<Constant>(PHIOp)) { |
9874 | 167k | unsigned &RegOut = ConstantsOut[C]; |
9875 | 167k | if (RegOut == 0) { |
9876 | 161k | RegOut = FuncInfo.CreateRegs(C); |
9877 | 161k | CopyValueToVirtualRegister(C, RegOut); |
9878 | 161k | } |
9879 | 167k | Reg = RegOut; |
9880 | 432k | } else { |
9881 | 432k | DenseMap<const Value *, unsigned>::iterator I = |
9882 | 432k | FuncInfo.ValueMap.find(PHIOp); |
9883 | 432k | if (I != FuncInfo.ValueMap.end()) |
9884 | 431k | Reg = I->second; |
9885 | 479 | else { |
9886 | 479 | assert(isa<AllocaInst>(PHIOp) && |
9887 | 479 | FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && |
9888 | 479 | "Didn't codegen value into a register!??"); |
9889 | 479 | Reg = FuncInfo.CreateRegs(PHIOp); |
9890 | 479 | CopyValueToVirtualRegister(PHIOp, Reg); |
9891 | 479 | } |
9892 | 432k | } |
9893 | 600k | |
9894 | 600k | // Remember that this register needs to added to the machine PHI node as |
9895 | 600k | // the input for this MBB. |
9896 | 600k | SmallVector<EVT, 4> ValueVTs; |
9897 | 600k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9898 | 600k | ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); |
9899 | 1.20M | for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti601k ) { |
9900 | 601k | EVT VT = ValueVTs[vti]; |
9901 | 601k | unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); |
9902 | 1.21M | for (unsigned i = 0, e = NumRegisters; i != e; ++i616k ) |
9903 | 616k | FuncInfo.PHINodesToUpdate.push_back( |
9904 | 616k | std::make_pair(&*MBBI++, Reg + i)); |
9905 | 601k | Reg += NumRegisters; |
9906 | 601k | } |
9907 | 600k | } |
9908 | 412k | } |
9909 | 1.12M | |
9910 | 1.12M | ConstantsOut.clear(); |
9911 | 1.12M | } |
9912 | | |
9913 | | /// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB |
9914 | | /// is 0. |
9915 | | MachineBasicBlock * |
9916 | | SelectionDAGBuilder::StackProtectorDescriptor:: |
9917 | | AddSuccessorMBB(const BasicBlock *BB, |
9918 | | MachineBasicBlock *ParentMBB, |
9919 | | bool IsLikely, |
9920 | 804 | MachineBasicBlock *SuccMBB) { |
9921 | 804 | // If SuccBB has not been created yet, create it. |
9922 | 804 | if (!SuccMBB) { |
9923 | 794 | MachineFunction *MF = ParentMBB->getParent(); |
9924 | 794 | MachineFunction::iterator BBI(ParentMBB); |
9925 | 794 | SuccMBB = MF->CreateMachineBasicBlock(BB); |
9926 | 794 | MF->insert(++BBI, SuccMBB); |
9927 | 794 | } |
9928 | 804 | // Add it as a successor of ParentMBB. |
9929 | 804 | ParentMBB->addSuccessor( |
9930 | 804 | SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); |
9931 | 804 | return SuccMBB; |
9932 | 804 | } |
9933 | | |
9934 | 869k | MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { |
9935 | 869k | MachineFunction::iterator I(MBB); |
9936 | 869k | if (++I == FuncInfo.MF->end()) |
9937 | 6.94k | return nullptr; |
9938 | 862k | return &*I; |
9939 | 862k | } |
9940 | | |
9941 | | /// During lowering new call nodes can be created (such as memset, etc.). |
9942 | | /// Those will become new roots of the current DAG, but complications arise |
9943 | | /// when they are tail calls. In such cases, the call lowering will update |
9944 | | /// the root, but the builder still needs to know that a tail call has been |
9945 | | /// lowered in order to avoid generating an additional return. |
9946 | 14.3k | void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { |
9947 | 14.3k | // If the node is null, we do have a tail call. |
9948 | 14.3k | if (MaybeTC.getNode() != nullptr) |
9949 | 14.3k | DAG.setRoot(MaybeTC); |
9950 | 36 | else |
9951 | 36 | HasTailCall = true; |
9952 | 14.3k | } |
9953 | | |
9954 | | void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, |
9955 | | MachineBasicBlock *SwitchMBB, |
9956 | 8.30k | MachineBasicBlock *DefaultMBB) { |
9957 | 8.30k | MachineFunction *CurMF = FuncInfo.MF; |
9958 | 8.30k | MachineBasicBlock *NextMBB = nullptr; |
9959 | 8.30k | MachineFunction::iterator BBI(W.MBB); |
9960 | 8.30k | if (++BBI != FuncInfo.MF->end()) |
9961 | 8.29k | NextMBB = &*BBI; |
9962 | 8.30k | |
9963 | 8.30k | unsigned Size = W.LastCluster - W.FirstCluster + 1; |
9964 | 8.30k | |
9965 | 8.30k | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
9966 | 8.30k | |
9967 | 8.30k | if (Size == 2 && W.MBB == SwitchMBB4.66k ) { |
9968 | 4.38k | // If any two of the cases has the same destination, and if one value |
9969 | 4.38k | // is the same as the other, but has one bit unset that the other has set, |
9970 | 4.38k | // use bit manipulation to do two compares at once. For example: |
9971 | 4.38k | // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" |
9972 | 4.38k | // TODO: This could be extended to merge any 2 cases in switches with 3 |
9973 | 4.38k | // cases. |
9974 | 4.38k | // TODO: Handle cases where W.CaseBB != SwitchBB. |
9975 | 4.38k | CaseCluster &Small = *W.FirstCluster; |
9976 | 4.38k | CaseCluster &Big = *W.LastCluster; |
9977 | 4.38k | |
9978 | 4.38k | if (Small.Low == Small.High && Big.Low == Big.High4.36k && |
9979 | 4.38k | Small.MBB == Big.MBB4.29k ) { |
9980 | 301 | const APInt &SmallValue = Small.Low->getValue(); |
9981 | 301 | const APInt &BigValue = Big.Low->getValue(); |
9982 | 301 | |
9983 | 301 | // Check that there is only one bit different. |
9984 | 301 | APInt CommonBit = BigValue ^ SmallValue; |
9985 | 301 | if (CommonBit.isPowerOf2()) { |
9986 | 36 | SDValue CondLHS = getValue(Cond); |
9987 | 36 | EVT VT = CondLHS.getValueType(); |
9988 | 36 | SDLoc DL = getCurSDLoc(); |
9989 | 36 | |
9990 | 36 | SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, |
9991 | 36 | DAG.getConstant(CommonBit, DL, VT)); |
9992 | 36 | SDValue Cond = DAG.getSetCC( |
9993 | 36 | DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), |
9994 | 36 | ISD::SETEQ); |
9995 | 36 | |
9996 | 36 | // Update successor info. |
9997 | 36 | // Both Small and Big will jump to Small.BB, so we sum up the |
9998 | 36 | // probabilities. |
9999 | 36 | addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); |
10000 | 36 | if (BPI) |
10001 | 36 | addSuccessorWithProb( |
10002 | 36 | SwitchMBB, DefaultMBB, |
10003 | 36 | // The default destination is the first successor in IR. |
10004 | 36 | BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); |
10005 | 0 | else |
10006 | 0 | addSuccessorWithProb(SwitchMBB, DefaultMBB); |
10007 | 36 | |
10008 | 36 | // Insert the true branch. |
10009 | 36 | SDValue BrCond = |
10010 | 36 | DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, |
10011 | 36 | DAG.getBasicBlock(Small.MBB)); |
10012 | 36 | // Insert the false branch. |
10013 | 36 | BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, |
10014 | 36 | DAG.getBasicBlock(DefaultMBB)); |
10015 | 36 | |
10016 | 36 | DAG.setRoot(BrCond); |
10017 | 36 | return; |
10018 | 36 | } |
10019 | 8.26k | } |
10020 | 4.38k | } |
10021 | 8.26k | |
10022 | 8.26k | if (TM.getOptLevel() != CodeGenOpt::None) { |
10023 | 8.22k | // Here, we order cases by probability so the most likely case will be |
10024 | 8.22k | // checked first. However, two clusters can have the same probability in |
10025 | 8.22k | // which case their relative ordering is non-deterministic. So we use Low |
10026 | 8.22k | // as a tie-breaker as clusters are guaranteed to never overlap. |
10027 | 8.22k | llvm::sort(W.FirstCluster, W.LastCluster + 1, |
10028 | 8.22k | [](const CaseCluster &a, const CaseCluster &b) { |
10029 | 5.41k | return a.Prob != b.Prob ? |
10030 | 517 | a.Prob > b.Prob : |
10031 | 5.41k | a.Low->getValue().slt(b.Low->getValue())4.89k ; |
10032 | 5.41k | }); |
10033 | 8.22k | |
10034 | 8.22k | // Rearrange the case blocks so that the last one falls through if possible |
10035 | 8.22k | // without changing the order of probabilities. |
10036 | 9.41k | for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { |
10037 | 5.22k | --I; |
10038 | 5.22k | if (I->Prob > W.LastCluster->Prob) |
10039 | 405 | break; |
10040 | 4.82k | if (I->Kind == CC_Range && I->MBB == NextMBB4.81k ) { |
10041 | 3.63k | std::swap(*I, *W.LastCluster); |
10042 | 3.63k | break; |
10043 | 3.63k | } |
10044 | 4.82k | } |
10045 | 8.22k | } |
10046 | 8.26k | |
10047 | 8.26k | // Compute total probability. |
10048 | 8.26k | BranchProbability DefaultProb = W.DefaultProb; |
10049 | 8.26k | BranchProbability UnhandledProbs = DefaultProb; |
10050 | 22.0k | for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I13.7k ) |
10051 | 13.7k | UnhandledProbs += I->Prob; |
10052 | 8.26k | |
10053 | 8.26k | MachineBasicBlock *CurMBB = W.MBB; |
10054 | 22.0k | for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I13.7k ) { |
10055 | 13.7k | bool FallthroughUnreachable = false; |
10056 | 13.7k | MachineBasicBlock *Fallthrough; |
10057 | 13.7k | if (I == W.LastCluster) { |
10058 | 8.26k | // For the last cluster, fall through to the default destination. |
10059 | 8.26k | Fallthrough = DefaultMBB; |
10060 | 8.26k | FallthroughUnreachable = isa<UnreachableInst>( |
10061 | 8.26k | DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); |
10062 | 8.26k | } else { |
10063 | 5.47k | Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); |
10064 | 5.47k | CurMF->insert(BBI, Fallthrough); |
10065 | 5.47k | // Put Cond in a virtual register to make it available from the new blocks. |
10066 | 5.47k | ExportFromCurrentBlock(Cond); |
10067 | 5.47k | } |
10068 | 13.7k | UnhandledProbs -= I->Prob; |
10069 | 13.7k | |
10070 | 13.7k | switch (I->Kind) { |
10071 | 13.7k | case CC_JumpTable: { |
10072 | 2.26k | // FIXME: Optimize away range check based on pivot comparisons. |
10073 | 2.26k | JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; |
10074 | 2.26k | SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; |
10075 | 2.26k | |
10076 | 2.26k | // The jump block hasn't been inserted yet; insert it here. |
10077 | 2.26k | MachineBasicBlock *JumpMBB = JT->MBB; |
10078 | 2.26k | CurMF->insert(BBI, JumpMBB); |
10079 | 2.26k | |
10080 | 2.26k | auto JumpProb = I->Prob; |
10081 | 2.26k | auto FallthroughProb = UnhandledProbs; |
10082 | 2.26k | |
10083 | 2.26k | // If the default statement is a target of the jump table, we evenly |
10084 | 2.26k | // distribute the default probability to successors of CurMBB. Also |
10085 | 2.26k | // update the probability on the edge from JumpMBB to Fallthrough. |
10086 | 2.26k | for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), |
10087 | 2.26k | SE = JumpMBB->succ_end(); |
10088 | 12.6k | SI != SE; ++SI10.3k ) { |
10089 | 10.6k | if (*SI == DefaultMBB) { |
10090 | 319 | JumpProb += DefaultProb / 2; |
10091 | 319 | FallthroughProb -= DefaultProb / 2; |
10092 | 319 | JumpMBB->setSuccProbability(SI, DefaultProb / 2); |
10093 | 319 | JumpMBB->normalizeSuccProbs(); |
10094 | 319 | break; |
10095 | 319 | } |
10096 | 10.6k | } |
10097 | 2.26k | |
10098 | 2.26k | if (FallthroughUnreachable) { |
10099 | 48 | // Skip the range check if the fallthrough block is unreachable. |
10100 | 48 | JTH->OmitRangeCheck = true; |
10101 | 48 | } |
10102 | 2.26k | |
10103 | 2.26k | if (!JTH->OmitRangeCheck) |
10104 | 2.21k | addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); |
10105 | 2.26k | addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); |
10106 | 2.26k | CurMBB->normalizeSuccProbs(); |
10107 | 2.26k | |
10108 | 2.26k | // The jump table header will be inserted in our current block, do the |
10109 | 2.26k | // range check, and fall through to our fallthrough block. |
10110 | 2.26k | JTH->HeaderBB = CurMBB; |
10111 | 2.26k | JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. |
10112 | 2.26k | |
10113 | 2.26k | // If we're in the right place, emit the jump table header right now. |
10114 | 2.26k | if (CurMBB == SwitchMBB) { |
10115 | 2.23k | visitJumpTableHeader(*JT, *JTH, SwitchMBB); |
10116 | 2.23k | JTH->Emitted = true; |
10117 | 2.23k | } |
10118 | 2.26k | break; |
10119 | 13.7k | } |
10120 | 13.7k | case CC_BitTests: { |
10121 | 286 | // FIXME: If Fallthrough is unreachable, skip the range check. |
10122 | 286 | |
10123 | 286 | // FIXME: Optimize away range check based on pivot comparisons. |
10124 | 286 | BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; |
10125 | 286 | |
10126 | 286 | // The bit test blocks haven't been inserted yet; insert them here. |
10127 | 286 | for (BitTestCase &BTC : BTB->Cases) |
10128 | 447 | CurMF->insert(BBI, BTC.ThisBB); |
10129 | 286 | |
10130 | 286 | // Fill in fields of the BitTestBlock. |
10131 | 286 | BTB->Parent = CurMBB; |
10132 | 286 | BTB->Default = Fallthrough; |
10133 | 286 | |
10134 | 286 | BTB->DefaultProb = UnhandledProbs; |
10135 | 286 | // If the cases in bit test don't form a contiguous range, we evenly |
10136 | 286 | // distribute the probability on the edge to Fallthrough to two |
10137 | 286 | // successors of CurMBB. |
10138 | 286 | if (!BTB->ContiguousRange) { |
10139 | 249 | BTB->Prob += DefaultProb / 2; |
10140 | 249 | BTB->DefaultProb -= DefaultProb / 2; |
10141 | 249 | } |
10142 | 286 | |
10143 | 286 | // If we're in the right place, emit the bit test header right now. |
10144 | 286 | if (CurMBB == SwitchMBB) { |
10145 | 284 | visitBitTestHeader(*BTB, SwitchMBB); |
10146 | 284 | BTB->Emitted = true; |
10147 | 284 | } |
10148 | 286 | break; |
10149 | 13.7k | } |
10150 | 13.7k | case CC_Range: { |
10151 | 11.1k | const Value *RHS, *LHS, *MHS; |
10152 | 11.1k | ISD::CondCode CC; |
10153 | 11.1k | if (I->Low == I->High) { |
10154 | 10.8k | // Check Cond == I->Low. |
10155 | 10.8k | CC = ISD::SETEQ; |
10156 | 10.8k | LHS = Cond; |
10157 | 10.8k | RHS=I->Low; |
10158 | 10.8k | MHS = nullptr; |
10159 | 10.8k | } else { |
10160 | 340 | // Check I->Low <= Cond <= I->High. |
10161 | 340 | CC = ISD::SETLE; |
10162 | 340 | LHS = I->Low; |
10163 | 340 | MHS = Cond; |
10164 | 340 | RHS = I->High; |
10165 | 340 | } |
10166 | 11.1k | |
10167 | 11.1k | // If Fallthrough is unreachable, fold away the comparison. |
10168 | 11.1k | if (FallthroughUnreachable) |
10169 | 17 | CC = ISD::SETTRUE; |
10170 | 11.1k | |
10171 | 11.1k | // The false probability is the sum of all unhandled cases. |
10172 | 11.1k | CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, |
10173 | 11.1k | getCurSDLoc(), I->Prob, UnhandledProbs); |
10174 | 11.1k | |
10175 | 11.1k | if (CurMBB == SwitchMBB) |
10176 | 5.02k | visitSwitchCase(CB, SwitchMBB); |
10177 | 6.16k | else |
10178 | 6.16k | SL->SwitchCases.push_back(CB); |
10179 | 11.1k | |
10180 | 11.1k | break; |
10181 | 13.7k | } |
10182 | 13.7k | } |
10183 | 13.7k | CurMBB = Fallthrough; |
10184 | 13.7k | } |
10185 | 8.26k | } |
10186 | | |
10187 | | unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, |
10188 | | CaseClusterIt First, |
10189 | 34 | CaseClusterIt Last) { |
10190 | 115 | return std::count_if(First, Last + 1, [&](const CaseCluster &X) { |
10191 | 115 | if (X.Prob != CC.Prob) |
10192 | 42 | return X.Prob > CC.Prob; |
10193 | 73 | |
10194 | 73 | // Ties are broken by comparing the case value. |
10195 | 73 | return X.Low->getValue().slt(CC.Low->getValue()); |
10196 | 73 | }); |
10197 | 34 | } |
10198 | | |
10199 | | void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, |
10200 | | const SwitchWorkListItem &W, |
10201 | | Value *Cond, |
10202 | 189 | MachineBasicBlock *SwitchMBB) { |
10203 | 189 | assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && |
10204 | 189 | "Clusters not sorted?"); |
10205 | 189 | |
10206 | 189 | assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); |
10207 | 189 | |
10208 | 189 | // Balance the tree based on branch probabilities to create a near-optimal (in |
10209 | 189 | // terms of search time given key frequency) binary search tree. See e.g. Kurt |
10210 | 189 | // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). |
10211 | 189 | CaseClusterIt LastLeft = W.FirstCluster; |
10212 | 189 | CaseClusterIt FirstRight = W.LastCluster; |
10213 | 189 | auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; |
10214 | 189 | auto RightProb = FirstRight->Prob + W.DefaultProb / 2; |
10215 | 189 | |
10216 | 189 | // Move LastLeft and FirstRight towards each other from opposite directions to |
10217 | 189 | // find a partitioning of the clusters which balances the probability on both |
10218 | 189 | // sides. If LeftProb and RightProb are equal, alternate which side is |
10219 | 189 | // taken to ensure 0-probability nodes are distributed evenly. |
10220 | 189 | unsigned I = 0; |
10221 | 815 | while (LastLeft + 1 < FirstRight) { |
10222 | 626 | if (LeftProb < RightProb || (338 LeftProb == RightProb338 && (I & 1)282 )) |
10223 | 293 | LeftProb += (++LastLeft)->Prob; |
10224 | 333 | else |
10225 | 333 | RightProb += (--FirstRight)->Prob; |
10226 | 626 | I++; |
10227 | 626 | } |
10228 | 189 | |
10229 | 199 | while (true) { |
10230 | 199 | // Our binary search tree differs from a typical BST in that ours can have up |
10231 | 199 | // to three values in each leaf. The pivot selection above doesn't take that |
10232 | 199 | // into account, which means the tree might require more nodes and be less |
10233 | 199 | // efficient. We compensate for this here. |
10234 | 199 | |
10235 | 199 | unsigned NumLeft = LastLeft - W.FirstCluster + 1; |
10236 | 199 | unsigned NumRight = W.LastCluster - FirstRight + 1; |
10237 | 199 | |
10238 | 199 | if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3157 ) { |
10239 | 17 | // If one side has less than 3 clusters, and the other has more than 3, |
10240 | 17 | // consider taking a cluster from the other side. |
10241 | 17 | |
10242 | 17 | if (NumLeft < NumRight) { |
10243 | 8 | // Consider moving the first cluster on the right to the left side. |
10244 | 8 | CaseCluster &CC = *FirstRight; |
10245 | 8 | unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); |
10246 | 8 | unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); |
10247 | 8 | if (LeftSideRank <= RightSideRank) { |
10248 | 2 | // Moving the cluster to the left does not demote it. |
10249 | 2 | ++LastLeft; |
10250 | 2 | ++FirstRight; |
10251 | 2 | continue; |
10252 | 2 | } |
10253 | 9 | } else { |
10254 | 9 | assert(NumRight < NumLeft); |
10255 | 9 | // Consider moving the last element on the left to the right side. |
10256 | 9 | CaseCluster &CC = *LastLeft; |
10257 | 9 | unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); |
10258 | 9 | unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); |
10259 | 9 | if (RightSideRank <= LeftSideRank) { |
10260 | 8 | // Moving the cluster to the right does not demot it. |
10261 | 8 | --LastLeft; |
10262 | 8 | --FirstRight; |
10263 | 8 | continue; |
10264 | 8 | } |
10265 | 189 | } |
10266 | 17 | } |
10267 | 189 | break; |
10268 | 189 | } |
10269 | 189 | |
10270 | 189 | assert(LastLeft + 1 == FirstRight); |
10271 | 189 | assert(LastLeft >= W.FirstCluster); |
10272 | 189 | assert(FirstRight <= W.LastCluster); |
10273 | 189 | |
10274 | 189 | // Use the first element on the right as pivot since we will make less-than |
10275 | 189 | // comparisons against it. |
10276 | 189 | CaseClusterIt PivotCluster = FirstRight; |
10277 | 189 | assert(PivotCluster > W.FirstCluster); |
10278 | 189 | assert(PivotCluster <= W.LastCluster); |
10279 | 189 | |
10280 | 189 | CaseClusterIt FirstLeft = W.FirstCluster; |
10281 | 189 | CaseClusterIt LastRight = W.LastCluster; |
10282 | 189 | |
10283 | 189 | const ConstantInt *Pivot = PivotCluster->Low; |
10284 | 189 | |
10285 | 189 | // New blocks will be inserted immediately after the current one. |
10286 | 189 | MachineFunction::iterator BBI(W.MBB); |
10287 | 189 | ++BBI; |
10288 | 189 | |
10289 | 189 | // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, |
10290 | 189 | // we can branch to its destination directly if it's squeezed exactly in |
10291 | 189 | // between the known lower bound and Pivot - 1. |
10292 | 189 | MachineBasicBlock *LeftMBB; |
10293 | 189 | if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range8 && |
10294 | 189 | FirstLeft->Low == W.GE1 && |
10295 | 189 | (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()0 ) { |
10296 | 0 | LeftMBB = FirstLeft->MBB; |
10297 | 189 | } else { |
10298 | 189 | LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); |
10299 | 189 | FuncInfo.MF->insert(BBI, LeftMBB); |
10300 | 189 | WorkList.push_back( |
10301 | 189 | {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); |
10302 | 189 | // Put Cond in a virtual register to make it available from the new blocks. |
10303 | 189 | ExportFromCurrentBlock(Cond); |
10304 | 189 | } |
10305 | 189 | |
10306 | 189 | // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a |
10307 | 189 | // single cluster, RHS.Low == Pivot, and we can branch to its destination |
10308 | 189 | // directly if RHS.High equals the current upper bound. |
10309 | 189 | MachineBasicBlock *RightMBB; |
10310 | 189 | if (FirstRight == LastRight && FirstRight->Kind == CC_Range2 && |
10311 | 189 | W.LT2 && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()0 ) { |
10312 | 0 | RightMBB = FirstRight->MBB; |
10313 | 189 | } else { |
10314 | 189 | RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); |
10315 | 189 | FuncInfo.MF->insert(BBI, RightMBB); |
10316 | 189 | WorkList.push_back( |
10317 | 189 | {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); |
10318 | 189 | // Put Cond in a virtual register to make it available from the new blocks. |
10319 | 189 | ExportFromCurrentBlock(Cond); |
10320 | 189 | } |
10321 | 189 | |
10322 | 189 | // Create the CaseBlock record that will be used to lower the branch. |
10323 | 189 | CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, |
10324 | 189 | getCurSDLoc(), LeftProb, RightProb); |
10325 | 189 | |
10326 | 189 | if (W.MBB == SwitchMBB) |
10327 | 125 | visitSwitchCase(CB, SwitchMBB); |
10328 | 64 | else |
10329 | 64 | SL->SwitchCases.push_back(CB); |
10330 | 189 | } |
10331 | | |
10332 | | // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb |
10333 | | // from the swith statement. |
10334 | | static BranchProbability scaleCaseProbality(BranchProbability CaseProb, |
10335 | 903 | BranchProbability PeeledCaseProb) { |
10336 | 903 | if (PeeledCaseProb == BranchProbability::getOne()) |
10337 | 0 | return BranchProbability::getZero(); |
10338 | 903 | BranchProbability SwitchProb = PeeledCaseProb.getCompl(); |
10339 | 903 | |
10340 | 903 | uint32_t Numerator = CaseProb.getNumerator(); |
10341 | 903 | uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator()); |
10342 | 903 | return BranchProbability(Numerator, std::max(Numerator, Denominator)); |
10343 | 903 | } |
10344 | | |
10345 | | // Try to peel the top probability case if it exceeds the threshold. |
10346 | | // Return current MachineBasicBlock for the switch statement if the peeling |
10347 | | // does not occur. |
10348 | | // If the peeling is performed, return the newly created MachineBasicBlock |
10349 | | // for the peeled switch statement. Also update Clusters to remove the peeled |
10350 | | // case. PeeledCaseProb is the BranchProbability for the peeled case. |
10351 | | MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( |
10352 | | const SwitchInst &SI, CaseClusterVector &Clusters, |
10353 | 7.70k | BranchProbability &PeeledCaseProb) { |
10354 | 7.70k | MachineBasicBlock *SwitchMBB = FuncInfo.MBB; |
10355 | 7.70k | // Don't perform if there is only one cluster or optimizing for size. |
10356 | 7.70k | if (SwitchPeelThreshold > 100 || !FuncInfo.BPI7.67k || Clusters.size() < 27.63k || |
10357 | 7.70k | TM.getOptLevel() == CodeGenOpt::None7.60k || |
10358 | 7.70k | SwitchMBB->getParent()->getFunction().hasMinSize()7.60k ) |
10359 | 202 | return SwitchMBB; |
10360 | 7.50k | |
10361 | 7.50k | BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); |
10362 | 7.50k | unsigned PeeledCaseIndex = 0; |
10363 | 7.50k | bool SwitchPeeled = false; |
10364 | 32.7k | for (unsigned Index = 0; Index < Clusters.size(); ++Index25.2k ) { |
10365 | 25.2k | CaseCluster &CC = Clusters[Index]; |
10366 | 25.2k | if (CC.Prob < TopCaseProb) |
10367 | 24.7k | continue; |
10368 | 412 | TopCaseProb = CC.Prob; |
10369 | 412 | PeeledCaseIndex = Index; |
10370 | 412 | SwitchPeeled = true; |
10371 | 412 | } |
10372 | 7.50k | if (!SwitchPeeled) |
10373 | 7.09k | return SwitchMBB; |
10374 | 412 | |
10375 | 412 | LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " |
10376 | 412 | << TopCaseProb << "\n"); |
10377 | 412 | |
10378 | 412 | // Record the MBB for the peeled switch statement. |
10379 | 412 | MachineFunction::iterator BBI(SwitchMBB); |
10380 | 412 | ++BBI; |
10381 | 412 | MachineBasicBlock *PeeledSwitchMBB = |
10382 | 412 | FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); |
10383 | 412 | FuncInfo.MF->insert(BBI, PeeledSwitchMBB); |
10384 | 412 | |
10385 | 412 | ExportFromCurrentBlock(SI.getCondition()); |
10386 | 412 | auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; |
10387 | 412 | SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, |
10388 | 412 | nullptr, nullptr, TopCaseProb.getCompl()}; |
10389 | 412 | lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); |
10390 | 412 | |
10391 | 412 | Clusters.erase(PeeledCaseIt); |
10392 | 491 | for (CaseCluster &CC : Clusters) { |
10393 | 491 | LLVM_DEBUG( |
10394 | 491 | dbgs() << "Scale the probablity for one cluster, before scaling: " |
10395 | 491 | << CC.Prob << "\n"); |
10396 | 491 | CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); |
10397 | 491 | LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); |
10398 | 491 | } |
10399 | 412 | PeeledCaseProb = TopCaseProb; |
10400 | 412 | return PeeledSwitchMBB; |
10401 | 412 | } |
10402 | | |
10403 | 7.70k | void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { |
10404 | 7.70k | // Extract cases from the switch. |
10405 | 7.70k | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
10406 | 7.70k | CaseClusterVector Clusters; |
10407 | 7.70k | Clusters.reserve(SI.getNumCases()); |
10408 | 28.5k | for (auto I : SI.cases()) { |
10409 | 28.5k | MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; |
10410 | 28.5k | const ConstantInt *CaseVal = I.getCaseValue(); |
10411 | 28.5k | BranchProbability Prob = |
10412 | 28.5k | BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())28.1k |
10413 | 28.5k | : BranchProbability(1, SI.getNumCases() + 1)425 ; |
10414 | 28.5k | Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); |
10415 | 28.5k | } |
10416 | 7.70k | |
10417 | 7.70k | MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; |
10418 | 7.70k | |
10419 | 7.70k | // Cluster adjacent cases with the same destination. We do this at all |
10420 | 7.70k | // optimization levels because it's cheap to do and will make codegen faster |
10421 | 7.70k | // if there are many clusters. |
10422 | 7.70k | sortAndRangeify(Clusters); |
10423 | 7.70k | |
10424 | 7.70k | // The branch probablity of the peeled case. |
10425 | 7.70k | BranchProbability PeeledCaseProb = BranchProbability::getZero(); |
10426 | 7.70k | MachineBasicBlock *PeeledSwitchMBB = |
10427 | 7.70k | peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); |
10428 | 7.70k | |
10429 | 7.70k | // If there is only the default destination, jump there directly. |
10430 | 7.70k | MachineBasicBlock *SwitchMBB = FuncInfo.MBB; |
10431 | 7.70k | if (Clusters.empty()) { |
10432 | 4 | assert(PeeledSwitchMBB == SwitchMBB); |
10433 | 4 | SwitchMBB->addSuccessor(DefaultMBB); |
10434 | 4 | if (DefaultMBB != NextBlock(SwitchMBB)) { |
10435 | 2 | DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, |
10436 | 2 | getControlRoot(), DAG.getBasicBlock(DefaultMBB))); |
10437 | 2 | } |
10438 | 4 | return; |
10439 | 4 | } |
10440 | 7.70k | |
10441 | 7.70k | SL->findJumpTables(Clusters, &SI, DefaultMBB); |
10442 | 7.70k | SL->findBitTestClusters(Clusters, &SI); |
10443 | 7.70k | |
10444 | 7.70k | LLVM_DEBUG({ |
10445 | 7.70k | dbgs() << "Case clusters: "; |
10446 | 7.70k | for (const CaseCluster &C : Clusters) { |
10447 | 7.70k | if (C.Kind == CC_JumpTable) |
10448 | 7.70k | dbgs() << "JT:"; |
10449 | 7.70k | if (C.Kind == CC_BitTests) |
10450 | 7.70k | dbgs() << "BT:"; |
10451 | 7.70k | |
10452 | 7.70k | C.Low->getValue().print(dbgs(), true); |
10453 | 7.70k | if (C.Low != C.High) { |
10454 | 7.70k | dbgs() << '-'; |
10455 | 7.70k | C.High->getValue().print(dbgs(), true); |
10456 | 7.70k | } |
10457 | 7.70k | dbgs() << ' '; |
10458 | 7.70k | } |
10459 | 7.70k | dbgs() << '\n'; |
10460 | 7.70k | }); |
10461 | 7.70k | |
10462 | 7.70k | assert(!Clusters.empty()); |
10463 | 7.70k | SwitchWorkList WorkList; |
10464 | 7.70k | CaseClusterIt First = Clusters.begin(); |
10465 | 7.70k | CaseClusterIt Last = Clusters.end() - 1; |
10466 | 7.70k | auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); |
10467 | 7.70k | // Scale the branchprobability for DefaultMBB if the peel occurs and |
10468 | 7.70k | // DefaultMBB is not replaced. |
10469 | 7.70k | if (PeeledCaseProb != BranchProbability::getZero() && |
10470 | 7.70k | DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]412 ) |
10471 | 412 | DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); |
10472 | 7.70k | WorkList.push_back( |
10473 | 7.70k | {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); |
10474 | 7.70k | |
10475 | 15.7k | while (!WorkList.empty()) { |
10476 | 8.08k | SwitchWorkListItem W = WorkList.back(); |
10477 | 8.08k | WorkList.pop_back(); |
10478 | 8.08k | unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; |
10479 | 8.08k | |
10480 | 8.08k | if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None211 && |
10481 | 8.08k | !DefaultMBB->getParent()->getFunction().hasMinSize()192 ) { |
10482 | 189 | // For optimized builds, lower large range as a balanced binary tree. |
10483 | 189 | splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); |
10484 | 189 | continue; |
10485 | 189 | } |
10486 | 7.89k | |
10487 | 7.89k | lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); |
10488 | 7.89k | } |
10489 | 7.70k | } |