/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/include/llvm/Target/TargetLowering.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- llvm/Target/TargetLowering.h - Target Lowering Info ------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | /// |
10 | | /// \file |
11 | | /// This file describes how to lower LLVM code to machine code. This has two |
12 | | /// main components: |
13 | | /// |
14 | | /// 1. Which ValueTypes are natively supported by the target. |
15 | | /// 2. Which operations are supported for supported ValueTypes. |
16 | | /// 3. Cost thresholds for alternative implementations of certain operations. |
17 | | /// |
18 | | /// In addition it has a few other components, like information about FP |
19 | | /// immediates. |
20 | | /// |
21 | | //===----------------------------------------------------------------------===// |
22 | | |
23 | | #ifndef LLVM_TARGET_TARGETLOWERING_H |
24 | | #define LLVM_TARGET_TARGETLOWERING_H |
25 | | |
26 | | #include "llvm/ADT/APInt.h" |
27 | | #include "llvm/ADT/ArrayRef.h" |
28 | | #include "llvm/ADT/DenseMap.h" |
29 | | #include "llvm/ADT/STLExtras.h" |
30 | | #include "llvm/ADT/SmallVector.h" |
31 | | #include "llvm/ADT/StringRef.h" |
32 | | #include "llvm/CodeGen/DAGCombine.h" |
33 | | #include "llvm/CodeGen/ISDOpcodes.h" |
34 | | #include "llvm/CodeGen/MachineValueType.h" |
35 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
36 | | #include "llvm/CodeGen/SelectionDAG.h" |
37 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
38 | | #include "llvm/CodeGen/ValueTypes.h" |
39 | | #include "llvm/IR/Attributes.h" |
40 | | #include "llvm/IR/CallSite.h" |
41 | | #include "llvm/IR/CallingConv.h" |
42 | | #include "llvm/IR/DataLayout.h" |
43 | | #include "llvm/IR/DerivedTypes.h" |
44 | | #include "llvm/IR/Function.h" |
45 | | #include "llvm/IR/IRBuilder.h" |
46 | | #include "llvm/IR/InlineAsm.h" |
47 | | #include "llvm/IR/Instruction.h" |
48 | | #include "llvm/IR/Instructions.h" |
49 | | #include "llvm/IR/Type.h" |
50 | | #include "llvm/MC/MCRegisterInfo.h" |
51 | | #include "llvm/Support/AtomicOrdering.h" |
52 | | #include "llvm/Support/Casting.h" |
53 | | #include "llvm/Support/ErrorHandling.h" |
54 | | #include "llvm/Target/TargetCallingConv.h" |
55 | | #include "llvm/Target/TargetMachine.h" |
56 | | #include <algorithm> |
57 | | #include <cassert> |
58 | | #include <climits> |
59 | | #include <cstdint> |
60 | | #include <iterator> |
61 | | #include <map> |
62 | | #include <string> |
63 | | #include <utility> |
64 | | #include <vector> |
65 | | |
66 | | namespace llvm { |
67 | | |
68 | | class BranchProbability; |
69 | | class CCState; |
70 | | class CCValAssign; |
71 | | class Constant; |
72 | | class FastISel; |
73 | | class FunctionLoweringInfo; |
74 | | class GlobalValue; |
75 | | class IntrinsicInst; |
76 | | struct KnownBits; |
77 | | class LLVMContext; |
78 | | class MachineBasicBlock; |
79 | | class MachineFunction; |
80 | | class MachineInstr; |
81 | | class MachineJumpTableInfo; |
82 | | class MachineLoop; |
83 | | class MachineRegisterInfo; |
84 | | class MCContext; |
85 | | class MCExpr; |
86 | | class Module; |
87 | | class TargetRegisterClass; |
88 | | class TargetLibraryInfo; |
89 | | class TargetRegisterInfo; |
90 | | class Value; |
91 | | |
92 | | namespace Sched { |
93 | | |
94 | | enum Preference { |
95 | | None, // No preference |
96 | | Source, // Follow source order. |
97 | | RegPressure, // Scheduling for lowest register pressure. |
98 | | Hybrid, // Scheduling for both latency and register pressure. |
99 | | ILP, // Scheduling for ILP in low register pressure mode. |
100 | | VLIW // Scheduling for VLIW targets. |
101 | | }; |
102 | | |
103 | | } // end namespace Sched |
104 | | |
105 | | /// This base class for TargetLowering contains the SelectionDAG-independent |
106 | | /// parts that can be used from the rest of CodeGen. |
107 | | class TargetLoweringBase { |
108 | | public: |
109 | | /// This enum indicates whether operations are valid for a target, and if not, |
110 | | /// what action should be used to make them valid. |
111 | | enum LegalizeAction : uint8_t { |
112 | | Legal, // The target natively supports this operation. |
113 | | Promote, // This operation should be executed in a larger type. |
114 | | Expand, // Try to expand this to other ops, otherwise use a libcall. |
115 | | LibCall, // Don't try to expand this to other ops, always use a libcall. |
116 | | Custom // Use the LowerOperation hook to implement custom lowering. |
117 | | }; |
118 | | |
119 | | /// This enum indicates whether a types are legal for a target, and if not, |
120 | | /// what action should be used to make them valid. |
121 | | enum LegalizeTypeAction : uint8_t { |
122 | | TypeLegal, // The target natively supports this type. |
123 | | TypePromoteInteger, // Replace this integer with a larger one. |
124 | | TypeExpandInteger, // Split this integer into two of half the size. |
125 | | TypeSoftenFloat, // Convert this float to a same size integer type, |
126 | | // if an operation is not supported in target HW. |
127 | | TypeExpandFloat, // Split this float into two of half the size. |
128 | | TypeScalarizeVector, // Replace this one-element vector with its element. |
129 | | TypeSplitVector, // Split this vector into two of half the size. |
130 | | TypeWidenVector, // This vector should be widened into a larger vector. |
131 | | TypePromoteFloat // Replace this float with a larger one. |
132 | | }; |
133 | | |
134 | | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
135 | | /// in order to type-legalize it. |
136 | | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
137 | | |
138 | | /// Enum that describes how the target represents true/false values. |
139 | | enum BooleanContent { |
140 | | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
141 | | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
142 | | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
143 | | }; |
144 | | |
145 | | /// Enum that describes what type of support for selects the target has. |
146 | | enum SelectSupportKind { |
147 | | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
148 | | ScalarCondVectorVal, // The target supports selects with a scalar condition |
149 | | // and vector values (ex: cmov). |
150 | | VectorMaskSelect // The target supports vector selects with a vector |
151 | | // mask (ex: x86 blends). |
152 | | }; |
153 | | |
154 | | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
155 | | /// to, if at all. Exists because different targets have different levels of |
156 | | /// support for these atomic instructions, and also have different options |
157 | | /// w.r.t. what they should expand to. |
158 | | enum class AtomicExpansionKind { |
159 | | None, // Don't expand the instruction. |
160 | | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
161 | | // by ARM/AArch64. |
162 | | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
163 | | // greater atomic guarantees than a normal load. |
164 | | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
165 | | }; |
166 | | |
167 | | /// Enum that specifies when a multiplication should be expanded. |
168 | | enum class MulExpansionKind { |
169 | | Always, // Always expand the instruction. |
170 | | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
171 | | // or custom. |
172 | | }; |
173 | | |
174 | | class ArgListEntry { |
175 | | public: |
176 | | Value *Val = nullptr; |
177 | | SDValue Node = SDValue(); |
178 | | Type *Ty = nullptr; |
179 | | bool IsSExt : 1; |
180 | | bool IsZExt : 1; |
181 | | bool IsInReg : 1; |
182 | | bool IsSRet : 1; |
183 | | bool IsNest : 1; |
184 | | bool IsByVal : 1; |
185 | | bool IsInAlloca : 1; |
186 | | bool IsReturned : 1; |
187 | | bool IsSwiftSelf : 1; |
188 | | bool IsSwiftError : 1; |
189 | | uint16_t Alignment = 0; |
190 | | |
191 | | ArgListEntry() |
192 | | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
193 | | IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), |
194 | 4.03M | IsSwiftSelf(false), IsSwiftError(false) {} |
195 | | |
196 | | void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx); |
197 | | }; |
198 | | using ArgListTy = std::vector<ArgListEntry>; |
199 | | |
200 | | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
201 | 30.7k | ArgListTy &Args) const {}; |
202 | | |
203 | 319k | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
204 | 319k | switch (Content) { |
205 | 7 | case UndefinedBooleanContent: |
206 | 7 | // Extend by adding rubbish bits. |
207 | 7 | return ISD::ANY_EXTEND; |
208 | 316k | case ZeroOrOneBooleanContent: |
209 | 316k | // Extend by adding zero bits. |
210 | 316k | return ISD::ZERO_EXTEND; |
211 | 2.78k | case ZeroOrNegativeOneBooleanContent: |
212 | 2.78k | // Extend by copying the sign bit. |
213 | 2.78k | return ISD::SIGN_EXTEND; |
214 | 319k | } |
215 | 0 | llvm_unreachable0 ("Invalid content kind"); |
216 | 319k | } |
217 | | |
218 | | /// NOTE: The TargetMachine owns TLOF. |
219 | | explicit TargetLoweringBase(const TargetMachine &TM); |
220 | | TargetLoweringBase(const TargetLoweringBase &) = delete; |
221 | | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
222 | 28.0k | virtual ~TargetLoweringBase() = default; |
223 | | |
224 | | protected: |
225 | | /// \brief Initialize all of the actions to default values. |
226 | | void initActions(); |
227 | | |
228 | | public: |
229 | 10.0M | const TargetMachine &getTargetMachine() const { return TM; } |
230 | | |
231 | 5.76M | virtual bool useSoftFloat() const { return false; } |
232 | | |
233 | | /// Return the pointer type for the given address space, defaults to |
234 | | /// the pointer type from the data layout. |
235 | | /// FIXME: The default needs to be removed once all the code is updated. |
236 | 58.1M | MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
237 | 58.1M | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
238 | 58.1M | } |
239 | | |
240 | | /// Return the type for frame index, which is determined by |
241 | | /// the alloca address space specified through the data layout. |
242 | 591k | MVT getFrameIndexTy(const DataLayout &DL) const { |
243 | 591k | return getPointerTy(DL, DL.getAllocaAddrSpace()); |
244 | 591k | } |
245 | | |
246 | | /// Return the type for operands of fence. |
247 | | /// TODO: Let fence operands be of i32 type and remove this. |
248 | 7.38k | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
249 | 7.38k | return getPointerTy(DL); |
250 | 7.38k | } |
251 | | |
252 | | /// EVT is not used in-tree, but is used by out-of-tree target. |
253 | | /// A documentation for this function would be nice... |
254 | | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
255 | | |
256 | | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const; |
257 | | |
258 | | /// Returns the type to be used for the index operand of: |
259 | | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
260 | | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
261 | 248k | virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
262 | 248k | return getPointerTy(DL); |
263 | 248k | } |
264 | | |
265 | 458k | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
266 | 458k | return true; |
267 | 458k | } |
268 | | |
269 | | /// Return true if multiple condition registers are available. |
270 | 5.90M | bool hasMultipleConditionRegisters() const { |
271 | 5.90M | return HasMultipleConditionRegisters; |
272 | 5.90M | } |
273 | | |
274 | | /// Return true if the target has BitExtract instructions. |
275 | 225k | bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } |
276 | | |
277 | | /// Return the preferred vector type legalization action. |
278 | | virtual TargetLoweringBase::LegalizeTypeAction |
279 | 3.54M | getPreferredVectorAction(EVT VT) const { |
280 | 3.54M | // The default action for one element vectors is to scalarize |
281 | 3.54M | if (VT.getVectorNumElements() == 1) |
282 | 553k | return TypeScalarizeVector; |
283 | 3.54M | // The default action for other vectors is to promote |
284 | 2.99M | return TypePromoteInteger; |
285 | 3.54M | } |
286 | | |
287 | | // There are two general methods for expanding a BUILD_VECTOR node: |
288 | | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
289 | | // them together. |
290 | | // 2. Build the vector on the stack and then load it. |
291 | | // If this function returns true, then method (1) will be used, subject to |
292 | | // the constraint that all of the necessary shuffles are legal (as determined |
293 | | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
294 | | // always used. The vector type, and the number of defined values, are |
295 | | // provided. |
296 | | virtual bool |
297 | | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
298 | 1.67k | unsigned DefinedValues) const { |
299 | 1.67k | return DefinedValues < 3; |
300 | 1.67k | } |
301 | | |
302 | | /// Return true if integer divide is usually cheaper than a sequence of |
303 | | /// several shifts, adds, and multiplies for this target. |
304 | | /// The definition of "cheaper" may depend on whether we're optimizing |
305 | | /// for speed or for size. |
306 | 571 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
307 | | |
308 | | /// Return true if the target can handle a standalone remainder operation. |
309 | 0 | virtual bool hasStandaloneRem(EVT VT) const { |
310 | 0 | return true; |
311 | 0 | } |
312 | | |
313 | | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
314 | 70 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
315 | 70 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
316 | 70 | return false; |
317 | 70 | } |
318 | | |
319 | | /// Reciprocal estimate status values used by the functions below. |
320 | | enum ReciprocalEstimate : int { |
321 | | Unspecified = -1, |
322 | | Disabled = 0, |
323 | | Enabled = 1 |
324 | | }; |
325 | | |
326 | | /// Return a ReciprocalEstimate enum value for a square root of the given type |
327 | | /// based on the function's attributes. If the operation is not overridden by |
328 | | /// the function's attributes, "Unspecified" is returned and target defaults |
329 | | /// are expected to be used for instruction selection. |
330 | | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
331 | | |
332 | | /// Return a ReciprocalEstimate enum value for a division of the given type |
333 | | /// based on the function's attributes. If the operation is not overridden by |
334 | | /// the function's attributes, "Unspecified" is returned and target defaults |
335 | | /// are expected to be used for instruction selection. |
336 | | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
337 | | |
338 | | /// Return the refinement step count for a square root of the given type based |
339 | | /// on the function's attributes. If the operation is not overridden by |
340 | | /// the function's attributes, "Unspecified" is returned and target defaults |
341 | | /// are expected to be used for instruction selection. |
342 | | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
343 | | |
344 | | /// Return the refinement step count for a division of the given type based |
345 | | /// on the function's attributes. If the operation is not overridden by |
346 | | /// the function's attributes, "Unspecified" is returned and target defaults |
347 | | /// are expected to be used for instruction selection. |
348 | | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
349 | | |
350 | | /// Returns true if target has indicated at least one type should be bypassed. |
351 | 585k | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
352 | | |
353 | | /// Returns map of slow types for division or remainder with corresponding |
354 | | /// fast types |
355 | 10.7k | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
356 | 10.7k | return BypassSlowDivWidths; |
357 | 10.7k | } |
358 | | |
359 | | /// Return true if Flow Control is an expensive operation that should be |
360 | | /// avoided. |
361 | 103k | bool isJumpExpensive() const { return JumpIsExpensive; } |
362 | | |
363 | | /// Return true if selects are only cheaper than branches if the branch is |
364 | | /// unlikely to be predicted right. |
365 | 463k | bool isPredictableSelectExpensive() const { |
366 | 463k | return PredictableSelectIsExpensive; |
367 | 463k | } |
368 | | |
369 | | /// If a branch or a select condition is skewed in one direction by more than |
370 | | /// this factor, it is very likely to be predicted correctly. |
371 | | virtual BranchProbability getPredictableBranchThreshold() const; |
372 | | |
373 | | /// Return true if the following transform is beneficial: |
374 | | /// fold (conv (load x)) -> (load (conv*)x) |
375 | | /// On architectures that don't natively support some vector loads |
376 | | /// efficiently, casting the load to a smaller vector of larger types and |
377 | | /// loading is more efficient, however, this can be undone by optimizations in |
378 | | /// dag combiner. |
379 | | virtual bool isLoadBitCastBeneficial(EVT LoadVT, |
380 | 8.63k | EVT BitcastVT) const { |
381 | 8.63k | // Don't do if we could do an indexed load on the original type, but not on |
382 | 8.63k | // the new one. |
383 | 8.63k | if (!LoadVT.isSimple() || 8.63k !BitcastVT.isSimple()8.63k ) |
384 | 22 | return true; |
385 | 8.63k | |
386 | 8.61k | MVT LoadMVT = LoadVT.getSimpleVT(); |
387 | 8.61k | |
388 | 8.61k | // Don't bother doing this if it's just going to be promoted again later, as |
389 | 8.61k | // doing so might interfere with other combines. |
390 | 8.61k | if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && |
391 | 3.57k | getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) |
392 | 2.84k | return false; |
393 | 8.61k | |
394 | 5.76k | return true; |
395 | 8.63k | } |
396 | | |
397 | | /// Return true if the following transform is beneficial: |
398 | | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
399 | 12.0k | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const { |
400 | 12.0k | // Default to the same logic as loads. |
401 | 12.0k | return isLoadBitCastBeneficial(StoreVT, BitcastVT); |
402 | 12.0k | } |
403 | | |
404 | | /// Return true if it is expected to be cheaper to do a store of a non-zero |
405 | | /// vector constant with the given size and type for the address space than to |
406 | | /// store the individual scalar element constants. |
407 | | virtual bool storeOfVectorConstantIsCheap(EVT MemVT, |
408 | | unsigned NumElem, |
409 | 462k | unsigned AddrSpace) const { |
410 | 462k | return false; |
411 | 462k | } |
412 | | |
413 | | /// Allow store merging after legalization in addition to before legalization. |
414 | | /// This may catch stores that do not exist earlier (eg, stores created from |
415 | | /// intrinsics). |
416 | 2.71M | virtual bool mergeStoresAfterLegalization() const { return false; } |
417 | | |
418 | | /// Returns if it's reasonable to merge stores to MemVT size. |
419 | | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
420 | 4.08k | const SelectionDAG &DAG) const { |
421 | 4.08k | return true; |
422 | 4.08k | } |
423 | | |
424 | | /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. |
425 | 8 | virtual bool isCheapToSpeculateCttz() const { |
426 | 8 | return false; |
427 | 8 | } |
428 | | |
429 | | /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz. |
430 | 5 | virtual bool isCheapToSpeculateCtlz() const { |
431 | 5 | return false; |
432 | 5 | } |
433 | | |
434 | | /// \brief Return true if ctlz instruction is fast. |
435 | 0 | virtual bool isCtlzFast() const { |
436 | 0 | return false; |
437 | 0 | } |
438 | | |
439 | | /// Return true if it is safe to transform an integer-domain bitwise operation |
440 | | /// into the equivalent floating-point operation. This should be set to true |
441 | | /// if the target has IEEE-754-compliant fabs/fneg operations for the input |
442 | | /// type. |
443 | 37.9k | virtual bool hasBitPreservingFPLogic(EVT VT) const { |
444 | 37.9k | return false; |
445 | 37.9k | } |
446 | | |
447 | | /// \brief Return true if it is cheaper to split the store of a merged int val |
448 | | /// from a pair of smaller values into multiple stores. |
449 | 430 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
450 | 430 | return false; |
451 | 430 | } |
452 | | |
453 | | /// \brief Return if the target supports combining a |
454 | | /// chain like: |
455 | | /// \code |
456 | | /// %andResult = and %val1, #mask |
457 | | /// %icmpResult = icmp %andResult, 0 |
458 | | /// \endcode |
459 | | /// into a single machine instruction of a form like: |
460 | | /// \code |
461 | | /// cc = test %register, #mask |
462 | | /// \endcode |
463 | 36 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
464 | 36 | return false; |
465 | 36 | } |
466 | | |
467 | | /// Use bitwise logic to make pairs of compares more efficient. For example: |
468 | | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
469 | | /// This should be true when it takes more than one instruction to lower |
470 | | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
471 | | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
472 | 11.4k | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
473 | 11.4k | return false; |
474 | 11.4k | } |
475 | | |
476 | | /// Return the preferred operand type if the target has a quick way to compare |
477 | | /// integer values of the given size. Assume that any legal integer type can |
478 | | /// be compared efficiently. Targets may override this to allow illegal wide |
479 | | /// types to return a vector type if there is support to compare that type. |
480 | 72 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
481 | 72 | MVT VT = MVT::getIntegerVT(NumBits); |
482 | 72 | return isTypeLegal(VT) ? VT34 : MVT::INVALID_SIMPLE_VALUE_TYPE38 ; |
483 | 72 | } |
484 | | |
485 | | /// Return true if the target should transform: |
486 | | /// (X & Y) == Y ---> (~X & Y) == 0 |
487 | | /// (X & Y) != Y ---> (~X & Y) != 0 |
488 | | /// |
489 | | /// This may be profitable if the target has a bitwise and-not operation that |
490 | | /// sets comparison flags. A target may want to limit the transformation based |
491 | | /// on the type of Y or if Y is a constant. |
492 | | /// |
493 | | /// Note that the transform will not occur if Y is known to be a power-of-2 |
494 | | /// because a mask and compare of a single bit can be handled by inverting the |
495 | | /// predicate, for example: |
496 | | /// (X & 8) == 8 ---> (X & 8) != 0 |
497 | 424 | virtual bool hasAndNotCompare(SDValue Y) const { |
498 | 424 | return false; |
499 | 424 | } |
500 | | |
501 | | /// Return true if the target has a bitwise and-not operation: |
502 | | /// X = ~A & B |
503 | | /// This can be used to simplify select or other instructions. |
504 | 4.70k | virtual bool hasAndNot(SDValue X) const { |
505 | 4.70k | // If the target has the more complex version of this operation, assume that |
506 | 4.70k | // it has this operation too. |
507 | 4.70k | return hasAndNotCompare(X); |
508 | 4.70k | } |
509 | | |
510 | | /// \brief Return true if the target wants to use the optimization that |
511 | | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
512 | | /// promotedInst1(...(promotedInstN(ext(load)))). |
513 | 918k | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
514 | | |
515 | | /// Return true if the target can combine store(extractelement VectorTy, |
516 | | /// Idx). |
517 | | /// \p Cost[out] gives the cost of that transformation when this is true. |
518 | | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
519 | 31.6k | unsigned &Cost) const { |
520 | 31.6k | return false; |
521 | 31.6k | } |
522 | | |
523 | | /// Return true if target supports floating point exceptions. |
524 | 16.5M | bool hasFloatingPointExceptions() const { |
525 | 16.5M | return HasFloatingPointExceptions; |
526 | 16.5M | } |
527 | | |
528 | | /// Return true if target always beneficiates from combining into FMA for a |
529 | | /// given value type. This must typically return false on targets where FMA |
530 | | /// takes more cycles to execute than FADD. |
531 | 4.25k | virtual bool enableAggressiveFMAFusion(EVT VT) const { |
532 | 4.25k | return false; |
533 | 4.25k | } |
534 | | |
535 | | /// Return the ValueType of the result of SETCC operations. |
536 | | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
537 | | EVT VT) const; |
538 | | |
539 | | /// Return the ValueType for comparison libcalls. Comparions libcalls include |
540 | | /// floating point comparion calls, and Ordered/Unordered check calls on |
541 | | /// floating point numbers. |
542 | | virtual |
543 | | MVT::SimpleValueType getCmpLibcallReturnType() const; |
544 | | |
545 | | /// For targets without i1 registers, this gives the nature of the high-bits |
546 | | /// of boolean values held in types wider than i1. |
547 | | /// |
548 | | /// "Boolean values" are special true/false values produced by nodes like |
549 | | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
550 | | /// Not to be confused with general values promoted from i1. Some cpus |
551 | | /// distinguish between vectors of boolean and scalars; the isVec parameter |
552 | | /// selects between the two kinds. For example on X86 a scalar boolean should |
553 | | /// be zero extended from i1, while the elements of a vector of booleans |
554 | | /// should be sign extended from i1. |
555 | | /// |
556 | | /// Some cpus also treat floating point types the same way as they treat |
557 | | /// vectors instead of the way they treat scalars. |
558 | 2.17M | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
559 | 2.17M | if (isVec) |
560 | 121k | return BooleanVectorContents; |
561 | 2.05M | return isFloat ? 2.05M BooleanFloatContents50.1k : BooleanContents2.00M ; |
562 | 2.17M | } |
563 | | |
564 | 2.15M | BooleanContent getBooleanContents(EVT Type) const { |
565 | 2.15M | return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); |
566 | 2.15M | } |
567 | | |
568 | | /// Return target scheduling preference. |
569 | 204k | Sched::Preference getSchedulingPreference() const { |
570 | 204k | return SchedPreferenceInfo; |
571 | 204k | } |
572 | | |
573 | | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
574 | | /// for different nodes. This function returns the preference (or none) for |
575 | | /// the given node. |
576 | 29.0M | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
577 | 29.0M | return Sched::None; |
578 | 29.0M | } |
579 | | |
580 | | /// Return the register class that should be used for the specified value |
581 | | /// type. |
582 | 28.8M | virtual const TargetRegisterClass *getRegClassFor(MVT VT) const { |
583 | 28.8M | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
584 | 28.8M | assert(RC && "This value type is not natively supported!"); |
585 | 28.8M | return RC; |
586 | 28.8M | } |
587 | | |
588 | | /// Return the 'representative' register class for the specified value |
589 | | /// type. |
590 | | /// |
591 | | /// The 'representative' register class is the largest legal super-reg |
592 | | /// register class for the register class of the value type. For example, on |
593 | | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
594 | | /// register class is GR64 on x86_64. |
595 | 1.28M | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
596 | 1.28M | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
597 | 1.28M | return RC; |
598 | 1.28M | } |
599 | | |
600 | | /// Return the cost of the 'representative' register class for the specified |
601 | | /// value type. |
602 | 940k | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
603 | 940k | return RepRegClassCostForVT[VT.SimpleTy]; |
604 | 940k | } |
605 | | |
606 | | /// Return true if the target has native support for the specified value type. |
607 | | /// This means that it has a register that directly holds it without |
608 | | /// promotions or expansions. |
609 | 990M | bool isTypeLegal(EVT VT) const { |
610 | 990M | assert(!VT.isSimple() || |
611 | 990M | (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); |
612 | 975M | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
613 | 990M | } |
614 | | |
615 | | class ValueTypeActionImpl { |
616 | | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
617 | | /// that indicates how instruction selection should deal with the type. |
618 | | LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; |
619 | | |
620 | | public: |
621 | 44.6k | ValueTypeActionImpl() { |
622 | 44.6k | std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), |
623 | 44.6k | TypeLegal); |
624 | 44.6k | } |
625 | | |
626 | 226M | LegalizeTypeAction getTypeAction(MVT VT) const { |
627 | 226M | return ValueTypeActions[VT.SimpleTy]; |
628 | 226M | } |
629 | | |
630 | 4.14M | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
631 | 4.14M | ValueTypeActions[VT.SimpleTy] = Action; |
632 | 4.14M | } |
633 | | }; |
634 | | |
635 | 3.45M | const ValueTypeActionImpl &getValueTypeActions() const { |
636 | 3.45M | return ValueTypeActions; |
637 | 3.45M | } |
638 | | |
639 | | /// Return how we should legalize values of this type, either it is already |
640 | | /// legal (return 'Legal') or we need to promote it to a larger type (return |
641 | | /// 'Promote'), or we need to expand it into multiple registers of smaller |
642 | | /// integer type (return 'Expand'). 'Custom' is not an option. |
643 | 217M | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
644 | 217M | return getTypeConversion(Context, VT).first; |
645 | 217M | } |
646 | 0 | LegalizeTypeAction getTypeAction(MVT VT) const { |
647 | 0 | return ValueTypeActions.getTypeAction(VT); |
648 | 0 | } |
649 | | |
650 | | /// For types supported by the target, this is an identity function. For |
651 | | /// types that must be promoted to larger types, this returns the larger type |
652 | | /// to promote to. For integer types that are larger than the largest integer |
653 | | /// register, this contains one step in the expansion to get to the smaller |
654 | | /// register. For illegal floating point types, this returns the integer type |
655 | | /// to transform to. |
656 | 2.34M | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
657 | 2.34M | return getTypeConversion(Context, VT).second; |
658 | 2.34M | } |
659 | | |
660 | | /// For types supported by the target, this is an identity function. For |
661 | | /// types that must be expanded (i.e. integer types that are larger than the |
662 | | /// largest integer register or illegal floating point types), this returns |
663 | | /// the largest legal type it will be expanded to. |
664 | 11.6k | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
665 | 11.6k | assert(!VT.isVector()); |
666 | 12.9k | while (true12.9k ) { |
667 | 12.9k | switch (getTypeAction(Context, VT)) { |
668 | 11.6k | case TypeLegal: |
669 | 11.6k | return VT; |
670 | 1.32k | case TypeExpandInteger: |
671 | 1.32k | VT = getTypeToTransformTo(Context, VT); |
672 | 1.32k | break; |
673 | 0 | default: |
674 | 0 | llvm_unreachable("Type is not legal nor is it to be expanded!"); |
675 | 12.9k | } |
676 | 12.9k | } |
677 | 11.6k | } |
678 | | |
679 | | /// Vector types are broken down into some number of legal first class types. |
680 | | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
681 | | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
682 | | /// turns into 4 EVT::i32 values with both PPC and X86. |
683 | | /// |
684 | | /// This method returns the number of registers needed, and the VT for each |
685 | | /// register. It also returns the VT and quantity of the intermediate values |
686 | | /// before they are promoted/expanded. |
687 | | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
688 | | EVT &IntermediateVT, |
689 | | unsigned &NumIntermediates, |
690 | | MVT &RegisterVT) const; |
691 | | |
692 | | /// Certain targets such as MIPS require that some types such as vectors are |
693 | | /// always broken down into scalars in some contexts. This occurs even if the |
694 | | /// vector type is legal. |
695 | | virtual unsigned getVectorTypeBreakdownForCallingConv( |
696 | | LLVMContext &Context, EVT VT, EVT &IntermediateVT, |
697 | 10.0k | unsigned &NumIntermediates, MVT &RegisterVT) const { |
698 | 10.0k | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
699 | 10.0k | RegisterVT); |
700 | 10.0k | } |
701 | | |
702 | | struct IntrinsicInfo { |
703 | | unsigned opc = 0; // target opcode |
704 | | EVT memVT; // memory VT |
705 | | const Value* ptrVal = nullptr; // value representing memory location |
706 | | int offset = 0; // offset off of ptrVal |
707 | | unsigned size = 0; // the size of the memory location |
708 | | // (taken from memVT if zero) |
709 | | unsigned align = 1; // alignment |
710 | | bool vol = false; // is volatile? |
711 | | bool readMem = false; // reads memory? |
712 | | bool writeMem = false; // writes memory? |
713 | | |
714 | 315k | IntrinsicInfo() = default; |
715 | | }; |
716 | | |
717 | | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
718 | | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
719 | | /// true and store the intrinsic information into the IntrinsicInfo that was |
720 | | /// passed to the function. |
721 | | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
722 | 6.24k | unsigned /*Intrinsic*/) const { |
723 | 6.24k | return false; |
724 | 6.24k | } |
725 | | |
726 | | /// Returns true if the target can instruction select the specified FP |
727 | | /// immediate natively. If false, the legalizer will materialize the FP |
728 | | /// immediate as a load from a constant pool. |
729 | 65 | virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const { |
730 | 65 | return false; |
731 | 65 | } |
732 | | |
733 | | /// Targets can use this to indicate that they only support *some* |
734 | | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
735 | | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
736 | | /// legal. |
737 | 564 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
738 | 564 | return true; |
739 | 564 | } |
740 | | |
741 | | /// Returns true if the operation can trap for the value type. |
742 | | /// |
743 | | /// VT must be a legal type. By default, we optimistically assume most |
744 | | /// operations don't trap except for integer divide and remainder. |
745 | | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
746 | | |
747 | | /// Similar to isShuffleMaskLegal. This is used by Targets can use this to |
748 | | /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace |
749 | | /// a VAND with a constant pool entry. |
750 | | virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/, |
751 | 16.7k | EVT /*VT*/) const { |
752 | 16.7k | return false; |
753 | 16.7k | } |
754 | | |
755 | | /// Return how this operation should be treated: either it is legal, needs to |
756 | | /// be promoted to a larger size, needs to be expanded to some other code |
757 | | /// sequence, or the target has a custom expander for it. |
758 | 209M | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
759 | 209M | if (VT.isExtended()209M ) return Expand144k ; |
760 | 209M | // If a target-specific SDNode requires legalization, require the target |
761 | 209M | // to provide custom legalization for it. |
762 | 209M | if (209M Op >= array_lengthof(OpActions[0])209M ) return Custom54 ; |
763 | 209M | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
764 | 209M | } |
765 | | |
766 | | /// Return true if the specified operation is legal on this target or can be |
767 | | /// made legal with custom lowering. This is used to help guide high-level |
768 | | /// lowering decisions. |
769 | 43.9M | bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { |
770 | 43.6M | return (VT == MVT::Other || isTypeLegal(VT)) && |
771 | 35.7M | (getOperationAction(Op, VT) == Legal || |
772 | 35.7M | getOperationAction(Op, VT) == Custom); |
773 | 43.9M | } |
774 | | |
775 | | /// Return true if the specified operation is legal on this target or can be |
776 | | /// made legal using promotion. This is used to help guide high-level lowering |
777 | | /// decisions. |
778 | 427k | bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { |
779 | 427k | return (VT == MVT::Other || isTypeLegal(VT)) && |
780 | 427k | (getOperationAction(Op, VT) == Legal || |
781 | 427k | getOperationAction(Op, VT) == Promote); |
782 | 427k | } |
783 | | |
784 | | /// Return true if the specified operation is legal on this target or can be |
785 | | /// made legal with custom lowering or using promotion. This is used to help |
786 | | /// guide high-level lowering decisions. |
787 | 251k | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { |
788 | 251k | return (VT == MVT::Other || isTypeLegal(VT)) && |
789 | 248k | (getOperationAction(Op, VT) == Legal || |
790 | 47.9k | getOperationAction(Op, VT) == Custom || |
791 | 248k | getOperationAction(Op, VT) == Promote); |
792 | 251k | } |
793 | | |
794 | | /// Return true if the operation uses custom lowering, regardless of whether |
795 | | /// the type is legal or not. |
796 | 899 | bool isOperationCustom(unsigned Op, EVT VT) const { |
797 | 899 | return getOperationAction(Op, VT) == Custom; |
798 | 899 | } |
799 | | |
800 | | /// Return true if lowering to a jump table is allowed. |
801 | 89.6k | bool areJTsAllowed(const Function *Fn) const { |
802 | 89.6k | if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") |
803 | 2 | return false; |
804 | 89.6k | |
805 | 89.6k | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
806 | 87.6k | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
807 | 89.6k | } |
808 | | |
809 | | /// Check whether the range [Low,High] fits in a machine word. |
810 | | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
811 | 130k | const DataLayout &DL) const { |
812 | 130k | // FIXME: Using the pointer type doesn't seem ideal. |
813 | 130k | uint64_t BW = DL.getPointerSizeInBits(); |
814 | 130k | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
815 | 130k | return Range <= BW; |
816 | 130k | } |
817 | | |
818 | | /// Return true if lowering to a jump table is suitable for a set of case |
819 | | /// clusters which may contain \p NumCases cases, \p Range range of values. |
820 | | /// FIXME: This function check the maximum table size and density, but the |
821 | | /// minimum size is not checked. It would be nice if the the minimum size is |
822 | | /// also combined within this function. Currently, the minimum size check is |
823 | | /// performed in findJumpTable() in SelectionDAGBuiler and |
824 | | /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
825 | | bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
826 | 59.5k | uint64_t Range) const { |
827 | 59.5k | const bool OptForSize = SI->getParent()->getParent()->optForSize(); |
828 | 59.5k | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
829 | 59.5k | const unsigned MaxJumpTableSize = |
830 | 59.4k | OptForSize || getMaximumJumpTableSize() == 0 |
831 | 59.5k | ? UINT_MAX |
832 | 459 | : getMaximumJumpTableSize(); |
833 | 59.5k | // Check whether a range of clusters is dense enough for a jump table. |
834 | 59.5k | if (Range <= MaxJumpTableSize && |
835 | 59.5k | (NumCases * 100 >= Range * MinDensity)59.3k ) { |
836 | 41.4k | return true; |
837 | 41.4k | } |
838 | 18.1k | return false; |
839 | 59.5k | } |
840 | | |
841 | | /// Return true if lowering to a bit test is suitable for a set of case |
842 | | /// clusters which contains \p NumDests unique destinations, \p Low and |
843 | | /// \p High as its lowest and highest case values, and expects \p NumCmps |
844 | | /// case value comparisons. Check if the number of destinations, comparison |
845 | | /// metric, and range are all suitable. |
846 | | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
847 | | const APInt &Low, const APInt &High, |
848 | 87.4k | const DataLayout &DL) const { |
849 | 87.4k | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
850 | 87.4k | // range of cases both require only one branch to lower. Just looking at the |
851 | 87.4k | // number of clusters and destinations should be enough to decide whether to |
852 | 87.4k | // build bit tests. |
853 | 87.4k | |
854 | 87.4k | // To lower a range with bit tests, the range must fit the bitwidth of a |
855 | 87.4k | // machine word. |
856 | 87.4k | if (!rangeFitsInWord(Low, High, DL)) |
857 | 16.3k | return false; |
858 | 87.4k | |
859 | 87.4k | // Decide whether it's profitable to lower this range with bit tests. Each |
860 | 87.4k | // destination requires a bit test and branch, and there is an overall range |
861 | 87.4k | // check branch. For a small number of clusters, separate comparisons might |
862 | 87.4k | // be cheaper, and for many destinations, splitting the range might be |
863 | 87.4k | // better. |
864 | 71.1k | return (NumDests == 1 && 71.1k NumCmps >= 321.7k ) || (NumDests == 2 && 67.4k NumCmps >= 531.8k ) || |
865 | 71.1k | (NumDests == 3 && 66.8k NumCmps >= 65.48k ); |
866 | 87.4k | } |
867 | | |
868 | | /// Return true if the specified operation is illegal on this target or |
869 | | /// unlikely to be made legal with custom lowering. This is used to help guide |
870 | | /// high-level lowering decisions. |
871 | 362k | bool isOperationExpand(unsigned Op, EVT VT) const { |
872 | 362k | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
873 | 362k | } |
874 | | |
875 | | /// Return true if the specified operation is legal on this target. |
876 | 4.51M | bool isOperationLegal(unsigned Op, EVT VT) const { |
877 | 4.51M | return (VT == MVT::Other || isTypeLegal(VT)) && |
878 | 4.48M | getOperationAction(Op, VT) == Legal; |
879 | 4.51M | } |
880 | | |
881 | | /// Return how this load with extension should be treated: either it is legal, |
882 | | /// needs to be promoted to a larger size, needs to be expanded to some other |
883 | | /// code sequence, or the target has a custom expander for it. |
884 | | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
885 | 2.60M | EVT MemVT) const { |
886 | 2.60M | if (ValVT.isExtended() || 2.60M MemVT.isExtended()2.59M ) return Expand61.5k ; |
887 | 2.53M | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
888 | 2.53M | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
889 | 2.53M | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && |
890 | 2.53M | MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); |
891 | 2.53M | unsigned Shift = 4 * ExtType; |
892 | 2.53M | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
893 | 2.60M | } |
894 | | |
895 | | /// Return true if the specified load with extension is legal on this target. |
896 | 1.60M | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
897 | 1.60M | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
898 | 1.60M | } |
899 | | |
900 | | /// Return true if the specified load with extension is legal or custom |
901 | | /// on this target. |
902 | 2.07k | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
903 | 2.07k | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
904 | 1.92k | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
905 | 2.07k | } |
906 | | |
907 | | /// Return how this store with truncation should be treated: either it is |
908 | | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
909 | | /// other code sequence, or the target has a custom expander for it. |
910 | 1.11M | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
911 | 1.11M | if (ValVT.isExtended() || 1.11M MemVT.isExtended()1.11M ) return Expand267k ; |
912 | 849k | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
913 | 849k | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
914 | 849k | assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && |
915 | 849k | "Table isn't big enough!"); |
916 | 849k | return TruncStoreActions[ValI][MemI]; |
917 | 1.11M | } |
918 | | |
919 | | /// Return true if the specified store with truncation is legal on this |
920 | | /// target. |
921 | 446k | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
922 | 399k | return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
923 | 446k | } |
924 | | |
925 | | /// Return true if the specified store with truncation has solution on this |
926 | | /// target. |
927 | 3.42k | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
928 | 3.42k | return isTypeLegal(ValVT) && |
929 | 3.42k | (getTruncStoreAction(ValVT, MemVT) == Legal || |
930 | 2.73k | getTruncStoreAction(ValVT, MemVT) == Custom); |
931 | 3.42k | } |
932 | | |
933 | | /// Return how the indexed load should be treated: either it is legal, needs |
934 | | /// to be promoted to a larger size, needs to be expanded to some other code |
935 | | /// sequence, or the target has a custom expander for it. |
936 | | LegalizeAction |
937 | 6.12M | getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
938 | 6.12M | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
939 | 6.12M | "Table isn't big enough!"); |
940 | 6.12M | unsigned Ty = (unsigned)VT.SimpleTy; |
941 | 6.12M | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); |
942 | 6.12M | } |
943 | | |
944 | | /// Return true if the specified indexed load is legal on this target. |
945 | 5.07M | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
946 | 5.07M | return VT.isSimple() && |
947 | 5.07M | (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || |
948 | 1.05M | getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); |
949 | 5.07M | } |
950 | | |
951 | | /// Return how the indexed store should be treated: either it is legal, needs |
952 | | /// to be promoted to a larger size, needs to be expanded to some other code |
953 | | /// sequence, or the target has a custom expander for it. |
954 | | LegalizeAction |
955 | 5.75M | getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
956 | 5.75M | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
957 | 5.75M | "Table isn't big enough!"); |
958 | 5.75M | unsigned Ty = (unsigned)VT.SimpleTy; |
959 | 5.75M | return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); |
960 | 5.75M | } |
961 | | |
962 | | /// Return true if the specified indexed load is legal on this target. |
963 | 4.48M | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
964 | 4.48M | return VT.isSimple() && |
965 | 4.48M | (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || |
966 | 1.26M | getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); |
967 | 4.48M | } |
968 | | |
969 | | /// Return how the condition code should be treated: either it is legal, needs |
970 | | /// to be expanded to some other code sequence, or the target has a custom |
971 | | /// expander for it. |
972 | | LegalizeAction |
973 | 1.97M | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
974 | 1.97M | assert((unsigned)CC < array_lengthof(CondCodeActions) && |
975 | 1.97M | ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && |
976 | 1.97M | "Table isn't big enough!"); |
977 | 1.97M | // See setCondCodeAction for how this is encoded. |
978 | 1.97M | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
979 | 1.97M | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
980 | 1.97M | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
981 | 1.97M | assert(Action != Promote && "Can't promote condition code!"); |
982 | 1.97M | return Action; |
983 | 1.97M | } |
984 | | |
985 | | /// Return true if the specified condition code is legal on this target. |
986 | 37.7k | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
987 | 37.7k | return |
988 | 37.7k | getCondCodeAction(CC, VT) == Legal || |
989 | 1.87k | getCondCodeAction(CC, VT) == Custom; |
990 | 37.7k | } |
991 | | |
992 | | /// If the action for this operation is to promote, this method returns the |
993 | | /// ValueType to promote to. |
994 | 227k | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
995 | 227k | assert(getOperationAction(Op, VT) == Promote && |
996 | 227k | "This operation isn't promoted!"); |
997 | 227k | |
998 | 227k | // See if this has an explicit type specified. |
999 | 227k | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
1000 | 227k | MVT::SimpleValueType>::const_iterator PTTI = |
1001 | 227k | PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); |
1002 | 227k | if (PTTI != PromoteToType.end()227k ) return PTTI->second226k ; |
1003 | 227k | |
1004 | 719 | assert((VT.isInteger() || VT.isFloatingPoint()) && |
1005 | 719 | "Cannot autopromote this type, add it with AddPromotedToType."); |
1006 | 719 | |
1007 | 719 | MVT NVT = VT; |
1008 | 768 | do { |
1009 | 768 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
1010 | 768 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && |
1011 | 768 | "Didn't find type to promote to!"); |
1012 | 719 | } while (!isTypeLegal(NVT) || |
1013 | 762 | getOperationAction(Op, NVT) == Promote); |
1014 | 719 | return NVT; |
1015 | 227k | } |
1016 | | |
1017 | | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
1018 | | /// operations except for the pointer size. If AllowUnknown is true, this |
1019 | | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
1020 | | /// otherwise it will assert. |
1021 | | EVT getValueType(const DataLayout &DL, Type *Ty, |
1022 | 102M | bool AllowUnknown = false) const { |
1023 | 102M | // Lower scalar pointers to native pointer types. |
1024 | 102M | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) |
1025 | 38.2M | return getPointerTy(DL, PTy->getAddressSpace()); |
1026 | 102M | |
1027 | 64.5M | if (64.5M Ty->isVectorTy()64.5M ) { |
1028 | 6.52M | VectorType *VTy = cast<VectorType>(Ty); |
1029 | 6.52M | Type *Elm = VTy->getElementType(); |
1030 | 6.52M | // Lower vectors of pointers to native pointer types. |
1031 | 6.52M | if (PointerType *PT6.52M = dyn_cast<PointerType>(Elm)) { |
1032 | 419k | EVT PointerTy(getPointerTy(DL, PT->getAddressSpace())); |
1033 | 419k | Elm = PointerTy.getTypeForEVT(Ty->getContext()); |
1034 | 419k | } |
1035 | 6.52M | |
1036 | 6.52M | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), |
1037 | 6.52M | VTy->getNumElements()); |
1038 | 6.52M | } |
1039 | 58.0M | return EVT::getEVT(Ty, AllowUnknown); |
1040 | 102M | } |
1041 | | |
1042 | | /// Return the MVT corresponding to this LLVM type. See getValueType. |
1043 | | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
1044 | 18.9k | bool AllowUnknown = false) const { |
1045 | 18.9k | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
1046 | 18.9k | } |
1047 | | |
1048 | | /// Return the desired alignment for ByVal or InAlloca aggregate function |
1049 | | /// arguments in the caller parameter area. This is the actual alignment, not |
1050 | | /// its logarithm. |
1051 | | virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
1052 | | |
1053 | | /// Return the type of registers that this ValueType will eventually require. |
1054 | 4.18M | MVT getRegisterType(MVT VT) const { |
1055 | 4.18M | assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); |
1056 | 4.18M | return RegisterTypeForVT[VT.SimpleTy]; |
1057 | 4.18M | } |
1058 | | |
1059 | | /// Return the type of registers that this ValueType will eventually require. |
1060 | 21.9M | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
1061 | 21.9M | if (VT.isSimple()21.9M ) { |
1062 | 21.9M | assert((unsigned)VT.getSimpleVT().SimpleTy < |
1063 | 21.9M | array_lengthof(RegisterTypeForVT)); |
1064 | 21.9M | return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; |
1065 | 21.9M | } |
1066 | 13.0k | if (13.0k VT.isVector()13.0k ) { |
1067 | 2.70k | EVT VT1; |
1068 | 2.70k | MVT RegisterVT; |
1069 | 2.70k | unsigned NumIntermediates; |
1070 | 2.70k | (void)getVectorTypeBreakdown(Context, VT, VT1, |
1071 | 2.70k | NumIntermediates, RegisterVT); |
1072 | 2.70k | return RegisterVT; |
1073 | 2.70k | } |
1074 | 10.3k | if (10.3k VT.isInteger()10.3k ) { |
1075 | 10.3k | return getRegisterType(Context, getTypeToTransformTo(Context, VT)); |
1076 | 10.3k | } |
1077 | 1 | llvm_unreachable1 ("Unsupported extended type!"); |
1078 | 10.3k | } |
1079 | | |
1080 | | /// Return the number of registers that this ValueType will eventually |
1081 | | /// require. |
1082 | | /// |
1083 | | /// This is one for any types promoted to live in larger registers, but may be |
1084 | | /// more than one for types (like i64) that are split into pieces. For types |
1085 | | /// like i140, which are first promoted then expanded, it is the number of |
1086 | | /// registers needed to hold all the bits of the original type. For an i140 |
1087 | | /// on a 32 bit machine this means 5 registers. |
1088 | 25.2M | unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { |
1089 | 25.2M | if (VT.isSimple()25.2M ) { |
1090 | 25.2M | assert((unsigned)VT.getSimpleVT().SimpleTy < |
1091 | 25.2M | array_lengthof(NumRegistersForVT)); |
1092 | 25.2M | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
1093 | 25.2M | } |
1094 | 7.16k | if (7.16k VT.isVector()7.16k ) { |
1095 | 3.33k | EVT VT1; |
1096 | 3.33k | MVT VT2; |
1097 | 3.33k | unsigned NumIntermediates; |
1098 | 3.33k | return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); |
1099 | 3.33k | } |
1100 | 3.82k | if (3.82k VT.isInteger()3.82k ) { |
1101 | 3.82k | unsigned BitWidth = VT.getSizeInBits(); |
1102 | 3.82k | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
1103 | 3.82k | return (BitWidth + RegWidth - 1) / RegWidth; |
1104 | 3.82k | } |
1105 | 0 | llvm_unreachable0 ("Unsupported extended type!"); |
1106 | 3.82k | } |
1107 | | |
1108 | | /// Certain combinations of ABIs, Targets and features require that types |
1109 | | /// are legal for some operations and not for other operations. |
1110 | | /// For MIPS all vector types must be passed through the integer register set. |
1111 | 641k | virtual MVT getRegisterTypeForCallingConv(MVT VT) const { |
1112 | 641k | return getRegisterType(VT); |
1113 | 641k | } |
1114 | | |
1115 | | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
1116 | 10.7M | EVT VT) const { |
1117 | 10.7M | return getRegisterType(Context, VT); |
1118 | 10.7M | } |
1119 | | |
1120 | | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
1121 | | /// this occurs when a vector type is used, as vector are passed through the |
1122 | | /// integer register set. |
1123 | | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1124 | 10.7M | EVT VT) const { |
1125 | 10.7M | return getNumRegisters(Context, VT); |
1126 | 10.7M | } |
1127 | | |
1128 | | /// Certain targets have context senstive alignment requirements, where one |
1129 | | /// type has the alignment requirement of another type. |
1130 | | virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, |
1131 | 5.01M | DataLayout DL) const { |
1132 | 5.01M | return DL.getABITypeAlignment(ArgTy); |
1133 | 5.01M | } |
1134 | | |
1135 | | /// If true, then instruction selection should seek to shrink the FP constant |
1136 | | /// of the specified type to a smaller type in order to save space and / or |
1137 | | /// reduce runtime. |
1138 | 282 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
1139 | | |
1140 | | // Return true if it is profitable to reduce the given load node to a smaller |
1141 | | // type. |
1142 | | // |
1143 | | // e.g. (i16 (trunc (i32 (load x))) -> i16 load x should be performed |
1144 | | virtual bool shouldReduceLoadWidth(SDNode *Load, |
1145 | | ISD::LoadExtType ExtTy, |
1146 | 8.47k | EVT NewVT) const { |
1147 | 8.47k | return true; |
1148 | 8.47k | } |
1149 | | |
1150 | | /// When splitting a value of the specified type into parts, does the Lo |
1151 | | /// or Hi part come first? This usually follows the endianness, except |
1152 | | /// for ppcf128, where the Hi part always comes first. |
1153 | 161k | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
1154 | 157k | return DL.isBigEndian() || VT == MVT::ppcf128; |
1155 | 161k | } |
1156 | | |
1157 | | /// If true, the target has custom DAG combine transformations that it can |
1158 | | /// perform for the specified node. |
1159 | 161M | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
1160 | 161M | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); |
1161 | 161M | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
1162 | 161M | } |
1163 | | |
1164 | 56.8M | unsigned getGatherAllAliasesMaxDepth() const { |
1165 | 56.8M | return GatherAllAliasesMaxDepth; |
1166 | 56.8M | } |
1167 | | |
1168 | | /// Returns the size of the platform's va_list object. |
1169 | 0 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
1170 | 0 | return getPointerTy(DL).getSizeInBits(); |
1171 | 0 | } |
1172 | | |
1173 | | /// \brief Get maximum # of store operations permitted for llvm.memset |
1174 | | /// |
1175 | | /// This function returns the maximum number of store operations permitted |
1176 | | /// to replace a call to llvm.memset. The value is set by the target at the |
1177 | | /// performance threshold for such a replacement. If OptSize is true, |
1178 | | /// return the limit for functions that have OptSize attribute. |
1179 | 21.5k | unsigned getMaxStoresPerMemset(bool OptSize) const { |
1180 | 21.5k | return OptSize ? MaxStoresPerMemsetOptSize9 : MaxStoresPerMemset21.5k ; |
1181 | 21.5k | } |
1182 | | |
1183 | | /// \brief Get maximum # of store operations permitted for llvm.memcpy |
1184 | | /// |
1185 | | /// This function returns the maximum number of store operations permitted |
1186 | | /// to replace a call to llvm.memcpy. The value is set by the target at the |
1187 | | /// performance threshold for such a replacement. If OptSize is true, |
1188 | | /// return the limit for functions that have OptSize attribute. |
1189 | 13.5k | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
1190 | 13.5k | return OptSize ? MaxStoresPerMemcpyOptSize42 : MaxStoresPerMemcpy13.5k ; |
1191 | 13.5k | } |
1192 | | |
1193 | | /// Get maximum # of load operations permitted for memcmp |
1194 | | /// |
1195 | | /// This function returns the maximum number of load operations permitted |
1196 | | /// to replace a call to memcmp. The value is set by the target at the |
1197 | | /// performance threshold for such a replacement. If OptSize is true, |
1198 | | /// return the limit for functions that have OptSize attribute. |
1199 | 298 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
1200 | 298 | return OptSize ? MaxLoadsPerMemcmpOptSize108 : MaxLoadsPerMemcmp190 ; |
1201 | 298 | } |
1202 | | |
1203 | | /// \brief Get maximum # of store operations permitted for llvm.memmove |
1204 | | /// |
1205 | | /// This function returns the maximum number of store operations permitted |
1206 | | /// to replace a call to llvm.memmove. The value is set by the target at the |
1207 | | /// performance threshold for such a replacement. If OptSize is true, |
1208 | | /// return the limit for functions that have OptSize attribute. |
1209 | 147 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
1210 | 147 | return OptSize ? MaxStoresPerMemmoveOptSize0 : MaxStoresPerMemmove147 ; |
1211 | 147 | } |
1212 | | |
1213 | | /// \brief Determine if the target supports unaligned memory accesses. |
1214 | | /// |
1215 | | /// This function returns true if the target allows unaligned memory accesses |
1216 | | /// of the specified type in the given address space. If true, it also returns |
1217 | | /// whether the unaligned memory access is "fast" in the last argument by |
1218 | | /// reference. This is used, for example, in situations where an array |
1219 | | /// copy/move/set is converted to a sequence of store operations. Its use |
1220 | | /// helps to ensure that such replacements don't generate code that causes an |
1221 | | /// alignment error (trap) on the target machine. |
1222 | | virtual bool allowsMisalignedMemoryAccesses(EVT, |
1223 | | unsigned AddrSpace = 0, |
1224 | | unsigned Align = 1, |
1225 | 839 | bool * /*Fast*/ = nullptr) const { |
1226 | 839 | return false; |
1227 | 839 | } |
1228 | | |
1229 | | /// Return true if the target supports a memory access of this type for the |
1230 | | /// given address space and alignment. If the access is allowed, the optional |
1231 | | /// final parameter returns if the access is also fast (as defined by the |
1232 | | /// target). |
1233 | | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1234 | | unsigned AddrSpace = 0, unsigned Alignment = 1, |
1235 | | bool *Fast = nullptr) const; |
1236 | | |
1237 | | /// Returns the target specific optimal type for load and store operations as |
1238 | | /// a result of memset, memcpy, and memmove lowering. |
1239 | | /// |
1240 | | /// If DstAlign is zero that means it's safe to destination alignment can |
1241 | | /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't |
1242 | | /// a need to check it against alignment requirement, probably because the |
1243 | | /// source does not need to be loaded. If 'IsMemset' is true, that means it's |
1244 | | /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of |
1245 | | /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it |
1246 | | /// does not need to be loaded. It returns EVT::Other if the type should be |
1247 | | /// determined using generic target-independent logic. |
1248 | | virtual EVT getOptimalMemOpType(uint64_t /*Size*/, |
1249 | | unsigned /*DstAlign*/, unsigned /*SrcAlign*/, |
1250 | | bool /*IsMemset*/, |
1251 | | bool /*ZeroMemset*/, |
1252 | | bool /*MemcpyStrSrc*/, |
1253 | 131 | MachineFunction &/*MF*/) const { |
1254 | 131 | return MVT::Other; |
1255 | 131 | } |
1256 | | |
1257 | | /// Returns true if it's safe to use load / store of the specified type to |
1258 | | /// expand memcpy / memset inline. |
1259 | | /// |
1260 | | /// This is mostly true for all types except for some special cases. For |
1261 | | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
1262 | | /// fstpl which also does type conversion. Note the specified type doesn't |
1263 | | /// have to be legal as the hook is used before type legalization. |
1264 | 11.3k | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
1265 | | |
1266 | | /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp. |
1267 | 0 | bool usesUnderscoreSetJmp() const { |
1268 | 0 | return UseUnderscoreSetJmp; |
1269 | 0 | } |
1270 | | |
1271 | | /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp. |
1272 | 0 | bool usesUnderscoreLongJmp() const { |
1273 | 0 | return UseUnderscoreLongJmp; |
1274 | 0 | } |
1275 | | |
1276 | | /// Return lower limit for number of blocks in a jump table. |
1277 | | unsigned getMinimumJumpTableEntries() const; |
1278 | | |
1279 | | /// Return lower limit of the density in a jump table. |
1280 | | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
1281 | | |
1282 | | /// Return upper limit for number of entries in a jump table. |
1283 | | /// Zero if no limit. |
1284 | | unsigned getMaximumJumpTableSize() const; |
1285 | | |
1286 | 4.78k | virtual bool isJumpTableRelative() const { |
1287 | 4.78k | return TM.isPositionIndependent(); |
1288 | 4.78k | } |
1289 | | |
1290 | | /// If a physical register, this specifies the register that |
1291 | | /// llvm.savestack/llvm.restorestack should save and restore. |
1292 | 24.8M | unsigned getStackPointerRegisterToSaveRestore() const { |
1293 | 24.8M | return StackPointerRegisterToSaveRestore; |
1294 | 24.8M | } |
1295 | | |
1296 | | /// If a physical register, this returns the register that receives the |
1297 | | /// exception address on entry to an EH pad. |
1298 | | virtual unsigned |
1299 | 0 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
1300 | 0 | // 0 is guaranteed to be the NoRegister value on all targets |
1301 | 0 | return 0; |
1302 | 0 | } |
1303 | | |
1304 | | /// If a physical register, this returns the register that receives the |
1305 | | /// exception typeid on entry to a landing pad. |
1306 | | virtual unsigned |
1307 | 0 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
1308 | 0 | // 0 is guaranteed to be the NoRegister value on all targets |
1309 | 0 | return 0; |
1310 | 0 | } |
1311 | | |
1312 | 0 | virtual bool needsFixedCatchObjects() const { |
1313 | 0 | report_fatal_error("Funclet EH is not implemented for this target"); |
1314 | 0 | } |
1315 | | |
1316 | | /// Returns the target's jmp_buf size in bytes (if never set, the default is |
1317 | | /// 200) |
1318 | 0 | unsigned getJumpBufSize() const { |
1319 | 0 | return JumpBufSize; |
1320 | 0 | } |
1321 | | |
1322 | | /// Returns the target's jmp_buf alignment in bytes (if never set, the default |
1323 | | /// is 0) |
1324 | 0 | unsigned getJumpBufAlignment() const { |
1325 | 0 | return JumpBufAlignment; |
1326 | 0 | } |
1327 | | |
1328 | | /// Return the minimum stack alignment of an argument. |
1329 | 169 | unsigned getMinStackArgumentAlignment() const { |
1330 | 169 | return MinStackArgumentAlignment; |
1331 | 169 | } |
1332 | | |
1333 | | /// Return the minimum function alignment. |
1334 | 661k | unsigned getMinFunctionAlignment() const { |
1335 | 661k | return MinFunctionAlignment; |
1336 | 661k | } |
1337 | | |
1338 | | /// Return the preferred function alignment. |
1339 | 659k | unsigned getPrefFunctionAlignment() const { |
1340 | 659k | return PrefFunctionAlignment; |
1341 | 659k | } |
1342 | | |
1343 | | /// Return the preferred loop alignment. |
1344 | 1.08M | virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { |
1345 | 1.08M | return PrefLoopAlignment; |
1346 | 1.08M | } |
1347 | | |
1348 | | /// If the target has a standard location for the stack protector guard, |
1349 | | /// returns the address of that location. Otherwise, returns nullptr. |
1350 | | /// DEPRECATED: please override useLoadStackGuardNode and customize |
1351 | | /// LOAD_STACK_GUARD, or customize @llvm.stackguard(). |
1352 | | virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; |
1353 | | |
1354 | | /// Inserts necessary declarations for SSP (stack protection) purpose. |
1355 | | /// Should be used only when getIRStackGuard returns nullptr. |
1356 | | virtual void insertSSPDeclarations(Module &M) const; |
1357 | | |
1358 | | /// Return the variable that's previously inserted by insertSSPDeclarations, |
1359 | | /// if any, otherwise return nullptr. Should be used only when |
1360 | | /// getIRStackGuard returns nullptr. |
1361 | | virtual Value *getSDagStackGuard(const Module &M) const; |
1362 | | |
1363 | | /// If the target has a standard stack protection check function that |
1364 | | /// performs validation and error handling, returns the function. Otherwise, |
1365 | | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
1366 | | /// Should be used only when getIRStackGuard returns nullptr. |
1367 | | virtual Value *getSSPStackGuardCheck(const Module &M) const; |
1368 | | |
1369 | | protected: |
1370 | | Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, |
1371 | | bool UseTLS) const; |
1372 | | |
1373 | | public: |
1374 | | /// Returns the target-specific address of the unsafe stack pointer. |
1375 | | virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; |
1376 | | |
1377 | | /// Returns the name of the symbol used to emit stack probes or the empty |
1378 | | /// string if not applicable. |
1379 | 0 | virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { |
1380 | 0 | return ""; |
1381 | 0 | } |
1382 | | |
1383 | | /// Returns true if a cast between SrcAS and DestAS is a noop. |
1384 | 451 | virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
1385 | 451 | return false; |
1386 | 451 | } |
1387 | | |
1388 | | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
1389 | | /// are happy to sink it into basic blocks. |
1390 | 198 | virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
1391 | 198 | return isNoopAddrSpaceCast(SrcAS, DestAS); |
1392 | 198 | } |
1393 | | |
1394 | | /// Return true if the pointer arguments to CI should be aligned by aligning |
1395 | | /// the object whose address is being passed. If so then MinSize is set to the |
1396 | | /// minimum size the object must be to be aligned and PrefAlign is set to the |
1397 | | /// preferred alignment. |
1398 | | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
1399 | 6.13M | unsigned & /*PrefAlign*/) const { |
1400 | 6.13M | return false; |
1401 | 6.13M | } |
1402 | | |
1403 | | //===--------------------------------------------------------------------===// |
1404 | | /// \name Helpers for TargetTransformInfo implementations |
1405 | | /// @{ |
1406 | | |
1407 | | /// Get the ISD node that corresponds to the Instruction class opcode. |
1408 | | int InstructionOpcodeToISD(unsigned Opcode) const; |
1409 | | |
1410 | | /// Estimate the cost of type-legalization and the legalized type. |
1411 | | std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, |
1412 | | Type *Ty) const; |
1413 | | |
1414 | | /// @} |
1415 | | |
1416 | | //===--------------------------------------------------------------------===// |
1417 | | /// \name Helpers for atomic expansion. |
1418 | | /// @{ |
1419 | | |
1420 | | /// Returns the maximum atomic operation size (in bits) supported by |
1421 | | /// the backend. Atomic operations greater than this size (as well |
1422 | | /// as ones that are not naturally aligned), will be expanded by |
1423 | | /// AtomicExpandPass into an __atomic_* library call. |
1424 | 65.4k | unsigned getMaxAtomicSizeInBitsSupported() const { |
1425 | 65.4k | return MaxAtomicSizeInBitsSupported; |
1426 | 65.4k | } |
1427 | | |
1428 | | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
1429 | | /// the backend supports. Any smaller operations are widened in |
1430 | | /// AtomicExpandPass. |
1431 | | /// |
1432 | | /// Note that *unlike* operations above the maximum size, atomic ops |
1433 | | /// are still natively supported below the minimum; they just |
1434 | | /// require a more complex expansion. |
1435 | 49.1k | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
1436 | | |
1437 | | /// Whether AtomicExpandPass should automatically insert fences and reduce |
1438 | | /// ordering for this atomic. This should be true for most architectures with |
1439 | | /// weak memory ordering. Defaults to false. |
1440 | 70.5k | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
1441 | 70.5k | return false; |
1442 | 70.5k | } |
1443 | | |
1444 | | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
1445 | | /// corresponding pointee type. This may entail some non-trivial operations to |
1446 | | /// truncate or reconstruct types that will be illegal in the backend. See |
1447 | | /// ARMISelLowering for an example implementation. |
1448 | | virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, |
1449 | 0 | AtomicOrdering Ord) const { |
1450 | 0 | llvm_unreachable("Load linked unimplemented on this target"); |
1451 | 0 | } |
1452 | | |
1453 | | /// Perform a store-conditional operation to Addr. Return the status of the |
1454 | | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
1455 | | virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, |
1456 | 0 | Value *Addr, AtomicOrdering Ord) const { |
1457 | 0 | llvm_unreachable("Store conditional unimplemented on this target"); |
1458 | 0 | } |
1459 | | |
1460 | | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
1461 | | /// It is called by AtomicExpandPass before expanding an |
1462 | | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
1463 | | /// if shouldInsertFencesForAtomic returns true. |
1464 | | /// |
1465 | | /// Inst is the original atomic instruction, prior to other expansions that |
1466 | | /// may be performed. |
1467 | | /// |
1468 | | /// This function should either return a nullptr, or a pointer to an IR-level |
1469 | | /// Instruction*. Even complex fence sequences can be represented by a |
1470 | | /// single Instruction* through an intrinsic to be lowered later. |
1471 | | /// Backends should override this method to produce target-specific intrinsic |
1472 | | /// for their fences. |
1473 | | /// FIXME: Please note that the default implementation here in terms of |
1474 | | /// IR-level fences exists for historical/compatibility reasons and is |
1475 | | /// *unsound* ! Fences cannot, in general, be used to restore sequential |
1476 | | /// consistency. For example, consider the following example: |
1477 | | /// atomic<int> x = y = 0; |
1478 | | /// int r1, r2, r3, r4; |
1479 | | /// Thread 0: |
1480 | | /// x.store(1); |
1481 | | /// Thread 1: |
1482 | | /// y.store(1); |
1483 | | /// Thread 2: |
1484 | | /// r1 = x.load(); |
1485 | | /// r2 = y.load(); |
1486 | | /// Thread 3: |
1487 | | /// r3 = y.load(); |
1488 | | /// r4 = x.load(); |
1489 | | /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all |
1490 | | /// seq_cst. But if they are lowered to monotonic accesses, no amount of |
1491 | | /// IR-level fences can prevent it. |
1492 | | /// @{ |
1493 | | virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, |
1494 | 111 | AtomicOrdering Ord) const { |
1495 | 111 | if (isReleaseOrStronger(Ord) && 111 Inst->hasAtomicStore()80 ) |
1496 | 77 | return Builder.CreateFence(Ord); |
1497 | 111 | else |
1498 | 34 | return nullptr; |
1499 | 111 | } |
1500 | | |
1501 | | virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, |
1502 | | Instruction *Inst, |
1503 | 111 | AtomicOrdering Ord) const { |
1504 | 111 | if (isAcquireOrStronger(Ord)) |
1505 | 88 | return Builder.CreateFence(Ord); |
1506 | 111 | else |
1507 | 23 | return nullptr; |
1508 | 111 | } |
1509 | | /// @} |
1510 | | |
1511 | | // Emits code that executes when the comparison result in the ll/sc |
1512 | | // expansion of a cmpxchg instruction is such that the store-conditional will |
1513 | | // not execute. This makes it possible to balance out the load-linked with |
1514 | | // a dedicated instruction, if desired. |
1515 | | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
1516 | | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
1517 | 3 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} |
1518 | | |
1519 | | /// Returns true if the given (atomic) store should be expanded by the |
1520 | | /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. |
1521 | 66 | virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
1522 | 66 | return false; |
1523 | 66 | } |
1524 | | |
1525 | | /// Returns true if arguments should be sign-extended in lib calls. |
1526 | 20.2k | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
1527 | 20.2k | return IsSigned; |
1528 | 20.2k | } |
1529 | | |
1530 | | /// Returns how the given (atomic) load should be expanded by the |
1531 | | /// IR-level AtomicExpand pass. |
1532 | 80 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
1533 | 80 | return AtomicExpansionKind::None; |
1534 | 80 | } |
1535 | | |
1536 | | /// Returns true if the given atomic cmpxchg should be expanded by the |
1537 | | /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence |
1538 | | /// (through emitLoadLinked() and emitStoreConditional()). |
1539 | 1.13k | virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
1540 | 1.13k | return false; |
1541 | 1.13k | } |
1542 | | |
1543 | | /// Returns how the IR-level AtomicExpand pass should expand the given |
1544 | | /// AtomicRMW, if at all. Default is to never expand. |
1545 | 613 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { |
1546 | 613 | return AtomicExpansionKind::None; |
1547 | 613 | } |
1548 | | |
1549 | | /// On some platforms, an AtomicRMW that never actually modifies the value |
1550 | | /// (such as fetch_add of 0) can be turned into a fence followed by an |
1551 | | /// atomic load. This may sound useless, but it makes it possible for the |
1552 | | /// processor to keep the cacheline shared, dramatically improving |
1553 | | /// performance. And such idempotent RMWs are useful for implementing some |
1554 | | /// kinds of locks, see for example (justification + benchmarks): |
1555 | | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
1556 | | /// This method tries doing that transformation, returning the atomic load if |
1557 | | /// it succeeds, and nullptr otherwise. |
1558 | | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
1559 | | /// another round of expansion. |
1560 | | virtual LoadInst * |
1561 | 0 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
1562 | 0 | return nullptr; |
1563 | 0 | } |
1564 | | |
1565 | | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
1566 | | /// SIGN_EXTEND, or ANY_EXTEND). |
1567 | 1.90k | virtual ISD::NodeType getExtendForAtomicOps() const { |
1568 | 1.90k | return ISD::ZERO_EXTEND; |
1569 | 1.90k | } |
1570 | | |
1571 | | /// @} |
1572 | | |
1573 | | /// Returns true if we should normalize |
1574 | | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
1575 | | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
1576 | | /// that it saves us from materializing N0 and N1 in an integer register. |
1577 | | /// Targets that are able to perform and/or on flags should return false here. |
1578 | | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
1579 | 24.5k | EVT VT) const { |
1580 | 24.5k | // If a target has multiple condition registers, then it likely has logical |
1581 | 24.5k | // operations on those registers. |
1582 | 24.5k | if (hasMultipleConditionRegisters()) |
1583 | 14.6k | return false; |
1584 | 24.5k | // Only do the transform if the value won't be split into multiple |
1585 | 24.5k | // registers. |
1586 | 9.94k | LegalizeTypeAction Action = getTypeAction(Context, VT); |
1587 | 9.58k | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
1588 | 9.58k | Action != TypeSplitVector; |
1589 | 24.5k | } |
1590 | | |
1591 | | /// Return true if a select of constants (select Cond, C1, C2) should be |
1592 | | /// transformed into simple math ops with the condition value. For example: |
1593 | | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
1594 | 19.0k | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
1595 | 19.0k | return false; |
1596 | 19.0k | } |
1597 | | |
1598 | | //===--------------------------------------------------------------------===// |
1599 | | // TargetLowering Configuration Methods - These methods should be invoked by |
1600 | | // the derived class constructor to configure this object for the target. |
1601 | | // |
1602 | | protected: |
1603 | | /// Specify how the target extends the result of integer and floating point |
1604 | | /// boolean values from i1 to a wider type. See getBooleanContents. |
1605 | 44.1k | void setBooleanContents(BooleanContent Ty) { |
1606 | 44.1k | BooleanContents = Ty; |
1607 | 44.1k | BooleanFloatContents = Ty; |
1608 | 44.1k | } |
1609 | | |
1610 | | /// Specify how the target extends the result of integer and floating point |
1611 | | /// boolean values from i1 to a wider type. See getBooleanContents. |
1612 | 1.08k | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
1613 | 1.08k | BooleanContents = IntTy; |
1614 | 1.08k | BooleanFloatContents = FloatTy; |
1615 | 1.08k | } |
1616 | | |
1617 | | /// Specify how the target extends the result of a vector boolean value from a |
1618 | | /// vector of i1 to a wider type. See getBooleanContents. |
1619 | 43.5k | void setBooleanVectorContents(BooleanContent Ty) { |
1620 | 43.5k | BooleanVectorContents = Ty; |
1621 | 43.5k | } |
1622 | | |
1623 | | /// Specify the target scheduling preference. |
1624 | 37.2k | void setSchedulingPreference(Sched::Preference Pref) { |
1625 | 37.2k | SchedPreferenceInfo = Pref; |
1626 | 37.2k | } |
1627 | | |
1628 | | /// Indicate whether this target prefers to use _setjmp to implement |
1629 | | /// llvm.setjmp or the version without _. Defaults to false. |
1630 | 11.0k | void setUseUnderscoreSetJmp(bool Val) { |
1631 | 11.0k | UseUnderscoreSetJmp = Val; |
1632 | 11.0k | } |
1633 | | |
1634 | | /// Indicate whether this target prefers to use _longjmp to implement |
1635 | | /// llvm.longjmp or the version without _. Defaults to false. |
1636 | 11.0k | void setUseUnderscoreLongJmp(bool Val) { |
1637 | 11.0k | UseUnderscoreLongJmp = Val; |
1638 | 11.0k | } |
1639 | | |
1640 | | /// Indicate the minimum number of blocks to generate jump tables. |
1641 | | void setMinimumJumpTableEntries(unsigned Val); |
1642 | | |
1643 | | /// Indicate the maximum number of entries in jump tables. |
1644 | | /// Set to zero to generate unlimited jump tables. |
1645 | | void setMaximumJumpTableSize(unsigned); |
1646 | | |
1647 | | /// If set to a physical register, this specifies the register that |
1648 | | /// llvm.savestack/llvm.restorestack should save and restore. |
1649 | 42.0k | void setStackPointerRegisterToSaveRestore(unsigned R) { |
1650 | 42.0k | StackPointerRegisterToSaveRestore = R; |
1651 | 42.0k | } |
1652 | | |
1653 | | /// Tells the code generator that the target has multiple (allocatable) |
1654 | | /// condition registers that can be used to store the results of comparisons |
1655 | | /// for use by selects and conditional branches. With multiple condition |
1656 | | /// registers, the code generator will not aggressively sink comparisons into |
1657 | | /// the blocks of their users. |
1658 | 3.27k | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
1659 | 3.27k | HasMultipleConditionRegisters = hasManyRegs; |
1660 | 3.27k | } |
1661 | | |
1662 | | /// Tells the code generator that the target has BitExtract instructions. |
1663 | | /// The code generator will aggressively sink "shift"s into the blocks of |
1664 | | /// their users if the users will generate "and" instructions which can be |
1665 | | /// combined with "shift" to BitExtract instructions. |
1666 | 15.9k | void setHasExtractBitsInsn(bool hasExtractInsn = true) { |
1667 | 15.9k | HasExtractBitsInsn = hasExtractInsn; |
1668 | 15.9k | } |
1669 | | |
1670 | | /// Tells the code generator not to expand logic operations on comparison |
1671 | | /// predicates into separate sequences that increase the amount of flow |
1672 | | /// control. |
1673 | | void setJumpIsExpensive(bool isExpensive = true); |
1674 | | |
1675 | | /// Tells the code generator that this target supports floating point |
1676 | | /// exceptions and cares about preserving floating point exception behavior. |
1677 | 2.06k | void setHasFloatingPointExceptions(bool FPExceptions = true) { |
1678 | 2.06k | HasFloatingPointExceptions = FPExceptions; |
1679 | 2.06k | } |
1680 | | |
1681 | | /// Tells the code generator which bitwidths to bypass. |
1682 | 1.29k | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
1683 | 1.29k | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
1684 | 1.29k | } |
1685 | | |
1686 | | /// Add the specified register class as an available regclass for the |
1687 | | /// specified value type. This indicates the selector can handle values of |
1688 | | /// that class natively. |
1689 | 593k | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
1690 | 593k | assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); |
1691 | 593k | RegClassForVT[VT.SimpleTy] = RC; |
1692 | 593k | } |
1693 | | |
1694 | | /// Return the largest legal super-reg register class of the register class |
1695 | | /// for the specified type and its associated "cost". |
1696 | | virtual std::pair<const TargetRegisterClass *, uint8_t> |
1697 | | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
1698 | | |
1699 | | /// Once all of the register classes are added, this allows us to compute |
1700 | | /// derived properties we expose. |
1701 | | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
1702 | | |
1703 | | /// Indicate that the specified operation does not work with the specified |
1704 | | /// type and indicate what to do about it. Note that VT may refer to either |
1705 | | /// the type of a result or that of an operand of Op. |
1706 | | void setOperationAction(unsigned Op, MVT VT, |
1707 | 257M | LegalizeAction Action) { |
1708 | 257M | assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); |
1709 | 257M | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
1710 | 257M | } |
1711 | | |
1712 | | /// Indicate that the specified load with extension does not work with the |
1713 | | /// specified type and indicate what to do about it. |
1714 | | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
1715 | 833M | LegalizeAction Action) { |
1716 | 833M | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
1717 | 833M | MemVT.isValid() && "Table isn't big enough!"); |
1718 | 833M | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
1719 | 833M | unsigned Shift = 4 * ExtType; |
1720 | 833M | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
1721 | 833M | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
1722 | 833M | } |
1723 | | |
1724 | | /// Indicate that the specified truncating store does not work with the |
1725 | | /// specified type and indicate what to do about it. |
1726 | | void setTruncStoreAction(MVT ValVT, MVT MemVT, |
1727 | 288M | LegalizeAction Action) { |
1728 | 288M | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); |
1729 | 288M | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
1730 | 288M | } |
1731 | | |
1732 | | /// Indicate that the specified indexed load does or does not work with the |
1733 | | /// specified type and indicate what to do abort it. |
1734 | | /// |
1735 | | /// NOTE: All indexed mode loads are initialized to Expand in |
1736 | | /// TargetLowering.cpp |
1737 | | void setIndexedLoadAction(unsigned IdxMode, MVT VT, |
1738 | 21.1M | LegalizeAction Action) { |
1739 | 21.1M | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
1740 | 21.1M | (unsigned)Action < 0xf && "Table isn't big enough!"); |
1741 | 21.1M | // Load action are kept in the upper half. |
1742 | 21.1M | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; |
1743 | 21.1M | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; |
1744 | 21.1M | } |
1745 | | |
1746 | | /// Indicate that the specified indexed store does or does not work with the |
1747 | | /// specified type and indicate what to do about it. |
1748 | | /// |
1749 | | /// NOTE: All indexed mode stores are initialized to Expand in |
1750 | | /// TargetLowering.cpp |
1751 | | void setIndexedStoreAction(unsigned IdxMode, MVT VT, |
1752 | 21.1M | LegalizeAction Action) { |
1753 | 21.1M | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
1754 | 21.1M | (unsigned)Action < 0xf && "Table isn't big enough!"); |
1755 | 21.1M | // Store action are kept in the lower half. |
1756 | 21.1M | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; |
1757 | 21.1M | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); |
1758 | 21.1M | } |
1759 | | |
1760 | | /// Indicate that the specified condition code is or isn't supported on the |
1761 | | /// target and indicate what to do about it. |
1762 | | void setCondCodeAction(ISD::CondCode CC, MVT VT, |
1763 | 115k | LegalizeAction Action) { |
1764 | 115k | assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && |
1765 | 115k | "Table isn't big enough!"); |
1766 | 115k | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
1767 | 115k | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit |
1768 | 115k | /// value and the upper 29 bits index into the second dimension of the array |
1769 | 115k | /// to select what 32-bit value to use. |
1770 | 115k | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
1771 | 115k | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
1772 | 115k | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
1773 | 115k | } |
1774 | | |
1775 | | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
1776 | | /// to trying a larger integer/fp until it can find one that works. If that |
1777 | | /// default is insufficient, this method can be used by the target to override |
1778 | | /// the default. |
1779 | 1.27M | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
1780 | 1.27M | PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; |
1781 | 1.27M | } |
1782 | | |
1783 | | /// Convenience method to set an operation to Promote and specify the type |
1784 | | /// in a single call. |
1785 | 187k | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
1786 | 187k | setOperationAction(Opc, OrigVT, Promote); |
1787 | 187k | AddPromotedToType(Opc, OrigVT, DestVT); |
1788 | 187k | } |
1789 | | |
1790 | | /// Targets should invoke this method for each target independent node that |
1791 | | /// they want to provide a custom DAG combiner for by implementing the |
1792 | | /// PerformDAGCombine virtual method. |
1793 | 1.02M | void setTargetDAGCombine(ISD::NodeType NT) { |
1794 | 1.02M | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); |
1795 | 1.02M | TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); |
1796 | 1.02M | } |
1797 | | |
1798 | | /// Set the target's required jmp_buf buffer size (in bytes); default is 200 |
1799 | 0 | void setJumpBufSize(unsigned Size) { |
1800 | 0 | JumpBufSize = Size; |
1801 | 0 | } |
1802 | | |
1803 | | /// Set the target's required jmp_buf buffer alignment (in bytes); default is |
1804 | | /// 0 |
1805 | 0 | void setJumpBufAlignment(unsigned Align) { |
1806 | 0 | JumpBufAlignment = Align; |
1807 | 0 | } |
1808 | | |
1809 | | /// Set the target's minimum function alignment (in log2(bytes)) |
1810 | 32.3k | void setMinFunctionAlignment(unsigned Align) { |
1811 | 32.3k | MinFunctionAlignment = Align; |
1812 | 32.3k | } |
1813 | | |
1814 | | /// Set the target's preferred function alignment. This should be set if |
1815 | | /// there is a performance benefit to higher-than-minimum alignment (in |
1816 | | /// log2(bytes)) |
1817 | 25.3k | void setPrefFunctionAlignment(unsigned Align) { |
1818 | 25.3k | PrefFunctionAlignment = Align; |
1819 | 25.3k | } |
1820 | | |
1821 | | /// Set the target's preferred loop alignment. Default alignment is zero, it |
1822 | | /// means the target does not care about loop alignment. The alignment is |
1823 | | /// specified in log2(bytes). The target may also override |
1824 | | /// getPrefLoopAlignment to provide per-loop values. |
1825 | 24.9k | void setPrefLoopAlignment(unsigned Align) { |
1826 | 24.9k | PrefLoopAlignment = Align; |
1827 | 24.9k | } |
1828 | | |
1829 | | /// Set the minimum stack alignment of an argument (in log2(bytes)). |
1830 | 16.4k | void setMinStackArgumentAlignment(unsigned Align) { |
1831 | 16.4k | MinStackArgumentAlignment = Align; |
1832 | 16.4k | } |
1833 | | |
1834 | | /// Set the maximum atomic operation size supported by the |
1835 | | /// backend. Atomic operations greater than this size (as well as |
1836 | | /// ones that are not naturally aligned), will be expanded by |
1837 | | /// AtomicExpandPass into an __atomic_* library call. |
1838 | 854 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
1839 | 854 | MaxAtomicSizeInBitsSupported = SizeInBits; |
1840 | 854 | } |
1841 | | |
1842 | | // Sets the minimum cmpxchg or ll/sc size supported by the backend. |
1843 | 854 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
1844 | 854 | MinCmpXchgSizeInBits = SizeInBits; |
1845 | 854 | } |
1846 | | |
1847 | | public: |
1848 | | //===--------------------------------------------------------------------===// |
1849 | | // Addressing mode description hooks (used by LSR etc). |
1850 | | // |
1851 | | |
1852 | | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
1853 | | /// instructions reading the address. This allows as much computation as |
1854 | | /// possible to be done in the address mode for that operand. This hook lets |
1855 | | /// targets also pass back when this should be done on intrinsics which |
1856 | | /// load/store. |
1857 | | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
1858 | | SmallVectorImpl<Value*> &/*Ops*/, |
1859 | 1.07M | Type *&/*AccessTy*/) const { |
1860 | 1.07M | return false; |
1861 | 1.07M | } |
1862 | | |
1863 | | /// This represents an addressing mode of: |
1864 | | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg |
1865 | | /// If BaseGV is null, there is no BaseGV. |
1866 | | /// If BaseOffs is zero, there is no base offset. |
1867 | | /// If HasBaseReg is false, there is no base register. |
1868 | | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
1869 | | /// no scale. |
1870 | | struct AddrMode { |
1871 | | GlobalValue *BaseGV = nullptr; |
1872 | | int64_t BaseOffs = 0; |
1873 | | bool HasBaseReg = false; |
1874 | | int64_t Scale = 0; |
1875 | 108M | AddrMode() = default; |
1876 | | }; |
1877 | | |
1878 | | /// Return true if the addressing mode represented by AM is legal for this |
1879 | | /// target, for a load/store of the specified type. |
1880 | | /// |
1881 | | /// The type may be VoidTy, in which case only return true if the addressing |
1882 | | /// mode is legal for a load/store of any legal type. TODO: Handle |
1883 | | /// pre/postinc as well. |
1884 | | /// |
1885 | | /// If the address space cannot be determined, it will be -1. |
1886 | | /// |
1887 | | /// TODO: Remove default argument |
1888 | | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
1889 | | Type *Ty, unsigned AddrSpace, |
1890 | | Instruction *I = nullptr) const; |
1891 | | |
1892 | | /// \brief Return the cost of the scaling factor used in the addressing mode |
1893 | | /// represented by AM for this target, for a load/store of the specified type. |
1894 | | /// |
1895 | | /// If the AM is supported, the return value must be >= 0. |
1896 | | /// If the AM is not supported, it returns a negative value. |
1897 | | /// TODO: Handle pre/postinc as well. |
1898 | | /// TODO: Remove default argument |
1899 | | virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, |
1900 | 6.08k | Type *Ty, unsigned AS = 0) const { |
1901 | 6.08k | // Default: assume that any scaling factor used in a legal AM is free. |
1902 | 6.08k | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
1903 | 6.08k | return 0; |
1904 | 0 | return -1; |
1905 | 6.08k | } |
1906 | | |
1907 | | /// Return true if the specified immediate is legal icmp immediate, that is |
1908 | | /// the target has icmp instructions which can compare a register against the |
1909 | | /// immediate without having to materialize the immediate into a register. |
1910 | 32.4k | virtual bool isLegalICmpImmediate(int64_t) const { |
1911 | 32.4k | return true; |
1912 | 32.4k | } |
1913 | | |
1914 | | /// Return true if the specified immediate is legal add immediate, that is the |
1915 | | /// target has add instructions which can add a register with the immediate |
1916 | | /// without having to materialize the immediate into a register. |
1917 | 1.11k | virtual bool isLegalAddImmediate(int64_t) const { |
1918 | 1.11k | return true; |
1919 | 1.11k | } |
1920 | | |
1921 | | /// Return true if it's significantly cheaper to shift a vector by a uniform |
1922 | | /// scalar than by an amount which will vary across each lane. On x86, for |
1923 | | /// example, there is a "psllw" instruction for the former case, but no simple |
1924 | | /// instruction for a general "a << b" operation on vectors. |
1925 | 232k | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
1926 | 232k | return false; |
1927 | 232k | } |
1928 | | |
1929 | | /// Returns true if the opcode is a commutative binary operation. |
1930 | 208M | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
1931 | 208M | // FIXME: This should get its info from the td file. |
1932 | 208M | switch (Opcode) { |
1933 | 29.0M | case ISD::ADD: |
1934 | 29.0M | case ISD::SMIN: |
1935 | 29.0M | case ISD::SMAX: |
1936 | 29.0M | case ISD::UMIN: |
1937 | 29.0M | case ISD::UMAX: |
1938 | 29.0M | case ISD::MUL: |
1939 | 29.0M | case ISD::MULHU: |
1940 | 29.0M | case ISD::MULHS: |
1941 | 29.0M | case ISD::SMUL_LOHI: |
1942 | 29.0M | case ISD::UMUL_LOHI: |
1943 | 29.0M | case ISD::FADD: |
1944 | 29.0M | case ISD::FMUL: |
1945 | 29.0M | case ISD::AND: |
1946 | 29.0M | case ISD::OR: |
1947 | 29.0M | case ISD::XOR: |
1948 | 29.0M | case ISD::SADDO: |
1949 | 29.0M | case ISD::UADDO: |
1950 | 29.0M | case ISD::ADDC: |
1951 | 29.0M | case ISD::ADDE: |
1952 | 29.0M | case ISD::FMINNUM: |
1953 | 29.0M | case ISD::FMAXNUM: |
1954 | 29.0M | case ISD::FMINNAN: |
1955 | 29.0M | case ISD::FMAXNAN: |
1956 | 29.0M | return true; |
1957 | 179M | default: return false; |
1958 | 208M | } |
1959 | 208M | } |
1960 | | |
1961 | | /// Return true if it's free to truncate a value of type FromTy to type |
1962 | | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
1963 | | /// by referencing its sub-register AX. |
1964 | | /// Targets must return false when FromTy <= ToTy. |
1965 | 80 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
1966 | 80 | return false; |
1967 | 80 | } |
1968 | | |
1969 | | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
1970 | | /// whether a call is in tail position. Typically this means that both results |
1971 | | /// would be assigned to the same register or stack slot, but it could mean |
1972 | | /// the target performs adequate checks of its own before proceeding with the |
1973 | | /// tail call. Targets must return false when FromTy <= ToTy. |
1974 | 1.41k | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
1975 | 1.41k | return false; |
1976 | 1.41k | } |
1977 | | |
1978 | 5.44k | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { |
1979 | 5.44k | return false; |
1980 | 5.44k | } |
1981 | | |
1982 | 16.6k | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
1983 | | |
1984 | | /// Return true if the extension represented by \p I is free. |
1985 | | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
1986 | | /// this method can use the context provided by \p I to decide |
1987 | | /// whether or not \p I is free. |
1988 | | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
1989 | | /// In other words, if is[Z|FP]Free returns true, then this method |
1990 | | /// returns true as well. The converse is not true. |
1991 | | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
1992 | | /// \pre \p I must be a sign, zero, or fp extension. |
1993 | 2.53M | bool isExtFree(const Instruction *I) const { |
1994 | 2.53M | switch (I->getOpcode()) { |
1995 | 84.8k | case Instruction::FPExt: |
1996 | 84.8k | if (isFPExtFree(EVT::getEVT(I->getType()))) |
1997 | 0 | return true; |
1998 | 84.8k | break; |
1999 | 855k | case Instruction::ZExt: |
2000 | 855k | if (isZExtFree(I->getOperand(0)->getType(), I->getType())) |
2001 | 310k | return true; |
2002 | 544k | break; |
2003 | 1.59M | case Instruction::SExt: |
2004 | 1.59M | break; |
2005 | 0 | default: |
2006 | 0 | llvm_unreachable("Instruction is not an extension"); |
2007 | 2.53M | } |
2008 | 2.22M | return isExtFreeImpl(I); |
2009 | 2.53M | } |
2010 | | |
2011 | | /// Return true if \p Load and \p Ext can form an ExtLoad. |
2012 | | /// For example, in AArch64 |
2013 | | /// %L = load i8, i8* %ptr |
2014 | | /// %E = zext i8 %L to i32 |
2015 | | /// can be lowered into one load instruction |
2016 | | /// ldrb w0, [x0] |
2017 | | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
2018 | 796k | const DataLayout &DL) const { |
2019 | 796k | EVT VT = getValueType(DL, Ext->getType()); |
2020 | 796k | EVT LoadVT = getValueType(DL, Load->getType()); |
2021 | 796k | |
2022 | 796k | // If the load has other users and the truncate is not free, the ext |
2023 | 796k | // probably isn't free. |
2024 | 796k | if (!Load->hasOneUse() && 796k (isTypeLegal(LoadVT) || 242k !isTypeLegal(VT)139k ) && |
2025 | 104k | !isTruncateFree(Ext->getType(), Load->getType())) |
2026 | 171 | return false; |
2027 | 796k | |
2028 | 796k | // Check whether the target supports casts folded into loads. |
2029 | 796k | unsigned LType; |
2030 | 796k | if (isa<ZExtInst>(Ext)) |
2031 | 392k | LType = ISD::ZEXTLOAD; |
2032 | 403k | else { |
2033 | 403k | assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); |
2034 | 403k | LType = ISD::SEXTLOAD; |
2035 | 403k | } |
2036 | 796k | |
2037 | 796k | return isLoadExtLegal(LType, VT, LoadVT); |
2038 | 796k | } |
2039 | | |
2040 | | /// Return true if any actual instruction that defines a value of type FromTy |
2041 | | /// implicitly zero-extends the value to ToTy in the result register. |
2042 | | /// |
2043 | | /// The function should return true when it is likely that the truncate can |
2044 | | /// be freely folded with an instruction defining a value of FromTy. If |
2045 | | /// the defining instruction is unknown (because you're looking at a |
2046 | | /// function argument, PHI, etc.) then the target may require an |
2047 | | /// explicit truncate, which is not necessarily free, but this function |
2048 | | /// does not deal with those cases. |
2049 | | /// Targets must return false when FromTy >= ToTy. |
2050 | 19.5k | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
2051 | 19.5k | return false; |
2052 | 19.5k | } |
2053 | | |
2054 | 12.4k | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { |
2055 | 12.4k | return false; |
2056 | 12.4k | } |
2057 | | |
2058 | | /// Return true if the target supplies and combines to a paired load |
2059 | | /// two loaded values of type LoadedType next to each other in memory. |
2060 | | /// RequiredAlignment gives the minimal alignment constraints that must be met |
2061 | | /// to be able to select this paired load. |
2062 | | /// |
2063 | | /// This information is *not* used to generate actual paired loads, but it is |
2064 | | /// used to generate a sequence of loads that is easier to combine into a |
2065 | | /// paired load. |
2066 | | /// For instance, something like this: |
2067 | | /// a = load i64* addr |
2068 | | /// b = trunc i64 a to i32 |
2069 | | /// c = lshr i64 a, 32 |
2070 | | /// d = trunc i64 c to i32 |
2071 | | /// will be optimized into: |
2072 | | /// b = load i32* addr1 |
2073 | | /// d = load i32* addr2 |
2074 | | /// Where addr1 = addr2 +/- sizeof(i32). |
2075 | | /// |
2076 | | /// In other words, unless the target performs a post-isel load combining, |
2077 | | /// this information should not be provided because it will generate more |
2078 | | /// loads. |
2079 | | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
2080 | 6.13k | unsigned & /*RequiredAlignment*/) const { |
2081 | 6.13k | return false; |
2082 | 6.13k | } |
2083 | | |
2084 | | /// \brief Get the maximum supported factor for interleaved memory accesses. |
2085 | | /// Default to be the minimum interleave factor: 2. |
2086 | 0 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
2087 | | |
2088 | | /// \brief Lower an interleaved load to target specific intrinsics. Return |
2089 | | /// true on success. |
2090 | | /// |
2091 | | /// \p LI is the vector load instruction. |
2092 | | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
2093 | | /// \p Indices is the corresponding indices for each shufflevector. |
2094 | | /// \p Factor is the interleave factor. |
2095 | | virtual bool lowerInterleavedLoad(LoadInst *LI, |
2096 | | ArrayRef<ShuffleVectorInst *> Shuffles, |
2097 | | ArrayRef<unsigned> Indices, |
2098 | 0 | unsigned Factor) const { |
2099 | 0 | return false; |
2100 | 0 | } |
2101 | | |
2102 | | /// \brief Lower an interleaved store to target specific intrinsics. Return |
2103 | | /// true on success. |
2104 | | /// |
2105 | | /// \p SI is the vector store instruction. |
2106 | | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
2107 | | /// \p Factor is the interleave factor. |
2108 | | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
2109 | 0 | unsigned Factor) const { |
2110 | 0 | return false; |
2111 | 0 | } |
2112 | | |
2113 | | /// Return true if zero-extending the specific node Val to type VT2 is free |
2114 | | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
2115 | | /// because it's folded such as X86 zero-extending loads). |
2116 | 10.6k | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
2117 | 10.6k | return isZExtFree(Val.getValueType(), VT2); |
2118 | 10.6k | } |
2119 | | |
2120 | | /// Return true if an fpext operation is free (for instance, because |
2121 | | /// single-precision floating-point numbers are implicitly extended to |
2122 | | /// double-precision). |
2123 | 92.0k | virtual bool isFPExtFree(EVT VT) const { |
2124 | 92.0k | assert(VT.isFloatingPoint()); |
2125 | 92.0k | return false; |
2126 | 92.0k | } |
2127 | | |
2128 | | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
2129 | | /// extend node) is profitable. |
2130 | 10.6k | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
2131 | | |
2132 | | /// Return true if an fneg operation is free to the point where it is never |
2133 | | /// worthwhile to replace it with a bitwise operation. |
2134 | 9.99k | virtual bool isFNegFree(EVT VT) const { |
2135 | 9.99k | assert(VT.isFloatingPoint()); |
2136 | 9.99k | return false; |
2137 | 9.99k | } |
2138 | | |
2139 | | /// Return true if an fabs operation is free to the point where it is never |
2140 | | /// worthwhile to replace it with a bitwise operation. |
2141 | 7.37k | virtual bool isFAbsFree(EVT VT) const { |
2142 | 7.37k | assert(VT.isFloatingPoint()); |
2143 | 7.37k | return false; |
2144 | 7.37k | } |
2145 | | |
2146 | | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
2147 | | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
2148 | | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
2149 | | /// |
2150 | | /// NOTE: This may be called before legalization on types for which FMAs are |
2151 | | /// not legal, but should return true if those types will eventually legalize |
2152 | | /// to types that support FMAs. After legalization, it will only be called on |
2153 | | /// types that support FMAs (via Legal or Custom actions) |
2154 | 4.67k | virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { |
2155 | 4.67k | return false; |
2156 | 4.67k | } |
2157 | | |
2158 | | /// Return true if it's profitable to narrow operations of type VT1 to |
2159 | | /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
2160 | | /// i32 to i16. |
2161 | 13.6k | virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { |
2162 | 13.6k | return false; |
2163 | 13.6k | } |
2164 | | |
2165 | | /// \brief Return true if it is beneficial to convert a load of a constant to |
2166 | | /// just the constant itself. |
2167 | | /// On some targets it might be more efficient to use a combination of |
2168 | | /// arithmetic instructions to materialize the constant instead of loading it |
2169 | | /// from a constant pool. |
2170 | | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
2171 | 12 | Type *Ty) const { |
2172 | 12 | return false; |
2173 | 12 | } |
2174 | | |
2175 | | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
2176 | | /// from this source type with this index. This is needed because |
2177 | | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
2178 | | /// the first element, and only the target knows which lowering is cheap. |
2179 | | virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
2180 | 274 | unsigned Index) const { |
2181 | 274 | return false; |
2182 | 274 | } |
2183 | | |
2184 | | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
2185 | | // even if the vector itself has multiple uses. |
2186 | 895 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
2187 | 895 | return false; |
2188 | 895 | } |
2189 | | |
2190 | | //===--------------------------------------------------------------------===// |
2191 | | // Runtime Library hooks |
2192 | | // |
2193 | | |
2194 | | /// Rename the default libcall routine name for the specified libcall. |
2195 | 1.27M | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
2196 | 1.27M | LibcallRoutineNames[Call] = Name; |
2197 | 1.27M | } |
2198 | | |
2199 | | /// Get the libcall routine name for the specified libcall. |
2200 | 71.9k | const char *getLibcallName(RTLIB::Libcall Call) const { |
2201 | 71.9k | return LibcallRoutineNames[Call]; |
2202 | 71.9k | } |
2203 | | |
2204 | | /// Override the default CondCode to be used to test the result of the |
2205 | | /// comparison libcall against zero. |
2206 | 67.7k | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
2207 | 67.7k | CmpLibcallCCs[Call] = CC; |
2208 | 67.7k | } |
2209 | | |
2210 | | /// Get the CondCode that's to be used to test the result of the comparison |
2211 | | /// libcall against zero. |
2212 | 1.08k | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
2213 | 1.08k | return CmpLibcallCCs[Call]; |
2214 | 1.08k | } |
2215 | | |
2216 | | /// Set the CallingConv that should be used for the specified libcall. |
2217 | 2.22M | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
2218 | 2.22M | LibcallCallingConvs[Call] = CC; |
2219 | 2.22M | } |
2220 | | |
2221 | | /// Get the CallingConv that should be used for the specified libcall. |
2222 | 24.0k | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
2223 | 24.0k | return LibcallCallingConvs[Call]; |
2224 | 24.0k | } |
2225 | | |
2226 | | /// Execute target specific actions to finalize target lowering. |
2227 | | /// This is used to set extra flags in MachineFrameInformation and freezing |
2228 | | /// the set of reserved registers. |
2229 | | /// The default implementation just freezes the set of reserved registers. |
2230 | | virtual void finalizeLowering(MachineFunction &MF) const; |
2231 | | |
2232 | | private: |
2233 | | const TargetMachine &TM; |
2234 | | |
2235 | | /// Tells the code generator that the target has multiple (allocatable) |
2236 | | /// condition registers that can be used to store the results of comparisons |
2237 | | /// for use by selects and conditional branches. With multiple condition |
2238 | | /// registers, the code generator will not aggressively sink comparisons into |
2239 | | /// the blocks of their users. |
2240 | | bool HasMultipleConditionRegisters; |
2241 | | |
2242 | | /// Tells the code generator that the target has BitExtract instructions. |
2243 | | /// The code generator will aggressively sink "shift"s into the blocks of |
2244 | | /// their users if the users will generate "and" instructions which can be |
2245 | | /// combined with "shift" to BitExtract instructions. |
2246 | | bool HasExtractBitsInsn; |
2247 | | |
2248 | | /// Tells the code generator to bypass slow divide or remainder |
2249 | | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
2250 | | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
2251 | | /// div/rem when the operands are positive and less than 256. |
2252 | | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
2253 | | |
2254 | | /// Tells the code generator that it shouldn't generate extra flow control |
2255 | | /// instructions and should attempt to combine flow control instructions via |
2256 | | /// predication. |
2257 | | bool JumpIsExpensive; |
2258 | | |
2259 | | /// Whether the target supports or cares about preserving floating point |
2260 | | /// exception behavior. |
2261 | | bool HasFloatingPointExceptions; |
2262 | | |
2263 | | /// This target prefers to use _setjmp to implement llvm.setjmp. |
2264 | | /// |
2265 | | /// Defaults to false. |
2266 | | bool UseUnderscoreSetJmp; |
2267 | | |
2268 | | /// This target prefers to use _longjmp to implement llvm.longjmp. |
2269 | | /// |
2270 | | /// Defaults to false. |
2271 | | bool UseUnderscoreLongJmp; |
2272 | | |
2273 | | /// Information about the contents of the high-bits in boolean values held in |
2274 | | /// a type wider than i1. See getBooleanContents. |
2275 | | BooleanContent BooleanContents; |
2276 | | |
2277 | | /// Information about the contents of the high-bits in boolean values held in |
2278 | | /// a type wider than i1. See getBooleanContents. |
2279 | | BooleanContent BooleanFloatContents; |
2280 | | |
2281 | | /// Information about the contents of the high-bits in boolean vector values |
2282 | | /// when the element type is wider than i1. See getBooleanContents. |
2283 | | BooleanContent BooleanVectorContents; |
2284 | | |
2285 | | /// The target scheduling preference: shortest possible total cycles or lowest |
2286 | | /// register usage. |
2287 | | Sched::Preference SchedPreferenceInfo; |
2288 | | |
2289 | | /// The size, in bytes, of the target's jmp_buf buffers |
2290 | | unsigned JumpBufSize; |
2291 | | |
2292 | | /// The alignment, in bytes, of the target's jmp_buf buffers |
2293 | | unsigned JumpBufAlignment; |
2294 | | |
2295 | | /// The minimum alignment that any argument on the stack needs to have. |
2296 | | unsigned MinStackArgumentAlignment; |
2297 | | |
2298 | | /// The minimum function alignment (used when optimizing for size, and to |
2299 | | /// prevent explicitly provided alignment from leading to incorrect code). |
2300 | | unsigned MinFunctionAlignment; |
2301 | | |
2302 | | /// The preferred function alignment (used when alignment unspecified and |
2303 | | /// optimizing for speed). |
2304 | | unsigned PrefFunctionAlignment; |
2305 | | |
2306 | | /// The preferred loop alignment. |
2307 | | unsigned PrefLoopAlignment; |
2308 | | |
2309 | | /// Size in bits of the maximum atomics size the backend supports. |
2310 | | /// Accesses larger than this will be expanded by AtomicExpandPass. |
2311 | | unsigned MaxAtomicSizeInBitsSupported; |
2312 | | |
2313 | | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
2314 | | /// backend supports. |
2315 | | unsigned MinCmpXchgSizeInBits; |
2316 | | |
2317 | | /// If set to a physical register, this specifies the register that |
2318 | | /// llvm.savestack/llvm.restorestack should save and restore. |
2319 | | unsigned StackPointerRegisterToSaveRestore; |
2320 | | |
2321 | | /// This indicates the default register class to use for each ValueType the |
2322 | | /// target supports natively. |
2323 | | const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; |
2324 | | unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; |
2325 | | MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; |
2326 | | |
2327 | | /// This indicates the "representative" register class to use for each |
2328 | | /// ValueType the target supports natively. This information is used by the |
2329 | | /// scheduler to track register pressure. By default, the representative |
2330 | | /// register class is the largest legal super-reg register class of the |
2331 | | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
2332 | | /// representative class would be GR32. |
2333 | | const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; |
2334 | | |
2335 | | /// This indicates the "cost" of the "representative" register class for each |
2336 | | /// ValueType. The cost is used by the scheduler to approximate register |
2337 | | /// pressure. |
2338 | | uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; |
2339 | | |
2340 | | /// For any value types we are promoting or expanding, this contains the value |
2341 | | /// type that we are changing to. For Expanded types, this contains one step |
2342 | | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
2343 | | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
2344 | | /// the same type (e.g. i32 -> i32). |
2345 | | MVT TransformToType[MVT::LAST_VALUETYPE]; |
2346 | | |
2347 | | /// For each operation and each value type, keep a LegalizeAction that |
2348 | | /// indicates how instruction selection should deal with the operation. Most |
2349 | | /// operations are Legal (aka, supported natively by the target), but |
2350 | | /// operations that are not should be described. Note that operations on |
2351 | | /// non-legal value types are not described here. |
2352 | | LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; |
2353 | | |
2354 | | /// For each load extension type and each value type, keep a LegalizeAction |
2355 | | /// that indicates how instruction selection should deal with a load of a |
2356 | | /// specific value type and extension type. Uses 4-bits to store the action |
2357 | | /// for each of the 4 load ext types. |
2358 | | uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
2359 | | |
2360 | | /// For each value type pair keep a LegalizeAction that indicates whether a |
2361 | | /// truncating store of a specific value type and truncating type is legal. |
2362 | | LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
2363 | | |
2364 | | /// For each indexed mode and each value type, keep a pair of LegalizeAction |
2365 | | /// that indicates how instruction selection should deal with the load / |
2366 | | /// store. |
2367 | | /// |
2368 | | /// The first dimension is the value_type for the reference. The second |
2369 | | /// dimension represents the various modes for load store. |
2370 | | uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; |
2371 | | |
2372 | | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
2373 | | /// indicates how instruction selection should deal with the condition code. |
2374 | | /// |
2375 | | /// Because each CC action takes up 4 bits, we need to have the array size be |
2376 | | /// large enough to fit all of the value types. This can be done by rounding |
2377 | | /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. |
2378 | | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; |
2379 | | |
2380 | | protected: |
2381 | | ValueTypeActionImpl ValueTypeActions; |
2382 | | |
2383 | | private: |
2384 | | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
2385 | | |
2386 | | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
2387 | | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
2388 | | /// array. |
2389 | | unsigned char |
2390 | | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
2391 | | |
2392 | | /// For operations that must be promoted to a specific type, this holds the |
2393 | | /// destination type. This map should be sparse, so don't hold it as an |
2394 | | /// array. |
2395 | | /// |
2396 | | /// Targets add entries to this map with AddPromotedToType(..), clients access |
2397 | | /// this with getTypeToPromoteTo(..). |
2398 | | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
2399 | | PromoteToType; |
2400 | | |
2401 | | /// Stores the name each libcall. |
2402 | | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL]; |
2403 | | |
2404 | | /// The ISD::CondCode that should be used to test the result of each of the |
2405 | | /// comparison libcall against zero. |
2406 | | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
2407 | | |
2408 | | /// Stores the CallingConv that should be used for each libcall. |
2409 | | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; |
2410 | | |
2411 | | protected: |
2412 | | /// Return true if the extension represented by \p I is free. |
2413 | | /// \pre \p I is a sign, zero, or fp extension and |
2414 | | /// is[Z|FP]ExtFree of the related types is not true. |
2415 | 39.9k | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
2416 | | |
2417 | | /// Depth that GatherAllAliases should should continue looking for chain |
2418 | | /// dependencies when trying to find a more preferable chain. As an |
2419 | | /// approximation, this should be more than the number of consecutive stores |
2420 | | /// expected to be merged. |
2421 | | unsigned GatherAllAliasesMaxDepth; |
2422 | | |
2423 | | /// \brief Specify maximum number of store instructions per memset call. |
2424 | | /// |
2425 | | /// When lowering \@llvm.memset this field specifies the maximum number of |
2426 | | /// store operations that may be substituted for the call to memset. Targets |
2427 | | /// must set this value based on the cost threshold for that target. Targets |
2428 | | /// should assume that the memset will be done using as many of the largest |
2429 | | /// store operations first, followed by smaller ones, if necessary, per |
2430 | | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
2431 | | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
2432 | | /// store. This only applies to setting a constant array of a constant size. |
2433 | | unsigned MaxStoresPerMemset; |
2434 | | |
2435 | | /// Maximum number of stores operations that may be substituted for the call |
2436 | | /// to memset, used for functions with OptSize attribute. |
2437 | | unsigned MaxStoresPerMemsetOptSize; |
2438 | | |
2439 | | /// \brief Specify maximum bytes of store instructions per memcpy call. |
2440 | | /// |
2441 | | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
2442 | | /// store operations that may be substituted for a call to memcpy. Targets |
2443 | | /// must set this value based on the cost threshold for that target. Targets |
2444 | | /// should assume that the memcpy will be done using as many of the largest |
2445 | | /// store operations first, followed by smaller ones, if necessary, per |
2446 | | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
2447 | | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
2448 | | /// and one 1-byte store. This only applies to copying a constant array of |
2449 | | /// constant size. |
2450 | | unsigned MaxStoresPerMemcpy; |
2451 | | |
2452 | | /// Maximum number of store operations that may be substituted for a call to |
2453 | | /// memcpy, used for functions with OptSize attribute. |
2454 | | unsigned MaxStoresPerMemcpyOptSize; |
2455 | | unsigned MaxLoadsPerMemcmp; |
2456 | | unsigned MaxLoadsPerMemcmpOptSize; |
2457 | | |
2458 | | /// \brief Specify maximum bytes of store instructions per memmove call. |
2459 | | /// |
2460 | | /// When lowering \@llvm.memmove this field specifies the maximum number of |
2461 | | /// store instructions that may be substituted for a call to memmove. Targets |
2462 | | /// must set this value based on the cost threshold for that target. Targets |
2463 | | /// should assume that the memmove will be done using as many of the largest |
2464 | | /// store operations first, followed by smaller ones, if necessary, per |
2465 | | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
2466 | | /// with 8-bit alignment would result in nine 1-byte stores. This only |
2467 | | /// applies to copying a constant array of constant size. |
2468 | | unsigned MaxStoresPerMemmove; |
2469 | | |
2470 | | /// Maximum number of store instructions that may be substituted for a call to |
2471 | | /// memmove, used for functions with OptSize attribute. |
2472 | | unsigned MaxStoresPerMemmoveOptSize; |
2473 | | |
2474 | | /// Tells the code generator that select is more expensive than a branch if |
2475 | | /// the branch is usually predicted right. |
2476 | | bool PredictableSelectIsExpensive; |
2477 | | |
2478 | | /// \see enableExtLdPromotion. |
2479 | | bool EnableExtLdPromotion; |
2480 | | |
2481 | | /// Return true if the value types that can be represented by the specified |
2482 | | /// register class are all legal. |
2483 | | bool isLegalRC(const TargetRegisterInfo &TRI, |
2484 | | const TargetRegisterClass &RC) const; |
2485 | | |
2486 | | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
2487 | | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
2488 | | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
2489 | | MachineBasicBlock *MBB) const; |
2490 | | }; |
2491 | | |
2492 | | /// This class defines information used to lower LLVM code to legal SelectionDAG |
2493 | | /// operators that the target instruction selector can accept natively. |
2494 | | /// |
2495 | | /// This class also defines callbacks that targets must implement to lower |
2496 | | /// target-specific constructs to SelectionDAG operators. |
2497 | | class TargetLowering : public TargetLoweringBase { |
2498 | | public: |
2499 | | struct DAGCombinerInfo; |
2500 | | |
2501 | | TargetLowering(const TargetLowering &) = delete; |
2502 | | TargetLowering &operator=(const TargetLowering &) = delete; |
2503 | | |
2504 | | /// NOTE: The TargetMachine owns TLOF. |
2505 | | explicit TargetLowering(const TargetMachine &TM); |
2506 | | |
2507 | | bool isPositionIndependent() const; |
2508 | | |
2509 | | /// Returns true by value, base pointer and offset pointer and addressing mode |
2510 | | /// by reference if the node's address can be legally represented as |
2511 | | /// pre-indexed load / store address. |
2512 | | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
2513 | | SDValue &/*Offset*/, |
2514 | | ISD::MemIndexedMode &/*AM*/, |
2515 | 0 | SelectionDAG &/*DAG*/) const { |
2516 | 0 | return false; |
2517 | 0 | } |
2518 | | |
2519 | | /// Returns true by value, base pointer and offset pointer and addressing mode |
2520 | | /// by reference if this node can be combined with a load / store to form a |
2521 | | /// post-indexed load / store. |
2522 | | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
2523 | | SDValue &/*Base*/, |
2524 | | SDValue &/*Offset*/, |
2525 | | ISD::MemIndexedMode &/*AM*/, |
2526 | 0 | SelectionDAG &/*DAG*/) const { |
2527 | 0 | return false; |
2528 | 0 | } |
2529 | | |
2530 | | /// Return the entry encoding for a jump table in the current function. The |
2531 | | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
2532 | | virtual unsigned getJumpTableEncoding() const; |
2533 | | |
2534 | | virtual const MCExpr * |
2535 | | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
2536 | | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
2537 | 0 | MCContext &/*Ctx*/) const { |
2538 | 0 | llvm_unreachable("Need to implement this hook if target has custom JTIs"); |
2539 | 0 | } |
2540 | | |
2541 | | /// Returns relocation base for the given PIC jumptable. |
2542 | | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
2543 | | SelectionDAG &DAG) const; |
2544 | | |
2545 | | /// This returns the relocation base for the given PIC jumptable, the same as |
2546 | | /// getPICJumpTableRelocBase, but as an MCExpr. |
2547 | | virtual const MCExpr * |
2548 | | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
2549 | | unsigned JTI, MCContext &Ctx) const; |
2550 | | |
2551 | | /// Return true if folding a constant offset with the given GlobalAddress is |
2552 | | /// legal. It is frequently not legal in PIC relocation models. |
2553 | | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
2554 | | |
2555 | | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
2556 | | SDValue &Chain) const; |
2557 | | |
2558 | | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
2559 | | SDValue &NewRHS, ISD::CondCode &CCCode, |
2560 | | const SDLoc &DL) const; |
2561 | | |
2562 | | /// Returns a pair of (return value, chain). |
2563 | | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
2564 | | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
2565 | | EVT RetVT, ArrayRef<SDValue> Ops, |
2566 | | bool isSigned, const SDLoc &dl, |
2567 | | bool doesNotReturn = false, |
2568 | | bool isReturnValueUsed = true) const; |
2569 | | |
2570 | | /// Check whether parameters to a call that are passed in callee saved |
2571 | | /// registers are the same as from the calling function. This needs to be |
2572 | | /// checked for tail call eligibility. |
2573 | | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
2574 | | const uint32_t *CallerPreservedMask, |
2575 | | const SmallVectorImpl<CCValAssign> &ArgLocs, |
2576 | | const SmallVectorImpl<SDValue> &OutVals) const; |
2577 | | |
2578 | | //===--------------------------------------------------------------------===// |
2579 | | // TargetLowering Optimization Methods |
2580 | | // |
2581 | | |
2582 | | /// A convenience struct that encapsulates a DAG, and two SDValues for |
2583 | | /// returning information from TargetLowering to its clients that want to |
2584 | | /// combine. |
2585 | | struct TargetLoweringOpt { |
2586 | | SelectionDAG &DAG; |
2587 | | bool LegalTys; |
2588 | | bool LegalOps; |
2589 | | SDValue Old; |
2590 | | SDValue New; |
2591 | | |
2592 | | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
2593 | | bool LT, bool LO) : |
2594 | 12.7M | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
2595 | | |
2596 | 233k | bool LegalTypes() const { return LegalTys; } |
2597 | 81.6k | bool LegalOperations() const { return LegalOps; } |
2598 | | |
2599 | 465k | bool CombineTo(SDValue O, SDValue N) { |
2600 | 465k | Old = O; |
2601 | 465k | New = N; |
2602 | 465k | return true; |
2603 | 465k | } |
2604 | | }; |
2605 | | |
2606 | | /// Check to see if the specified operand of the specified instruction is a |
2607 | | /// constant integer. If so, check to see if there are any bits set in the |
2608 | | /// constant that are not demanded. If so, shrink the constant and return |
2609 | | /// true. |
2610 | | bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
2611 | | TargetLoweringOpt &TLO) const; |
2612 | | |
2613 | | // Target hook to do target-specific const optimization, which is called by |
2614 | | // ShrinkDemandedConstant. This function should return true if the target |
2615 | | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
2616 | | virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
2617 | 505k | TargetLoweringOpt &TLO) const { |
2618 | 505k | return false; |
2619 | 505k | } |
2620 | | |
2621 | | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This |
2622 | | /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be |
2623 | | /// generalized for targets with other types of implicit widening casts. |
2624 | | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, |
2625 | | TargetLoweringOpt &TLO) const; |
2626 | | |
2627 | | /// Helper for SimplifyDemandedBits that can simplify an operation with |
2628 | | /// multiple uses. This function simplifies operand \p OpIdx of \p User and |
2629 | | /// then updates \p User with the simplified version. No other uses of |
2630 | | /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this |
2631 | | /// function behaves exactly like function SimplifyDemandedBits declared |
2632 | | /// below except that it also updates the DAG by calling |
2633 | | /// DCI.CommitTargetLoweringOpt. |
2634 | | bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, |
2635 | | DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const; |
2636 | | |
2637 | | /// Look at Op. At this point, we know that only the DemandedMask bits of the |
2638 | | /// result of Op are ever used downstream. If we can use this information to |
2639 | | /// simplify Op, create a new simplified DAG node and return true, returning |
2640 | | /// the original and new nodes in Old and New. Otherwise, analyze the |
2641 | | /// expression and return a mask of KnownOne and KnownZero bits for the |
2642 | | /// expression (used to simplify the caller). The KnownZero/One bits may only |
2643 | | /// be accurate for those bits in the DemandedMask. |
2644 | | /// \p AssumeSingleUse When this parameter is true, this function will |
2645 | | /// attempt to simplify \p Op even if there are multiple uses. |
2646 | | /// Callers are responsible for correctly updating the DAG based on the |
2647 | | /// results of this function, because simply replacing replacing TLO.Old |
2648 | | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
2649 | | /// has multiple uses. |
2650 | | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, |
2651 | | KnownBits &Known, |
2652 | | TargetLoweringOpt &TLO, |
2653 | | unsigned Depth = 0, |
2654 | | bool AssumeSingleUse = false) const; |
2655 | | |
2656 | | /// Helper wrapper around SimplifyDemandedBits |
2657 | | bool SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, |
2658 | | DAGCombinerInfo &DCI) const; |
2659 | | |
2660 | | /// Determine which of the bits specified in Mask are known to be either zero |
2661 | | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
2662 | | /// argument allows us to only collect the known bits that are shared by the |
2663 | | /// requested vector elements. |
2664 | | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
2665 | | KnownBits &Known, |
2666 | | const APInt &DemandedElts, |
2667 | | const SelectionDAG &DAG, |
2668 | | unsigned Depth = 0) const; |
2669 | | |
2670 | | /// This method can be implemented by targets that want to expose additional |
2671 | | /// information about sign bits to the DAG Combiner. The DemandedElts |
2672 | | /// argument allows us to only collect the minimum sign bits that are shared |
2673 | | /// by the requested vector elements. |
2674 | | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
2675 | | const APInt &DemandedElts, |
2676 | | const SelectionDAG &DAG, |
2677 | | unsigned Depth = 0) const; |
2678 | | |
2679 | | struct DAGCombinerInfo { |
2680 | | void *DC; // The DAG Combiner object. |
2681 | | CombineLevel Level; |
2682 | | bool CalledByLegalizer; |
2683 | | |
2684 | | public: |
2685 | | SelectionDAG &DAG; |
2686 | | |
2687 | | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
2688 | 35.7M | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
2689 | | |
2690 | 13.5M | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
2691 | 11.1M | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
2692 | 21.4k | bool isAfterLegalizeVectorOps() const { |
2693 | 21.4k | return Level == AfterLegalizeDAG; |
2694 | 21.4k | } |
2695 | 145k | CombineLevel getDAGCombineLevel() { return Level; } |
2696 | 59.4k | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
2697 | | |
2698 | | void AddToWorklist(SDNode *N); |
2699 | | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); |
2700 | | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
2701 | | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); |
2702 | | |
2703 | | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
2704 | | }; |
2705 | | |
2706 | | /// Return if the N is a constant or constant vector equal to the true value |
2707 | | /// from getBooleanContents(). |
2708 | | bool isConstTrueVal(const SDNode *N) const; |
2709 | | |
2710 | | /// Return if the N is a constant or constant vector equal to the false value |
2711 | | /// from getBooleanContents(). |
2712 | | bool isConstFalseVal(const SDNode *N) const; |
2713 | | |
2714 | | /// Return a constant of type VT that contains a true value that respects |
2715 | | /// getBooleanContents() |
2716 | | SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const; |
2717 | | |
2718 | | /// Return if \p N is a True value when extended to \p VT. |
2719 | | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const; |
2720 | | |
2721 | | /// Try to simplify a setcc built with the specified operands and cc. If it is |
2722 | | /// unable to simplify it, return a null SDValue. |
2723 | | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
2724 | | bool foldBooleans, DAGCombinerInfo &DCI, |
2725 | | const SDLoc &dl) const; |
2726 | | |
2727 | | // For targets which wrap address, unwrap for analysis. |
2728 | 114M | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
2729 | | |
2730 | | /// Returns true (and the GlobalValue and the offset) if the node is a |
2731 | | /// GlobalAddress + offset. |
2732 | | virtual bool |
2733 | | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
2734 | | |
2735 | | /// This method will be invoked for all target nodes and for any |
2736 | | /// target-independent nodes that the target has registered with invoke it |
2737 | | /// for. |
2738 | | /// |
2739 | | /// The semantics are as follows: |
2740 | | /// Return Value: |
2741 | | /// SDValue.Val == 0 - No change was made |
2742 | | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
2743 | | /// otherwise - N should be replaced by the returned Operand. |
2744 | | /// |
2745 | | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
2746 | | /// more complex transformations. |
2747 | | /// |
2748 | | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
2749 | | |
2750 | | /// Return true if it is profitable to move a following shift through this |
2751 | | // node, adjusting any immediate operands as necessary to preserve semantics. |
2752 | | // This transformation may not be desirable if it disrupts a particularly |
2753 | | // auspicious target-specific tree (e.g. bitfield extraction in AArch64). |
2754 | | // By default, it returns true. |
2755 | 656 | virtual bool isDesirableToCommuteWithShift(const SDNode *N) const { |
2756 | 656 | return true; |
2757 | 656 | } |
2758 | | |
2759 | | // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE. |
2760 | | // Example of such a combine: |
2761 | | // v4i32 build_vector((extract_elt V, 0), |
2762 | | // (extract_elt V, 2), |
2763 | | // (extract_elt V, 4), |
2764 | | // (extract_elt V, 6)) |
2765 | | // --> |
2766 | | // v4i32 truncate (bitcast V to v4i64) |
2767 | 295k | virtual bool isDesirableToCombineBuildVectorToTruncate() const { |
2768 | 295k | return false; |
2769 | 295k | } |
2770 | | |
2771 | | // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern |
2772 | | // to a shuffle and a truncate. |
2773 | | // Example of such a combine: |
2774 | | // v4i32 build_vector((extract_elt V, 1), |
2775 | | // (extract_elt V, 3), |
2776 | | // (extract_elt V, 5), |
2777 | | // (extract_elt V, 7)) |
2778 | | // --> |
2779 | | // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64) |
2780 | | virtual bool isDesirableToCombineBuildVectorToShuffleTruncate( |
2781 | 0 | ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const { |
2782 | 0 | return false; |
2783 | 0 | } |
2784 | | |
2785 | | /// Return true if the target has native support for the specified value type |
2786 | | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
2787 | | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
2788 | | /// and some i16 instructions are slow. |
2789 | 7.19M | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
2790 | 7.19M | // By default, assume all legal types are desirable. |
2791 | 7.19M | return isTypeLegal(VT); |
2792 | 7.19M | } |
2793 | | |
2794 | | /// Return true if it is profitable for dag combiner to transform a floating |
2795 | | /// point op of specified opcode to a equivalent op of an integer |
2796 | | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
2797 | | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
2798 | 253 | EVT /*VT*/) const { |
2799 | 253 | return false; |
2800 | 253 | } |
2801 | | |
2802 | | /// This method query the target whether it is beneficial for dag combiner to |
2803 | | /// promote the specified node. If true, it should return the desired |
2804 | | /// promotion type by reference. |
2805 | 2.17k | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
2806 | 2.17k | return false; |
2807 | 2.17k | } |
2808 | | |
2809 | | /// Return true if the target supports swifterror attribute. It optimizes |
2810 | | /// loads and stores to reading and writing a specific register. |
2811 | 316k | virtual bool supportSwiftError() const { |
2812 | 316k | return false; |
2813 | 316k | } |
2814 | | |
2815 | | /// Return true if the target supports that a subset of CSRs for the given |
2816 | | /// machine function is handled explicitly via copies. |
2817 | 23.7k | virtual bool supportSplitCSR(MachineFunction *MF) const { |
2818 | 23.7k | return false; |
2819 | 23.7k | } |
2820 | | |
2821 | | /// Perform necessary initialization to handle a subset of CSRs explicitly |
2822 | | /// via copies. This function is called at the beginning of instruction |
2823 | | /// selection. |
2824 | 0 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
2825 | 0 | llvm_unreachable("Not Implemented"); |
2826 | 0 | } |
2827 | | |
2828 | | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
2829 | | /// CSRs to virtual registers in the entry block, and copy them back to |
2830 | | /// physical registers in the exit blocks. This function is called at the end |
2831 | | /// of instruction selection. |
2832 | | virtual void insertCopiesSplitCSR( |
2833 | | MachineBasicBlock *Entry, |
2834 | 0 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
2835 | 0 | llvm_unreachable("Not Implemented"); |
2836 | 0 | } |
2837 | | |
2838 | | //===--------------------------------------------------------------------===// |
2839 | | // Lowering methods - These methods must be implemented by targets so that |
2840 | | // the SelectionDAGBuilder code knows how to lower these. |
2841 | | // |
2842 | | |
2843 | | /// This hook must be implemented to lower the incoming (formal) arguments, |
2844 | | /// described by the Ins array, into the specified DAG. The implementation |
2845 | | /// should fill in the InVals array with legal-type argument values, and |
2846 | | /// return the resulting token chain value. |
2847 | | virtual SDValue LowerFormalArguments( |
2848 | | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
2849 | | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
2850 | 0 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
2851 | 0 | llvm_unreachable("Not Implemented"); |
2852 | 0 | } |
2853 | | |
2854 | | /// This structure contains all information that is necessary for lowering |
2855 | | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
2856 | | /// needs to lower a call, and targets will see this struct in their LowerCall |
2857 | | /// implementation. |
2858 | | struct CallLoweringInfo { |
2859 | | SDValue Chain; |
2860 | | Type *RetTy = nullptr; |
2861 | | bool RetSExt : 1; |
2862 | | bool RetZExt : 1; |
2863 | | bool IsVarArg : 1; |
2864 | | bool IsInReg : 1; |
2865 | | bool DoesNotReturn : 1; |
2866 | | bool IsReturnValueUsed : 1; |
2867 | | bool IsConvergent : 1; |
2868 | | bool IsPatchPoint : 1; |
2869 | | |
2870 | | // IsTailCall should be modified by implementations of |
2871 | | // TargetLowering::LowerCall that perform tail call conversions. |
2872 | | bool IsTailCall = false; |
2873 | | |
2874 | | // Is Call lowering done post SelectionDAG type legalization. |
2875 | | bool IsPostTypeLegalization = false; |
2876 | | |
2877 | | unsigned NumFixedArgs = -1; |
2878 | | CallingConv::ID CallConv = CallingConv::C; |
2879 | | SDValue Callee; |
2880 | | ArgListTy Args; |
2881 | | SelectionDAG &DAG; |
2882 | | SDLoc DL; |
2883 | | ImmutableCallSite CS; |
2884 | | SmallVector<ISD::OutputArg, 32> Outs; |
2885 | | SmallVector<SDValue, 32> OutVals; |
2886 | | SmallVector<ISD::InputArg, 32> Ins; |
2887 | | SmallVector<SDValue, 4> InVals; |
2888 | | |
2889 | | CallLoweringInfo(SelectionDAG &DAG) |
2890 | | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
2891 | | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
2892 | 1.79M | IsPatchPoint(false), DAG(DAG) {} |
2893 | | |
2894 | 1.79M | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
2895 | 1.79M | DL = dl; |
2896 | 1.79M | return *this; |
2897 | 1.79M | } |
2898 | | |
2899 | 1.81M | CallLoweringInfo &setChain(SDValue InChain) { |
2900 | 1.81M | Chain = InChain; |
2901 | 1.81M | return *this; |
2902 | 1.81M | } |
2903 | | |
2904 | | // setCallee with target/module-specific attributes |
2905 | | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
2906 | 32.8k | SDValue Target, ArgListTy &&ArgsList) { |
2907 | 32.8k | RetTy = ResultType; |
2908 | 32.8k | Callee = Target; |
2909 | 32.8k | CallConv = CC; |
2910 | 32.8k | NumFixedArgs = Args.size(); |
2911 | 32.8k | Args = std::move(ArgsList); |
2912 | 32.8k | |
2913 | 32.8k | DAG.getTargetLoweringInfo().markLibCallAttributes( |
2914 | 32.8k | &(DAG.getMachineFunction()), CC, Args); |
2915 | 32.8k | return *this; |
2916 | 32.8k | } |
2917 | | |
2918 | | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
2919 | 543 | SDValue Target, ArgListTy &&ArgsList) { |
2920 | 543 | RetTy = ResultType; |
2921 | 543 | Callee = Target; |
2922 | 543 | CallConv = CC; |
2923 | 543 | NumFixedArgs = Args.size(); |
2924 | 543 | Args = std::move(ArgsList); |
2925 | 543 | return *this; |
2926 | 543 | } |
2927 | | |
2928 | | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
2929 | | SDValue Target, ArgListTy &&ArgsList, |
2930 | 1.76M | ImmutableCallSite Call) { |
2931 | 1.76M | RetTy = ResultType; |
2932 | 1.76M | |
2933 | 1.76M | IsInReg = Call.hasRetAttr(Attribute::InReg); |
2934 | 1.76M | DoesNotReturn = |
2935 | 1.76M | Call.doesNotReturn() || |
2936 | 1.74M | (!Call.isInvoke() && |
2937 | 1.74M | isa<UnreachableInst>(Call.getInstruction()->getNextNode())); |
2938 | 1.76M | IsVarArg = FTy->isVarArg(); |
2939 | 1.76M | IsReturnValueUsed = !Call.getInstruction()->use_empty(); |
2940 | 1.76M | RetSExt = Call.hasRetAttr(Attribute::SExt); |
2941 | 1.76M | RetZExt = Call.hasRetAttr(Attribute::ZExt); |
2942 | 1.76M | |
2943 | 1.76M | Callee = Target; |
2944 | 1.76M | |
2945 | 1.76M | CallConv = Call.getCallingConv(); |
2946 | 1.76M | NumFixedArgs = FTy->getNumParams(); |
2947 | 1.76M | Args = std::move(ArgsList); |
2948 | 1.76M | |
2949 | 1.76M | CS = Call; |
2950 | 1.76M | |
2951 | 1.76M | return *this; |
2952 | 1.76M | } |
2953 | | |
2954 | 193 | CallLoweringInfo &setInRegister(bool Value = true) { |
2955 | 193 | IsInReg = Value; |
2956 | 193 | return *this; |
2957 | 193 | } |
2958 | | |
2959 | 7.74k | CallLoweringInfo &setNoReturn(bool Value = true) { |
2960 | 7.74k | DoesNotReturn = Value; |
2961 | 7.74k | return *this; |
2962 | 7.74k | } |
2963 | | |
2964 | 0 | CallLoweringInfo &setVarArg(bool Value = true) { |
2965 | 0 | IsVarArg = Value; |
2966 | 0 | return *this; |
2967 | 0 | } |
2968 | | |
2969 | 1.77M | CallLoweringInfo &setTailCall(bool Value = true) { |
2970 | 1.77M | IsTailCall = Value; |
2971 | 1.77M | return *this; |
2972 | 1.77M | } |
2973 | | |
2974 | 29.6k | CallLoweringInfo &setDiscardResult(bool Value = true) { |
2975 | 29.6k | IsReturnValueUsed = !Value; |
2976 | 29.6k | return *this; |
2977 | 29.6k | } |
2978 | | |
2979 | 1.76M | CallLoweringInfo &setConvergent(bool Value = true) { |
2980 | 1.76M | IsConvergent = Value; |
2981 | 1.76M | return *this; |
2982 | 1.76M | } |
2983 | | |
2984 | 10.8k | CallLoweringInfo &setSExtResult(bool Value = true) { |
2985 | 10.8k | RetSExt = Value; |
2986 | 10.8k | return *this; |
2987 | 10.8k | } |
2988 | | |
2989 | 10.8k | CallLoweringInfo &setZExtResult(bool Value = true) { |
2990 | 10.8k | RetZExt = Value; |
2991 | 10.8k | return *this; |
2992 | 10.8k | } |
2993 | | |
2994 | 185 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
2995 | 185 | IsPatchPoint = Value; |
2996 | 185 | return *this; |
2997 | 185 | } |
2998 | | |
2999 | 2.53k | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
3000 | 2.53k | IsPostTypeLegalization = Value; |
3001 | 2.53k | return *this; |
3002 | 2.53k | } |
3003 | | |
3004 | 5.60M | ArgListTy &getArgs() { |
3005 | 5.60M | return Args; |
3006 | 5.60M | } |
3007 | | }; |
3008 | | |
3009 | | /// This function lowers an abstract call to a function into an actual call. |
3010 | | /// This returns a pair of operands. The first element is the return value |
3011 | | /// for the function (if RetTy is not VoidTy). The second element is the |
3012 | | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
3013 | | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
3014 | | |
3015 | | /// This hook must be implemented to lower calls into the specified |
3016 | | /// DAG. The outgoing arguments to the call are described by the Outs array, |
3017 | | /// and the values to be returned by the call are described by the Ins |
3018 | | /// array. The implementation should fill in the InVals array with legal-type |
3019 | | /// return values from the call, and return the resulting token chain value. |
3020 | | virtual SDValue |
3021 | | LowerCall(CallLoweringInfo &/*CLI*/, |
3022 | 0 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
3023 | 0 | llvm_unreachable("Not Implemented"); |
3024 | 0 | } |
3025 | | |
3026 | | /// Target-specific cleanup for formal ByVal parameters. |
3027 | 351 | virtual void HandleByVal(CCState *, unsigned &, unsigned) const {} |
3028 | | |
3029 | | /// This hook should be implemented to check whether the return values |
3030 | | /// described by the Outs array can fit into the return registers. If false |
3031 | | /// is returned, an sret-demotion is performed. |
3032 | | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
3033 | | MachineFunction &/*MF*/, bool /*isVarArg*/, |
3034 | | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
3035 | | LLVMContext &/*Context*/) const |
3036 | 5.00k | { |
3037 | 5.00k | // Return true by default to get preexisting behavior. |
3038 | 5.00k | return true; |
3039 | 5.00k | } |
3040 | | |
3041 | | /// This hook must be implemented to lower outgoing return values, described |
3042 | | /// by the Outs array, into the specified DAG. The implementation should |
3043 | | /// return the resulting token chain value. |
3044 | | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
3045 | | bool /*isVarArg*/, |
3046 | | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
3047 | | const SmallVectorImpl<SDValue> & /*OutVals*/, |
3048 | | const SDLoc & /*dl*/, |
3049 | 0 | SelectionDAG & /*DAG*/) const { |
3050 | 0 | llvm_unreachable("Not Implemented"); |
3051 | 0 | } |
3052 | | |
3053 | | /// Return true if result of the specified node is used by a return node |
3054 | | /// only. It also compute and return the input chain for the tail call. |
3055 | | /// |
3056 | | /// This is used to determine whether it is possible to codegen a libcall as |
3057 | | /// tail call at legalization time. |
3058 | 336 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
3059 | 336 | return false; |
3060 | 336 | } |
3061 | | |
3062 | | /// Return true if the target may be able emit the call instruction as a tail |
3063 | | /// call. This is used by optimization passes to determine if it's profitable |
3064 | | /// to duplicate return instructions to enable tailcall optimization. |
3065 | 298 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
3066 | 298 | return false; |
3067 | 298 | } |
3068 | | |
3069 | | /// Return the builtin name for the __builtin___clear_cache intrinsic |
3070 | | /// Default is to invoke the clear cache library call |
3071 | 2 | virtual const char * getClearCacheBuiltinName() const { |
3072 | 2 | return "__clear_cache"; |
3073 | 2 | } |
3074 | | |
3075 | | /// Return the register ID of the name passed in. Used by named register |
3076 | | /// global variables extension. There is no target-independent behaviour |
3077 | | /// so the default action is to bail. |
3078 | | virtual unsigned getRegisterByName(const char* RegName, EVT VT, |
3079 | 0 | SelectionDAG &DAG) const { |
3080 | 0 | report_fatal_error("Named registers not implemented for this target"); |
3081 | 0 | } |
3082 | | |
3083 | | /// Return the type that should be used to zero or sign extend a |
3084 | | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
3085 | | /// require the return type to be promoted, but this is not true all the time, |
3086 | | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
3087 | | /// conventions. The frontend should handle this and include all of the |
3088 | | /// necessary information. |
3089 | | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
3090 | 13.5k | ISD::NodeType /*ExtendKind*/) const { |
3091 | 13.5k | EVT MinVT = getRegisterType(Context, MVT::i32); |
3092 | 13.5k | return VT.bitsLT(MinVT) ? MinVT11.5k : VT1.98k ; |
3093 | 13.5k | } |
3094 | | |
3095 | | /// For some targets, an LLVM struct type must be broken down into multiple |
3096 | | /// simple types, but the calling convention specifies that the entire struct |
3097 | | /// must be passed in a block of consecutive registers. |
3098 | | virtual bool |
3099 | | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
3100 | 271k | bool isVarArg) const { |
3101 | 271k | return false; |
3102 | 271k | } |
3103 | | |
3104 | | /// Returns a 0 terminated array of registers that can be safely used as |
3105 | | /// scratch registers. |
3106 | 0 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
3107 | 0 | return nullptr; |
3108 | 0 | } |
3109 | | |
3110 | | /// This callback is used to prepare for a volatile or atomic load. |
3111 | | /// It takes a chain node as input and returns the chain for the load itself. |
3112 | | /// |
3113 | | /// Having a callback like this is necessary for targets like SystemZ, |
3114 | | /// which allows a CPU to reuse the result of a previous load indefinitely, |
3115 | | /// even if a cache-coherent store is performed by another CPU. The default |
3116 | | /// implementation does nothing. |
3117 | | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
3118 | 10.4k | SelectionDAG &DAG) const { |
3119 | 10.4k | return Chain; |
3120 | 10.4k | } |
3121 | | |
3122 | | /// This callback is used to inspect load/store instructions and add |
3123 | | /// target-specific MachineMemOperand flags to them. The default |
3124 | | /// implementation does nothing. |
3125 | 174k | virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { |
3126 | 174k | return MachineMemOperand::MONone; |
3127 | 174k | } |
3128 | | |
3129 | | /// This callback is invoked by the type legalizer to legalize nodes with an |
3130 | | /// illegal operand type but legal result types. It replaces the |
3131 | | /// LowerOperation callback in the type Legalizer. The reason we can not do |
3132 | | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
3133 | | /// use this callback. |
3134 | | /// |
3135 | | /// TODO: Consider merging with ReplaceNodeResults. |
3136 | | /// |
3137 | | /// The target places new result values for the node in Results (their number |
3138 | | /// and types must exactly match those of the original return values of |
3139 | | /// the node), or leaves Results empty, which indicates that the node is not |
3140 | | /// to be custom lowered after all. |
3141 | | /// The default implementation calls LowerOperation. |
3142 | | virtual void LowerOperationWrapper(SDNode *N, |
3143 | | SmallVectorImpl<SDValue> &Results, |
3144 | | SelectionDAG &DAG) const; |
3145 | | |
3146 | | /// This callback is invoked for operations that are unsupported by the |
3147 | | /// target, which are registered to use 'custom' lowering, and whose defined |
3148 | | /// values are all legal. If the target has no operations that require custom |
3149 | | /// lowering, it need not implement this. The default implementation of this |
3150 | | /// aborts. |
3151 | | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
3152 | | |
3153 | | /// This callback is invoked when a node result type is illegal for the |
3154 | | /// target, and the operation was registered to use 'custom' lowering for that |
3155 | | /// result type. The target places new result values for the node in Results |
3156 | | /// (their number and types must exactly match those of the original return |
3157 | | /// values of the node), or leaves Results empty, which indicates that the |
3158 | | /// node is not to be custom lowered after all. |
3159 | | /// |
3160 | | /// If the target has no operations that require custom lowering, it need not |
3161 | | /// implement this. The default implementation aborts. |
3162 | | virtual void ReplaceNodeResults(SDNode * /*N*/, |
3163 | | SmallVectorImpl<SDValue> &/*Results*/, |
3164 | 0 | SelectionDAG &/*DAG*/) const { |
3165 | 0 | llvm_unreachable("ReplaceNodeResults not implemented for this target!"); |
3166 | 0 | } |
3167 | | |
3168 | | /// This method returns the name of a target specific DAG node. |
3169 | | virtual const char *getTargetNodeName(unsigned Opcode) const; |
3170 | | |
3171 | | /// This method returns a target specific FastISel object, or null if the |
3172 | | /// target does not support "fast" ISel. |
3173 | | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
3174 | 2.45k | const TargetLibraryInfo *) const { |
3175 | 2.45k | return nullptr; |
3176 | 2.45k | } |
3177 | | |
3178 | | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
3179 | | SelectionDAG &DAG) const; |
3180 | | |
3181 | | //===--------------------------------------------------------------------===// |
3182 | | // Inline Asm Support hooks |
3183 | | // |
3184 | | |
3185 | | /// This hook allows the target to expand an inline asm call to be explicit |
3186 | | /// llvm code if it wants to. This is useful for turning simple inline asms |
3187 | | /// into LLVM intrinsics, which gives the compiler more information about the |
3188 | | /// behavior of the code. |
3189 | 2.71k | virtual bool ExpandInlineAsm(CallInst *) const { |
3190 | 2.71k | return false; |
3191 | 2.71k | } |
3192 | | |
3193 | | enum ConstraintType { |
3194 | | C_Register, // Constraint represents specific register(s). |
3195 | | C_RegisterClass, // Constraint represents any of register(s) in class. |
3196 | | C_Memory, // Memory constraint. |
3197 | | C_Other, // Something else. |
3198 | | C_Unknown // Unsupported constraint. |
3199 | | }; |
3200 | | |
3201 | | enum ConstraintWeight { |
3202 | | // Generic weights. |
3203 | | CW_Invalid = -1, // No match. |
3204 | | CW_Okay = 0, // Acceptable. |
3205 | | CW_Good = 1, // Good weight. |
3206 | | CW_Better = 2, // Better weight. |
3207 | | CW_Best = 3, // Best weight. |
3208 | | |
3209 | | // Well-known weights. |
3210 | | CW_SpecificReg = CW_Okay, // Specific register operands. |
3211 | | CW_Register = CW_Good, // Register operands. |
3212 | | CW_Memory = CW_Better, // Memory operands. |
3213 | | CW_Constant = CW_Best, // Constant operand. |
3214 | | CW_Default = CW_Okay // Default or don't know type. |
3215 | | }; |
3216 | | |
3217 | | /// This contains information for each constraint that we are lowering. |
3218 | | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
3219 | | /// This contains the actual string for the code, like "m". TargetLowering |
3220 | | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
3221 | | /// matches the operand. |
3222 | | std::string ConstraintCode; |
3223 | | |
3224 | | /// Information about the constraint code, e.g. Register, RegisterClass, |
3225 | | /// Memory, Other, Unknown. |
3226 | | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
3227 | | |
3228 | | /// If this is the result output operand or a clobber, this is null, |
3229 | | /// otherwise it is the incoming operand to the CallInst. This gets |
3230 | | /// modified as the asm is processed. |
3231 | | Value *CallOperandVal = nullptr; |
3232 | | |
3233 | | /// The ValueType for the operand value. |
3234 | | MVT ConstraintVT = MVT::Other; |
3235 | | |
3236 | | /// Copy constructor for copying from a ConstraintInfo. |
3237 | | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
3238 | 195k | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
3239 | | |
3240 | | /// Return true of this is an input operand that is a matching constraint |
3241 | | /// like "4". |
3242 | | bool isMatchingInputConstraint() const; |
3243 | | |
3244 | | /// If this is an input matching constraint, this method returns the output |
3245 | | /// operand it matches. |
3246 | | unsigned getMatchedOperand() const; |
3247 | | }; |
3248 | | |
3249 | | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
3250 | | |
3251 | | /// Split up the constraint string from the inline assembly value into the |
3252 | | /// specific constraints and their prefixes, and also tie in the associated |
3253 | | /// operand values. If this returns an empty vector, and if the constraint |
3254 | | /// string itself isn't empty, there was an error parsing. |
3255 | | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
3256 | | const TargetRegisterInfo *TRI, |
3257 | | ImmutableCallSite CS) const; |
3258 | | |
3259 | | /// Examine constraint type and operand type and determine a weight value. |
3260 | | /// The operand object must already have been set up with the operand type. |
3261 | | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
3262 | | AsmOperandInfo &info, int maIndex) const; |
3263 | | |
3264 | | /// Examine constraint string and operand type and determine a weight value. |
3265 | | /// The operand object must already have been set up with the operand type. |
3266 | | virtual ConstraintWeight getSingleConstraintMatchWeight( |
3267 | | AsmOperandInfo &info, const char *constraint) const; |
3268 | | |
3269 | | /// Determines the constraint code and constraint type to use for the specific |
3270 | | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
3271 | | /// If the actual operand being passed in is available, it can be passed in as |
3272 | | /// Op, otherwise an empty SDValue can be passed. |
3273 | | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
3274 | | SDValue Op, |
3275 | | SelectionDAG *DAG = nullptr) const; |
3276 | | |
3277 | | /// Given a constraint, return the type of constraint it is for this target. |
3278 | | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
3279 | | |
3280 | | /// Given a physical register constraint (e.g. {edx}), return the register |
3281 | | /// number and the register class for the register. |
3282 | | /// |
3283 | | /// Given a register class constraint, like 'r', if this corresponds directly |
3284 | | /// to an LLVM register class, return a register of 0 and the register class |
3285 | | /// pointer. |
3286 | | /// |
3287 | | /// This should only be used for C_Register constraints. On error, this |
3288 | | /// returns a register number of 0 and a null register class pointer. |
3289 | | virtual std::pair<unsigned, const TargetRegisterClass *> |
3290 | | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
3291 | | StringRef Constraint, MVT VT) const; |
3292 | | |
3293 | 316 | virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
3294 | 316 | if (ConstraintCode == "i") |
3295 | 0 | return InlineAsm::Constraint_i; |
3296 | 316 | else if (316 ConstraintCode == "m"316 ) |
3297 | 316 | return InlineAsm::Constraint_m; |
3298 | 0 | return InlineAsm::Constraint_Unknown; |
3299 | 316 | } |
3300 | | |
3301 | | /// Try to replace an X constraint, which matches anything, with another that |
3302 | | /// has more specific requirements based on the type of the corresponding |
3303 | | /// operand. This returns null if there is no replacement to make. |
3304 | | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
3305 | | |
3306 | | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
3307 | | /// add anything to Ops. |
3308 | | virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, |
3309 | | std::vector<SDValue> &Ops, |
3310 | | SelectionDAG &DAG) const; |
3311 | | |
3312 | | //===--------------------------------------------------------------------===// |
3313 | | // Div utility functions |
3314 | | // |
3315 | | SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
3316 | | bool IsAfterLegalization, |
3317 | | std::vector<SDNode *> *Created) const; |
3318 | | SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
3319 | | bool IsAfterLegalization, |
3320 | | std::vector<SDNode *> *Created) const; |
3321 | | |
3322 | | /// Targets may override this function to provide custom SDIV lowering for |
3323 | | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
3324 | | /// assumes SDIV is expensive and replaces it with a series of other integer |
3325 | | /// operations. |
3326 | | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
3327 | | SelectionDAG &DAG, |
3328 | | std::vector<SDNode *> *Created) const; |
3329 | | |
3330 | | /// Indicate whether this target prefers to combine FDIVs with the same |
3331 | | /// divisor. If the transform should never be done, return zero. If the |
3332 | | /// transform should be done, return the minimum number of divisor uses |
3333 | | /// that must exist. |
3334 | 240 | virtual unsigned combineRepeatedFPDivisors() const { |
3335 | 240 | return 0; |
3336 | 240 | } |
3337 | | |
3338 | | /// Hooks for building estimates in place of slower divisions and square |
3339 | | /// roots. |
3340 | | |
3341 | | /// Return either a square root or its reciprocal estimate value for the input |
3342 | | /// operand. |
3343 | | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
3344 | | /// 'Enabled' as set by a potential default override attribute. |
3345 | | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
3346 | | /// refinement iterations required to generate a sufficient (though not |
3347 | | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
3348 | | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
3349 | | /// algorithm implementation that uses either one or two constants. |
3350 | | /// The boolean Reciprocal is used to select whether the estimate is for the |
3351 | | /// square root of the input operand or the reciprocal of its square root. |
3352 | | /// A target may choose to implement its own refinement within this function. |
3353 | | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
3354 | | /// any further refinement of the estimate. |
3355 | | /// An empty SDValue return means no estimate sequence can be created. |
3356 | | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
3357 | | int Enabled, int &RefinementSteps, |
3358 | 0 | bool &UseOneConstNR, bool Reciprocal) const { |
3359 | 0 | return SDValue(); |
3360 | 0 | } |
3361 | | |
3362 | | /// Return a reciprocal estimate value for the input operand. |
3363 | | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
3364 | | /// 'Enabled' as set by a potential default override attribute. |
3365 | | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
3366 | | /// refinement iterations required to generate a sufficient (though not |
3367 | | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
3368 | | /// A target may choose to implement its own refinement within this function. |
3369 | | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
3370 | | /// any further refinement of the estimate. |
3371 | | /// An empty SDValue return means no estimate sequence can be created. |
3372 | | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
3373 | 35 | int Enabled, int &RefinementSteps) const { |
3374 | 35 | return SDValue(); |
3375 | 35 | } |
3376 | | |
3377 | | //===--------------------------------------------------------------------===// |
3378 | | // Legalization utility functions |
3379 | | // |
3380 | | |
3381 | | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
3382 | | /// respectively, each computing an n/2-bit part of the result. |
3383 | | /// \param Result A vector that will be filled with the parts of the result |
3384 | | /// in little-endian order. |
3385 | | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
3386 | | /// if you want to control how low bits are extracted from the LHS. |
3387 | | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
3388 | | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
3389 | | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
3390 | | /// \returns true if the node has been expanded, false if it has not |
3391 | | bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, |
3392 | | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
3393 | | SelectionDAG &DAG, MulExpansionKind Kind, |
3394 | | SDValue LL = SDValue(), SDValue LH = SDValue(), |
3395 | | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
3396 | | |
3397 | | /// Expand a MUL into two nodes. One that computes the high bits of |
3398 | | /// the result and one that computes the low bits. |
3399 | | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
3400 | | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
3401 | | /// if you want to control how low bits are extracted from the LHS. |
3402 | | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
3403 | | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
3404 | | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
3405 | | /// \returns true if the node has been expanded. false if it has not |
3406 | | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
3407 | | SelectionDAG &DAG, MulExpansionKind Kind, |
3408 | | SDValue LL = SDValue(), SDValue LH = SDValue(), |
3409 | | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
3410 | | |
3411 | | /// Expand float(f32) to SINT(i64) conversion |
3412 | | /// \param N Node to expand |
3413 | | /// \param Result output after conversion |
3414 | | /// \returns True, if the expansion was successful, false otherwise |
3415 | | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
3416 | | |
3417 | | /// Turn load of vector type into a load of the individual elements. |
3418 | | /// \param LD load to expand |
3419 | | /// \returns MERGE_VALUEs of the scalar loads with their chains. |
3420 | | SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const; |
3421 | | |
3422 | | // Turn a store of a vector type into stores of the individual elements. |
3423 | | /// \param ST Store with a vector value type |
3424 | | /// \returns MERGE_VALUs of the individual store chains. |
3425 | | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
3426 | | |
3427 | | /// Expands an unaligned load to 2 half-size loads for an integer, and |
3428 | | /// possibly more for vectors. |
3429 | | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
3430 | | SelectionDAG &DAG) const; |
3431 | | |
3432 | | /// Expands an unaligned store to 2 half-size stores for integer values, and |
3433 | | /// possibly more for vectors. |
3434 | | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
3435 | | |
3436 | | /// Increments memory address \p Addr according to the type of the value |
3437 | | /// \p DataVT that should be stored. If the data is stored in compressed |
3438 | | /// form, the memory address should be incremented according to the number of |
3439 | | /// the stored elements. This number is equal to the number of '1's bits |
3440 | | /// in the \p Mask. |
3441 | | /// \p DataVT is a vector type. \p Mask is a vector value. |
3442 | | /// \p DataVT and \p Mask have the same number of vector elements. |
3443 | | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
3444 | | EVT DataVT, SelectionDAG &DAG, |
3445 | | bool IsCompressedMemory) const; |
3446 | | |
3447 | | /// Get a pointer to vector element \p Idx located in memory for a vector of |
3448 | | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
3449 | | /// bounds the returned pointer is unspecified, but will be within the vector |
3450 | | /// bounds. |
3451 | | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
3452 | | SDValue Idx) const; |
3453 | | |
3454 | | //===--------------------------------------------------------------------===// |
3455 | | // Instruction Emitting Hooks |
3456 | | // |
3457 | | |
3458 | | /// This method should be implemented by targets that mark instructions with |
3459 | | /// the 'usesCustomInserter' flag. These instructions are special in various |
3460 | | /// ways, which require special support to insert. The specified MachineInstr |
3461 | | /// is created but not inserted into any basic blocks, and this method is |
3462 | | /// called to expand it into a sequence of instructions, potentially also |
3463 | | /// creating new basic blocks and control flow. |
3464 | | /// As long as the returned basic block is different (i.e., we created a new |
3465 | | /// one), the custom inserter is free to modify the rest of \p MBB. |
3466 | | virtual MachineBasicBlock * |
3467 | | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
3468 | | |
3469 | | /// This method should be implemented by targets that mark instructions with |
3470 | | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
3471 | | /// instruction selection by target hooks. e.g. To fill in optional defs for |
3472 | | /// ARM 's' setting instructions. |
3473 | | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
3474 | | SDNode *Node) const; |
3475 | | |
3476 | | /// If this function returns true, SelectionDAGBuilder emits a |
3477 | | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
3478 | 15 | virtual bool useLoadStackGuardNode() const { |
3479 | 15 | return false; |
3480 | 15 | } |
3481 | | |
3482 | | /// Lower TLS global address SDNode for target independent emulated TLS model. |
3483 | | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
3484 | | SelectionDAG &DAG) const; |
3485 | | |
3486 | | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
3487 | | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
3488 | | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
3489 | | // combiner can fold the new nodes. |
3490 | | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
3491 | | |
3492 | | private: |
3493 | | SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, |
3494 | | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
3495 | | const SDLoc &DL) const; |
3496 | | }; |
3497 | | |
3498 | | /// Given an LLVM IR type and return type attributes, compute the return value |
3499 | | /// EVTs and flags, and optionally also the offsets, if the return value is |
3500 | | /// being lowered to memory. |
3501 | | void GetReturnInfo(Type *ReturnType, AttributeList attr, |
3502 | | SmallVectorImpl<ISD::OutputArg> &Outs, |
3503 | | const TargetLowering &TLI, const DataLayout &DL); |
3504 | | |
3505 | | } // end namespace llvm |
3506 | | |
3507 | | #endif // LLVM_TARGET_TARGETLOWERING_H |