/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the PPCISelLowering class. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "PPCISelLowering.h" |
15 | | #include "MCTargetDesc/PPCPredicates.h" |
16 | | #include "PPC.h" |
17 | | #include "PPCCCState.h" |
18 | | #include "PPCCallingConv.h" |
19 | | #include "PPCFrameLowering.h" |
20 | | #include "PPCInstrInfo.h" |
21 | | #include "PPCMachineFunctionInfo.h" |
22 | | #include "PPCPerfectShuffle.h" |
23 | | #include "PPCRegisterInfo.h" |
24 | | #include "PPCSubtarget.h" |
25 | | #include "PPCTargetMachine.h" |
26 | | #include "llvm/ADT/APFloat.h" |
27 | | #include "llvm/ADT/APInt.h" |
28 | | #include "llvm/ADT/ArrayRef.h" |
29 | | #include "llvm/ADT/DenseMap.h" |
30 | | #include "llvm/ADT/None.h" |
31 | | #include "llvm/ADT/STLExtras.h" |
32 | | #include "llvm/ADT/SmallPtrSet.h" |
33 | | #include "llvm/ADT/SmallSet.h" |
34 | | #include "llvm/ADT/SmallVector.h" |
35 | | #include "llvm/ADT/Statistic.h" |
36 | | #include "llvm/ADT/StringRef.h" |
37 | | #include "llvm/ADT/StringSwitch.h" |
38 | | #include "llvm/CodeGen/CallingConvLower.h" |
39 | | #include "llvm/CodeGen/ISDOpcodes.h" |
40 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
41 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
42 | | #include "llvm/CodeGen/MachineFunction.h" |
43 | | #include "llvm/CodeGen/MachineInstr.h" |
44 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
45 | | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
46 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
47 | | #include "llvm/CodeGen/MachineMemOperand.h" |
48 | | #include "llvm/CodeGen/MachineOperand.h" |
49 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
50 | | #include "llvm/CodeGen/MachineValueType.h" |
51 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
52 | | #include "llvm/CodeGen/SelectionDAG.h" |
53 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
54 | | #include "llvm/CodeGen/ValueTypes.h" |
55 | | #include "llvm/IR/CallSite.h" |
56 | | #include "llvm/IR/CallingConv.h" |
57 | | #include "llvm/IR/Constant.h" |
58 | | #include "llvm/IR/Constants.h" |
59 | | #include "llvm/IR/DataLayout.h" |
60 | | #include "llvm/IR/DebugLoc.h" |
61 | | #include "llvm/IR/DerivedTypes.h" |
62 | | #include "llvm/IR/Function.h" |
63 | | #include "llvm/IR/GlobalValue.h" |
64 | | #include "llvm/IR/IRBuilder.h" |
65 | | #include "llvm/IR/Instructions.h" |
66 | | #include "llvm/IR/Intrinsics.h" |
67 | | #include "llvm/IR/Module.h" |
68 | | #include "llvm/IR/Type.h" |
69 | | #include "llvm/IR/Use.h" |
70 | | #include "llvm/IR/Value.h" |
71 | | #include "llvm/MC/MCExpr.h" |
72 | | #include "llvm/MC/MCRegisterInfo.h" |
73 | | #include "llvm/Support/AtomicOrdering.h" |
74 | | #include "llvm/Support/BranchProbability.h" |
75 | | #include "llvm/Support/Casting.h" |
76 | | #include "llvm/Support/CodeGen.h" |
77 | | #include "llvm/Support/CommandLine.h" |
78 | | #include "llvm/Support/Compiler.h" |
79 | | #include "llvm/Support/Debug.h" |
80 | | #include "llvm/Support/ErrorHandling.h" |
81 | | #include "llvm/Support/Format.h" |
82 | | #include "llvm/Support/KnownBits.h" |
83 | | #include "llvm/Support/MathExtras.h" |
84 | | #include "llvm/Support/raw_ostream.h" |
85 | | #include "llvm/Target/TargetInstrInfo.h" |
86 | | #include "llvm/Target/TargetLowering.h" |
87 | | #include "llvm/Target/TargetMachine.h" |
88 | | #include "llvm/Target/TargetOptions.h" |
89 | | #include "llvm/Target/TargetRegisterInfo.h" |
90 | | #include <algorithm> |
91 | | #include <cassert> |
92 | | #include <cstdint> |
93 | | #include <iterator> |
94 | | #include <list> |
95 | | #include <utility> |
96 | | #include <vector> |
97 | | |
98 | | using namespace llvm; |
99 | | |
100 | | #define DEBUG_TYPE "ppc-lowering" |
101 | | |
102 | | static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", |
103 | | cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); |
104 | | |
105 | | static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", |
106 | | cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); |
107 | | |
108 | | static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", |
109 | | cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); |
110 | | |
111 | | static cl::opt<bool> DisableSCO("disable-ppc-sco", |
112 | | cl::desc("disable sibling call optimization on ppc"), cl::Hidden); |
113 | | |
114 | | STATISTIC(NumTailCalls, "Number of tail calls"); |
115 | | STATISTIC(NumSiblingCalls, "Number of sibling calls"); |
116 | | |
117 | | // FIXME: Remove this once the bug has been fixed! |
118 | | extern cl::opt<bool> ANDIGlueBug; |
119 | | |
120 | | PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, |
121 | | const PPCSubtarget &STI) |
122 | 1.40k | : TargetLowering(TM), Subtarget(STI) { |
123 | 1.40k | // Use _setjmp/_longjmp instead of setjmp/longjmp. |
124 | 1.40k | setUseUnderscoreSetJmp(true); |
125 | 1.40k | setUseUnderscoreLongJmp(true); |
126 | 1.40k | |
127 | 1.40k | // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all |
128 | 1.40k | // arguments are at least 4/8 bytes aligned. |
129 | 1.40k | bool isPPC64 = Subtarget.isPPC64(); |
130 | 1.40k | setMinStackArgumentAlignment(isPPC64 ? 81.02k :4379 ); |
131 | 1.40k | |
132 | 1.40k | // Set up the register classes. |
133 | 1.40k | addRegisterClass(MVT::i32, &PPC::GPRCRegClass); |
134 | 1.40k | if (!useSoftFloat()1.40k ) { |
135 | 1.39k | addRegisterClass(MVT::f32, &PPC::F4RCRegClass); |
136 | 1.39k | addRegisterClass(MVT::f64, &PPC::F8RCRegClass); |
137 | 1.39k | } |
138 | 1.40k | |
139 | 1.40k | // Match BITREVERSE to customized fast code sequence in the td file. |
140 | 1.40k | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
141 | 1.40k | setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
142 | 1.40k | |
143 | 1.40k | // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. |
144 | 8.44k | for (MVT VT : MVT::integer_valuetypes()) { |
145 | 8.44k | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
146 | 8.44k | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); |
147 | 8.44k | } |
148 | 1.40k | |
149 | 1.40k | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
150 | 1.40k | |
151 | 1.40k | // PowerPC has pre-inc load and store's. |
152 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); |
153 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); |
154 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); |
155 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); |
156 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); |
157 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); |
158 | 1.40k | setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); |
159 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); |
160 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); |
161 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); |
162 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); |
163 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); |
164 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); |
165 | 1.40k | setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); |
166 | 1.40k | |
167 | 1.40k | if (Subtarget.useCRBits()1.40k ) { |
168 | 1.20k | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
169 | 1.20k | |
170 | 1.20k | if (isPPC64 || 1.20k Subtarget.hasFPCVT()356 ) { |
171 | 859 | setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); |
172 | 859 | AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, |
173 | 859 | isPPC64 ? MVT::i64850 : MVT::i329 ); |
174 | 859 | setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); |
175 | 859 | AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, |
176 | 859 | isPPC64 ? MVT::i64850 : MVT::i329 ); |
177 | 1.20k | } else { |
178 | 347 | setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); |
179 | 347 | setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); |
180 | 347 | } |
181 | 1.20k | |
182 | 1.20k | // PowerPC does not support direct load/store of condition registers. |
183 | 1.20k | setOperationAction(ISD::LOAD, MVT::i1, Custom); |
184 | 1.20k | setOperationAction(ISD::STORE, MVT::i1, Custom); |
185 | 1.20k | |
186 | 1.20k | // FIXME: Remove this once the ANDI glue bug is fixed: |
187 | 1.20k | if (ANDIGlueBug) |
188 | 0 | setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); |
189 | 1.20k | |
190 | 7.23k | for (MVT VT : MVT::integer_valuetypes()) { |
191 | 7.23k | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
192 | 7.23k | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
193 | 7.23k | setTruncStoreAction(VT, MVT::i1, Expand); |
194 | 7.23k | } |
195 | 1.20k | |
196 | 1.20k | addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); |
197 | 1.20k | } |
198 | 1.40k | |
199 | 1.40k | // This is used in the ppcf128->int sequence. Note it has different semantics |
200 | 1.40k | // from FP_ROUND: that rounds to nearest, this rounds to zero. |
201 | 1.40k | setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); |
202 | 1.40k | |
203 | 1.40k | // We do not currently implement these libm ops for PowerPC. |
204 | 1.40k | setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); |
205 | 1.40k | setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); |
206 | 1.40k | setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); |
207 | 1.40k | setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); |
208 | 1.40k | setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); |
209 | 1.40k | setOperationAction(ISD::FREM, MVT::ppcf128, Expand); |
210 | 1.40k | |
211 | 1.40k | // PowerPC has no SREM/UREM instructions unless we are on P9 |
212 | 1.40k | // On P9 we may use a hardware instruction to compute the remainder. |
213 | 1.40k | // The instructions are not legalized directly because in the cases where the |
214 | 1.40k | // result of both the remainder and the division is required it is more |
215 | 1.40k | // efficient to compute the remainder from the result of the division rather |
216 | 1.40k | // than use the remainder instruction. |
217 | 1.40k | if (Subtarget.isISA3_0()1.40k ) { |
218 | 68 | setOperationAction(ISD::SREM, MVT::i32, Custom); |
219 | 68 | setOperationAction(ISD::UREM, MVT::i32, Custom); |
220 | 68 | setOperationAction(ISD::SREM, MVT::i64, Custom); |
221 | 68 | setOperationAction(ISD::UREM, MVT::i64, Custom); |
222 | 1.40k | } else { |
223 | 1.33k | setOperationAction(ISD::SREM, MVT::i32, Expand); |
224 | 1.33k | setOperationAction(ISD::UREM, MVT::i32, Expand); |
225 | 1.33k | setOperationAction(ISD::SREM, MVT::i64, Expand); |
226 | 1.33k | setOperationAction(ISD::UREM, MVT::i64, Expand); |
227 | 1.33k | } |
228 | 1.40k | |
229 | 1.40k | if (Subtarget.hasP9Vector()1.40k ) { |
230 | 60 | setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
231 | 60 | setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
232 | 60 | setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
233 | 60 | } |
234 | 1.40k | |
235 | 1.40k | // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. |
236 | 1.40k | setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
237 | 1.40k | setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
238 | 1.40k | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
239 | 1.40k | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
240 | 1.40k | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
241 | 1.40k | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
242 | 1.40k | setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
243 | 1.40k | setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
244 | 1.40k | |
245 | 1.40k | // We don't support sin/cos/sqrt/fmod/pow |
246 | 1.40k | setOperationAction(ISD::FSIN , MVT::f64, Expand); |
247 | 1.40k | setOperationAction(ISD::FCOS , MVT::f64, Expand); |
248 | 1.40k | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
249 | 1.40k | setOperationAction(ISD::FREM , MVT::f64, Expand); |
250 | 1.40k | setOperationAction(ISD::FPOW , MVT::f64, Expand); |
251 | 1.40k | setOperationAction(ISD::FMA , MVT::f64, Legal); |
252 | 1.40k | setOperationAction(ISD::FSIN , MVT::f32, Expand); |
253 | 1.40k | setOperationAction(ISD::FCOS , MVT::f32, Expand); |
254 | 1.40k | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
255 | 1.40k | setOperationAction(ISD::FREM , MVT::f32, Expand); |
256 | 1.40k | setOperationAction(ISD::FPOW , MVT::f32, Expand); |
257 | 1.40k | setOperationAction(ISD::FMA , MVT::f32, Legal); |
258 | 1.40k | |
259 | 1.40k | setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
260 | 1.40k | |
261 | 1.40k | // If we're enabling GP optimizations, use hardware square root |
262 | 1.40k | if (!Subtarget.hasFSQRT() && |
263 | 531 | !(TM.Options.UnsafeFPMath && 531 Subtarget.hasFRSQRTE()3 && |
264 | 0 | Subtarget.hasFRE())) |
265 | 531 | setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
266 | 1.40k | |
267 | 1.40k | if (!Subtarget.hasFSQRT() && |
268 | 531 | !(TM.Options.UnsafeFPMath && 531 Subtarget.hasFRSQRTES()3 && |
269 | 0 | Subtarget.hasFRES())) |
270 | 531 | setOperationAction(ISD::FSQRT, MVT::f32, Expand); |
271 | 1.40k | |
272 | 1.40k | if (Subtarget.hasFCPSGN()1.40k ) { |
273 | 777 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); |
274 | 777 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); |
275 | 1.40k | } else { |
276 | 630 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
277 | 630 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); |
278 | 630 | } |
279 | 1.40k | |
280 | 1.40k | if (Subtarget.hasFPRND()1.40k ) { |
281 | 779 | setOperationAction(ISD::FFLOOR, MVT::f64, Legal); |
282 | 779 | setOperationAction(ISD::FCEIL, MVT::f64, Legal); |
283 | 779 | setOperationAction(ISD::FTRUNC, MVT::f64, Legal); |
284 | 779 | setOperationAction(ISD::FROUND, MVT::f64, Legal); |
285 | 779 | |
286 | 779 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); |
287 | 779 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); |
288 | 779 | setOperationAction(ISD::FTRUNC, MVT::f32, Legal); |
289 | 779 | setOperationAction(ISD::FROUND, MVT::f32, Legal); |
290 | 779 | } |
291 | 1.40k | |
292 | 1.40k | // PowerPC does not have BSWAP |
293 | 1.40k | // CTPOP or CTTZ were introduced in P8/P9 respectivelly |
294 | 1.40k | setOperationAction(ISD::BSWAP, MVT::i32 , Expand); |
295 | 1.40k | setOperationAction(ISD::BSWAP, MVT::i64 , Expand); |
296 | 1.40k | if (Subtarget.isISA3_0()1.40k ) { |
297 | 68 | setOperationAction(ISD::CTTZ , MVT::i32 , Legal); |
298 | 68 | setOperationAction(ISD::CTTZ , MVT::i64 , Legal); |
299 | 1.40k | } else { |
300 | 1.33k | setOperationAction(ISD::CTTZ , MVT::i32 , Expand); |
301 | 1.33k | setOperationAction(ISD::CTTZ , MVT::i64 , Expand); |
302 | 1.33k | } |
303 | 1.40k | |
304 | 1.40k | if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast1.40k ) { |
305 | 690 | setOperationAction(ISD::CTPOP, MVT::i32 , Legal); |
306 | 690 | setOperationAction(ISD::CTPOP, MVT::i64 , Legal); |
307 | 1.40k | } else { |
308 | 717 | setOperationAction(ISD::CTPOP, MVT::i32 , Expand); |
309 | 717 | setOperationAction(ISD::CTPOP, MVT::i64 , Expand); |
310 | 717 | } |
311 | 1.40k | |
312 | 1.40k | // PowerPC does not have ROTR |
313 | 1.40k | setOperationAction(ISD::ROTR, MVT::i32 , Expand); |
314 | 1.40k | setOperationAction(ISD::ROTR, MVT::i64 , Expand); |
315 | 1.40k | |
316 | 1.40k | if (!Subtarget.useCRBits()1.40k ) { |
317 | 201 | // PowerPC does not have Select |
318 | 201 | setOperationAction(ISD::SELECT, MVT::i32, Expand); |
319 | 201 | setOperationAction(ISD::SELECT, MVT::i64, Expand); |
320 | 201 | setOperationAction(ISD::SELECT, MVT::f32, Expand); |
321 | 201 | setOperationAction(ISD::SELECT, MVT::f64, Expand); |
322 | 201 | } |
323 | 1.40k | |
324 | 1.40k | // PowerPC wants to turn select_cc of FP into fsel when possible. |
325 | 1.40k | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
326 | 1.40k | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
327 | 1.40k | |
328 | 1.40k | // PowerPC wants to optimize integer setcc a bit |
329 | 1.40k | if (!Subtarget.useCRBits()) |
330 | 201 | setOperationAction(ISD::SETCC, MVT::i32, Custom); |
331 | 1.40k | |
332 | 1.40k | // PowerPC does not have BRCOND which requires SetCC |
333 | 1.40k | if (!Subtarget.useCRBits()) |
334 | 201 | setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
335 | 1.40k | |
336 | 1.40k | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
337 | 1.40k | |
338 | 1.40k | // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. |
339 | 1.40k | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
340 | 1.40k | |
341 | 1.40k | // PowerPC does not have [U|S]INT_TO_FP |
342 | 1.40k | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); |
343 | 1.40k | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); |
344 | 1.40k | |
345 | 1.40k | if (Subtarget.hasDirectMove() && 1.40k isPPC64401 ) { |
346 | 399 | setOperationAction(ISD::BITCAST, MVT::f32, Legal); |
347 | 399 | setOperationAction(ISD::BITCAST, MVT::i32, Legal); |
348 | 399 | setOperationAction(ISD::BITCAST, MVT::i64, Legal); |
349 | 399 | setOperationAction(ISD::BITCAST, MVT::f64, Legal); |
350 | 1.40k | } else { |
351 | 1.00k | setOperationAction(ISD::BITCAST, MVT::f32, Expand); |
352 | 1.00k | setOperationAction(ISD::BITCAST, MVT::i32, Expand); |
353 | 1.00k | setOperationAction(ISD::BITCAST, MVT::i64, Expand); |
354 | 1.00k | setOperationAction(ISD::BITCAST, MVT::f64, Expand); |
355 | 1.00k | } |
356 | 1.40k | |
357 | 1.40k | // We cannot sextinreg(i1). Expand to shifts. |
358 | 1.40k | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
359 | 1.40k | |
360 | 1.40k | // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support |
361 | 1.40k | // SjLj exception handling but a light-weight setjmp/longjmp replacement to |
362 | 1.40k | // support continuation, user-level threading, and etc.. As a result, no |
363 | 1.40k | // other SjLj exception interfaces are implemented and please don't build |
364 | 1.40k | // your own exception handling based on them. |
365 | 1.40k | // LLVM/Clang supports zero-cost DWARF exception handling. |
366 | 1.40k | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
367 | 1.40k | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
368 | 1.40k | |
369 | 1.40k | // We want to legalize GlobalAddress and ConstantPool nodes into the |
370 | 1.40k | // appropriate instructions to materialize the address. |
371 | 1.40k | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
372 | 1.40k | setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); |
373 | 1.40k | setOperationAction(ISD::BlockAddress, MVT::i32, Custom); |
374 | 1.40k | setOperationAction(ISD::ConstantPool, MVT::i32, Custom); |
375 | 1.40k | setOperationAction(ISD::JumpTable, MVT::i32, Custom); |
376 | 1.40k | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
377 | 1.40k | setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
378 | 1.40k | setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
379 | 1.40k | setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
380 | 1.40k | setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
381 | 1.40k | |
382 | 1.40k | // TRAP is legal. |
383 | 1.40k | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
384 | 1.40k | |
385 | 1.40k | // TRAMPOLINE is custom lowered. |
386 | 1.40k | setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
387 | 1.40k | setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
388 | 1.40k | |
389 | 1.40k | // VASTART needs to be custom lowered to use the VarArgsFrameIndex |
390 | 1.40k | setOperationAction(ISD::VASTART , MVT::Other, Custom); |
391 | 1.40k | |
392 | 1.40k | if (Subtarget.isSVR4ABI()1.40k ) { |
393 | 1.25k | if (isPPC641.25k ) { |
394 | 992 | // VAARG always uses double-word chunks, so promote anything smaller. |
395 | 992 | setOperationAction(ISD::VAARG, MVT::i1, Promote); |
396 | 992 | AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); |
397 | 992 | setOperationAction(ISD::VAARG, MVT::i8, Promote); |
398 | 992 | AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); |
399 | 992 | setOperationAction(ISD::VAARG, MVT::i16, Promote); |
400 | 992 | AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); |
401 | 992 | setOperationAction(ISD::VAARG, MVT::i32, Promote); |
402 | 992 | AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); |
403 | 992 | setOperationAction(ISD::VAARG, MVT::Other, Expand); |
404 | 1.25k | } else { |
405 | 258 | // VAARG is custom lowered with the 32-bit SVR4 ABI. |
406 | 258 | setOperationAction(ISD::VAARG, MVT::Other, Custom); |
407 | 258 | setOperationAction(ISD::VAARG, MVT::i64, Custom); |
408 | 258 | } |
409 | 1.25k | } else |
410 | 157 | setOperationAction(ISD::VAARG, MVT::Other, Expand); |
411 | 1.40k | |
412 | 1.40k | if (Subtarget.isSVR4ABI() && 1.40k !isPPC641.25k ) |
413 | 1.40k | // VACOPY is custom lowered with the 32-bit SVR4 ABI. |
414 | 258 | setOperationAction(ISD::VACOPY , MVT::Other, Custom); |
415 | 1.40k | else |
416 | 1.14k | setOperationAction(ISD::VACOPY , MVT::Other, Expand); |
417 | 1.40k | |
418 | 1.40k | // Use the default implementation. |
419 | 1.40k | setOperationAction(ISD::VAEND , MVT::Other, Expand); |
420 | 1.40k | setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); |
421 | 1.40k | setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); |
422 | 1.40k | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); |
423 | 1.40k | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); |
424 | 1.40k | setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); |
425 | 1.40k | setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); |
426 | 1.40k | setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); |
427 | 1.40k | setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); |
428 | 1.40k | |
429 | 1.40k | // We want to custom lower some of our intrinsics. |
430 | 1.40k | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
431 | 1.40k | |
432 | 1.40k | // To handle counter-based loop conditions. |
433 | 1.40k | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); |
434 | 1.40k | |
435 | 1.40k | setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); |
436 | 1.40k | setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); |
437 | 1.40k | setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); |
438 | 1.40k | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
439 | 1.40k | |
440 | 1.40k | // Comparisons that require checking two conditions. |
441 | 1.40k | setCondCodeAction(ISD::SETULT, MVT::f32, Expand); |
442 | 1.40k | setCondCodeAction(ISD::SETULT, MVT::f64, Expand); |
443 | 1.40k | setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); |
444 | 1.40k | setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); |
445 | 1.40k | setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); |
446 | 1.40k | setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); |
447 | 1.40k | setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); |
448 | 1.40k | setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); |
449 | 1.40k | setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); |
450 | 1.40k | setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); |
451 | 1.40k | setCondCodeAction(ISD::SETONE, MVT::f32, Expand); |
452 | 1.40k | setCondCodeAction(ISD::SETONE, MVT::f64, Expand); |
453 | 1.40k | |
454 | 1.40k | if (Subtarget.has64BitSupport()1.40k ) { |
455 | 1.08k | // They also have instructions for converting between i64 and fp. |
456 | 1.08k | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
457 | 1.08k | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); |
458 | 1.08k | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
459 | 1.08k | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); |
460 | 1.08k | // This is just the low 32 bits of a (signed) fp->i64 conversion. |
461 | 1.08k | // We cannot do this with Promote because i64 is not a legal type. |
462 | 1.08k | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
463 | 1.08k | |
464 | 1.08k | if (Subtarget.hasLFIWAX() || 1.08k Subtarget.isPPC64()309 ) |
465 | 1.03k | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
466 | 1.40k | } else { |
467 | 321 | // PowerPC does not have FP_TO_UINT on 32-bit implementations. |
468 | 321 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); |
469 | 321 | } |
470 | 1.40k | |
471 | 1.40k | // With the instructions enabled under FPCVT, we can do everything. |
472 | 1.40k | if (Subtarget.hasFPCVT()1.40k ) { |
473 | 764 | if (Subtarget.has64BitSupport()764 ) { |
474 | 764 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
475 | 764 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
476 | 764 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
477 | 764 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
478 | 764 | } |
479 | 764 | |
480 | 764 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
481 | 764 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
482 | 764 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
483 | 764 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
484 | 764 | } |
485 | 1.40k | |
486 | 1.40k | if (Subtarget.use64BitRegs()1.40k ) { |
487 | 1.02k | // 64-bit PowerPC implementations can support i64 types directly |
488 | 1.02k | addRegisterClass(MVT::i64, &PPC::G8RCRegClass); |
489 | 1.02k | // BUILD_PAIR can't be handled natively, and should be expanded to shl/or |
490 | 1.02k | setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
491 | 1.02k | // 64-bit PowerPC wants to expand i128 shifts itself. |
492 | 1.02k | setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
493 | 1.02k | setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
494 | 1.02k | setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
495 | 1.40k | } else { |
496 | 379 | // 32-bit PowerPC wants to expand i64 shifts itself. |
497 | 379 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
498 | 379 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
499 | 379 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
500 | 379 | } |
501 | 1.40k | |
502 | 1.40k | if (Subtarget.hasAltivec()1.40k ) { |
503 | 822 | // First set operation action for all vector types to expand. Then we |
504 | 822 | // will selectively turn on ones that can be effectively codegen'd. |
505 | 77.2k | for (MVT VT : MVT::vector_valuetypes()) { |
506 | 77.2k | // add/sub are legal for all supported vector VT's. |
507 | 77.2k | setOperationAction(ISD::ADD, VT, Legal); |
508 | 77.2k | setOperationAction(ISD::SUB, VT, Legal); |
509 | 77.2k | |
510 | 77.2k | // Vector instructions introduced in P8 |
511 | 77.2k | if (Subtarget.hasP8Altivec() && 77.2k (VT.SimpleTy != MVT::v1i128)40.6k ) { |
512 | 40.1k | setOperationAction(ISD::CTPOP, VT, Legal); |
513 | 40.1k | setOperationAction(ISD::CTLZ, VT, Legal); |
514 | 40.1k | } |
515 | 37.0k | else { |
516 | 37.0k | setOperationAction(ISD::CTPOP, VT, Expand); |
517 | 37.0k | setOperationAction(ISD::CTLZ, VT, Expand); |
518 | 37.0k | } |
519 | 77.2k | |
520 | 77.2k | // Vector instructions introduced in P9 |
521 | 77.2k | if (Subtarget.hasP9Altivec() && 77.2k (VT.SimpleTy != MVT::v1i128)6.39k ) |
522 | 6.32k | setOperationAction(ISD::CTTZ, VT, Legal); |
523 | 77.2k | else |
524 | 70.9k | setOperationAction(ISD::CTTZ, VT, Expand); |
525 | 77.2k | |
526 | 77.2k | // We promote all shuffles to v16i8. |
527 | 77.2k | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); |
528 | 77.2k | AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); |
529 | 77.2k | |
530 | 77.2k | // We promote all non-typed operations to v4i32. |
531 | 77.2k | setOperationAction(ISD::AND , VT, Promote); |
532 | 77.2k | AddPromotedToType (ISD::AND , VT, MVT::v4i32); |
533 | 77.2k | setOperationAction(ISD::OR , VT, Promote); |
534 | 77.2k | AddPromotedToType (ISD::OR , VT, MVT::v4i32); |
535 | 77.2k | setOperationAction(ISD::XOR , VT, Promote); |
536 | 77.2k | AddPromotedToType (ISD::XOR , VT, MVT::v4i32); |
537 | 77.2k | setOperationAction(ISD::LOAD , VT, Promote); |
538 | 77.2k | AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); |
539 | 77.2k | setOperationAction(ISD::SELECT, VT, Promote); |
540 | 77.2k | AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); |
541 | 77.2k | setOperationAction(ISD::SELECT_CC, VT, Promote); |
542 | 77.2k | AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); |
543 | 77.2k | setOperationAction(ISD::STORE, VT, Promote); |
544 | 77.2k | AddPromotedToType (ISD::STORE, VT, MVT::v4i32); |
545 | 77.2k | |
546 | 77.2k | // No other operations are legal. |
547 | 77.2k | setOperationAction(ISD::MUL , VT, Expand); |
548 | 77.2k | setOperationAction(ISD::SDIV, VT, Expand); |
549 | 77.2k | setOperationAction(ISD::SREM, VT, Expand); |
550 | 77.2k | setOperationAction(ISD::UDIV, VT, Expand); |
551 | 77.2k | setOperationAction(ISD::UREM, VT, Expand); |
552 | 77.2k | setOperationAction(ISD::FDIV, VT, Expand); |
553 | 77.2k | setOperationAction(ISD::FREM, VT, Expand); |
554 | 77.2k | setOperationAction(ISD::FNEG, VT, Expand); |
555 | 77.2k | setOperationAction(ISD::FSQRT, VT, Expand); |
556 | 77.2k | setOperationAction(ISD::FLOG, VT, Expand); |
557 | 77.2k | setOperationAction(ISD::FLOG10, VT, Expand); |
558 | 77.2k | setOperationAction(ISD::FLOG2, VT, Expand); |
559 | 77.2k | setOperationAction(ISD::FEXP, VT, Expand); |
560 | 77.2k | setOperationAction(ISD::FEXP2, VT, Expand); |
561 | 77.2k | setOperationAction(ISD::FSIN, VT, Expand); |
562 | 77.2k | setOperationAction(ISD::FCOS, VT, Expand); |
563 | 77.2k | setOperationAction(ISD::FABS, VT, Expand); |
564 | 77.2k | setOperationAction(ISD::FFLOOR, VT, Expand); |
565 | 77.2k | setOperationAction(ISD::FCEIL, VT, Expand); |
566 | 77.2k | setOperationAction(ISD::FTRUNC, VT, Expand); |
567 | 77.2k | setOperationAction(ISD::FRINT, VT, Expand); |
568 | 77.2k | setOperationAction(ISD::FNEARBYINT, VT, Expand); |
569 | 77.2k | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); |
570 | 77.2k | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
571 | 77.2k | setOperationAction(ISD::BUILD_VECTOR, VT, Expand); |
572 | 77.2k | setOperationAction(ISD::MULHU, VT, Expand); |
573 | 77.2k | setOperationAction(ISD::MULHS, VT, Expand); |
574 | 77.2k | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
575 | 77.2k | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
576 | 77.2k | setOperationAction(ISD::UDIVREM, VT, Expand); |
577 | 77.2k | setOperationAction(ISD::SDIVREM, VT, Expand); |
578 | 77.2k | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); |
579 | 77.2k | setOperationAction(ISD::FPOW, VT, Expand); |
580 | 77.2k | setOperationAction(ISD::BSWAP, VT, Expand); |
581 | 77.2k | setOperationAction(ISD::VSELECT, VT, Expand); |
582 | 77.2k | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
583 | 77.2k | setOperationAction(ISD::ROTL, VT, Expand); |
584 | 77.2k | setOperationAction(ISD::ROTR, VT, Expand); |
585 | 77.2k | |
586 | 7.26M | for (MVT InnerVT : MVT::vector_valuetypes()) { |
587 | 7.26M | setTruncStoreAction(VT, InnerVT, Expand); |
588 | 7.26M | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
589 | 7.26M | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
590 | 7.26M | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
591 | 7.26M | } |
592 | 77.2k | } |
593 | 822 | |
594 | 822 | // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle |
595 | 822 | // with merges, splats, etc. |
596 | 822 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); |
597 | 822 | |
598 | 822 | setOperationAction(ISD::AND , MVT::v4i32, Legal); |
599 | 822 | setOperationAction(ISD::OR , MVT::v4i32, Legal); |
600 | 822 | setOperationAction(ISD::XOR , MVT::v4i32, Legal); |
601 | 822 | setOperationAction(ISD::LOAD , MVT::v4i32, Legal); |
602 | 822 | setOperationAction(ISD::SELECT, MVT::v4i32, |
603 | 822 | Subtarget.useCRBits() ? Legal676 : Expand146 ); |
604 | 822 | setOperationAction(ISD::STORE , MVT::v4i32, Legal); |
605 | 822 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
606 | 822 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); |
607 | 822 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
608 | 822 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); |
609 | 822 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); |
610 | 822 | setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); |
611 | 822 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); |
612 | 822 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); |
613 | 822 | |
614 | 822 | addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); |
615 | 822 | addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); |
616 | 822 | addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); |
617 | 822 | addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); |
618 | 822 | |
619 | 822 | setOperationAction(ISD::MUL, MVT::v4f32, Legal); |
620 | 822 | setOperationAction(ISD::FMA, MVT::v4f32, Legal); |
621 | 822 | |
622 | 822 | if (TM.Options.UnsafeFPMath || 822 Subtarget.hasVSX()810 ) { |
623 | 636 | setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
624 | 636 | setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); |
625 | 636 | } |
626 | 822 | |
627 | 822 | if (Subtarget.hasP8Altivec()) |
628 | 432 | setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
629 | 822 | else |
630 | 390 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
631 | 822 | |
632 | 822 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
633 | 822 | setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
634 | 822 | |
635 | 822 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); |
636 | 822 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); |
637 | 822 | |
638 | 822 | setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); |
639 | 822 | setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); |
640 | 822 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); |
641 | 822 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
642 | 822 | |
643 | 822 | // Altivec does not contain unordered floating-point compare instructions |
644 | 822 | setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); |
645 | 822 | setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); |
646 | 822 | setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); |
647 | 822 | setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); |
648 | 822 | |
649 | 822 | if (Subtarget.hasVSX()822 ) { |
650 | 632 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); |
651 | 632 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); |
652 | 632 | if (Subtarget.hasP8Vector()632 ) { |
653 | 407 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); |
654 | 407 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); |
655 | 407 | } |
656 | 632 | if (Subtarget.hasDirectMove() && 632 isPPC64401 ) { |
657 | 399 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); |
658 | 399 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); |
659 | 399 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); |
660 | 399 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); |
661 | 399 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); |
662 | 399 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); |
663 | 399 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); |
664 | 399 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); |
665 | 399 | } |
666 | 632 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); |
667 | 632 | |
668 | 632 | setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); |
669 | 632 | setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); |
670 | 632 | setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); |
671 | 632 | setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); |
672 | 632 | setOperationAction(ISD::FROUND, MVT::v2f64, Legal); |
673 | 632 | |
674 | 632 | setOperationAction(ISD::FROUND, MVT::v4f32, Legal); |
675 | 632 | |
676 | 632 | setOperationAction(ISD::MUL, MVT::v2f64, Legal); |
677 | 632 | setOperationAction(ISD::FMA, MVT::v2f64, Legal); |
678 | 632 | |
679 | 632 | setOperationAction(ISD::FDIV, MVT::v2f64, Legal); |
680 | 632 | setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); |
681 | 632 | |
682 | 632 | setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
683 | 632 | setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); |
684 | 632 | setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); |
685 | 632 | setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); |
686 | 632 | setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); |
687 | 632 | |
688 | 632 | // Share the Altivec comparison restrictions. |
689 | 632 | setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); |
690 | 632 | setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); |
691 | 632 | setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); |
692 | 632 | setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); |
693 | 632 | |
694 | 632 | setOperationAction(ISD::LOAD, MVT::v2f64, Legal); |
695 | 632 | setOperationAction(ISD::STORE, MVT::v2f64, Legal); |
696 | 632 | |
697 | 632 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); |
698 | 632 | |
699 | 632 | if (Subtarget.hasP8Vector()) |
700 | 407 | addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); |
701 | 632 | |
702 | 632 | addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); |
703 | 632 | |
704 | 632 | addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); |
705 | 632 | addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); |
706 | 632 | addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); |
707 | 632 | |
708 | 632 | if (Subtarget.hasP8Altivec()632 ) { |
709 | 409 | setOperationAction(ISD::SHL, MVT::v2i64, Legal); |
710 | 409 | setOperationAction(ISD::SRA, MVT::v2i64, Legal); |
711 | 409 | setOperationAction(ISD::SRL, MVT::v2i64, Legal); |
712 | 409 | |
713 | 409 | // 128 bit shifts can be accomplished via 3 instructions for SHL and |
714 | 409 | // SRL, but not for SRA because of the instructions available: |
715 | 409 | // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth |
716 | 409 | // doing |
717 | 409 | setOperationAction(ISD::SHL, MVT::v1i128, Expand); |
718 | 409 | setOperationAction(ISD::SRL, MVT::v1i128, Expand); |
719 | 409 | setOperationAction(ISD::SRA, MVT::v1i128, Expand); |
720 | 409 | |
721 | 409 | setOperationAction(ISD::SETCC, MVT::v2i64, Legal); |
722 | 409 | } |
723 | 223 | else { |
724 | 223 | setOperationAction(ISD::SHL, MVT::v2i64, Expand); |
725 | 223 | setOperationAction(ISD::SRA, MVT::v2i64, Expand); |
726 | 223 | setOperationAction(ISD::SRL, MVT::v2i64, Expand); |
727 | 223 | |
728 | 223 | setOperationAction(ISD::SETCC, MVT::v2i64, Custom); |
729 | 223 | |
730 | 223 | // VSX v2i64 only supports non-arithmetic operations. |
731 | 223 | setOperationAction(ISD::ADD, MVT::v2i64, Expand); |
732 | 223 | setOperationAction(ISD::SUB, MVT::v2i64, Expand); |
733 | 223 | } |
734 | 632 | |
735 | 632 | setOperationAction(ISD::LOAD, MVT::v2i64, Promote); |
736 | 632 | AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); |
737 | 632 | setOperationAction(ISD::STORE, MVT::v2i64, Promote); |
738 | 632 | AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); |
739 | 632 | |
740 | 632 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); |
741 | 632 | |
742 | 632 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); |
743 | 632 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); |
744 | 632 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); |
745 | 632 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); |
746 | 632 | |
747 | 632 | // Vector operation legalization checks the result type of |
748 | 632 | // SIGN_EXTEND_INREG, overall legalization checks the inner type. |
749 | 632 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); |
750 | 632 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); |
751 | 632 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); |
752 | 632 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); |
753 | 632 | |
754 | 632 | setOperationAction(ISD::FNEG, MVT::v4f32, Legal); |
755 | 632 | setOperationAction(ISD::FNEG, MVT::v2f64, Legal); |
756 | 632 | setOperationAction(ISD::FABS, MVT::v4f32, Legal); |
757 | 632 | setOperationAction(ISD::FABS, MVT::v2f64, Legal); |
758 | 632 | |
759 | 632 | if (Subtarget.hasDirectMove()) |
760 | 401 | setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); |
761 | 632 | setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); |
762 | 632 | |
763 | 632 | addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); |
764 | 632 | } |
765 | 822 | |
766 | 822 | if (Subtarget.hasP8Altivec()822 ) { |
767 | 432 | addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); |
768 | 432 | addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); |
769 | 432 | } |
770 | 822 | |
771 | 822 | if (Subtarget.hasP9Vector()822 ) { |
772 | 60 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
773 | 60 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
774 | 60 | |
775 | 60 | // 128 bit shifts can be accomplished via 3 instructions for SHL and |
776 | 60 | // SRL, but not for SRA because of the instructions available: |
777 | 60 | // VS{RL} and VS{RL}O. |
778 | 60 | setOperationAction(ISD::SHL, MVT::v1i128, Legal); |
779 | 60 | setOperationAction(ISD::SRL, MVT::v1i128, Legal); |
780 | 60 | setOperationAction(ISD::SRA, MVT::v1i128, Expand); |
781 | 60 | } |
782 | 822 | } |
783 | 1.40k | |
784 | 1.40k | if (Subtarget.hasQPX()1.40k ) { |
785 | 40 | setOperationAction(ISD::FADD, MVT::v4f64, Legal); |
786 | 40 | setOperationAction(ISD::FSUB, MVT::v4f64, Legal); |
787 | 40 | setOperationAction(ISD::FMUL, MVT::v4f64, Legal); |
788 | 40 | setOperationAction(ISD::FREM, MVT::v4f64, Expand); |
789 | 40 | |
790 | 40 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); |
791 | 40 | setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); |
792 | 40 | |
793 | 40 | setOperationAction(ISD::LOAD , MVT::v4f64, Custom); |
794 | 40 | setOperationAction(ISD::STORE , MVT::v4f64, Custom); |
795 | 40 | |
796 | 40 | setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); |
797 | 40 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); |
798 | 40 | |
799 | 40 | if (!Subtarget.useCRBits()) |
800 | 5 | setOperationAction(ISD::SELECT, MVT::v4f64, Expand); |
801 | 40 | setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); |
802 | 40 | |
803 | 40 | setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); |
804 | 40 | setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); |
805 | 40 | setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); |
806 | 40 | setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); |
807 | 40 | setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); |
808 | 40 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); |
809 | 40 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); |
810 | 40 | |
811 | 40 | setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); |
812 | 40 | setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); |
813 | 40 | |
814 | 40 | setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); |
815 | 40 | setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); |
816 | 40 | setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); |
817 | 40 | |
818 | 40 | setOperationAction(ISD::FNEG , MVT::v4f64, Legal); |
819 | 40 | setOperationAction(ISD::FABS , MVT::v4f64, Legal); |
820 | 40 | setOperationAction(ISD::FSIN , MVT::v4f64, Expand); |
821 | 40 | setOperationAction(ISD::FCOS , MVT::v4f64, Expand); |
822 | 40 | setOperationAction(ISD::FPOW , MVT::v4f64, Expand); |
823 | 40 | setOperationAction(ISD::FLOG , MVT::v4f64, Expand); |
824 | 40 | setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); |
825 | 40 | setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); |
826 | 40 | setOperationAction(ISD::FEXP , MVT::v4f64, Expand); |
827 | 40 | setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); |
828 | 40 | |
829 | 40 | setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); |
830 | 40 | setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); |
831 | 40 | |
832 | 40 | setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); |
833 | 40 | setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); |
834 | 40 | |
835 | 40 | addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); |
836 | 40 | |
837 | 40 | setOperationAction(ISD::FADD, MVT::v4f32, Legal); |
838 | 40 | setOperationAction(ISD::FSUB, MVT::v4f32, Legal); |
839 | 40 | setOperationAction(ISD::FMUL, MVT::v4f32, Legal); |
840 | 40 | setOperationAction(ISD::FREM, MVT::v4f32, Expand); |
841 | 40 | |
842 | 40 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); |
843 | 40 | setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); |
844 | 40 | |
845 | 40 | setOperationAction(ISD::LOAD , MVT::v4f32, Custom); |
846 | 40 | setOperationAction(ISD::STORE , MVT::v4f32, Custom); |
847 | 40 | |
848 | 40 | if (!Subtarget.useCRBits()) |
849 | 5 | setOperationAction(ISD::SELECT, MVT::v4f32, Expand); |
850 | 40 | setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); |
851 | 40 | |
852 | 40 | setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); |
853 | 40 | setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); |
854 | 40 | setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); |
855 | 40 | setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); |
856 | 40 | setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); |
857 | 40 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); |
858 | 40 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
859 | 40 | |
860 | 40 | setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); |
861 | 40 | setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); |
862 | 40 | |
863 | 40 | setOperationAction(ISD::FNEG , MVT::v4f32, Legal); |
864 | 40 | setOperationAction(ISD::FABS , MVT::v4f32, Legal); |
865 | 40 | setOperationAction(ISD::FSIN , MVT::v4f32, Expand); |
866 | 40 | setOperationAction(ISD::FCOS , MVT::v4f32, Expand); |
867 | 40 | setOperationAction(ISD::FPOW , MVT::v4f32, Expand); |
868 | 40 | setOperationAction(ISD::FLOG , MVT::v4f32, Expand); |
869 | 40 | setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); |
870 | 40 | setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); |
871 | 40 | setOperationAction(ISD::FEXP , MVT::v4f32, Expand); |
872 | 40 | setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); |
873 | 40 | |
874 | 40 | setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); |
875 | 40 | setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); |
876 | 40 | |
877 | 40 | setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); |
878 | 40 | setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); |
879 | 40 | |
880 | 40 | addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); |
881 | 40 | |
882 | 40 | setOperationAction(ISD::AND , MVT::v4i1, Legal); |
883 | 40 | setOperationAction(ISD::OR , MVT::v4i1, Legal); |
884 | 40 | setOperationAction(ISD::XOR , MVT::v4i1, Legal); |
885 | 40 | |
886 | 40 | if (!Subtarget.useCRBits()) |
887 | 5 | setOperationAction(ISD::SELECT, MVT::v4i1, Expand); |
888 | 40 | setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); |
889 | 40 | |
890 | 40 | setOperationAction(ISD::LOAD , MVT::v4i1, Custom); |
891 | 40 | setOperationAction(ISD::STORE , MVT::v4i1, Custom); |
892 | 40 | |
893 | 40 | setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); |
894 | 40 | setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); |
895 | 40 | setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); |
896 | 40 | setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); |
897 | 40 | setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); |
898 | 40 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); |
899 | 40 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); |
900 | 40 | |
901 | 40 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); |
902 | 40 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); |
903 | 40 | |
904 | 40 | addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); |
905 | 40 | |
906 | 40 | setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); |
907 | 40 | setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); |
908 | 40 | setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); |
909 | 40 | setOperationAction(ISD::FROUND, MVT::v4f64, Legal); |
910 | 40 | |
911 | 40 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); |
912 | 40 | setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); |
913 | 40 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); |
914 | 40 | setOperationAction(ISD::FROUND, MVT::v4f32, Legal); |
915 | 40 | |
916 | 40 | setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); |
917 | 40 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); |
918 | 40 | |
919 | 40 | // These need to set FE_INEXACT, and so cannot be vectorized here. |
920 | 40 | setOperationAction(ISD::FRINT, MVT::v4f64, Expand); |
921 | 40 | setOperationAction(ISD::FRINT, MVT::v4f32, Expand); |
922 | 40 | |
923 | 40 | if (TM.Options.UnsafeFPMath40 ) { |
924 | 3 | setOperationAction(ISD::FDIV, MVT::v4f64, Legal); |
925 | 3 | setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); |
926 | 3 | |
927 | 3 | setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
928 | 3 | setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); |
929 | 40 | } else { |
930 | 37 | setOperationAction(ISD::FDIV, MVT::v4f64, Expand); |
931 | 37 | setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); |
932 | 37 | |
933 | 37 | setOperationAction(ISD::FDIV, MVT::v4f32, Expand); |
934 | 37 | setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); |
935 | 37 | } |
936 | 40 | } |
937 | 1.40k | |
938 | 1.40k | if (Subtarget.has64BitSupport()) |
939 | 1.08k | setOperationAction(ISD::PREFETCH, MVT::Other, Legal); |
940 | 1.40k | |
941 | 1.40k | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal1.02k : Custom379 ); |
942 | 1.40k | |
943 | 1.40k | if (!isPPC641.40k ) { |
944 | 379 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); |
945 | 379 | setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); |
946 | 379 | } |
947 | 1.40k | |
948 | 1.40k | setBooleanContents(ZeroOrOneBooleanContent); |
949 | 1.40k | |
950 | 1.40k | if (Subtarget.hasAltivec()1.40k ) { |
951 | 822 | // Altivec instructions set fields to all zeros or all ones. |
952 | 822 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
953 | 822 | } |
954 | 1.40k | |
955 | 1.40k | if (!isPPC641.40k ) { |
956 | 379 | // These libcalls are not available in 32-bit. |
957 | 379 | setLibcallName(RTLIB::SHL_I128, nullptr); |
958 | 379 | setLibcallName(RTLIB::SRL_I128, nullptr); |
959 | 379 | setLibcallName(RTLIB::SRA_I128, nullptr); |
960 | 379 | } |
961 | 1.40k | |
962 | 1.40k | setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X11.02k : PPC::R1379 ); |
963 | 1.40k | |
964 | 1.40k | // We have target-specific dag combine patterns for the following nodes: |
965 | 1.40k | setTargetDAGCombine(ISD::SHL); |
966 | 1.40k | setTargetDAGCombine(ISD::SRA); |
967 | 1.40k | setTargetDAGCombine(ISD::SRL); |
968 | 1.40k | setTargetDAGCombine(ISD::SINT_TO_FP); |
969 | 1.40k | setTargetDAGCombine(ISD::BUILD_VECTOR); |
970 | 1.40k | if (Subtarget.hasFPCVT()) |
971 | 764 | setTargetDAGCombine(ISD::UINT_TO_FP); |
972 | 1.40k | setTargetDAGCombine(ISD::LOAD); |
973 | 1.40k | setTargetDAGCombine(ISD::STORE); |
974 | 1.40k | setTargetDAGCombine(ISD::BR_CC); |
975 | 1.40k | if (Subtarget.useCRBits()) |
976 | 1.20k | setTargetDAGCombine(ISD::BRCOND); |
977 | 1.40k | setTargetDAGCombine(ISD::BSWAP); |
978 | 1.40k | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
979 | 1.40k | setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
980 | 1.40k | setTargetDAGCombine(ISD::INTRINSIC_VOID); |
981 | 1.40k | |
982 | 1.40k | setTargetDAGCombine(ISD::SIGN_EXTEND); |
983 | 1.40k | setTargetDAGCombine(ISD::ZERO_EXTEND); |
984 | 1.40k | setTargetDAGCombine(ISD::ANY_EXTEND); |
985 | 1.40k | |
986 | 1.40k | if (Subtarget.useCRBits()1.40k ) { |
987 | 1.20k | setTargetDAGCombine(ISD::TRUNCATE); |
988 | 1.20k | setTargetDAGCombine(ISD::SETCC); |
989 | 1.20k | setTargetDAGCombine(ISD::SELECT_CC); |
990 | 1.20k | } |
991 | 1.40k | |
992 | 1.40k | // Use reciprocal estimates. |
993 | 1.40k | if (TM.Options.UnsafeFPMath1.40k ) { |
994 | 17 | setTargetDAGCombine(ISD::FDIV); |
995 | 17 | setTargetDAGCombine(ISD::FSQRT); |
996 | 17 | } |
997 | 1.40k | |
998 | 1.40k | // Darwin long double math library functions have $LDBL128 appended. |
999 | 1.40k | if (Subtarget.isDarwin()1.40k ) { |
1000 | 157 | setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); |
1001 | 157 | setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); |
1002 | 157 | setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); |
1003 | 157 | setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); |
1004 | 157 | setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); |
1005 | 157 | setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); |
1006 | 157 | setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); |
1007 | 157 | setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); |
1008 | 157 | setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); |
1009 | 157 | setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); |
1010 | 157 | } |
1011 | 1.40k | |
1012 | 1.40k | // With 32 condition bits, we don't need to sink (and duplicate) compares |
1013 | 1.40k | // aggressively in CodeGenPrep. |
1014 | 1.40k | if (Subtarget.useCRBits()1.40k ) { |
1015 | 1.20k | setHasMultipleConditionRegisters(); |
1016 | 1.20k | setJumpIsExpensive(); |
1017 | 1.20k | } |
1018 | 1.40k | |
1019 | 1.40k | setMinFunctionAlignment(2); |
1020 | 1.40k | if (Subtarget.isDarwin()) |
1021 | 157 | setPrefFunctionAlignment(4); |
1022 | 1.40k | |
1023 | 1.40k | switch (Subtarget.getDarwinDirective()) { |
1024 | 558 | default: break; |
1025 | 849 | case PPC::DIR_970: |
1026 | 849 | case PPC::DIR_A2: |
1027 | 849 | case PPC::DIR_E500mc: |
1028 | 849 | case PPC::DIR_E5500: |
1029 | 849 | case PPC::DIR_PWR4: |
1030 | 849 | case PPC::DIR_PWR5: |
1031 | 849 | case PPC::DIR_PWR5X: |
1032 | 849 | case PPC::DIR_PWR6: |
1033 | 849 | case PPC::DIR_PWR6X: |
1034 | 849 | case PPC::DIR_PWR7: |
1035 | 849 | case PPC::DIR_PWR8: |
1036 | 849 | case PPC::DIR_PWR9: |
1037 | 849 | setPrefFunctionAlignment(4); |
1038 | 849 | setPrefLoopAlignment(4); |
1039 | 849 | break; |
1040 | 1.40k | } |
1041 | 1.40k | |
1042 | 1.40k | if (1.40k Subtarget.enableMachineScheduler()1.40k ) |
1043 | 776 | setSchedulingPreference(Sched::Source); |
1044 | 1.40k | else |
1045 | 631 | setSchedulingPreference(Sched::Hybrid); |
1046 | 1.40k | |
1047 | 1.40k | computeRegisterProperties(STI.getRegisterInfo()); |
1048 | 1.40k | |
1049 | 1.40k | // The Freescale cores do better with aggressive inlining of memcpy and |
1050 | 1.40k | // friends. GCC uses same threshold of 128 bytes (= 32 word stores). |
1051 | 1.40k | if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || |
1052 | 1.40k | Subtarget.getDarwinDirective() == PPC::DIR_E55001.40k ) { |
1053 | 5 | MaxStoresPerMemset = 32; |
1054 | 5 | MaxStoresPerMemsetOptSize = 16; |
1055 | 5 | MaxStoresPerMemcpy = 32; |
1056 | 5 | MaxStoresPerMemcpyOptSize = 8; |
1057 | 5 | MaxStoresPerMemmove = 32; |
1058 | 5 | MaxStoresPerMemmoveOptSize = 8; |
1059 | 1.40k | } else if (1.40k Subtarget.getDarwinDirective() == PPC::DIR_A21.40k ) { |
1060 | 80 | // The A2 also benefits from (very) aggressive inlining of memcpy and |
1061 | 80 | // friends. The overhead of a the function call, even when warm, can be |
1062 | 80 | // over one hundred cycles. |
1063 | 80 | MaxStoresPerMemset = 128; |
1064 | 80 | MaxStoresPerMemcpy = 128; |
1065 | 80 | MaxStoresPerMemmove = 128; |
1066 | 80 | MaxLoadsPerMemcmp = 128; |
1067 | 1.40k | } else { |
1068 | 1.32k | MaxLoadsPerMemcmp = 8; |
1069 | 1.32k | MaxLoadsPerMemcmpOptSize = 4; |
1070 | 1.32k | } |
1071 | 1.40k | } |
1072 | | |
1073 | | /// getMaxByValAlign - Helper for getByValTypeAlignment to determine |
1074 | | /// the desired ByVal argument alignment. |
1075 | | static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, |
1076 | 276 | unsigned MaxMaxAlign) { |
1077 | 276 | if (MaxAlign == MaxMaxAlign) |
1078 | 0 | return; |
1079 | 276 | if (VectorType *276 VTy276 = dyn_cast<VectorType>(Ty)) { |
1080 | 4 | if (MaxMaxAlign >= 32 && 4 VTy->getBitWidth() >= 2560 ) |
1081 | 0 | MaxAlign = 32; |
1082 | 4 | else if (4 VTy->getBitWidth() >= 128 && 4 MaxAlign < 164 ) |
1083 | 4 | MaxAlign = 16; |
1084 | 276 | } else if (ArrayType *272 ATy272 = dyn_cast<ArrayType>(Ty)) { |
1085 | 20 | unsigned EltAlign = 0; |
1086 | 20 | getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); |
1087 | 20 | if (EltAlign > MaxAlign) |
1088 | 0 | MaxAlign = EltAlign; |
1089 | 272 | } else if (StructType *252 STy252 = dyn_cast<StructType>(Ty)) { |
1090 | 156 | for (auto *EltTy : STy->elements()) { |
1091 | 156 | unsigned EltAlign = 0; |
1092 | 156 | getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); |
1093 | 156 | if (EltAlign > MaxAlign) |
1094 | 4 | MaxAlign = EltAlign; |
1095 | 156 | if (MaxAlign == MaxMaxAlign) |
1096 | 4 | break; |
1097 | 276 | } |
1098 | 272 | } |
1099 | 276 | } |
1100 | | |
1101 | | /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
1102 | | /// function arguments in the caller parameter area. |
1103 | | unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, |
1104 | 133 | const DataLayout &DL) const { |
1105 | 133 | // Darwin passes everything on 4 byte boundary. |
1106 | 133 | if (Subtarget.isDarwin()) |
1107 | 11 | return 4; |
1108 | 122 | |
1109 | 122 | // 16byte and wider vectors are passed on 16byte boundary. |
1110 | 122 | // The rest is 8 on PPC64 and 4 on PPC32 boundary. |
1111 | 122 | unsigned Align = Subtarget.isPPC64() ? 122 8113 : 49 ; |
1112 | 122 | if (Subtarget.hasAltivec() || 122 Subtarget.hasQPX()22 ) |
1113 | 100 | getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 100 320 : 16100 ); |
1114 | 133 | return Align; |
1115 | 133 | } |
1116 | | |
1117 | 10.7k | bool PPCTargetLowering::useSoftFloat() const { |
1118 | 10.7k | return Subtarget.useSoftFloat(); |
1119 | 10.7k | } |
1120 | | |
1121 | 0 | const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { |
1122 | 0 | switch ((PPCISD::NodeType)Opcode) { |
1123 | 0 | case PPCISD::FIRST_NUMBER: break; |
1124 | 0 | case PPCISD::FSEL: return "PPCISD::FSEL"; |
1125 | 0 | case PPCISD::FCFID: return "PPCISD::FCFID"; |
1126 | 0 | case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; |
1127 | 0 | case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; |
1128 | 0 | case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; |
1129 | 0 | case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; |
1130 | 0 | case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; |
1131 | 0 | case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; |
1132 | 0 | case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; |
1133 | 0 | case PPCISD::FRE: return "PPCISD::FRE"; |
1134 | 0 | case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; |
1135 | 0 | case PPCISD::STFIWX: return "PPCISD::STFIWX"; |
1136 | 0 | case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; |
1137 | 0 | case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; |
1138 | 0 | case PPCISD::VPERM: return "PPCISD::VPERM"; |
1139 | 0 | case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; |
1140 | 0 | case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; |
1141 | 0 | case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; |
1142 | 0 | case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; |
1143 | 0 | case PPCISD::VECSHL: return "PPCISD::VECSHL"; |
1144 | 0 | case PPCISD::CMPB: return "PPCISD::CMPB"; |
1145 | 0 | case PPCISD::Hi: return "PPCISD::Hi"; |
1146 | 0 | case PPCISD::Lo: return "PPCISD::Lo"; |
1147 | 0 | case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; |
1148 | 0 | case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; |
1149 | 0 | case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; |
1150 | 0 | case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; |
1151 | 0 | case PPCISD::SRL: return "PPCISD::SRL"; |
1152 | 0 | case PPCISD::SRA: return "PPCISD::SRA"; |
1153 | 0 | case PPCISD::SHL: return "PPCISD::SHL"; |
1154 | 0 | case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; |
1155 | 0 | case PPCISD::CALL: return "PPCISD::CALL"; |
1156 | 0 | case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; |
1157 | 0 | case PPCISD::MTCTR: return "PPCISD::MTCTR"; |
1158 | 0 | case PPCISD::BCTRL: return "PPCISD::BCTRL"; |
1159 | 0 | case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; |
1160 | 0 | case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; |
1161 | 0 | case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; |
1162 | 0 | case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; |
1163 | 0 | case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; |
1164 | 0 | case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; |
1165 | 0 | case PPCISD::MFVSR: return "PPCISD::MFVSR"; |
1166 | 0 | case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; |
1167 | 0 | case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; |
1168 | 0 | case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; |
1169 | 0 | case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; |
1170 | 0 | case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; |
1171 | 0 | case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; |
1172 | 0 | case PPCISD::VCMP: return "PPCISD::VCMP"; |
1173 | 0 | case PPCISD::VCMPo: return "PPCISD::VCMPo"; |
1174 | 0 | case PPCISD::LBRX: return "PPCISD::LBRX"; |
1175 | 0 | case PPCISD::STBRX: return "PPCISD::STBRX"; |
1176 | 0 | case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; |
1177 | 0 | case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; |
1178 | 0 | case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; |
1179 | 0 | case PPCISD::STXSIX: return "PPCISD::STXSIX"; |
1180 | 0 | case PPCISD::VEXTS: return "PPCISD::VEXTS"; |
1181 | 0 | case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; |
1182 | 0 | case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; |
1183 | 0 | case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; |
1184 | 0 | case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; |
1185 | 0 | case PPCISD::BDNZ: return "PPCISD::BDNZ"; |
1186 | 0 | case PPCISD::BDZ: return "PPCISD::BDZ"; |
1187 | 0 | case PPCISD::MFFS: return "PPCISD::MFFS"; |
1188 | 0 | case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; |
1189 | 0 | case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; |
1190 | 0 | case PPCISD::CR6SET: return "PPCISD::CR6SET"; |
1191 | 0 | case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; |
1192 | 0 | case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; |
1193 | 0 | case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; |
1194 | 0 | case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; |
1195 | 0 | case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; |
1196 | 0 | case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; |
1197 | 0 | case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; |
1198 | 0 | case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; |
1199 | 0 | case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; |
1200 | 0 | case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; |
1201 | 0 | case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; |
1202 | 0 | case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; |
1203 | 0 | case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; |
1204 | 0 | case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; |
1205 | 0 | case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; |
1206 | 0 | case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; |
1207 | 0 | case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; |
1208 | 0 | case PPCISD::SC: return "PPCISD::SC"; |
1209 | 0 | case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; |
1210 | 0 | case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; |
1211 | 0 | case PPCISD::RFEBB: return "PPCISD::RFEBB"; |
1212 | 0 | case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; |
1213 | 0 | case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; |
1214 | 0 | case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; |
1215 | 0 | case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; |
1216 | 0 | case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; |
1217 | 0 | case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; |
1218 | 0 | case PPCISD::QBFLT: return "PPCISD::QBFLT"; |
1219 | 0 | case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; |
1220 | 0 | } |
1221 | 0 | return nullptr; |
1222 | 0 | } |
1223 | | |
1224 | | EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, |
1225 | 6.13k | EVT VT) const { |
1226 | 6.13k | if (!VT.isVector()) |
1227 | 5.89k | return Subtarget.useCRBits() ? 5.89k MVT::i15.45k : MVT::i32438 ; |
1228 | 243 | |
1229 | 243 | if (243 Subtarget.hasQPX()243 ) |
1230 | 5 | return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); |
1231 | 238 | |
1232 | 238 | return VT.changeVectorElementTypeToInteger(); |
1233 | 238 | } |
1234 | | |
1235 | 557 | bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { |
1236 | 557 | assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); |
1237 | 557 | return true; |
1238 | 557 | } |
1239 | | |
1240 | | //===----------------------------------------------------------------------===// |
1241 | | // Node matching predicates, for use by the tblgen matching code. |
1242 | | //===----------------------------------------------------------------------===// |
1243 | | |
1244 | | /// isFloatingPointZero - Return true if this is 0.0 or -0.0. |
1245 | 19 | static bool isFloatingPointZero(SDValue Op) { |
1246 | 19 | if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) |
1247 | 9 | return CFP->getValueAPF().isZero(); |
1248 | 10 | else if (10 ISD::isEXTLoad(Op.getNode()) || 10 ISD::isNON_EXTLoad(Op.getNode())10 ) { |
1249 | 0 | // Maybe this has already been legalized into the constant pool? |
1250 | 0 | if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) |
1251 | 0 | if (const ConstantFP *0 CFP0 = dyn_cast<ConstantFP>(CP->getConstVal())) |
1252 | 0 | return CFP->getValueAPF().isZero(); |
1253 | 10 | } |
1254 | 10 | return false; |
1255 | 10 | } |
1256 | | |
1257 | | /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return |
1258 | | /// true if Op is undef or if it matches the specified value. |
1259 | 22.9k | static bool isConstantOrUndef(int Op, int Val) { |
1260 | 17.0k | return Op < 0 || Op == Val; |
1261 | 22.9k | } |
1262 | | |
1263 | | /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a |
1264 | | /// VPKUHUM instruction. |
1265 | | /// The ShuffleKind distinguishes between big-endian operations with |
1266 | | /// two different inputs (0), either-endian operations with two identical |
1267 | | /// inputs (1), and little-endian operations with two different inputs (2). |
1268 | | /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
1269 | | bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
1270 | 821 | SelectionDAG &DAG) { |
1271 | 821 | bool IsLE = DAG.getDataLayout().isLittleEndian(); |
1272 | 821 | if (ShuffleKind == 0821 ) { |
1273 | 366 | if (IsLE) |
1274 | 60 | return false; |
1275 | 339 | for (unsigned i = 0; 306 i != 16339 ; ++i33 ) |
1276 | 339 | if (339 !isConstantOrUndef(N->getMaskElt(i), i*2+1)339 ) |
1277 | 306 | return false; |
1278 | 821 | } else if (455 ShuffleKind == 2455 ) { |
1279 | 222 | if (!IsLE) |
1280 | 3 | return false; |
1281 | 455 | for (unsigned i = 0; 219 i != 16455 ; ++i236 ) |
1282 | 452 | if (452 !isConstantOrUndef(N->getMaskElt(i), i*2)452 ) |
1283 | 216 | return false; |
1284 | 455 | } else if (233 ShuffleKind == 1233 ) { |
1285 | 233 | unsigned j = IsLE ? 094 : 1139 ; |
1286 | 386 | for (unsigned i = 0; i != 8386 ; ++i153 ) |
1287 | 377 | if (377 !isConstantOrUndef(N->getMaskElt(i), i*2+j) || |
1288 | 190 | !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) |
1289 | 224 | return false; |
1290 | 455 | } |
1291 | 12 | return true; |
1292 | 821 | } |
1293 | | |
1294 | | /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a |
1295 | | /// VPKUWUM instruction. |
1296 | | /// The ShuffleKind distinguishes between big-endian operations with |
1297 | | /// two different inputs (0), either-endian operations with two identical |
1298 | | /// inputs (1), and little-endian operations with two different inputs (2). |
1299 | | /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
1300 | | bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
1301 | 837 | SelectionDAG &DAG) { |
1302 | 837 | bool IsLE = DAG.getDataLayout().isLittleEndian(); |
1303 | 837 | if (ShuffleKind == 0837 ) { |
1304 | 366 | if (IsLE) |
1305 | 60 | return false; |
1306 | 320 | for (unsigned i = 0; 306 i != 16320 ; i += 214 ) |
1307 | 320 | if (320 !isConstantOrUndef(N->getMaskElt(i ), i*2+2) || |
1308 | 16 | !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) |
1309 | 306 | return false; |
1310 | 837 | } else if (471 ShuffleKind == 2471 ) { |
1311 | 225 | if (!IsLE) |
1312 | 3 | return false; |
1313 | 381 | for (unsigned i = 0; 222 i != 16381 ; i += 2159 ) |
1314 | 378 | if (378 !isConstantOrUndef(N->getMaskElt(i ), i*2) || |
1315 | 215 | !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) |
1316 | 219 | return false; |
1317 | 471 | } else if (246 ShuffleKind == 1246 ) { |
1318 | 246 | unsigned j = IsLE ? 097 : 2149 ; |
1319 | 339 | for (unsigned i = 0; i != 8339 ; i += 293 ) |
1320 | 326 | if (326 !isConstantOrUndef(N->getMaskElt(i ), i*2+j) || |
1321 | 131 | !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || |
1322 | 123 | !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || |
1323 | 93 | !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) |
1324 | 233 | return false; |
1325 | 471 | } |
1326 | 16 | return true; |
1327 | 837 | } |
1328 | | |
1329 | | /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a |
1330 | | /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the |
1331 | | /// current subtarget. |
1332 | | /// |
1333 | | /// The ShuffleKind distinguishes between big-endian operations with |
1334 | | /// two different inputs (0), either-endian operations with two identical |
1335 | | /// inputs (1), and little-endian operations with two different inputs (2). |
1336 | | /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). |
1337 | | bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
1338 | 105 | SelectionDAG &DAG) { |
1339 | 105 | const PPCSubtarget& Subtarget = |
1340 | 105 | static_cast<const PPCSubtarget&>(DAG.getSubtarget()); |
1341 | 105 | if (!Subtarget.hasP8Vector()) |
1342 | 0 | return false; |
1343 | 105 | |
1344 | 105 | bool IsLE = DAG.getDataLayout().isLittleEndian(); |
1345 | 105 | if (ShuffleKind == 0105 ) { |
1346 | 30 | if (IsLE) |
1347 | 1 | return false; |
1348 | 45 | for (unsigned i = 0; 29 i != 1645 ; i += 416 ) |
1349 | 42 | if (42 !isConstantOrUndef(N->getMaskElt(i ), i*2+4) || |
1350 | 20 | !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || |
1351 | 20 | !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || |
1352 | 18 | !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) |
1353 | 26 | return false; |
1354 | 105 | } else if (75 ShuffleKind == 275 ) { |
1355 | 24 | if (!IsLE) |
1356 | 0 | return false; |
1357 | 41 | for (unsigned i = 0; 24 i != 1641 ; i += 417 ) |
1358 | 38 | if (38 !isConstantOrUndef(N->getMaskElt(i ), i*2) || |
1359 | 17 | !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || |
1360 | 17 | !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || |
1361 | 17 | !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) |
1362 | 21 | return false; |
1363 | 75 | } else if (51 ShuffleKind == 151 ) { |
1364 | 51 | unsigned j = IsLE ? 023 : 428 ; |
1365 | 67 | for (unsigned i = 0; i != 867 ; i += 416 ) |
1366 | 61 | if (61 !isConstantOrUndef(N->getMaskElt(i ), i*2+j) || |
1367 | 26 | !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || |
1368 | 26 | !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || |
1369 | 24 | !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || |
1370 | 22 | !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || |
1371 | 16 | !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || |
1372 | 16 | !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || |
1373 | 16 | !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) |
1374 | 45 | return false; |
1375 | 75 | } |
1376 | 12 | return true; |
1377 | 105 | } |
1378 | | |
1379 | | /// isVMerge - Common function, used to match vmrg* shuffles. |
1380 | | /// |
1381 | | static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, |
1382 | 2.97k | unsigned LHSStart, unsigned RHSStart) { |
1383 | 2.97k | if (N->getValueType(0) != MVT::v16i8) |
1384 | 0 | return false; |
1385 | 2.97k | assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && |
1386 | 2.97k | "Unsupported merge size!"); |
1387 | 2.97k | |
1388 | 5.50k | for (unsigned i = 0; i != 8/UnitSize5.50k ; ++i2.53k ) // Step over units |
1389 | 10.0k | for (unsigned j = 0; 4.90k j != UnitSize10.0k ; ++j5.19k ) { // Step over bytes within unit |
1390 | 7.56k | if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), |
1391 | 7.56k | LHSStart+j+i*UnitSize) || |
1392 | 6.08k | !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), |
1393 | 6.08k | RHSStart+j+i*UnitSize)) |
1394 | 2.37k | return false; |
1395 | 4.90k | } |
1396 | 603 | return true; |
1397 | 2.97k | } |
1398 | | |
1399 | | /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for |
1400 | | /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). |
1401 | | /// The ShuffleKind distinguishes between big-endian merges with two |
1402 | | /// different inputs (0), either-endian merges with two identical inputs (1), |
1403 | | /// and little-endian merges with two different inputs (2). For the latter, |
1404 | | /// the input operands are swapped (see PPCInstrAltivec.td). |
1405 | | bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
1406 | 1.98k | unsigned ShuffleKind, SelectionDAG &DAG) { |
1407 | 1.98k | if (DAG.getDataLayout().isLittleEndian()1.98k ) { |
1408 | 845 | if (ShuffleKind == 1) // unary |
1409 | 200 | return isVMerge(N, UnitSize, 0, 0); |
1410 | 645 | else if (645 ShuffleKind == 2645 ) // swapped |
1411 | 465 | return isVMerge(N, UnitSize, 0, 16); |
1412 | 645 | else |
1413 | 180 | return false; |
1414 | 1.13k | } else { |
1415 | 1.13k | if (ShuffleKind == 1) // unary |
1416 | 288 | return isVMerge(N, UnitSize, 8, 8); |
1417 | 849 | else if (849 ShuffleKind == 0849 ) // normal |
1418 | 840 | return isVMerge(N, UnitSize, 8, 24); |
1419 | 849 | else |
1420 | 9 | return false; |
1421 | 0 | } |
1422 | 1.98k | } |
1423 | | |
1424 | | /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for |
1425 | | /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). |
1426 | | /// The ShuffleKind distinguishes between big-endian merges with two |
1427 | | /// different inputs (0), either-endian merges with two identical inputs (1), |
1428 | | /// and little-endian merges with two different inputs (2). For the latter, |
1429 | | /// the input operands are swapped (see PPCInstrAltivec.td). |
1430 | | bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
1431 | 1.37k | unsigned ShuffleKind, SelectionDAG &DAG) { |
1432 | 1.37k | if (DAG.getDataLayout().isLittleEndian()1.37k ) { |
1433 | 407 | if (ShuffleKind == 1) // unary |
1434 | 125 | return isVMerge(N, UnitSize, 8, 8); |
1435 | 282 | else if (282 ShuffleKind == 2282 ) // swapped |
1436 | 102 | return isVMerge(N, UnitSize, 8, 24); |
1437 | 282 | else |
1438 | 180 | return false; |
1439 | 967 | } else { |
1440 | 967 | if (ShuffleKind == 1) // unary |
1441 | 180 | return isVMerge(N, UnitSize, 0, 0); |
1442 | 787 | else if (787 ShuffleKind == 0787 ) // normal |
1443 | 778 | return isVMerge(N, UnitSize, 0, 16); |
1444 | 787 | else |
1445 | 9 | return false; |
1446 | 0 | } |
1447 | 1.37k | } |
1448 | | |
1449 | | /** |
1450 | | * \brief Common function used to match vmrgew and vmrgow shuffles |
1451 | | * |
1452 | | * The indexOffset determines whether to look for even or odd words in |
1453 | | * the shuffle mask. This is based on the of the endianness of the target |
1454 | | * machine. |
1455 | | * - Little Endian: |
1456 | | * - Use offset of 0 to check for odd elements |
1457 | | * - Use offset of 4 to check for even elements |
1458 | | * - Big Endian: |
1459 | | * - Use offset of 0 to check for even elements |
1460 | | * - Use offset of 4 to check for odd elements |
1461 | | * A detailed description of the vector element ordering for little endian and |
1462 | | * big endian can be found at |
1463 | | * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html |
1464 | | * Targeting your applications - what little endian and big endian IBM XL C/C++ |
1465 | | * compiler differences mean to you |
1466 | | * |
1467 | | * The mask to the shuffle vector instruction specifies the indices of the |
1468 | | * elements from the two input vectors to place in the result. The elements are |
1469 | | * numbered in array-access order, starting with the first vector. These vectors |
1470 | | * are always of type v16i8, thus each vector will contain 16 elements of size |
1471 | | * 8. More info on the shuffle vector can be found in the |
1472 | | * http://llvm.org/docs/LangRef.html#shufflevector-instruction |
1473 | | * Language Reference. |
1474 | | * |
1475 | | * The RHSStartValue indicates whether the same input vectors are used (unary) |
1476 | | * or two different input vectors are used, based on the following: |
1477 | | * - If the instruction uses the same vector for both inputs, the range of the |
1478 | | * indices will be 0 to 15. In this case, the RHSStart value passed should |
1479 | | * be 0. |
1480 | | * - If the instruction has two different vectors then the range of the |
1481 | | * indices will be 0 to 31. In this case, the RHSStart value passed should |
1482 | | * be 16 (indices 0-15 specify elements in the first vector while indices 16 |
1483 | | * to 31 specify elements in the second vector). |
1484 | | * |
1485 | | * \param[in] N The shuffle vector SD Node to analyze |
1486 | | * \param[in] IndexOffset Specifies whether to look for even or odd elements |
1487 | | * \param[in] RHSStartValue Specifies the starting index for the righthand input |
1488 | | * vector to the shuffle_vector instruction |
1489 | | * \return true iff this shuffle vector represents an even or odd word merge |
1490 | | */ |
1491 | | static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, |
1492 | 195 | unsigned RHSStartValue) { |
1493 | 195 | if (N->getValueType(0) != MVT::v16i8) |
1494 | 0 | return false; |
1495 | 195 | |
1496 | 264 | for (unsigned i = 0; 195 i < 2264 ; ++i69 ) |
1497 | 553 | for (unsigned j = 0; 237 j < 4553 ; ++j316 ) |
1498 | 484 | if (484 !isConstantOrUndef(N->getMaskElt(i*4+j), |
1499 | 484 | i*RHSStartValue+j+IndexOffset) || |
1500 | 327 | !isConstantOrUndef(N->getMaskElt(i*4+j+8), |
1501 | 327 | i*RHSStartValue+j+IndexOffset+8)) |
1502 | 168 | return false; |
1503 | 27 | return true; |
1504 | 195 | } |
1505 | | |
1506 | | /** |
1507 | | * \brief Determine if the specified shuffle mask is suitable for the vmrgew or |
1508 | | * vmrgow instructions. |
1509 | | * |
1510 | | * \param[in] N The shuffle vector SD Node to analyze |
1511 | | * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) |
1512 | | * \param[in] ShuffleKind Identify the type of merge: |
1513 | | * - 0 = big-endian merge with two different inputs; |
1514 | | * - 1 = either-endian merge with two identical inputs; |
1515 | | * - 2 = little-endian merge with two different inputs (inputs are swapped for |
1516 | | * little-endian merges). |
1517 | | * \param[in] DAG The current SelectionDAG |
1518 | | * \return true iff this shuffle mask |
1519 | | */ |
1520 | | bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, |
1521 | 203 | unsigned ShuffleKind, SelectionDAG &DAG) { |
1522 | 203 | if (DAG.getDataLayout().isLittleEndian()203 ) { |
1523 | 94 | unsigned indexOffset = CheckEven ? 450 : 044 ; |
1524 | 94 | if (ShuffleKind == 1) // Unary |
1525 | 43 | return isVMerge(N, indexOffset, 0); |
1526 | 51 | else if (51 ShuffleKind == 251 ) // swapped |
1527 | 45 | return isVMerge(N, indexOffset, 16); |
1528 | 51 | else |
1529 | 6 | return false; |
1530 | 203 | } |
1531 | 109 | else { |
1532 | 109 | unsigned indexOffset = CheckEven ? 059 : 450 ; |
1533 | 109 | if (ShuffleKind == 1) // Unary |
1534 | 52 | return isVMerge(N, indexOffset, 0); |
1535 | 57 | else if (57 ShuffleKind == 057 ) // Normal |
1536 | 55 | return isVMerge(N, indexOffset, 16); |
1537 | 57 | else |
1538 | 2 | return false; |
1539 | 0 | } |
1540 | 0 | return false; |
1541 | 0 | } |
1542 | | |
1543 | | /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift |
1544 | | /// amount, otherwise return -1. |
1545 | | /// The ShuffleKind distinguishes between big-endian operations with two |
1546 | | /// different inputs (0), either-endian operations with two identical inputs |
1547 | | /// (1), and little-endian operations with two different inputs (2). For the |
1548 | | /// latter, the input operands are swapped (see PPCInstrAltivec.td). |
1549 | | int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, |
1550 | 1.13k | SelectionDAG &DAG) { |
1551 | 1.13k | if (N->getValueType(0) != MVT::v16i8) |
1552 | 0 | return -1; |
1553 | 1.13k | |
1554 | 1.13k | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
1555 | 1.13k | |
1556 | 1.13k | // Find the first non-undef value in the shuffle mask. |
1557 | 1.13k | unsigned i; |
1558 | 1.34k | for (i = 0; i != 16 && 1.34k SVOp->getMaskElt(i) < 01.34k ; ++i210 ) |
1559 | 210 | /*search*/; |
1560 | 1.13k | |
1561 | 1.13k | if (i == 161.13k ) return -10 ; // all undef. |
1562 | 1.13k | |
1563 | 1.13k | // Otherwise, check to see if the rest of the elements are consecutively |
1564 | 1.13k | // numbered from this value. |
1565 | 1.13k | unsigned ShiftAmt = SVOp->getMaskElt(i); |
1566 | 1.13k | if (ShiftAmt < i1.13k ) return -139 ; |
1567 | 1.09k | |
1568 | 1.09k | ShiftAmt -= i; |
1569 | 1.09k | bool isLE = DAG.getDataLayout().isLittleEndian(); |
1570 | 1.09k | |
1571 | 1.09k | if ((ShuffleKind == 0 && 1.09k !isLE458 ) || (ShuffleKind == 2 && 693 isLE223 )) { |
1572 | 618 | // Check the rest of the elements to see if they are consecutive. |
1573 | 2.63k | for (++i; i != 162.63k ; ++i2.01k ) |
1574 | 2.57k | if (2.57k !isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)2.57k ) |
1575 | 558 | return -1; |
1576 | 1.09k | } else if (473 ShuffleKind == 1473 ) { |
1577 | 410 | // Check the rest of the elements to see if they are consecutive. |
1578 | 2.60k | for (++i; i != 162.60k ; ++i2.19k ) |
1579 | 2.50k | if (2.50k !isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)2.50k ) |
1580 | 309 | return -1; |
1581 | 410 | } else |
1582 | 63 | return -1; |
1583 | 161 | |
1584 | 161 | if (161 isLE161 ) |
1585 | 50 | ShiftAmt = 16 - ShiftAmt; |
1586 | 1.13k | |
1587 | 1.13k | return ShiftAmt; |
1588 | 1.13k | } |
1589 | | |
1590 | | /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand |
1591 | | /// specifies a splat of a single element that is suitable for input to |
1592 | | /// VSPLTB/VSPLTH/VSPLTW. |
1593 | 1.27k | bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { |
1594 | 1.27k | assert(N->getValueType(0) == MVT::v16i8 && |
1595 | 1.27k | (EltSize == 1 || EltSize == 2 || EltSize == 4)); |
1596 | 1.27k | |
1597 | 1.27k | // The consecutive indices need to specify an element, not part of two |
1598 | 1.27k | // different elements. So abandon ship early if this isn't the case. |
1599 | 1.27k | if (N->getMaskElt(0) % EltSize != 0) |
1600 | 171 | return false; |
1601 | 1.10k | |
1602 | 1.10k | // This is a splat operation if each element of the permute is the same, and |
1603 | 1.10k | // if the value doesn't reference the second vector. |
1604 | 1.10k | unsigned ElementBase = N->getMaskElt(0); |
1605 | 1.10k | |
1606 | 1.10k | // FIXME: Handle UNDEF elements too! |
1607 | 1.10k | if (ElementBase >= 16) |
1608 | 27 | return false; |
1609 | 1.07k | |
1610 | 1.07k | // Check that the indices are consecutive, in the case of a multi-byte element |
1611 | 1.07k | // splatted with a v16i8 mask. |
1612 | 2.25k | for (unsigned i = 1; 1.07k i != EltSize2.25k ; ++i1.17k ) |
1613 | 1.36k | if (1.36k N->getMaskElt(i) < 0 || 1.36k N->getMaskElt(i) != (int)(i+ElementBase)1.30k ) |
1614 | 187 | return false; |
1615 | 1.07k | |
1616 | 2.58k | for (unsigned i = EltSize, e = 16; 889 i != e2.58k ; i += EltSize1.69k ) { |
1617 | 2.36k | if (N->getMaskElt(i) < 02.36k ) continue89 ; |
1618 | 5.32k | for (unsigned j = 0; 2.28k j != EltSize5.32k ; ++j3.04k ) |
1619 | 3.72k | if (3.72k N->getMaskElt(i+j) != N->getMaskElt(j)3.72k ) |
1620 | 677 | return false; |
1621 | 2.36k | } |
1622 | 212 | return true; |
1623 | 1.27k | } |
1624 | | |
1625 | | /// Check that the mask is shuffling N byte elements. Within each N byte |
1626 | | /// element of the mask, the indices could be either in increasing or |
1627 | | /// decreasing order as long as they are consecutive. |
1628 | | /// \param[in] N the shuffle vector SD Node to analyze |
1629 | | /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ |
1630 | | /// Word/DoubleWord/QuadWord). |
1631 | | /// \param[in] StepLen the delta indices number among the N byte element, if |
1632 | | /// the mask is in increasing/decreasing order then it is 1/-1. |
1633 | | /// \return true iff the mask is shuffling N byte elements. |
1634 | | static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, |
1635 | 2.80k | int StepLen) { |
1636 | 2.80k | assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && |
1637 | 2.80k | "Unexpected element width."); |
1638 | 2.80k | assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); |
1639 | 2.80k | |
1640 | 2.80k | unsigned NumOfElem = 16 / Width; |
1641 | 2.80k | unsigned MaskVal[16]; // Width is never greater than 16 |
1642 | 5.01k | for (unsigned i = 0; i < NumOfElem5.01k ; ++i2.20k ) { |
1643 | 4.53k | MaskVal[0] = N->getMaskElt(i * Width); |
1644 | 4.53k | if ((StepLen == 1) && 4.53k (MaskVal[0] % Width)3.29k ) { |
1645 | 348 | return false; |
1646 | 4.18k | } else if (4.18k (StepLen == -1) && 4.18k ((MaskVal[0] + 1) % Width)1.24k ) { |
1647 | 1.06k | return false; |
1648 | 1.06k | } |
1649 | 3.12k | |
1650 | 10.8k | for (unsigned int j = 1; 3.12k j < Width10.8k ; ++j7.70k ) { |
1651 | 8.62k | MaskVal[j] = N->getMaskElt(i * Width + j); |
1652 | 8.62k | if (MaskVal[j] != MaskVal[j-1] + StepLen8.62k ) { |
1653 | 925 | return false; |
1654 | 925 | } |
1655 | 8.62k | } |
1656 | 4.53k | } |
1657 | 2.80k | |
1658 | 474 | return true; |
1659 | 2.80k | } |
1660 | | |
1661 | | bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
1662 | 439 | unsigned &InsertAtByte, bool &Swap, bool IsLE) { |
1663 | 439 | if (!isNByteElemShuffleMask(N, 4, 1)) |
1664 | 232 | return false; |
1665 | 207 | |
1666 | 207 | // Now we look at mask elements 0,4,8,12 |
1667 | 207 | unsigned M0 = N->getMaskElt(0) / 4; |
1668 | 207 | unsigned M1 = N->getMaskElt(4) / 4; |
1669 | 207 | unsigned M2 = N->getMaskElt(8) / 4; |
1670 | 207 | unsigned M3 = N->getMaskElt(12) / 4; |
1671 | 207 | unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; |
1672 | 207 | unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; |
1673 | 207 | |
1674 | 207 | // Below, let H and L be arbitrary elements of the shuffle mask |
1675 | 207 | // where H is in the range [4,7] and L is in the range [0,3]. |
1676 | 207 | // H, 1, 2, 3 or L, 5, 6, 7 |
1677 | 207 | if ((M0 > 3 && 207 M1 == 164 && M2 == 220 && M3 == 316 ) || |
1678 | 207 | (M0 < 4 && 191 M1 == 5143 && M2 == 620 && M3 == 716 )) { |
1679 | 32 | ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3]16 : BigEndianShifts[M0 & 0x3]16 ; |
1680 | 32 | InsertAtByte = IsLE ? 1216 : 016 ; |
1681 | 32 | Swap = M0 < 4; |
1682 | 32 | return true; |
1683 | 32 | } |
1684 | 175 | // 0, H, 2, 3 or 4, L, 6, 7 |
1685 | 175 | if (175 (M1 > 3 && 175 M0 == 052 && M2 == 220 && M3 == 316 ) || |
1686 | 175 | (M1 < 4 && 159 M0 == 4123 && M2 == 616 && M3 == 716 )) { |
1687 | 32 | ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3]16 : BigEndianShifts[M1 & 0x3]16 ; |
1688 | 32 | InsertAtByte = IsLE ? 816 : 416 ; |
1689 | 32 | Swap = M1 < 4; |
1690 | 32 | return true; |
1691 | 32 | } |
1692 | 143 | // 0, 1, H, 3 or 4, 5, L, 7 |
1693 | 143 | if (143 (M2 > 3 && 143 M0 == 039 && M1 == 122 && M3 == 322 ) || |
1694 | 143 | (M2 < 4 && 127 M0 == 4104 && M1 == 516 && M3 == 716 )) { |
1695 | 32 | ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3]16 : BigEndianShifts[M2 & 0x3]16 ; |
1696 | 32 | InsertAtByte = IsLE ? 416 : 816 ; |
1697 | 32 | Swap = M2 < 4; |
1698 | 32 | return true; |
1699 | 32 | } |
1700 | 111 | // 0, 1, 2, H or 4, 5, 6, L |
1701 | 111 | if (111 (M3 > 3 && 111 M0 == 028 && M1 == 126 && M2 == 222 ) || |
1702 | 111 | (M3 < 4 && 95 M0 == 483 && M1 == 516 && M2 == 616 )) { |
1703 | 32 | ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3]16 : BigEndianShifts[M3 & 0x3]16 ; |
1704 | 32 | InsertAtByte = IsLE ? 016 : 1216 ; |
1705 | 32 | Swap = M3 < 4; |
1706 | 32 | return true; |
1707 | 32 | } |
1708 | 79 | |
1709 | 79 | // If both vector operands for the shuffle are the same vector, the mask will |
1710 | 79 | // contain only elements from the first one and the second one will be undef. |
1711 | 79 | if (79 N->getOperand(1).isUndef()79 ) { |
1712 | 67 | ShiftElts = 0; |
1713 | 67 | Swap = true; |
1714 | 67 | unsigned XXINSERTWSrcElem = IsLE ? 231 : 136 ; |
1715 | 67 | if (M0 == XXINSERTWSrcElem && 67 M1 == 12 && M2 == 22 && M3 == 32 ) { |
1716 | 2 | InsertAtByte = IsLE ? 121 : 01 ; |
1717 | 2 | return true; |
1718 | 2 | } |
1719 | 65 | if (65 M0 == 0 && 65 M1 == XXINSERTWSrcElem46 && M2 == 24 && M3 == 33 ) { |
1720 | 1 | InsertAtByte = IsLE ? 81 : 40 ; |
1721 | 1 | return true; |
1722 | 1 | } |
1723 | 64 | if (64 M0 == 0 && 64 M1 == 145 && M2 == XXINSERTWSrcElem6 && M3 == 33 ) { |
1724 | 1 | InsertAtByte = IsLE ? 40 : 81 ; |
1725 | 1 | return true; |
1726 | 1 | } |
1727 | 63 | if (63 M0 == 0 && 63 M1 == 144 && M2 == 25 && M3 == XXINSERTWSrcElem4 ) { |
1728 | 2 | InsertAtByte = IsLE ? 01 : 121 ; |
1729 | 2 | return true; |
1730 | 2 | } |
1731 | 73 | } |
1732 | 73 | |
1733 | 73 | return false; |
1734 | 73 | } |
1735 | | |
1736 | | bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
1737 | 633 | bool &Swap, bool IsLE) { |
1738 | 633 | assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
1739 | 633 | // Ensure each byte index of the word is consecutive. |
1740 | 633 | if (!isNByteElemShuffleMask(N, 4, 1)) |
1741 | 399 | return false; |
1742 | 234 | |
1743 | 234 | // Now we look at mask elements 0,4,8,12, which are the beginning of words. |
1744 | 234 | unsigned M0 = N->getMaskElt(0) / 4; |
1745 | 234 | unsigned M1 = N->getMaskElt(4) / 4; |
1746 | 234 | unsigned M2 = N->getMaskElt(8) / 4; |
1747 | 234 | unsigned M3 = N->getMaskElt(12) / 4; |
1748 | 234 | |
1749 | 234 | // If both vector operands for the shuffle are the same vector, the mask will |
1750 | 234 | // contain only elements from the first one and the second one will be undef. |
1751 | 234 | if (N->getOperand(1).isUndef()234 ) { |
1752 | 155 | assert(M0 < 4 && "Indexing into an undef vector?"); |
1753 | 155 | if (M1 != (M0 + 1) % 4 || 155 M2 != (M1 + 1) % 429 || M3 != (M2 + 1) % 428 ) |
1754 | 129 | return false; |
1755 | 26 | |
1756 | 26 | ShiftElts = IsLE ? 26 (4 - M0) % 49 : M017 ; |
1757 | 155 | Swap = false; |
1758 | 155 | return true; |
1759 | 155 | } |
1760 | 79 | |
1761 | 79 | // Ensure each word index of the ShuffleVector Mask is consecutive. |
1762 | 79 | if (79 M1 != (M0 + 1) % 8 || 79 M2 != (M1 + 1) % 850 || M3 != (M2 + 1) % 835 ) |
1763 | 45 | return false; |
1764 | 34 | |
1765 | 34 | if (34 IsLE34 ) { |
1766 | 14 | if (M0 == 0 || 14 M0 == 714 || M0 == 610 || M0 == 58 ) { |
1767 | 8 | // Input vectors don't need to be swapped if the leading element |
1768 | 8 | // of the result is one of the 3 left elements of the second vector |
1769 | 8 | // (or if there is no shift to be done at all). |
1770 | 8 | Swap = false; |
1771 | 8 | ShiftElts = (8 - M0) % 8; |
1772 | 14 | } else if (6 M0 == 4 || 6 M0 == 36 || M0 == 24 || M0 == 12 ) { |
1773 | 6 | // Input vectors need to be swapped if the leading element |
1774 | 6 | // of the result is one of the 3 left elements of the first vector |
1775 | 6 | // (or if we're shifting by 4 - thereby simply swapping the vectors). |
1776 | 6 | Swap = true; |
1777 | 6 | ShiftElts = (4 - M0) % 4; |
1778 | 6 | } |
1779 | 14 | |
1780 | 14 | return true; |
1781 | 0 | } else { // BE |
1782 | 20 | if (M0 == 0 || 20 M0 == 120 || M0 == 217 || M0 == 313 ) { |
1783 | 12 | // Input vectors don't need to be swapped if the leading element |
1784 | 12 | // of the result is one of the 4 elements of the first vector. |
1785 | 12 | Swap = false; |
1786 | 12 | ShiftElts = M0; |
1787 | 20 | } else if (8 M0 == 4 || 8 M0 == 58 || M0 == 66 || M0 == 74 ) { |
1788 | 8 | // Input vectors need to be swapped if the leading element |
1789 | 8 | // of the result is one of the 4 elements of the right vector. |
1790 | 8 | Swap = true; |
1791 | 8 | ShiftElts = M0 - 4; |
1792 | 8 | } |
1793 | 20 | |
1794 | 20 | return true; |
1795 | 20 | } |
1796 | 0 | } |
1797 | | |
1798 | 1.16k | bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { |
1799 | 1.16k | assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
1800 | 1.16k | |
1801 | 1.16k | if (!isNByteElemShuffleMask(N, Width, -1)) |
1802 | 1.14k | return false; |
1803 | 20 | |
1804 | 50 | for (int i = 0; 20 i < 1650 ; i += Width30 ) |
1805 | 42 | if (42 N->getMaskElt(i) != i + Width - 142 ) |
1806 | 12 | return false; |
1807 | 20 | |
1808 | 8 | return true; |
1809 | 1.16k | } |
1810 | | |
1811 | 294 | bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { |
1812 | 294 | return isXXBRShuffleMaskHelper(N, 2); |
1813 | 294 | } |
1814 | | |
1815 | 292 | bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { |
1816 | 292 | return isXXBRShuffleMaskHelper(N, 4); |
1817 | 292 | } |
1818 | | |
1819 | 290 | bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { |
1820 | 290 | return isXXBRShuffleMaskHelper(N, 8); |
1821 | 290 | } |
1822 | | |
1823 | 288 | bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { |
1824 | 288 | return isXXBRShuffleMaskHelper(N, 16); |
1825 | 288 | } |
1826 | | |
1827 | | /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap |
1828 | | /// if the inputs to the instruction should be swapped and set \p DM to the |
1829 | | /// value for the immediate. |
1830 | | /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI |
1831 | | /// AND element 0 of the result comes from the first input (LE) or second input |
1832 | | /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. |
1833 | | /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle |
1834 | | /// mask. |
1835 | | bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, |
1836 | 573 | bool &Swap, bool IsLE) { |
1837 | 573 | assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); |
1838 | 573 | |
1839 | 573 | // Ensure each byte index of the double word is consecutive. |
1840 | 573 | if (!isNByteElemShuffleMask(N, 8, 1)) |
1841 | 560 | return false; |
1842 | 13 | |
1843 | 13 | unsigned M0 = N->getMaskElt(0) / 8; |
1844 | 13 | unsigned M1 = N->getMaskElt(8) / 8; |
1845 | 13 | assert(((M0 | M1) < 4) && "A mask element out of bounds?"); |
1846 | 13 | |
1847 | 13 | // If both vector operands for the shuffle are the same vector, the mask will |
1848 | 13 | // contain only elements from the first one and the second one will be undef. |
1849 | 13 | if (N->getOperand(1).isUndef()13 ) { |
1850 | 0 | if ((M0 | M1) < 20 ) { |
1851 | 0 | DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1)0 : (M0 << 1) + (M1 & 1)0 ; |
1852 | 0 | Swap = false; |
1853 | 0 | return true; |
1854 | 0 | } else |
1855 | 0 | return false; |
1856 | 13 | } |
1857 | 13 | |
1858 | 13 | if (13 IsLE13 ) { |
1859 | 7 | if (M0 > 1 && 7 M1 < 20 ) { |
1860 | 0 | Swap = false; |
1861 | 7 | } else if (7 M0 < 2 && 7 M1 > 17 ) { |
1862 | 7 | M0 = (M0 + 2) % 4; |
1863 | 7 | M1 = (M1 + 2) % 4; |
1864 | 7 | Swap = true; |
1865 | 7 | } else |
1866 | 0 | return false; |
1867 | 7 | |
1868 | 7 | // Note: if control flow comes here that means Swap is already set above |
1869 | 7 | DM = (((~M1) & 1) << 1) + ((~M0) & 1); |
1870 | 7 | return true; |
1871 | 0 | } else { // BE |
1872 | 6 | if (M0 < 2 && 6 M1 > 16 ) { |
1873 | 6 | Swap = false; |
1874 | 6 | } else if (0 M0 > 1 && 0 M1 < 20 ) { |
1875 | 0 | M0 = (M0 + 2) % 4; |
1876 | 0 | M1 = (M1 + 2) % 4; |
1877 | 0 | Swap = true; |
1878 | 0 | } else |
1879 | 0 | return false; |
1880 | 6 | |
1881 | 6 | // Note: if control flow comes here that means Swap is already set above |
1882 | 6 | DM = (M0 << 1) + (M1 & 1); |
1883 | 6 | return true; |
1884 | 6 | } |
1885 | 573 | } |
1886 | | |
1887 | | |
1888 | | /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the |
1889 | | /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. |
1890 | | unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, |
1891 | 126 | SelectionDAG &DAG) { |
1892 | 126 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
1893 | 126 | assert(isSplatShuffleMask(SVOp, EltSize)); |
1894 | 126 | if (DAG.getDataLayout().isLittleEndian()) |
1895 | 57 | return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); |
1896 | 126 | else |
1897 | 69 | return SVOp->getMaskElt(0) / EltSize; |
1898 | 0 | } |
1899 | | |
1900 | | /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed |
1901 | | /// by using a vspltis[bhw] instruction of the specified element size, return |
1902 | | /// the constant being splatted. The ByteSize field indicates the number of |
1903 | | /// bytes of each element [124] -> [bhw]. |
1904 | 136 | SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { |
1905 | 136 | SDValue OpVal(nullptr, 0); |
1906 | 136 | |
1907 | 136 | // If ByteSize of the splat is bigger than the element size of the |
1908 | 136 | // build_vector, then we have a case where we are checking for a splat where |
1909 | 136 | // multiple elements of the buildvector are folded together into a single |
1910 | 136 | // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). |
1911 | 136 | unsigned EltSize = 16/N->getNumOperands(); |
1912 | 136 | if (EltSize < ByteSize136 ) { |
1913 | 0 | unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. |
1914 | 0 | SDValue UniquedVals[4]; |
1915 | 0 | assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); |
1916 | 0 |
|
1917 | 0 | // See if all of the elements in the buildvector agree across. |
1918 | 0 | for (unsigned i = 0, e = N->getNumOperands(); i != e0 ; ++i0 ) { |
1919 | 0 | if (N->getOperand(i).isUndef()0 ) continue0 ; |
1920 | 0 | // If the element isn't a constant, bail fully out. |
1921 | 0 | if (0 !isa<ConstantSDNode>(N->getOperand(i))0 ) return SDValue()0 ; |
1922 | 0 |
|
1923 | 0 | if (0 !UniquedVals[i&(Multiple-1)].getNode()0 ) |
1924 | 0 | UniquedVals[i&(Multiple-1)] = N->getOperand(i); |
1925 | 0 | else if (0 UniquedVals[i&(Multiple-1)] != N->getOperand(i)0 ) |
1926 | 0 | return SDValue(); // no match. |
1927 | 0 | } |
1928 | 0 |
|
1929 | 0 | // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains |
1930 | 0 | // either constant or undef values that are identical for each chunk. See |
1931 | 0 | // if these chunks can form into a larger vspltis*. |
1932 | 0 |
|
1933 | 0 | // Check to see if all of the leading entries are either 0 or -1. If |
1934 | 0 | // neither, then this won't fit into the immediate field. |
1935 | 0 | bool LeadingZero = true; |
1936 | 0 | bool LeadingOnes = true; |
1937 | 0 | for (unsigned i = 0; i != Multiple-10 ; ++i0 ) { |
1938 | 0 | if (!UniquedVals[i].getNode()0 ) continue0 ; // Must have been undefs. |
1939 | 0 |
|
1940 | 0 | LeadingZero &= isNullConstant(UniquedVals[i]); |
1941 | 0 | LeadingOnes &= isAllOnesConstant(UniquedVals[i]); |
1942 | 0 | } |
1943 | 0 | // Finally, check the least significant entry. |
1944 | 0 | if (LeadingZero0 ) { |
1945 | 0 | if (!UniquedVals[Multiple-1].getNode()) |
1946 | 0 | return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef |
1947 | 0 | int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); |
1948 | 0 | if (Val < 16) // 0,0,0,4 -> vspltisw(4) |
1949 | 0 | return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); |
1950 | 0 | } |
1951 | 0 | if (0 LeadingOnes0 ) { |
1952 | 0 | if (!UniquedVals[Multiple-1].getNode()) |
1953 | 0 | return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef |
1954 | 0 | int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); |
1955 | 0 | if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) |
1956 | 0 | return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); |
1957 | 0 | } |
1958 | 0 |
|
1959 | 0 | return SDValue(); |
1960 | 0 | } |
1961 | 136 | |
1962 | 136 | // Check to see if this buildvec has a single non-undef value in its elements. |
1963 | 1.40k | for (unsigned i = 0, e = N->getNumOperands(); 136 i != e1.40k ; ++i1.26k ) { |
1964 | 1.26k | if (N->getOperand(i).isUndef()1.26k ) continue0 ; |
1965 | 1.26k | if (1.26k !OpVal.getNode()1.26k ) |
1966 | 136 | OpVal = N->getOperand(i); |
1967 | 1.12k | else if (1.12k OpVal != N->getOperand(i)1.12k ) |
1968 | 0 | return SDValue(); |
1969 | 1.26k | } |
1970 | 136 | |
1971 | 136 | if (136 !OpVal.getNode()136 ) return SDValue()0 ; // All UNDEF: use implicit def. |
1972 | 136 | |
1973 | 136 | unsigned ValSizeInBytes = EltSize; |
1974 | 136 | uint64_t Value = 0; |
1975 | 136 | if (ConstantSDNode *CN136 = dyn_cast<ConstantSDNode>(OpVal)) { |
1976 | 136 | Value = CN->getZExtValue(); |
1977 | 136 | } else if (ConstantFPSDNode *0 CN0 = dyn_cast<ConstantFPSDNode>(OpVal)) { |
1978 | 0 | assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); |
1979 | 0 | Value = FloatToBits(CN->getValueAPF().convertToFloat()); |
1980 | 0 | } |
1981 | 136 | |
1982 | 136 | // If the splat value is larger than the element value, then we can never do |
1983 | 136 | // this splat. The only case that we could fit the replicated bits into our |
1984 | 136 | // immediate field for would be zero, and we prefer to use vxor for it. |
1985 | 136 | if (ValSizeInBytes < ByteSize136 ) return SDValue()0 ; |
1986 | 136 | |
1987 | 136 | // If the element value is larger than the splat value, check if it consists |
1988 | 136 | // of a repeated bit pattern of size ByteSize. |
1989 | 136 | if (136 !APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)136 ) |
1990 | 0 | return SDValue(); |
1991 | 136 | |
1992 | 136 | // Properly sign extend the value. |
1993 | 136 | int MaskVal = SignExtend32(Value, ByteSize * 8); |
1994 | 136 | |
1995 | 136 | // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. |
1996 | 136 | if (MaskVal == 0136 ) return SDValue()42 ; |
1997 | 94 | |
1998 | 94 | // Finally, if this value fits in a 5 bit sext field, return it |
1999 | 94 | if (94 SignExtend32<5>(MaskVal) == MaskVal94 ) |
2000 | 94 | return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); |
2001 | 0 | return SDValue(); |
2002 | 0 | } |
2003 | | |
2004 | | /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift |
2005 | | /// amount, otherwise return -1. |
2006 | 71 | int PPC::isQVALIGNIShuffleMask(SDNode *N) { |
2007 | 71 | EVT VT = N->getValueType(0); |
2008 | 71 | if (VT != MVT::v4f64 && 71 VT != MVT::v4f3226 && VT != MVT::v4i10 ) |
2009 | 0 | return -1; |
2010 | 71 | |
2011 | 71 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
2012 | 71 | |
2013 | 71 | // Find the first non-undef value in the shuffle mask. |
2014 | 71 | unsigned i; |
2015 | 71 | for (i = 0; i != 4 && 71 SVOp->getMaskElt(i) < 071 ; ++i0 ) |
2016 | 0 | /*search*/; |
2017 | 71 | |
2018 | 71 | if (i == 471 ) return -10 ; // all undef. |
2019 | 71 | |
2020 | 71 | // Otherwise, check to see if the rest of the elements are consecutively |
2021 | 71 | // numbered from this value. |
2022 | 71 | unsigned ShiftAmt = SVOp->getMaskElt(i); |
2023 | 71 | if (ShiftAmt < i71 ) return -10 ; |
2024 | 71 | ShiftAmt -= i; |
2025 | 71 | |
2026 | 71 | // Check the rest of the elements to see if they are consecutive. |
2027 | 87 | for (++i; i != 487 ; ++i16 ) |
2028 | 87 | if (87 !isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)87 ) |
2029 | 71 | return -1; |
2030 | 71 | |
2031 | 0 | return ShiftAmt; |
2032 | 71 | } |
2033 | | |
2034 | | //===----------------------------------------------------------------------===// |
2035 | | // Addressing Mode Selection |
2036 | | //===----------------------------------------------------------------------===// |
2037 | | |
2038 | | /// isIntS16Immediate - This method tests to see if the node is either a 32-bit |
2039 | | /// or 64-bit immediate, and if the value can be accurately represented as a |
2040 | | /// sign extension from a 16-bit value. If so, this returns true and the |
2041 | | /// immediate. |
2042 | 11.3k | bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { |
2043 | 11.3k | if (!isa<ConstantSDNode>(N)) |
2044 | 1.21k | return false; |
2045 | 10.1k | |
2046 | 10.1k | Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue(); |
2047 | 10.1k | if (N->getValueType(0) == MVT::i32) |
2048 | 2.75k | return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); |
2049 | 10.1k | else |
2050 | 7.39k | return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); |
2051 | 0 | } |
2052 | 11.2k | bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { |
2053 | 11.2k | return isIntS16Immediate(Op.getNode(), Imm); |
2054 | 11.2k | } |
2055 | | |
2056 | | /// SelectAddressRegReg - Given the specified addressed, check to see if it |
2057 | | /// can be represented as an indexed [r+r] operation. Returns false if it |
2058 | | /// can be more efficiently represented with [r+imm]. |
2059 | | bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, |
2060 | | SDValue &Index, |
2061 | 13.0k | SelectionDAG &DAG) const { |
2062 | 13.0k | int16_t imm = 0; |
2063 | 13.0k | if (N.getOpcode() == ISD::ADD13.0k ) { |
2064 | 5.13k | if (isIntS16Immediate(N.getOperand(1), imm)) |
2065 | 4.31k | return false; // r+i |
2066 | 818 | if (818 N.getOperand(1).getOpcode() == PPCISD::Lo818 ) |
2067 | 319 | return false; // r+i |
2068 | 499 | |
2069 | 499 | Base = N.getOperand(0); |
2070 | 499 | Index = N.getOperand(1); |
2071 | 499 | return true; |
2072 | 7.93k | } else if (7.93k N.getOpcode() == ISD::OR7.93k ) { |
2073 | 775 | if (isIntS16Immediate(N.getOperand(1), imm)) |
2074 | 755 | return false; // r+i can fold it if we can. |
2075 | 20 | |
2076 | 20 | // If this is an or of disjoint bitfields, we can codegen this as an add |
2077 | 20 | // (for better address arithmetic) if the LHS and RHS of the OR are provably |
2078 | 20 | // disjoint. |
2079 | 20 | KnownBits LHSKnown, RHSKnown; |
2080 | 20 | DAG.computeKnownBits(N.getOperand(0), LHSKnown); |
2081 | 20 | |
2082 | 20 | if (LHSKnown.Zero.getBoolValue()20 ) { |
2083 | 20 | DAG.computeKnownBits(N.getOperand(1), RHSKnown); |
2084 | 20 | // If all of the bits are known zero on the LHS or RHS, the add won't |
2085 | 20 | // carry. |
2086 | 20 | if (~(LHSKnown.Zero | RHSKnown.Zero) == 020 ) { |
2087 | 20 | Base = N.getOperand(0); |
2088 | 20 | Index = N.getOperand(1); |
2089 | 20 | return true; |
2090 | 20 | } |
2091 | 7.16k | } |
2092 | 7.93k | } |
2093 | 7.16k | |
2094 | 7.16k | return false; |
2095 | 7.16k | } |
2096 | | |
2097 | | // If we happen to be doing an i64 load or store into a stack slot that has |
2098 | | // less than a 4-byte alignment, then the frame-index elimination may need to |
2099 | | // use an indexed load or store instruction (because the offset may not be a |
2100 | | // multiple of 4). The extra register needed to hold the offset comes from the |
2101 | | // register scavenger, and it is possible that the scavenger will need to use |
2102 | | // an emergency spill slot. As a result, we need to make sure that a spill slot |
2103 | | // is allocated when doing an i64 load/store into a less-than-4-byte-aligned |
2104 | | // stack slot. |
2105 | 3.15k | static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { |
2106 | 3.15k | // FIXME: This does not handle the LWA case. |
2107 | 3.15k | if (VT != MVT::i64) |
2108 | 1.32k | return; |
2109 | 1.83k | |
2110 | 1.83k | // NOTE: We'll exclude negative FIs here, which come from argument |
2111 | 1.83k | // lowering, because there are no known test cases triggering this problem |
2112 | 1.83k | // using packed structures (or similar). We can remove this exclusion if |
2113 | 1.83k | // we find such a test case. The reason why this is so test-case driven is |
2114 | 1.83k | // because this entire 'fixup' is only to prevent crashes (from the |
2115 | 1.83k | // register scavenger) on not-really-valid inputs. For example, if we have: |
2116 | 1.83k | // %a = alloca i1 |
2117 | 1.83k | // %b = bitcast i1* %a to i64* |
2118 | 1.83k | // store i64* a, i64 b |
2119 | 1.83k | // then the store should really be marked as 'align 1', but is not. If it |
2120 | 1.83k | // were marked as 'align 1' then the indexed form would have been |
2121 | 1.83k | // instruction-selected initially, and the problem this 'fixup' is preventing |
2122 | 1.83k | // won't happen regardless. |
2123 | 1.83k | if (1.83k FrameIdx < 01.83k ) |
2124 | 419 | return; |
2125 | 1.41k | |
2126 | 1.41k | MachineFunction &MF = DAG.getMachineFunction(); |
2127 | 1.41k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2128 | 1.41k | |
2129 | 1.41k | unsigned Align = MFI.getObjectAlignment(FrameIdx); |
2130 | 1.41k | if (Align >= 4) |
2131 | 1.38k | return; |
2132 | 36 | |
2133 | 36 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
2134 | 36 | FuncInfo->setHasNonRISpills(); |
2135 | 36 | } |
2136 | | |
2137 | | /// Returns true if the address N can be represented by a base register plus |
2138 | | /// a signed 16-bit displacement [r+imm], and if it is not better |
2139 | | /// represented as reg+reg. If \p Alignment is non-zero, only accept |
2140 | | /// displacements that are multiples of that value. |
2141 | | bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, |
2142 | | SDValue &Base, |
2143 | | SelectionDAG &DAG, |
2144 | 7.90k | unsigned Alignment) const { |
2145 | 7.90k | // FIXME dl should come from parent load or store, not from address |
2146 | 7.90k | SDLoc dl(N); |
2147 | 7.90k | // If this can be more profitably realized as r+r, fail. |
2148 | 7.90k | if (SelectAddressRegReg(N, Disp, Base, DAG)) |
2149 | 166 | return false; |
2150 | 7.73k | |
2151 | 7.73k | if (7.73k N.getOpcode() == ISD::ADD7.73k ) { |
2152 | 3.15k | int16_t imm = 0; |
2153 | 3.15k | if (isIntS16Immediate(N.getOperand(1), imm) && |
2154 | 3.15k | (!Alignment || 2.89k (imm % Alignment) == 01.15k )) { |
2155 | 2.89k | Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); |
2156 | 2.89k | if (FrameIndexSDNode *FI2.89k = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { |
2157 | 205 | Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); |
2158 | 205 | fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); |
2159 | 2.89k | } else { |
2160 | 2.69k | Base = N.getOperand(0); |
2161 | 2.69k | } |
2162 | 2.89k | return true; // [r+i] |
2163 | 263 | } else if (263 N.getOperand(1).getOpcode() == PPCISD::Lo263 ) { |
2164 | 263 | // Match LOAD (ADD (X, Lo(G))). |
2165 | 263 | assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() |
2166 | 263 | && "Cannot handle constant offsets yet!"); |
2167 | 263 | Disp = N.getOperand(1).getOperand(0); // The global address. |
2168 | 263 | assert(Disp.getOpcode() == ISD::TargetGlobalAddress || |
2169 | 263 | Disp.getOpcode() == ISD::TargetGlobalTLSAddress || |
2170 | 263 | Disp.getOpcode() == ISD::TargetConstantPool || |
2171 | 263 | Disp.getOpcode() == ISD::TargetJumpTable); |
2172 | 263 | Base = N.getOperand(0); |
2173 | 263 | return true; // [&g+r] |
2174 | 263 | } |
2175 | 4.57k | } else if (4.57k N.getOpcode() == ISD::OR4.57k ) { |
2176 | 748 | int16_t imm = 0; |
2177 | 748 | if (isIntS16Immediate(N.getOperand(1), imm) && |
2178 | 748 | (!Alignment || 748 (imm % Alignment) == 0103 )) { |
2179 | 748 | // If this is an or of disjoint bitfields, we can codegen this as an add |
2180 | 748 | // (for better address arithmetic) if the LHS and RHS of the OR are |
2181 | 748 | // provably disjoint. |
2182 | 748 | KnownBits LHSKnown; |
2183 | 748 | DAG.computeKnownBits(N.getOperand(0), LHSKnown); |
2184 | 748 | |
2185 | 748 | if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL748 ) { |
2186 | 748 | // If all of the bits are known zero on the LHS or RHS, the add won't |
2187 | 748 | // carry. |
2188 | 748 | if (FrameIndexSDNode *FI = |
2189 | 745 | dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { |
2190 | 745 | Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); |
2191 | 745 | fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); |
2192 | 748 | } else { |
2193 | 3 | Base = N.getOperand(0); |
2194 | 3 | } |
2195 | 748 | Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); |
2196 | 748 | return true; |
2197 | 748 | } |
2198 | 4.57k | } |
2199 | 3.82k | } else if (ConstantSDNode *3.82k CN3.82k = dyn_cast<ConstantSDNode>(N)) { |
2200 | 113 | // Loading from a constant address. |
2201 | 113 | |
2202 | 113 | // If this address fits entirely in a 16-bit sext immediate field, codegen |
2203 | 113 | // this as "d, 0" |
2204 | 113 | int16_t Imm; |
2205 | 113 | if (isIntS16Immediate(CN, Imm) && 113 (!Alignment || 101 (Imm % Alignment) == 025 )) { |
2206 | 101 | Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); |
2207 | 101 | Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO832 : PPC::ZERO69 , |
2208 | 101 | CN->getValueType(0)); |
2209 | 101 | return true; |
2210 | 101 | } |
2211 | 12 | |
2212 | 12 | // Handle 32-bit sext immediates with LIS + addr mode. |
2213 | 12 | if (12 (CN->getValueType(0) == MVT::i32 || |
2214 | 7 | (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && |
2215 | 12 | (!Alignment || 9 (CN->getZExtValue() % Alignment) == 02 )) { |
2216 | 9 | int Addr = (int)CN->getZExtValue(); |
2217 | 9 | |
2218 | 9 | // Otherwise, break this down into an LIS + disp. |
2219 | 9 | Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); |
2220 | 9 | |
2221 | 9 | Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, |
2222 | 9 | MVT::i32); |
2223 | 9 | unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS5 : PPC::LIS84 ; |
2224 | 9 | Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); |
2225 | 9 | return true; |
2226 | 9 | } |
2227 | 3.71k | } |
2228 | 3.71k | |
2229 | 3.71k | Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); |
2230 | 3.71k | if (FrameIndexSDNode *FI3.71k = dyn_cast<FrameIndexSDNode>(N)) { |
2231 | 2.20k | Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); |
2232 | 2.20k | fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); |
2233 | 2.20k | } else |
2234 | 1.50k | Base = N; |
2235 | 7.90k | return true; // [r+0] |
2236 | 7.90k | } |
2237 | | |
2238 | | /// SelectAddressRegRegOnly - Given the specified addressed, force it to be |
2239 | | /// represented as an indexed [r+r] operation. |
2240 | | bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, |
2241 | | SDValue &Index, |
2242 | 3.99k | SelectionDAG &DAG) const { |
2243 | 3.99k | // Check to see if we can easily represent this as an [r+r] address. This |
2244 | 3.99k | // will fail if it thinks that the address is more profitably represented as |
2245 | 3.99k | // reg+imm, e.g. where imm = 0. |
2246 | 3.99k | if (SelectAddressRegReg(N, Base, Index, DAG)) |
2247 | 75 | return true; |
2248 | 3.92k | |
2249 | 3.92k | // If the address is the result of an add, we will utilize the fact that the |
2250 | 3.92k | // address calculation includes an implicit add. However, we can reduce |
2251 | 3.92k | // register pressure if we do not materialize a constant just for use as the |
2252 | 3.92k | // index register. We only get rid of the add if it is not an add of a |
2253 | 3.92k | // value and a 16-bit signed constant and both have a single use. |
2254 | 3.92k | int16_t imm = 0; |
2255 | 3.92k | if (N.getOpcode() == ISD::ADD && |
2256 | 718 | (!isIntS16Immediate(N.getOperand(1), imm) || |
2257 | 3.92k | !N.getOperand(1).hasOneUse()705 || !N.getOperand(0).hasOneUse()540 )) { |
2258 | 661 | Base = N.getOperand(0); |
2259 | 661 | Index = N.getOperand(1); |
2260 | 661 | return true; |
2261 | 661 | } |
2262 | 3.26k | |
2263 | 3.26k | // Otherwise, do it the hard way, using R0 as the base register. |
2264 | 3.26k | Base = DAG.getRegister(Subtarget.isPPC64() ? 3.26k PPC::ZERO82.97k : PPC::ZERO284 , |
2265 | 3.99k | N.getValueType()); |
2266 | 3.99k | Index = N; |
2267 | 3.99k | return true; |
2268 | 3.99k | } |
2269 | | |
2270 | | /// getPreIndexedAddressParts - returns true by value, base pointer and |
2271 | | /// offset pointer and addressing mode by reference if the node's address |
2272 | | /// can be legally represented as pre-indexed load / store address. |
2273 | | bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
2274 | | SDValue &Offset, |
2275 | | ISD::MemIndexedMode &AM, |
2276 | 830 | SelectionDAG &DAG) const { |
2277 | 830 | if (DisablePPCPreinc830 ) return false0 ; |
2278 | 830 | |
2279 | 830 | bool isLoad = true; |
2280 | 830 | SDValue Ptr; |
2281 | 830 | EVT VT; |
2282 | 830 | unsigned Alignment; |
2283 | 830 | if (LoadSDNode *LD830 = dyn_cast<LoadSDNode>(N)) { |
2284 | 453 | Ptr = LD->getBasePtr(); |
2285 | 453 | VT = LD->getMemoryVT(); |
2286 | 453 | Alignment = LD->getAlignment(); |
2287 | 830 | } else if (StoreSDNode *377 ST377 = dyn_cast<StoreSDNode>(N)) { |
2288 | 377 | Ptr = ST->getBasePtr(); |
2289 | 377 | VT = ST->getMemoryVT(); |
2290 | 377 | Alignment = ST->getAlignment(); |
2291 | 377 | isLoad = false; |
2292 | 377 | } else |
2293 | 0 | return false; |
2294 | 830 | |
2295 | 830 | // PowerPC doesn't have preinc load/store instructions for vectors (except |
2296 | 830 | // for QPX, which does have preinc r+r forms). |
2297 | 830 | if (830 VT.isVector()830 ) { |
2298 | 1 | if (!Subtarget.hasQPX() || 1 (VT != MVT::v4f64 && 1 VT != MVT::v4f320 )) { |
2299 | 0 | return false; |
2300 | 1 | } else if (1 SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)1 ) { |
2301 | 1 | AM = ISD::PRE_INC; |
2302 | 1 | return true; |
2303 | 1 | } |
2304 | 829 | } |
2305 | 829 | |
2306 | 829 | if (829 SelectAddressRegReg(Ptr, Base, Offset, DAG)829 ) { |
2307 | 107 | // Common code will reject creating a pre-inc form if the base pointer |
2308 | 107 | // is a frame index, or if N is a store and the base pointer is either |
2309 | 107 | // the same as or a predecessor of the value being stored. Check for |
2310 | 107 | // those situations here, and try with swapped Base/Offset instead. |
2311 | 107 | bool Swap = false; |
2312 | 107 | |
2313 | 107 | if (isa<FrameIndexSDNode>(Base) || 107 isa<RegisterSDNode>(Base)107 ) |
2314 | 0 | Swap = true; |
2315 | 107 | else if (107 !isLoad107 ) { |
2316 | 33 | SDValue Val = cast<StoreSDNode>(N)->getValue(); |
2317 | 33 | if (Val == Base || 33 Base.getNode()->isPredecessorOf(Val.getNode())33 ) |
2318 | 22 | Swap = true; |
2319 | 107 | } |
2320 | 107 | |
2321 | 107 | if (Swap) |
2322 | 22 | std::swap(Base, Offset); |
2323 | 107 | |
2324 | 107 | AM = ISD::PRE_INC; |
2325 | 107 | return true; |
2326 | 107 | } |
2327 | 722 | |
2328 | 722 | // LDU/STU can only handle immediates that are a multiple of 4. |
2329 | 722 | if (722 VT != MVT::i64722 ) { |
2330 | 581 | if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) |
2331 | 0 | return false; |
2332 | 141 | } else { |
2333 | 141 | // LDU/STU need an address with at least 4-byte alignment. |
2334 | 141 | if (Alignment < 4) |
2335 | 4 | return false; |
2336 | 137 | |
2337 | 137 | if (137 !SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)137 ) |
2338 | 0 | return false; |
2339 | 718 | } |
2340 | 718 | |
2341 | 718 | if (LoadSDNode *718 LD718 = dyn_cast<LoadSDNode>(N)) { |
2342 | 376 | // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of |
2343 | 376 | // sext i32 to i64 when addr mode is r+i. |
2344 | 376 | if (LD->getValueType(0) == MVT::i64 && 376 LD->getMemoryVT() == MVT::i3290 && |
2345 | 1 | LD->getExtensionType() == ISD::SEXTLOAD && |
2346 | 0 | isa<ConstantSDNode>(Offset)) |
2347 | 0 | return false; |
2348 | 718 | } |
2349 | 718 | |
2350 | 718 | AM = ISD::PRE_INC; |
2351 | 718 | return true; |
2352 | 718 | } |
2353 | | |
2354 | | //===----------------------------------------------------------------------===// |
2355 | | // LowerOperation implementation |
2356 | | //===----------------------------------------------------------------------===// |
2357 | | |
2358 | | /// Return true if we should reference labels using a PICBase, set the HiOpFlags |
2359 | | /// and LoOpFlags to the target MO flags. |
2360 | | static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, |
2361 | | unsigned &HiOpFlags, unsigned &LoOpFlags, |
2362 | 352 | const GlobalValue *GV = nullptr) { |
2363 | 352 | HiOpFlags = PPCII::MO_HA; |
2364 | 352 | LoOpFlags = PPCII::MO_LO; |
2365 | 352 | |
2366 | 352 | // Don't use the pic base if not in PIC relocation model. |
2367 | 352 | if (IsPIC352 ) { |
2368 | 35 | HiOpFlags |= PPCII::MO_PIC_FLAG; |
2369 | 35 | LoOpFlags |= PPCII::MO_PIC_FLAG; |
2370 | 35 | } |
2371 | 352 | |
2372 | 352 | // If this is a reference to a global value that requires a non-lazy-ptr, make |
2373 | 352 | // sure that instruction lowering adds it. |
2374 | 352 | if (GV && 352 Subtarget.hasLazyResolverStub(GV)259 ) { |
2375 | 127 | HiOpFlags |= PPCII::MO_NLP_FLAG; |
2376 | 127 | LoOpFlags |= PPCII::MO_NLP_FLAG; |
2377 | 127 | |
2378 | 127 | if (GV->hasHiddenVisibility()127 ) { |
2379 | 3 | HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; |
2380 | 3 | LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; |
2381 | 3 | } |
2382 | 127 | } |
2383 | 352 | } |
2384 | | |
2385 | | static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, |
2386 | 334 | SelectionDAG &DAG) { |
2387 | 334 | SDLoc DL(HiPart); |
2388 | 334 | EVT PtrVT = HiPart.getValueType(); |
2389 | 334 | SDValue Zero = DAG.getConstant(0, DL, PtrVT); |
2390 | 334 | |
2391 | 334 | SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); |
2392 | 334 | SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); |
2393 | 334 | |
2394 | 334 | // With PIC, the first instruction is actually "GR+hi(&G)". |
2395 | 334 | if (isPIC) |
2396 | 17 | Hi = DAG.getNode(ISD::ADD, DL, PtrVT, |
2397 | 17 | DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); |
2398 | 334 | |
2399 | 334 | // Generate non-pic code that has direct accesses to the constant pool. |
2400 | 334 | // The address of the global is just (hi(&g)+lo(&g)). |
2401 | 334 | return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); |
2402 | 334 | } |
2403 | | |
2404 | 3.10k | static void setUsesTOCBasePtr(MachineFunction &MF) { |
2405 | 3.10k | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
2406 | 3.10k | FuncInfo->setUsesTOCBasePtr(); |
2407 | 3.10k | } |
2408 | | |
2409 | 3.05k | static void setUsesTOCBasePtr(SelectionDAG &DAG) { |
2410 | 3.05k | setUsesTOCBasePtr(DAG.getMachineFunction()); |
2411 | 3.05k | } |
2412 | | |
2413 | | static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, |
2414 | 1.83k | SDValue GA) { |
2415 | 1.83k | EVT VT = Is64Bit ? MVT::i641.81k : MVT::i3218 ; |
2416 | 1.81k | SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : |
2417 | 18 | DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); |
2418 | 1.83k | |
2419 | 1.83k | SDValue Ops[] = { GA, Reg }; |
2420 | 1.83k | return DAG.getMemIntrinsicNode( |
2421 | 1.83k | PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, |
2422 | 1.83k | MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, |
2423 | 1.83k | false, 0); |
2424 | 1.83k | } |
2425 | | |
2426 | | SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, |
2427 | 652 | SelectionDAG &DAG) const { |
2428 | 652 | EVT PtrVT = Op.getValueType(); |
2429 | 652 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
2430 | 652 | const Constant *C = CP->getConstVal(); |
2431 | 652 | |
2432 | 652 | // 64-bit SVR4 ABI code is always position-independent. |
2433 | 652 | // The actual address of the GlobalValue is stored in the TOC. |
2434 | 652 | if (Subtarget.isSVR4ABI() && 652 Subtarget.isPPC64()618 ) { |
2435 | 565 | setUsesTOCBasePtr(DAG); |
2436 | 565 | SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); |
2437 | 565 | return getTOCEntry(DAG, SDLoc(CP), true, GA); |
2438 | 565 | } |
2439 | 87 | |
2440 | 87 | unsigned MOHiFlag, MOLoFlag; |
2441 | 87 | bool IsPIC = isPositionIndependent(); |
2442 | 87 | getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); |
2443 | 87 | |
2444 | 87 | if (IsPIC && 87 Subtarget.isSVR4ABI()11 ) { |
2445 | 11 | SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), |
2446 | 11 | PPCII::MO_PIC_FLAG); |
2447 | 11 | return getTOCEntry(DAG, SDLoc(CP), false, GA); |
2448 | 11 | } |
2449 | 76 | |
2450 | 76 | SDValue CPIHi = |
2451 | 76 | DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); |
2452 | 76 | SDValue CPILo = |
2453 | 76 | DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); |
2454 | 76 | return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); |
2455 | 76 | } |
2456 | | |
2457 | | // For 64-bit PowerPC, prefer the more compact relative encodings. |
2458 | | // This trades 32 bits per jump table entry for one or two instructions |
2459 | | // on the jump site. |
2460 | 16 | unsigned PPCTargetLowering::getJumpTableEncoding() const { |
2461 | 16 | if (isJumpTableRelative()) |
2462 | 13 | return MachineJumpTableInfo::EK_LabelDifference32; |
2463 | 3 | |
2464 | 3 | return TargetLowering::getJumpTableEncoding(); |
2465 | 3 | } |
2466 | | |
2467 | 27 | bool PPCTargetLowering::isJumpTableRelative() const { |
2468 | 27 | if (Subtarget.isPPC64()) |
2469 | 21 | return true; |
2470 | 6 | return TargetLowering::isJumpTableRelative(); |
2471 | 6 | } |
2472 | | |
2473 | | SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, |
2474 | 8 | SelectionDAG &DAG) const { |
2475 | 8 | if (!Subtarget.isPPC64()) |
2476 | 0 | return TargetLowering::getPICJumpTableRelocBase(Table, DAG); |
2477 | 8 | |
2478 | 8 | switch (getTargetMachine().getCodeModel()) { |
2479 | 5 | case CodeModel::Small: |
2480 | 5 | case CodeModel::Medium: |
2481 | 5 | return TargetLowering::getPICJumpTableRelocBase(Table, DAG); |
2482 | 3 | default: |
2483 | 3 | return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), |
2484 | 3 | getPointerTy(DAG.getDataLayout())); |
2485 | 0 | } |
2486 | 0 | } |
2487 | | |
2488 | | const MCExpr * |
2489 | | PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
2490 | | unsigned JTI, |
2491 | 44 | MCContext &Ctx) const { |
2492 | 44 | if (!Subtarget.isPPC64()) |
2493 | 0 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
2494 | 44 | |
2495 | 44 | switch (getTargetMachine().getCodeModel()) { |
2496 | 32 | case CodeModel::Small: |
2497 | 32 | case CodeModel::Medium: |
2498 | 32 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
2499 | 12 | default: |
2500 | 12 | return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); |
2501 | 0 | } |
2502 | 0 | } |
2503 | | |
2504 | 11 | SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { |
2505 | 11 | EVT PtrVT = Op.getValueType(); |
2506 | 11 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); |
2507 | 11 | |
2508 | 11 | // 64-bit SVR4 ABI code is always position-independent. |
2509 | 11 | // The actual address of the GlobalValue is stored in the TOC. |
2510 | 11 | if (Subtarget.isSVR4ABI() && 11 Subtarget.isPPC64()10 ) { |
2511 | 8 | setUsesTOCBasePtr(DAG); |
2512 | 8 | SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); |
2513 | 8 | return getTOCEntry(DAG, SDLoc(JT), true, GA); |
2514 | 8 | } |
2515 | 3 | |
2516 | 3 | unsigned MOHiFlag, MOLoFlag; |
2517 | 3 | bool IsPIC = isPositionIndependent(); |
2518 | 3 | getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); |
2519 | 3 | |
2520 | 3 | if (IsPIC && 3 Subtarget.isSVR4ABI()0 ) { |
2521 | 0 | SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, |
2522 | 0 | PPCII::MO_PIC_FLAG); |
2523 | 0 | return getTOCEntry(DAG, SDLoc(GA), false, GA); |
2524 | 0 | } |
2525 | 3 | |
2526 | 3 | SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); |
2527 | 3 | SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); |
2528 | 3 | return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); |
2529 | 3 | } |
2530 | | |
2531 | | SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, |
2532 | 9 | SelectionDAG &DAG) const { |
2533 | 9 | EVT PtrVT = Op.getValueType(); |
2534 | 9 | BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); |
2535 | 9 | const BlockAddress *BA = BASDN->getBlockAddress(); |
2536 | 9 | |
2537 | 9 | // 64-bit SVR4 ABI code is always position-independent. |
2538 | 9 | // The actual BlockAddress is stored in the TOC. |
2539 | 9 | if (Subtarget.isSVR4ABI() && 9 Subtarget.isPPC64()6 ) { |
2540 | 6 | setUsesTOCBasePtr(DAG); |
2541 | 6 | SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); |
2542 | 6 | return getTOCEntry(DAG, SDLoc(BASDN), true, GA); |
2543 | 6 | } |
2544 | 3 | |
2545 | 3 | unsigned MOHiFlag, MOLoFlag; |
2546 | 3 | bool IsPIC = isPositionIndependent(); |
2547 | 3 | getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); |
2548 | 3 | SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); |
2549 | 3 | SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); |
2550 | 3 | return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); |
2551 | 3 | } |
2552 | | |
2553 | | SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, |
2554 | 39 | SelectionDAG &DAG) const { |
2555 | 39 | // FIXME: TLS addresses currently use medium model code sequences, |
2556 | 39 | // which is the most useful form. Eventually support for small and |
2557 | 39 | // large models could be added if users need it, at the cost of |
2558 | 39 | // additional complexity. |
2559 | 39 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
2560 | 39 | if (DAG.getTarget().Options.EmulatedTLS) |
2561 | 6 | return LowerToTLSEmulatedModel(GA, DAG); |
2562 | 33 | |
2563 | 33 | SDLoc dl(GA); |
2564 | 33 | const GlobalValue *GV = GA->getGlobal(); |
2565 | 33 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
2566 | 33 | bool is64bit = Subtarget.isPPC64(); |
2567 | 33 | const Module *M = DAG.getMachineFunction().getFunction()->getParent(); |
2568 | 33 | PICLevel::Level picLevel = M->getPICLevel(); |
2569 | 33 | |
2570 | 33 | TLSModel::Model Model = getTargetMachine().getTLSModel(GV); |
2571 | 33 | |
2572 | 33 | if (Model == TLSModel::LocalExec33 ) { |
2573 | 10 | SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, |
2574 | 10 | PPCII::MO_TPREL_HA); |
2575 | 10 | SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, |
2576 | 10 | PPCII::MO_TPREL_LO); |
2577 | 9 | SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) |
2578 | 1 | : DAG.getRegister(PPC::R2, MVT::i32); |
2579 | 10 | |
2580 | 10 | SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); |
2581 | 10 | return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); |
2582 | 10 | } |
2583 | 23 | |
2584 | 23 | if (23 Model == TLSModel::InitialExec23 ) { |
2585 | 3 | SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); |
2586 | 3 | SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, |
2587 | 3 | PPCII::MO_TLS); |
2588 | 3 | SDValue GOTPtr; |
2589 | 3 | if (is64bit3 ) { |
2590 | 2 | setUsesTOCBasePtr(DAG); |
2591 | 2 | SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); |
2592 | 2 | GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, |
2593 | 2 | PtrVT, GOTReg, TGA); |
2594 | 2 | } else |
2595 | 1 | GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); |
2596 | 3 | SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, |
2597 | 3 | PtrVT, TGA, GOTPtr); |
2598 | 3 | return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); |
2599 | 3 | } |
2600 | 20 | |
2601 | 20 | if (20 Model == TLSModel::GeneralDynamic20 ) { |
2602 | 12 | SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); |
2603 | 12 | SDValue GOTPtr; |
2604 | 12 | if (is64bit12 ) { |
2605 | 10 | setUsesTOCBasePtr(DAG); |
2606 | 10 | SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); |
2607 | 10 | GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, |
2608 | 10 | GOTReg, TGA); |
2609 | 12 | } else { |
2610 | 2 | if (picLevel == PICLevel::SmallPIC) |
2611 | 0 | GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); |
2612 | 2 | else |
2613 | 2 | GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); |
2614 | 2 | } |
2615 | 12 | return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, |
2616 | 12 | GOTPtr, TGA, TGA); |
2617 | 12 | } |
2618 | 8 | |
2619 | 8 | if (8 Model == TLSModel::LocalDynamic8 ) { |
2620 | 8 | SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); |
2621 | 8 | SDValue GOTPtr; |
2622 | 8 | if (is64bit8 ) { |
2623 | 6 | setUsesTOCBasePtr(DAG); |
2624 | 6 | SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); |
2625 | 6 | GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, |
2626 | 6 | GOTReg, TGA); |
2627 | 8 | } else { |
2628 | 2 | if (picLevel == PICLevel::SmallPIC) |
2629 | 0 | GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); |
2630 | 2 | else |
2631 | 2 | GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); |
2632 | 2 | } |
2633 | 8 | SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, |
2634 | 8 | PtrVT, GOTPtr, TGA, TGA); |
2635 | 8 | SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, |
2636 | 8 | PtrVT, TLSAddr, TGA); |
2637 | 8 | return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); |
2638 | 8 | } |
2639 | 0 |
|
2640 | 0 | llvm_unreachable0 ("Unknown TLS model!"); |
2641 | 0 | } |
2642 | | |
2643 | | SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, |
2644 | 1.49k | SelectionDAG &DAG) const { |
2645 | 1.49k | EVT PtrVT = Op.getValueType(); |
2646 | 1.49k | GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); |
2647 | 1.49k | SDLoc DL(GSDN); |
2648 | 1.49k | const GlobalValue *GV = GSDN->getGlobal(); |
2649 | 1.49k | |
2650 | 1.49k | // 64-bit SVR4 ABI code is always position-independent. |
2651 | 1.49k | // The actual address of the GlobalValue is stored in the TOC. |
2652 | 1.49k | if (Subtarget.isSVR4ABI() && 1.49k Subtarget.isPPC64()1.33k ) { |
2653 | 1.23k | setUsesTOCBasePtr(DAG); |
2654 | 1.23k | SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); |
2655 | 1.23k | return getTOCEntry(DAG, DL, true, GA); |
2656 | 1.23k | } |
2657 | 259 | |
2658 | 259 | unsigned MOHiFlag, MOLoFlag; |
2659 | 259 | bool IsPIC = isPositionIndependent(); |
2660 | 259 | getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); |
2661 | 259 | |
2662 | 259 | if (IsPIC && 259 Subtarget.isSVR4ABI()22 ) { |
2663 | 7 | SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, |
2664 | 7 | GSDN->getOffset(), |
2665 | 7 | PPCII::MO_PIC_FLAG); |
2666 | 7 | return getTOCEntry(DAG, DL, false, GA); |
2667 | 7 | } |
2668 | 252 | |
2669 | 252 | SDValue GAHi = |
2670 | 252 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); |
2671 | 252 | SDValue GALo = |
2672 | 252 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); |
2673 | 252 | |
2674 | 252 | SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); |
2675 | 252 | |
2676 | 252 | // If the global reference is actually to a non-lazy-pointer, we have to do an |
2677 | 252 | // extra load to get the address of the global. |
2678 | 252 | if (MOHiFlag & PPCII::MO_NLP_FLAG) |
2679 | 127 | Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); |
2680 | 1.49k | return Ptr; |
2681 | 1.49k | } |
2682 | | |
2683 | 38 | SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { |
2684 | 38 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
2685 | 38 | SDLoc dl(Op); |
2686 | 38 | |
2687 | 38 | if (Op.getValueType() == MVT::v2i6438 ) { |
2688 | 16 | // When the operands themselves are v2i64 values, we need to do something |
2689 | 16 | // special because VSX has no underlying comparison operations for these. |
2690 | 16 | if (Op.getOperand(0).getValueType() == MVT::v2i6416 ) { |
2691 | 12 | // Equality can be handled by casting to the legal type for Altivec |
2692 | 12 | // comparisons, everything else needs to be expanded. |
2693 | 12 | if (CC == ISD::SETEQ || 12 CC == ISD::SETNE8 ) { |
2694 | 8 | return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, |
2695 | 8 | DAG.getSetCC(dl, MVT::v4i32, |
2696 | 8 | DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), |
2697 | 8 | DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), |
2698 | 8 | CC)); |
2699 | 8 | } |
2700 | 4 | |
2701 | 4 | return SDValue(); |
2702 | 4 | } |
2703 | 4 | |
2704 | 4 | // We handle most of these in the usual way. |
2705 | 4 | return Op; |
2706 | 4 | } |
2707 | 22 | |
2708 | 22 | // If we're comparing for equality to zero, expose the fact that this is |
2709 | 22 | // implemented as a ctlz/srl pair on ppc, so that the dag combiner can |
2710 | 22 | // fold the new nodes. |
2711 | 22 | if (SDValue 22 V22 = lowerCmpEqZeroToCtlzSrl(Op, DAG)) |
2712 | 6 | return V; |
2713 | 16 | |
2714 | 16 | if (ConstantSDNode *16 C16 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
2715 | 14 | // Leave comparisons against 0 and -1 alone for now, since they're usually |
2716 | 14 | // optimized. FIXME: revisit this when we can custom lower all setcc |
2717 | 14 | // optimizations. |
2718 | 14 | if (C->isAllOnesValue() || 14 C->isNullValue()13 ) |
2719 | 9 | return SDValue(); |
2720 | 7 | } |
2721 | 7 | |
2722 | 7 | // If we have an integer seteq/setne, turn it into a compare against zero |
2723 | 7 | // by xor'ing the rhs with the lhs, which is faster than setting a |
2724 | 7 | // condition register, reading it back out, and masking the correct bit. The |
2725 | 7 | // normal approach here uses sub to do this instead of xor. Using xor exposes |
2726 | 7 | // the result to other bit-twiddling opportunities. |
2727 | 7 | EVT LHSVT = Op.getOperand(0).getValueType(); |
2728 | 7 | if (LHSVT.isInteger() && 7 (CC == ISD::SETEQ || 7 CC == ISD::SETNE5 )) { |
2729 | 2 | EVT VT = Op.getValueType(); |
2730 | 2 | SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), |
2731 | 2 | Op.getOperand(1)); |
2732 | 2 | return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); |
2733 | 2 | } |
2734 | 5 | return SDValue(); |
2735 | 5 | } |
2736 | | |
2737 | 1 | SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
2738 | 1 | SDNode *Node = Op.getNode(); |
2739 | 1 | EVT VT = Node->getValueType(0); |
2740 | 1 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
2741 | 1 | SDValue InChain = Node->getOperand(0); |
2742 | 1 | SDValue VAListPtr = Node->getOperand(1); |
2743 | 1 | const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); |
2744 | 1 | SDLoc dl(Node); |
2745 | 1 | |
2746 | 1 | assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); |
2747 | 1 | |
2748 | 1 | // gpr_index |
2749 | 1 | SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, |
2750 | 1 | VAListPtr, MachinePointerInfo(SV), MVT::i8); |
2751 | 1 | InChain = GprIndex.getValue(1); |
2752 | 1 | |
2753 | 1 | if (VT == MVT::i641 ) { |
2754 | 0 | // Check if GprIndex is even |
2755 | 0 | SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, |
2756 | 0 | DAG.getConstant(1, dl, MVT::i32)); |
2757 | 0 | SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, |
2758 | 0 | DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); |
2759 | 0 | SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, |
2760 | 0 | DAG.getConstant(1, dl, MVT::i32)); |
2761 | 0 | // Align GprIndex to be even if it isn't |
2762 | 0 | GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, |
2763 | 0 | GprIndex); |
2764 | 0 | } |
2765 | 1 | |
2766 | 1 | // fpr index is 1 byte after gpr |
2767 | 1 | SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, |
2768 | 1 | DAG.getConstant(1, dl, MVT::i32)); |
2769 | 1 | |
2770 | 1 | // fpr |
2771 | 1 | SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, |
2772 | 1 | FprPtr, MachinePointerInfo(SV), MVT::i8); |
2773 | 1 | InChain = FprIndex.getValue(1); |
2774 | 1 | |
2775 | 1 | SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, |
2776 | 1 | DAG.getConstant(8, dl, MVT::i32)); |
2777 | 1 | |
2778 | 1 | SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, |
2779 | 1 | DAG.getConstant(4, dl, MVT::i32)); |
2780 | 1 | |
2781 | 1 | // areas |
2782 | 1 | SDValue OverflowArea = |
2783 | 1 | DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); |
2784 | 1 | InChain = OverflowArea.getValue(1); |
2785 | 1 | |
2786 | 1 | SDValue RegSaveArea = |
2787 | 1 | DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); |
2788 | 1 | InChain = RegSaveArea.getValue(1); |
2789 | 1 | |
2790 | 1 | // select overflow_area if index > 8 |
2791 | 1 | SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex1 : FprIndex0 , |
2792 | 1 | DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); |
2793 | 1 | |
2794 | 1 | // adjustment constant gpr_index * 4/8 |
2795 | 1 | SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, |
2796 | 1 | VT.isInteger() ? GprIndex1 : FprIndex0 , |
2797 | 1 | DAG.getConstant(VT.isInteger() ? 41 : 80 , dl, |
2798 | 1 | MVT::i32)); |
2799 | 1 | |
2800 | 1 | // OurReg = RegSaveArea + RegConstant |
2801 | 1 | SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, |
2802 | 1 | RegConstant); |
2803 | 1 | |
2804 | 1 | // Floating types are 32 bytes into RegSaveArea |
2805 | 1 | if (VT.isFloatingPoint()) |
2806 | 0 | OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, |
2807 | 0 | DAG.getConstant(32, dl, MVT::i32)); |
2808 | 1 | |
2809 | 1 | // increase {f,g}pr_index by 1 (or 2 if VT is i64) |
2810 | 1 | SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, |
2811 | 1 | VT.isInteger() ? GprIndex1 : FprIndex0 , |
2812 | 1 | DAG.getConstant(VT == MVT::i64 ? 20 : 11 , dl, |
2813 | 1 | MVT::i32)); |
2814 | 1 | |
2815 | 1 | InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, |
2816 | 1 | VT.isInteger() ? VAListPtr1 : FprPtr0 , |
2817 | 1 | MachinePointerInfo(SV), MVT::i8); |
2818 | 1 | |
2819 | 1 | // determine if we should load from reg_save_area or overflow_area |
2820 | 1 | SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); |
2821 | 1 | |
2822 | 1 | // increase overflow_area by 4/8 if gpr/fpr > 8 |
2823 | 1 | SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, |
2824 | 1 | DAG.getConstant(VT.isInteger() ? 41 : 80 , |
2825 | 1 | dl, MVT::i32)); |
2826 | 1 | |
2827 | 1 | OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, |
2828 | 1 | OverflowAreaPlusN); |
2829 | 1 | |
2830 | 1 | InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, |
2831 | 1 | MachinePointerInfo(), MVT::i32); |
2832 | 1 | |
2833 | 1 | return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); |
2834 | 1 | } |
2835 | | |
2836 | 1 | SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { |
2837 | 1 | assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); |
2838 | 1 | |
2839 | 1 | // We have to copy the entire va_list struct: |
2840 | 1 | // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte |
2841 | 1 | return DAG.getMemcpy(Op.getOperand(0), Op, |
2842 | 1 | Op.getOperand(1), Op.getOperand(2), |
2843 | 1 | DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, |
2844 | 1 | false, MachinePointerInfo(), MachinePointerInfo()); |
2845 | 1 | } |
2846 | | |
2847 | | SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, |
2848 | 1 | SelectionDAG &DAG) const { |
2849 | 1 | return Op.getOperand(0); |
2850 | 1 | } |
2851 | | |
2852 | | SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, |
2853 | 1 | SelectionDAG &DAG) const { |
2854 | 1 | SDValue Chain = Op.getOperand(0); |
2855 | 1 | SDValue Trmp = Op.getOperand(1); // trampoline |
2856 | 1 | SDValue FPtr = Op.getOperand(2); // nested function |
2857 | 1 | SDValue Nest = Op.getOperand(3); // 'nest' parameter value |
2858 | 1 | SDLoc dl(Op); |
2859 | 1 | |
2860 | 1 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
2861 | 1 | bool isPPC64 = (PtrVT == MVT::i64); |
2862 | 1 | Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); |
2863 | 1 | |
2864 | 1 | TargetLowering::ArgListTy Args; |
2865 | 1 | TargetLowering::ArgListEntry Entry; |
2866 | 1 | |
2867 | 1 | Entry.Ty = IntPtrTy; |
2868 | 1 | Entry.Node = Trmp; Args.push_back(Entry); |
2869 | 1 | |
2870 | 1 | // TrampSize == (isPPC64 ? 48 : 40); |
2871 | 1 | Entry.Node = DAG.getConstant(isPPC64 ? 480 : 401 , dl, |
2872 | 1 | isPPC64 ? MVT::i640 : MVT::i321 ); |
2873 | 1 | Args.push_back(Entry); |
2874 | 1 | |
2875 | 1 | Entry.Node = FPtr; Args.push_back(Entry); |
2876 | 1 | Entry.Node = Nest; Args.push_back(Entry); |
2877 | 1 | |
2878 | 1 | // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) |
2879 | 1 | TargetLowering::CallLoweringInfo CLI(DAG); |
2880 | 1 | CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( |
2881 | 1 | CallingConv::C, Type::getVoidTy(*DAG.getContext()), |
2882 | 1 | DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); |
2883 | 1 | |
2884 | 1 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); |
2885 | 1 | return CallResult.second; |
2886 | 1 | } |
2887 | | |
2888 | 6 | SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
2889 | 6 | MachineFunction &MF = DAG.getMachineFunction(); |
2890 | 6 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
2891 | 6 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
2892 | 6 | |
2893 | 6 | SDLoc dl(Op); |
2894 | 6 | |
2895 | 6 | if (Subtarget.isDarwinABI() || 6 Subtarget.isPPC64()4 ) { |
2896 | 6 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
2897 | 6 | // memory location argument. |
2898 | 6 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
2899 | 6 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
2900 | 6 | return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), |
2901 | 6 | MachinePointerInfo(SV)); |
2902 | 6 | } |
2903 | 0 |
|
2904 | 0 | // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. |
2905 | 0 | // We suppose the given va_list is already allocated. |
2906 | 0 | // |
2907 | 0 | // typedef struct { |
2908 | 0 | // char gpr; /* index into the array of 8 GPRs |
2909 | 0 | // * stored in the register save area |
2910 | 0 | // * gpr=0 corresponds to r3, |
2911 | 0 | // * gpr=1 to r4, etc. |
2912 | 0 | // */ |
2913 | 0 | // char fpr; /* index into the array of 8 FPRs |
2914 | 0 | // * stored in the register save area |
2915 | 0 | // * fpr=0 corresponds to f1, |
2916 | 0 | // * fpr=1 to f2, etc. |
2917 | 0 | // */ |
2918 | 0 | // char *overflow_arg_area; |
2919 | 0 | // /* location on stack that holds |
2920 | 0 | // * the next overflow argument |
2921 | 0 | // */ |
2922 | 0 | // char *reg_save_area; |
2923 | 0 | // /* where r3:r10 and f1:f8 (if saved) |
2924 | 0 | // * are stored |
2925 | 0 | // */ |
2926 | 0 | // } va_list[1]; |
2927 | 0 |
|
2928 | 0 | SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); |
2929 | 0 | SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); |
2930 | 0 | SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), |
2931 | 0 | PtrVT); |
2932 | 0 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), |
2933 | 0 | PtrVT); |
2934 | 0 |
|
2935 | 0 | uint64_t FrameOffset = PtrVT.getSizeInBits()/8; |
2936 | 0 | SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); |
2937 | 0 |
|
2938 | 0 | uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; |
2939 | 0 | SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); |
2940 | 0 |
|
2941 | 0 | uint64_t FPROffset = 1; |
2942 | 0 | SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); |
2943 | 0 |
|
2944 | 0 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
2945 | 0 |
|
2946 | 0 | // Store first byte : number of int regs |
2947 | 0 | SDValue firstStore = |
2948 | 0 | DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), |
2949 | 0 | MachinePointerInfo(SV), MVT::i8); |
2950 | 0 | uint64_t nextOffset = FPROffset; |
2951 | 0 | SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), |
2952 | 0 | ConstFPROffset); |
2953 | 0 |
|
2954 | 0 | // Store second byte : number of float regs |
2955 | 0 | SDValue secondStore = |
2956 | 0 | DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, |
2957 | 0 | MachinePointerInfo(SV, nextOffset), MVT::i8); |
2958 | 0 | nextOffset += StackOffset; |
2959 | 0 | nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); |
2960 | 0 |
|
2961 | 0 | // Store second word : arguments given on stack |
2962 | 0 | SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, |
2963 | 0 | MachinePointerInfo(SV, nextOffset)); |
2964 | 0 | nextOffset += FrameOffset; |
2965 | 0 | nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); |
2966 | 0 |
|
2967 | 0 | // Store third word : arguments given in registers |
2968 | 0 | return DAG.getStore(thirdStore, dl, FR, nextPtr, |
2969 | 0 | MachinePointerInfo(SV, nextOffset)); |
2970 | 0 | } |
2971 | | |
2972 | | #include "PPCGenCallingConv.inc" |
2973 | | |
2974 | | // Function whose sole purpose is to kill compiler warnings |
2975 | | // stemming from unused functions included from PPCGenCallingConv.inc. |
2976 | 0 | CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { |
2977 | 0 | return Flag ? CC_PPC64_ELF_FIS0 : RetCC_PPC64_ELF_FIS0 ; |
2978 | 0 | } |
2979 | | |
2980 | | bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
2981 | | CCValAssign::LocInfo &LocInfo, |
2982 | | ISD::ArgFlagsTy &ArgFlags, |
2983 | 1.64k | CCState &State) { |
2984 | 1.64k | return true; |
2985 | 1.64k | } |
2986 | | |
2987 | | bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, |
2988 | | MVT &LocVT, |
2989 | | CCValAssign::LocInfo &LocInfo, |
2990 | | ISD::ArgFlagsTy &ArgFlags, |
2991 | 99 | CCState &State) { |
2992 | 99 | static const MCPhysReg ArgRegs[] = { |
2993 | 99 | PPC::R3, PPC::R4, PPC::R5, PPC::R6, |
2994 | 99 | PPC::R7, PPC::R8, PPC::R9, PPC::R10, |
2995 | 99 | }; |
2996 | 99 | const unsigned NumArgRegs = array_lengthof(ArgRegs); |
2997 | 99 | |
2998 | 99 | unsigned RegNum = State.getFirstUnallocated(ArgRegs); |
2999 | 99 | |
3000 | 99 | // Skip one register if the first unallocated register has an even register |
3001 | 99 | // number and there are still argument registers available which have not been |
3002 | 99 | // allocated yet. RegNum is actually an index into ArgRegs, which means we |
3003 | 99 | // need to skip a register if RegNum is odd. |
3004 | 99 | if (RegNum != NumArgRegs && 99 RegNum % 2 == 198 ) { |
3005 | 11 | State.AllocateReg(ArgRegs[RegNum]); |
3006 | 11 | } |
3007 | 99 | |
3008 | 99 | // Always return false here, as this function only makes sure that the first |
3009 | 99 | // unallocated register has an odd register number and does not actually |
3010 | 99 | // allocate a register for the current argument. |
3011 | 99 | return false; |
3012 | 99 | } |
3013 | | |
3014 | | bool |
3015 | | llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, |
3016 | | MVT &LocVT, |
3017 | | CCValAssign::LocInfo &LocInfo, |
3018 | | ISD::ArgFlagsTy &ArgFlags, |
3019 | 6 | CCState &State) { |
3020 | 6 | static const MCPhysReg ArgRegs[] = { |
3021 | 6 | PPC::R3, PPC::R4, PPC::R5, PPC::R6, |
3022 | 6 | PPC::R7, PPC::R8, PPC::R9, PPC::R10, |
3023 | 6 | }; |
3024 | 6 | const unsigned NumArgRegs = array_lengthof(ArgRegs); |
3025 | 6 | |
3026 | 6 | unsigned RegNum = State.getFirstUnallocated(ArgRegs); |
3027 | 6 | int RegsLeft = NumArgRegs - RegNum; |
3028 | 6 | |
3029 | 6 | // Skip if there is not enough registers left for long double type (4 gpr regs |
3030 | 6 | // in soft float mode) and put long double argument on the stack. |
3031 | 6 | if (RegNum != NumArgRegs && 6 RegsLeft < 46 ) { |
3032 | 4 | for (int i = 0; i < RegsLeft4 ; i++3 ) { |
3033 | 3 | State.AllocateReg(ArgRegs[RegNum + i]); |
3034 | 3 | } |
3035 | 1 | } |
3036 | 6 | |
3037 | 6 | return false; |
3038 | 6 | } |
3039 | | |
3040 | | bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, |
3041 | | MVT &LocVT, |
3042 | | CCValAssign::LocInfo &LocInfo, |
3043 | | ISD::ArgFlagsTy &ArgFlags, |
3044 | 51 | CCState &State) { |
3045 | 51 | static const MCPhysReg ArgRegs[] = { |
3046 | 51 | PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, |
3047 | 51 | PPC::F8 |
3048 | 51 | }; |
3049 | 51 | |
3050 | 51 | const unsigned NumArgRegs = array_lengthof(ArgRegs); |
3051 | 51 | |
3052 | 51 | unsigned RegNum = State.getFirstUnallocated(ArgRegs); |
3053 | 51 | |
3054 | 51 | // If there is only one Floating-point register left we need to put both f64 |
3055 | 51 | // values of a split ppc_fp128 value on the stack. |
3056 | 51 | if (RegNum != NumArgRegs && 51 ArgRegs[RegNum] == PPC::F851 ) { |
3057 | 0 | State.AllocateReg(ArgRegs[RegNum]); |
3058 | 0 | } |
3059 | 51 | |
3060 | 51 | // Always return false here, as this function only makes sure that the two f64 |
3061 | 51 | // values a ppc_fp128 value is split into are both passed in registers or both |
3062 | 51 | // passed on the stack and does not actually allocate a register for the |
3063 | 51 | // current argument. |
3064 | 51 | return false; |
3065 | 51 | } |
3066 | | |
3067 | | /// FPR - The set of FP registers that should be allocated for arguments, |
3068 | | /// on Darwin. |
3069 | | static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, |
3070 | | PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, |
3071 | | PPC::F11, PPC::F12, PPC::F13}; |
3072 | | |
3073 | | /// QFPR - The set of QPX registers that should be allocated for arguments. |
3074 | | static const MCPhysReg QFPR[] = { |
3075 | | PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, |
3076 | | PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; |
3077 | | |
3078 | | /// CalculateStackSlotSize - Calculates the size reserved for this argument on |
3079 | | /// the stack. |
3080 | | static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, |
3081 | 18.7k | unsigned PtrByteSize) { |
3082 | 18.7k | unsigned ArgSize = ArgVT.getStoreSize(); |
3083 | 18.7k | if (Flags.isByVal()) |
3084 | 144 | ArgSize = Flags.getByValSize(); |
3085 | 18.7k | |
3086 | 18.7k | // Round up to multiples of the pointer size, except for array members, |
3087 | 18.7k | // which are always packed. |
3088 | 18.7k | if (!Flags.isInConsecutiveRegs()) |
3089 | 16.5k | ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
3090 | 18.7k | |
3091 | 18.7k | return ArgSize; |
3092 | 18.7k | } |
3093 | | |
3094 | | /// CalculateStackSlotAlignment - Calculates the alignment of this argument |
3095 | | /// on the stack. |
3096 | | static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, |
3097 | | ISD::ArgFlagsTy Flags, |
3098 | 33.3k | unsigned PtrByteSize) { |
3099 | 33.3k | unsigned Align = PtrByteSize; |
3100 | 33.3k | |
3101 | 33.3k | // Altivec parameters are padded to a 16 byte boundary. |
3102 | 33.3k | if (ArgVT == MVT::v4f32 || 33.3k ArgVT == MVT::v4i3232.2k || |
3103 | 33.3k | ArgVT == MVT::v8i1629.8k || ArgVT == MVT::v16i829.3k || |
3104 | 33.3k | ArgVT == MVT::v2f6428.7k || ArgVT == MVT::v2i6428.1k || |
3105 | 26.8k | ArgVT == MVT::v1i128) |
3106 | 6.68k | Align = 16; |
3107 | 33.3k | // QPX vector types stored in double-precision are padded to a 32 byte |
3108 | 33.3k | // boundary. |
3109 | 26.6k | else if (26.6k ArgVT == MVT::v4f64 || 26.6k ArgVT == MVT::v4i126.4k ) |
3110 | 266 | Align = 32; |
3111 | 33.3k | |
3112 | 33.3k | // ByVal parameters are aligned as requested. |
3113 | 33.3k | if (Flags.isByVal()33.3k ) { |
3114 | 262 | unsigned BVAlign = Flags.getByValAlign(); |
3115 | 262 | if (BVAlign > PtrByteSize262 ) { |
3116 | 40 | if (BVAlign % PtrByteSize != 0) |
3117 | 0 | llvm_unreachable( |
3118 | 40 | "ByVal alignment is not a multiple of the pointer size"); |
3119 | 40 | |
3120 | 40 | Align = BVAlign; |
3121 | 40 | } |
3122 | 262 | } |
3123 | 33.3k | |
3124 | 33.3k | // Array members are always packed to their original alignment. |
3125 | 33.3k | if (33.3k Flags.isInConsecutiveRegs()33.3k ) { |
3126 | 3.82k | // If the array member was split into multiple registers, the first |
3127 | 3.82k | // needs to be aligned to the size of the full type. (Except for |
3128 | 3.82k | // ppcf128, which is only aligned as its f64 components.) |
3129 | 3.82k | if (Flags.isSplit() && 3.82k OrigVT != MVT::ppcf128192 ) |
3130 | 84 | Align = OrigVT.getStoreSize(); |
3131 | 3.82k | else |
3132 | 3.74k | Align = ArgVT.getStoreSize(); |
3133 | 3.82k | } |
3134 | 33.3k | |
3135 | 33.3k | return Align; |
3136 | 33.3k | } |
3137 | | |
3138 | | /// CalculateStackSlotUsed - Return whether this argument will use its |
3139 | | /// stack slot (instead of being passed in registers). ArgOffset, |
3140 | | /// AvailableFPRs, and AvailableVRs must hold the current argument |
3141 | | /// position, and will be updated to account for this argument. |
3142 | | static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, |
3143 | | ISD::ArgFlagsTy Flags, |
3144 | | unsigned PtrByteSize, |
3145 | | unsigned LinkageSize, |
3146 | | unsigned ParamAreaSize, |
3147 | | unsigned &ArgOffset, |
3148 | | unsigned &AvailableFPRs, |
3149 | 15.4k | unsigned &AvailableVRs, bool HasQPX) { |
3150 | 15.4k | bool UseMemory = false; |
3151 | 15.4k | |
3152 | 15.4k | // Respect alignment of argument on the stack. |
3153 | 15.4k | unsigned Align = |
3154 | 15.4k | CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); |
3155 | 15.4k | ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; |
3156 | 15.4k | // If there's no space left in the argument save area, we must |
3157 | 15.4k | // use memory (this check also catches zero-sized arguments). |
3158 | 15.4k | if (ArgOffset >= LinkageSize + ParamAreaSize) |
3159 | 2.54k | UseMemory = true; |
3160 | 15.4k | |
3161 | 15.4k | // Allocate argument on the stack. |
3162 | 15.4k | ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); |
3163 | 15.4k | if (Flags.isInConsecutiveRegsLast()) |
3164 | 227 | ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
3165 | 15.4k | // If we overran the argument save area, we must use memory |
3166 | 15.4k | // (this check catches arguments passed partially in memory) |
3167 | 15.4k | if (ArgOffset > LinkageSize + ParamAreaSize) |
3168 | 2.57k | UseMemory = true; |
3169 | 15.4k | |
3170 | 15.4k | // However, if the argument is actually passed in an FPR or a VR, |
3171 | 15.4k | // we don't use memory after all. |
3172 | 15.4k | if (!Flags.isByVal()15.4k ) { |
3173 | 15.3k | if (ArgVT == MVT::f32 || 15.3k ArgVT == MVT::f6413.6k || |
3174 | 15.3k | // QPX registers overlap with the scalar FP registers. |
3175 | 11.7k | (HasQPX && 11.7k (ArgVT == MVT::v4f32 || |
3176 | 218 | ArgVT == MVT::v4f64 || |
3177 | 308 | ArgVT == MVT::v4i1))) |
3178 | 3.84k | if (3.84k AvailableFPRs > 03.84k ) { |
3179 | 3.65k | --AvailableFPRs; |
3180 | 3.65k | return false; |
3181 | 3.65k | } |
3182 | 11.7k | if (11.7k ArgVT == MVT::v4f32 || 11.7k ArgVT == MVT::v4i3211.3k || |
3183 | 11.7k | ArgVT == MVT::v8i169.94k || ArgVT == MVT::v16i89.70k || |
3184 | 11.7k | ArgVT == MVT::v2f649.42k || ArgVT == MVT::v2i649.12k || |
3185 | 8.47k | ArgVT == MVT::v1i128) |
3186 | 3.33k | if (3.33k AvailableVRs > 03.33k ) { |
3187 | 3.02k | --AvailableVRs; |
3188 | 3.02k | return false; |
3189 | 3.02k | } |
3190 | 8.76k | } |
3191 | 8.76k | |
3192 | 8.76k | return UseMemory; |
3193 | 8.76k | } |
3194 | | |
3195 | | /// EnsureStackAlignment - Round stack frame size up from NumBytes to |
3196 | | /// ensure minimum alignment required for target. |
3197 | | static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, |
3198 | 7.49k | unsigned NumBytes) { |
3199 | 7.49k | unsigned TargetAlign = Lowering->getStackAlignment(); |
3200 | 7.49k | unsigned AlignMask = TargetAlign - 1; |
3201 | 7.49k | NumBytes = (NumBytes + AlignMask) & ~AlignMask; |
3202 | 7.49k | return NumBytes; |
3203 | 7.49k | } |
3204 | | |
3205 | | SDValue PPCTargetLowering::LowerFormalArguments( |
3206 | | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
3207 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3208 | 7.49k | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3209 | 7.49k | if (Subtarget.isSVR4ABI()7.49k ) { |
3210 | 7.24k | if (Subtarget.isPPC64()) |
3211 | 6.53k | return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, |
3212 | 6.53k | dl, DAG, InVals); |
3213 | 7.24k | else |
3214 | 709 | return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, |
3215 | 709 | dl, DAG, InVals); |
3216 | 252 | } else { |
3217 | 252 | return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, |
3218 | 252 | dl, DAG, InVals); |
3219 | 252 | } |
3220 | 0 | } |
3221 | | |
3222 | | SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( |
3223 | | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
3224 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3225 | 709 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3226 | 709 | |
3227 | 709 | // 32-bit SVR4 ABI Stack Frame Layout: |
3228 | 709 | // +-----------------------------------+ |
3229 | 709 | // +--> | Back chain | |
3230 | 709 | // | +-----------------------------------+ |
3231 | 709 | // | | Floating-point register save area | |
3232 | 709 | // | +-----------------------------------+ |
3233 | 709 | // | | General register save area | |
3234 | 709 | // | +-----------------------------------+ |
3235 | 709 | // | | CR save word | |
3236 | 709 | // | +-----------------------------------+ |
3237 | 709 | // | | VRSAVE save word | |
3238 | 709 | // | +-----------------------------------+ |
3239 | 709 | // | | Alignment padding | |
3240 | 709 | // | +-----------------------------------+ |
3241 | 709 | // | | Vector register save area | |
3242 | 709 | // | +-----------------------------------+ |
3243 | 709 | // | | Local variable space | |
3244 | 709 | // | +-----------------------------------+ |
3245 | 709 | // | | Parameter list area | |
3246 | 709 | // | +-----------------------------------+ |
3247 | 709 | // | | LR save word | |
3248 | 709 | // | +-----------------------------------+ |
3249 | 709 | // SP--> +--- | Back chain | |
3250 | 709 | // +-----------------------------------+ |
3251 | 709 | // |
3252 | 709 | // Specifications: |
3253 | 709 | // System V Application Binary Interface PowerPC Processor Supplement |
3254 | 709 | // AltiVec Technology Programming Interface Manual |
3255 | 709 | |
3256 | 709 | MachineFunction &MF = DAG.getMachineFunction(); |
3257 | 709 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3258 | 709 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
3259 | 709 | |
3260 | 709 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
3261 | 709 | // Potential tail calls could cause overwriting of argument stack slots. |
3262 | 709 | bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && |
3263 | 2 | (CallConv == CallingConv::Fast)); |
3264 | 709 | unsigned PtrByteSize = 4; |
3265 | 709 | |
3266 | 709 | // Assign locations to all of the incoming arguments. |
3267 | 709 | SmallVector<CCValAssign, 16> ArgLocs; |
3268 | 709 | PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
3269 | 709 | *DAG.getContext()); |
3270 | 709 | |
3271 | 709 | // Reserve space for the linkage area on the stack. |
3272 | 709 | unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); |
3273 | 709 | CCInfo.AllocateStack(LinkageSize, PtrByteSize); |
3274 | 709 | if (useSoftFloat()) |
3275 | 24 | CCInfo.PreAnalyzeFormalArguments(Ins); |
3276 | 709 | |
3277 | 709 | CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); |
3278 | 709 | CCInfo.clearWasPPCF128(); |
3279 | 709 | |
3280 | 1.85k | for (unsigned i = 0, e = ArgLocs.size(); i != e1.85k ; ++i1.14k ) { |
3281 | 1.14k | CCValAssign &VA = ArgLocs[i]; |
3282 | 1.14k | |
3283 | 1.14k | // Arguments stored in registers. |
3284 | 1.14k | if (VA.isRegLoc()1.14k ) { |
3285 | 1.12k | const TargetRegisterClass *RC; |
3286 | 1.12k | EVT ValVT = VA.getValVT(); |
3287 | 1.12k | |
3288 | 1.12k | switch (ValVT.getSimpleVT().SimpleTy) { |
3289 | 0 | default: |
3290 | 0 | llvm_unreachable("ValVT not supported by formal arguments Lowering"); |
3291 | 778 | case MVT::i1: |
3292 | 778 | case MVT::i32: |
3293 | 778 | RC = &PPC::GPRCRegClass; |
3294 | 778 | break; |
3295 | 162 | case MVT::f32: |
3296 | 162 | if (Subtarget.hasP8Vector()) |
3297 | 0 | RC = &PPC::VSSRCRegClass; |
3298 | 162 | else |
3299 | 162 | RC = &PPC::F4RCRegClass; |
3300 | 162 | break; |
3301 | 166 | case MVT::f64: |
3302 | 166 | if (Subtarget.hasVSX()) |
3303 | 0 | RC = &PPC::VSFRCRegClass; |
3304 | 166 | else |
3305 | 166 | RC = &PPC::F8RCRegClass; |
3306 | 166 | break; |
3307 | 11 | case MVT::v16i8: |
3308 | 11 | case MVT::v8i16: |
3309 | 11 | case MVT::v4i32: |
3310 | 11 | RC = &PPC::VRRCRegClass; |
3311 | 11 | break; |
3312 | 9 | case MVT::v4f32: |
3313 | 9 | RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass0 : &PPC::VRRCRegClass9 ; |
3314 | 9 | break; |
3315 | 0 | case MVT::v2f64: |
3316 | 0 | case MVT::v2i64: |
3317 | 0 | RC = &PPC::VRRCRegClass; |
3318 | 0 | break; |
3319 | 0 | case MVT::v4f64: |
3320 | 0 | RC = &PPC::QFRCRegClass; |
3321 | 0 | break; |
3322 | 0 | case MVT::v4i1: |
3323 | 0 | RC = &PPC::QBRCRegClass; |
3324 | 0 | break; |
3325 | 1.12k | } |
3326 | 1.12k | |
3327 | 1.12k | // Transform the arguments stored in physical registers into virtual ones. |
3328 | 1.12k | unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); |
3329 | 1.12k | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, |
3330 | 1.12k | ValVT == MVT::i1 ? MVT::i322 : ValVT1.12k ); |
3331 | 1.12k | |
3332 | 1.12k | if (ValVT == MVT::i1) |
3333 | 2 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); |
3334 | 1.12k | |
3335 | 1.12k | InVals.push_back(ArgValue); |
3336 | 1.14k | } else { |
3337 | 23 | // Argument stored in memory. |
3338 | 23 | assert(VA.isMemLoc()); |
3339 | 23 | |
3340 | 23 | unsigned ArgSize = VA.getLocVT().getStoreSize(); |
3341 | 23 | int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(), |
3342 | 23 | isImmutable); |
3343 | 23 | |
3344 | 23 | // Create load nodes to retrieve arguments from the stack. |
3345 | 23 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3346 | 23 | InVals.push_back( |
3347 | 23 | DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); |
3348 | 23 | } |
3349 | 1.14k | } |
3350 | 709 | |
3351 | 709 | // Assign locations to all of the incoming aggregate by value arguments. |
3352 | 709 | // Aggregates passed by value are stored in the local variable space of the |
3353 | 709 | // caller's stack frame, right above the parameter list area. |
3354 | 709 | SmallVector<CCValAssign, 16> ByValArgLocs; |
3355 | 709 | CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), |
3356 | 709 | ByValArgLocs, *DAG.getContext()); |
3357 | 709 | |
3358 | 709 | // Reserve stack space for the allocations in CCInfo. |
3359 | 709 | CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); |
3360 | 709 | |
3361 | 709 | CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); |
3362 | 709 | |
3363 | 709 | // Area that is at least reserved in the caller of this function. |
3364 | 709 | unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); |
3365 | 709 | MinReservedArea = std::max(MinReservedArea, LinkageSize); |
3366 | 709 | |
3367 | 709 | // Set the size that is at least reserved in caller of this function. Tail |
3368 | 709 | // call optimized function's reserved stack space needs to be aligned so that |
3369 | 709 | // taking the difference between two stack areas will result in an aligned |
3370 | 709 | // stack. |
3371 | 709 | MinReservedArea = |
3372 | 709 | EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); |
3373 | 709 | FuncInfo->setMinReservedArea(MinReservedArea); |
3374 | 709 | |
3375 | 709 | SmallVector<SDValue, 8> MemOps; |
3376 | 709 | |
3377 | 709 | // If the function takes variable number of arguments, make a frame index for |
3378 | 709 | // the start of the first vararg value... for expansion of llvm.va_start. |
3379 | 709 | if (isVarArg709 ) { |
3380 | 1 | static const MCPhysReg GPArgRegs[] = { |
3381 | 1 | PPC::R3, PPC::R4, PPC::R5, PPC::R6, |
3382 | 1 | PPC::R7, PPC::R8, PPC::R9, PPC::R10, |
3383 | 1 | }; |
3384 | 1 | const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); |
3385 | 1 | |
3386 | 1 | static const MCPhysReg FPArgRegs[] = { |
3387 | 1 | PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, |
3388 | 1 | PPC::F8 |
3389 | 1 | }; |
3390 | 1 | unsigned NumFPArgRegs = array_lengthof(FPArgRegs); |
3391 | 1 | |
3392 | 1 | if (useSoftFloat()) |
3393 | 0 | NumFPArgRegs = 0; |
3394 | 1 | |
3395 | 1 | FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); |
3396 | 1 | FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); |
3397 | 1 | |
3398 | 1 | // Make room for NumGPArgRegs and NumFPArgRegs. |
3399 | 1 | int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + |
3400 | 1 | NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; |
3401 | 1 | |
3402 | 1 | FuncInfo->setVarArgsStackOffset( |
3403 | 1 | MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, |
3404 | 1 | CCInfo.getNextStackOffset(), true)); |
3405 | 1 | |
3406 | 1 | FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); |
3407 | 1 | SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
3408 | 1 | |
3409 | 1 | // The fixed integer arguments of a variadic function are stored to the |
3410 | 1 | // VarArgsFrameIndex on the stack so that they may be loaded by |
3411 | 1 | // dereferencing the result of va_next. |
3412 | 9 | for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs9 ; ++GPRIndex8 ) { |
3413 | 8 | // Get an existing live-in vreg, or add a new one. |
3414 | 8 | unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); |
3415 | 8 | if (!VReg) |
3416 | 7 | VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); |
3417 | 8 | |
3418 | 8 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
3419 | 8 | SDValue Store = |
3420 | 8 | DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); |
3421 | 8 | MemOps.push_back(Store); |
3422 | 8 | // Increment the address by four for the next argument to store |
3423 | 8 | SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); |
3424 | 8 | FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); |
3425 | 8 | } |
3426 | 1 | |
3427 | 1 | // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 |
3428 | 1 | // is set. |
3429 | 1 | // The double arguments are stored to the VarArgsFrameIndex |
3430 | 1 | // on the stack. |
3431 | 9 | for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs9 ; ++FPRIndex8 ) { |
3432 | 8 | // Get an existing live-in vreg, or add a new one. |
3433 | 8 | unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); |
3434 | 8 | if (!VReg) |
3435 | 7 | VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); |
3436 | 8 | |
3437 | 8 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); |
3438 | 8 | SDValue Store = |
3439 | 8 | DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); |
3440 | 8 | MemOps.push_back(Store); |
3441 | 8 | // Increment the address by eight for the next argument to store |
3442 | 8 | SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, |
3443 | 8 | PtrVT); |
3444 | 8 | FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); |
3445 | 8 | } |
3446 | 1 | } |
3447 | 709 | |
3448 | 709 | if (!MemOps.empty()) |
3449 | 1 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); |
3450 | 709 | |
3451 | 709 | return Chain; |
3452 | 709 | } |
3453 | | |
3454 | | // PPC64 passes i8, i16, and i32 values in i64 registers. Promote |
3455 | | // value to MVT::i64 and then truncate to the correct register size. |
3456 | | SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, |
3457 | | EVT ObjectVT, SelectionDAG &DAG, |
3458 | | SDValue ArgVal, |
3459 | 2.57k | const SDLoc &dl) const { |
3460 | 2.57k | if (Flags.isSExt()) |
3461 | 884 | ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, |
3462 | 884 | DAG.getValueType(ObjectVT)); |
3463 | 1.69k | else if (1.69k Flags.isZExt()1.69k ) |
3464 | 572 | ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, |
3465 | 572 | DAG.getValueType(ObjectVT)); |
3466 | 2.57k | |
3467 | 2.57k | return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); |
3468 | 2.57k | } |
3469 | | |
3470 | | SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( |
3471 | | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
3472 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3473 | 6.53k | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3474 | 6.53k | // TODO: add description of PPC stack frame format, or at least some docs. |
3475 | 6.53k | // |
3476 | 6.53k | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
3477 | 6.53k | bool isLittleEndian = Subtarget.isLittleEndian(); |
3478 | 6.53k | MachineFunction &MF = DAG.getMachineFunction(); |
3479 | 6.53k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3480 | 6.53k | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
3481 | 6.53k | |
3482 | 6.53k | assert(!(CallConv == CallingConv::Fast && isVarArg) && |
3483 | 6.53k | "fastcc not supported on varargs functions"); |
3484 | 6.53k | |
3485 | 6.53k | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
3486 | 6.53k | // Potential tail calls could cause overwriting of argument stack slots. |
3487 | 6.53k | bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && |
3488 | 2 | (CallConv == CallingConv::Fast)); |
3489 | 6.53k | unsigned PtrByteSize = 8; |
3490 | 6.53k | unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); |
3491 | 6.53k | |
3492 | 6.53k | static const MCPhysReg GPR[] = { |
3493 | 6.53k | PPC::X3, PPC::X4, PPC::X5, PPC::X6, |
3494 | 6.53k | PPC::X7, PPC::X8, PPC::X9, PPC::X10, |
3495 | 6.53k | }; |
3496 | 6.53k | static const MCPhysReg VR[] = { |
3497 | 6.53k | PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, |
3498 | 6.53k | PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 |
3499 | 6.53k | }; |
3500 | 6.53k | |
3501 | 6.53k | const unsigned Num_GPR_Regs = array_lengthof(GPR); |
3502 | 6.53k | const unsigned Num_FPR_Regs = useSoftFloat() ? 08 : 136.52k ; |
3503 | 6.53k | const unsigned Num_VR_Regs = array_lengthof(VR); |
3504 | 6.53k | const unsigned Num_QFPR_Regs = Num_FPR_Regs; |
3505 | 6.53k | |
3506 | 6.53k | // Do a first pass over the arguments to determine whether the ABI |
3507 | 6.53k | // guarantees that our caller has allocated the parameter save area |
3508 | 6.53k | // on its stack frame. In the ELFv1 ABI, this is always the case; |
3509 | 6.53k | // in the ELFv2 ABI, it is true if this is a vararg function or if |
3510 | 6.53k | // any parameter is located in a stack slot. |
3511 | 6.53k | |
3512 | 2.52k | bool HasParameterArea = !isELFv2ABI || isVarArg; |
3513 | 6.53k | unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; |
3514 | 6.53k | unsigned NumBytes = LinkageSize; |
3515 | 6.53k | unsigned AvailableFPRs = Num_FPR_Regs; |
3516 | 6.53k | unsigned AvailableVRs = Num_VR_Regs; |
3517 | 20.7k | for (unsigned i = 0, e = Ins.size(); i != e20.7k ; ++i14.2k ) { |
3518 | 14.2k | if (Ins[i].Flags.isNest()) |
3519 | 1 | continue; |
3520 | 14.2k | |
3521 | 14.2k | if (14.2k CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, |
3522 | 14.2k | PtrByteSize, LinkageSize, ParamAreaSize, |
3523 | 14.2k | NumBytes, AvailableFPRs, AvailableVRs, |
3524 | 14.2k | Subtarget.hasQPX())) |
3525 | 1.31k | HasParameterArea = true; |
3526 | 14.2k | } |
3527 | 6.53k | |
3528 | 6.53k | // Add DAG nodes to load the arguments or copy them out of registers. On |
3529 | 6.53k | // entry to a function on PPC, the arguments start after the linkage area, |
3530 | 6.53k | // although the first ones are often in registers. |
3531 | 6.53k | |
3532 | 6.53k | unsigned ArgOffset = LinkageSize; |
3533 | 6.53k | unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; |
3534 | 6.53k | unsigned &QFPR_idx = FPR_idx; |
3535 | 6.53k | SmallVector<SDValue, 8> MemOps; |
3536 | 6.53k | Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); |
3537 | 6.53k | unsigned CurArgIdx = 0; |
3538 | 20.7k | for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e20.7k ; ++ArgNo14.2k ) { |
3539 | 14.2k | SDValue ArgVal; |
3540 | 14.2k | bool needsLoad = false; |
3541 | 14.2k | EVT ObjectVT = Ins[ArgNo].VT; |
3542 | 14.2k | EVT OrigVT = Ins[ArgNo].ArgVT; |
3543 | 14.2k | unsigned ObjSize = ObjectVT.getStoreSize(); |
3544 | 14.2k | unsigned ArgSize = ObjSize; |
3545 | 14.2k | ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; |
3546 | 14.2k | if (Ins[ArgNo].isOrigArg()14.2k ) { |
3547 | 14.2k | std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); |
3548 | 14.2k | CurArgIdx = Ins[ArgNo].getOrigArgIndex(); |
3549 | 14.2k | } |
3550 | 14.2k | // We re-align the argument offset for each argument, except when using the |
3551 | 14.2k | // fast calling convention, when we need to make sure we do that only when |
3552 | 14.2k | // we'll actually use a stack slot. |
3553 | 14.2k | unsigned CurArgOffset, Align; |
3554 | 12.7k | auto ComputeArgOffset = [&]() { |
3555 | 12.7k | /* Respect alignment of argument on the stack. */ |
3556 | 12.7k | Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); |
3557 | 12.7k | ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; |
3558 | 12.7k | CurArgOffset = ArgOffset; |
3559 | 12.7k | }; |
3560 | 14.2k | |
3561 | 14.2k | if (CallConv != CallingConv::Fast14.2k ) { |
3562 | 12.1k | ComputeArgOffset(); |
3563 | 12.1k | |
3564 | 12.1k | /* Compute GPR index associated with argument offset. */ |
3565 | 12.1k | GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; |
3566 | 12.1k | GPR_idx = std::min(GPR_idx, Num_GPR_Regs); |
3567 | 12.1k | } |
3568 | 14.2k | |
3569 | 14.2k | // FIXME the codegen can be much improved in some cases. |
3570 | 14.2k | // We do not have to keep everything in memory. |
3571 | 14.2k | if (Flags.isByVal()14.2k ) { |
3572 | 72 | assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); |
3573 | 72 | |
3574 | 72 | if (CallConv == CallingConv::Fast) |
3575 | 1 | ComputeArgOffset(); |
3576 | 72 | |
3577 | 72 | // ObjSize is the true size, ArgSize rounded up to multiple of registers. |
3578 | 72 | ObjSize = Flags.getByValSize(); |
3579 | 72 | ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
3580 | 72 | // Empty aggregate parameters do not take up registers. Examples: |
3581 | 72 | // struct { } a; |
3582 | 72 | // union { } b; |
3583 | 72 | // int c[0]; |
3584 | 72 | // etc. However, we have to provide a place-holder in InVals, so |
3585 | 72 | // pretend we have an 8-byte item at the current address for that |
3586 | 72 | // purpose. |
3587 | 72 | if (!ObjSize72 ) { |
3588 | 2 | int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); |
3589 | 2 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3590 | 2 | InVals.push_back(FIN); |
3591 | 2 | continue; |
3592 | 2 | } |
3593 | 70 | |
3594 | 70 | // Create a stack object covering all stack doublewords occupied |
3595 | 70 | // by the argument. If the argument is (fully or partially) on |
3596 | 70 | // the stack, or if the argument is fully in registers but the |
3597 | 70 | // caller has allocated the parameter save anyway, we can refer |
3598 | 70 | // directly to the caller's stack frame. Otherwise, create a |
3599 | 70 | // local copy in our own frame. |
3600 | 70 | int FI; |
3601 | 70 | if (HasParameterArea || |
3602 | 2 | ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) |
3603 | 68 | FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); |
3604 | 70 | else |
3605 | 2 | FI = MFI.CreateStackObject(ArgSize, Align, false); |
3606 | 70 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3607 | 70 | |
3608 | 70 | // Handle aggregates smaller than 8 bytes. |
3609 | 70 | if (ObjSize < PtrByteSize70 ) { |
3610 | 29 | // The value of the object is its address, which differs from the |
3611 | 29 | // address of the enclosing doubleword on big-endian systems. |
3612 | 29 | SDValue Arg = FIN; |
3613 | 29 | if (!isLittleEndian29 ) { |
3614 | 28 | SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); |
3615 | 28 | Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); |
3616 | 28 | } |
3617 | 29 | InVals.push_back(Arg); |
3618 | 29 | |
3619 | 29 | if (GPR_idx != Num_GPR_Regs29 ) { |
3620 | 16 | unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); |
3621 | 16 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
3622 | 16 | SDValue Store; |
3623 | 16 | |
3624 | 16 | if (ObjSize==1 || 16 ObjSize==214 || ObjSize==411 ) { |
3625 | 2 | EVT ObjType = (ObjSize == 1 ? MVT::i8 : |
3626 | 6 | (ObjSize == 2 ? 6 MVT::i163 : MVT::i323 )); |
3627 | 8 | Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, |
3628 | 8 | MachinePointerInfo(&*FuncArg), ObjType); |
3629 | 16 | } else { |
3630 | 8 | // For sizes that don't fit a truncating store (3, 5, 6, 7), |
3631 | 8 | // store the whole register as-is to the parameter save area |
3632 | 8 | // slot. |
3633 | 8 | Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, |
3634 | 8 | MachinePointerInfo(&*FuncArg)); |
3635 | 8 | } |
3636 | 16 | |
3637 | 16 | MemOps.push_back(Store); |
3638 | 16 | } |
3639 | 29 | // Whether we copied from a register or not, advance the offset |
3640 | 29 | // into the parameter save area by a full doubleword. |
3641 | 29 | ArgOffset += PtrByteSize; |
3642 | 29 | continue; |
3643 | 29 | } |
3644 | 41 | |
3645 | 41 | // The value of the object is its address, which is the address of |
3646 | 41 | // its first stack doubleword. |
3647 | 41 | InVals.push_back(FIN); |
3648 | 41 | |
3649 | 41 | // Store whatever pieces of the object are in registers to memory. |
3650 | 112 | for (unsigned j = 0; j < ArgSize112 ; j += PtrByteSize71 ) { |
3651 | 84 | if (GPR_idx == Num_GPR_Regs) |
3652 | 13 | break; |
3653 | 71 | |
3654 | 71 | unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); |
3655 | 71 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
3656 | 71 | SDValue Addr = FIN; |
3657 | 71 | if (j71 ) { |
3658 | 40 | SDValue Off = DAG.getConstant(j, dl, PtrVT); |
3659 | 40 | Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); |
3660 | 40 | } |
3661 | 84 | SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, |
3662 | 84 | MachinePointerInfo(&*FuncArg, j)); |
3663 | 84 | MemOps.push_back(Store); |
3664 | 84 | ++GPR_idx; |
3665 | 84 | } |
3666 | 72 | ArgOffset += ArgSize; |
3667 | 72 | continue; |
3668 | 72 | } |
3669 | 14.1k | |
3670 | 14.1k | switch (ObjectVT.getSimpleVT().SimpleTy) { |
3671 | 0 | default: 0 llvm_unreachable0 ("Unhandled argument type!"); |
3672 | 7.41k | case MVT::i1: |
3673 | 7.41k | case MVT::i32: |
3674 | 7.41k | case MVT::i64: |
3675 | 7.41k | if (Flags.isNest()7.41k ) { |
3676 | 1 | // The 'nest' parameter, if any, is passed in R11. |
3677 | 1 | unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); |
3678 | 1 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); |
3679 | 1 | |
3680 | 1 | if (ObjectVT == MVT::i32 || 1 ObjectVT == MVT::i11 ) |
3681 | 0 | ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); |
3682 | 1 | |
3683 | 1 | break; |
3684 | 1 | } |
3685 | 7.41k | |
3686 | 7.41k | // These can be scalar arguments or elements of an integer array type |
3687 | 7.41k | // passed directly. Clang may use those instead of "byval" aggregate |
3688 | 7.41k | // types to avoid forcing arguments to memory unnecessarily. |
3689 | 7.41k | if (7.41k GPR_idx != Num_GPR_Regs7.41k ) { |
3690 | 6.81k | unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); |
3691 | 6.81k | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); |
3692 | 6.81k | |
3693 | 6.81k | if (ObjectVT == MVT::i32 || 6.81k ObjectVT == MVT::i14.37k ) |
3694 | 6.81k | // PPC64 passes i8, i16, and i32 values in i64 registers. Promote |
3695 | 6.81k | // value to MVT::i64 and then truncate to the correct register size. |
3696 | 2.55k | ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); |
3697 | 7.41k | } else { |
3698 | 607 | if (CallConv == CallingConv::Fast) |
3699 | 328 | ComputeArgOffset(); |
3700 | 607 | |
3701 | 607 | needsLoad = true; |
3702 | 607 | ArgSize = PtrByteSize; |
3703 | 607 | } |
3704 | 7.41k | if (CallConv != CallingConv::Fast || 7.41k needsLoad699 ) |
3705 | 7.04k | ArgOffset += 8; |
3706 | 7.41k | break; |
3707 | 7.41k | |
3708 | 3.20k | case MVT::f32: |
3709 | 3.20k | case MVT::f64: |
3710 | 3.20k | // These can be scalar arguments or elements of a float array type |
3711 | 3.20k | // passed directly. The latter are used to implement ELFv2 homogenous |
3712 | 3.20k | // float aggregates. |
3713 | 3.20k | if (FPR_idx != Num_FPR_Regs3.20k ) { |
3714 | 3.05k | unsigned VReg; |
3715 | 3.05k | |
3716 | 3.05k | if (ObjectVT == MVT::f32) |
3717 | 1.39k | VReg = MF.addLiveIn(FPR[FPR_idx], |
3718 | 1.39k | Subtarget.hasP8Vector() |
3719 | 341 | ? &PPC::VSSRCRegClass |
3720 | 1.39k | : &PPC::F4RCRegClass); |
3721 | 3.05k | else |
3722 | 1.65k | VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() |
3723 | 702 | ? &PPC::VSFRCRegClass |
3724 | 1.65k | : &PPC::F8RCRegClass); |
3725 | 3.05k | |
3726 | 3.05k | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); |
3727 | 3.05k | ++FPR_idx; |
3728 | 3.20k | } else if (153 GPR_idx != Num_GPR_Regs && 153 CallConv != CallingConv::Fast27 ) { |
3729 | 27 | // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 |
3730 | 27 | // once we support fp <-> gpr moves. |
3731 | 27 | |
3732 | 27 | // This can only ever happen in the presence of f32 array types, |
3733 | 27 | // since otherwise we never run out of FPRs before running out |
3734 | 27 | // of GPRs. |
3735 | 27 | unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); |
3736 | 27 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); |
3737 | 27 | |
3738 | 27 | if (ObjectVT == MVT::f3227 ) { |
3739 | 24 | if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 24 424 : 00 )) |
3740 | 9 | ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, |
3741 | 9 | DAG.getConstant(32, dl, MVT::i32)); |
3742 | 24 | ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); |
3743 | 24 | } |
3744 | 27 | |
3745 | 27 | ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); |
3746 | 153 | } else { |
3747 | 126 | if (CallConv == CallingConv::Fast) |
3748 | 123 | ComputeArgOffset(); |
3749 | 153 | |
3750 | 153 | needsLoad = true; |
3751 | 153 | } |
3752 | 3.20k | |
3753 | 3.20k | // When passing an array of floats, the array occupies consecutive |
3754 | 3.20k | // space in the argument area; only round up to the next doubleword |
3755 | 3.20k | // at the end of the array. Otherwise, each float takes 8 bytes. |
3756 | 3.20k | if (CallConv != CallingConv::Fast || 3.20k needsLoad672 ) { |
3757 | 2.65k | ArgSize = Flags.isInConsecutiveRegs() ? ObjSize483 : PtrByteSize2.17k ; |
3758 | 2.65k | ArgOffset += ArgSize; |
3759 | 2.65k | if (Flags.isInConsecutiveRegsLast()) |
3760 | 69 | ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
3761 | 2.65k | } |
3762 | 3.20k | break; |
3763 | 3.38k | case MVT::v4f32: |
3764 | 3.38k | case MVT::v4i32: |
3765 | 3.38k | case MVT::v8i16: |
3766 | 3.38k | case MVT::v16i8: |
3767 | 3.38k | case MVT::v2f64: |
3768 | 3.38k | case MVT::v2i64: |
3769 | 3.38k | case MVT::v1i128: |
3770 | 3.38k | if (!Subtarget.hasQPX()3.38k ) { |
3771 | 3.29k | // These can be scalar arguments or elements of a vector array type |
3772 | 3.29k | // passed directly. The latter are used to implement ELFv2 homogenous |
3773 | 3.29k | // vector aggregates. |
3774 | 3.29k | if (VR_idx != Num_VR_Regs3.29k ) { |
3775 | 2.98k | unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); |
3776 | 2.98k | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); |
3777 | 2.98k | ++VR_idx; |
3778 | 3.29k | } else { |
3779 | 312 | if (CallConv == CallingConv::Fast) |
3780 | 164 | ComputeArgOffset(); |
3781 | 312 | |
3782 | 312 | needsLoad = true; |
3783 | 312 | } |
3784 | 3.29k | if (CallConv != CallingConv::Fast || 3.29k needsLoad656 ) |
3785 | 2.80k | ArgOffset += 16; |
3786 | 3.29k | break; |
3787 | 3.29k | } // not QPX |
3788 | 90 | |
3789 | 3.38k | assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && |
3790 | 90 | "Invalid QPX parameter type"); |
3791 | 90 | /* fall through */ |
3792 | 90 | |
3793 | 221 | case MVT::v4f64: |
3794 | 221 | case MVT::v4i1: |
3795 | 221 | // QPX vectors are treated like their scalar floating-point subregisters |
3796 | 221 | // (except that they're larger). |
3797 | 221 | unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 1690 : 32131 ; |
3798 | 221 | if (QFPR_idx != Num_QFPR_Regs221 ) { |
3799 | 221 | const TargetRegisterClass *RC; |
3800 | 221 | switch (ObjectVT.getSimpleVT().SimpleTy) { |
3801 | 77 | case MVT::v4f64: RC = &PPC::QFRCRegClass; break; |
3802 | 90 | case MVT::v4f32: RC = &PPC::QSRCRegClass; break; |
3803 | 54 | default: RC = &PPC::QBRCRegClass; break; |
3804 | 221 | } |
3805 | 221 | |
3806 | 221 | unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); |
3807 | 221 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); |
3808 | 221 | ++QFPR_idx; |
3809 | 221 | } else { |
3810 | 0 | if (CallConv == CallingConv::Fast) |
3811 | 0 | ComputeArgOffset(); |
3812 | 0 | needsLoad = true; |
3813 | 0 | } |
3814 | 221 | if (221 CallConv != CallingConv::Fast || 221 needsLoad0 ) |
3815 | 221 | ArgOffset += Sz; |
3816 | 7.41k | break; |
3817 | 14.1k | } |
3818 | 14.1k | |
3819 | 14.1k | // We need to load the argument to a virtual register if we determined |
3820 | 14.1k | // above that we ran out of physical registers of the appropriate type. |
3821 | 14.1k | if (14.1k needsLoad14.1k ) { |
3822 | 1.04k | if (ObjSize < ArgSize && 1.04k !isLittleEndian16 ) |
3823 | 14 | CurArgOffset += ArgSize - ObjSize; |
3824 | 1.04k | int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); |
3825 | 1.04k | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3826 | 1.04k | ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); |
3827 | 1.04k | } |
3828 | 14.2k | |
3829 | 14.2k | InVals.push_back(ArgVal); |
3830 | 14.2k | } |
3831 | 6.53k | |
3832 | 6.53k | // Area that is at least reserved in the caller of this function. |
3833 | 6.53k | unsigned MinReservedArea; |
3834 | 6.53k | if (HasParameterArea) |
3835 | 4.04k | MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); |
3836 | 6.53k | else |
3837 | 2.48k | MinReservedArea = LinkageSize; |
3838 | 6.53k | |
3839 | 6.53k | // Set the size that is at least reserved in caller of this function. Tail |
3840 | 6.53k | // call optimized functions' reserved stack space needs to be aligned so that |
3841 | 6.53k | // taking the difference between two stack areas will result in an aligned |
3842 | 6.53k | // stack. |
3843 | 6.53k | MinReservedArea = |
3844 | 6.53k | EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); |
3845 | 6.53k | FuncInfo->setMinReservedArea(MinReservedArea); |
3846 | 6.53k | |
3847 | 6.53k | // If the function takes variable number of arguments, make a frame index for |
3848 | 6.53k | // the start of the first vararg value... for expansion of llvm.va_start. |
3849 | 6.53k | if (isVarArg6.53k ) { |
3850 | 9 | int Depth = ArgOffset; |
3851 | 9 | |
3852 | 9 | FuncInfo->setVarArgsFrameIndex( |
3853 | 9 | MFI.CreateFixedObject(PtrByteSize, Depth, true)); |
3854 | 9 | SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
3855 | 9 | |
3856 | 9 | // If this function is vararg, store any remaining integer argument regs |
3857 | 9 | // to their spots on the stack so that they may be loaded by dereferencing |
3858 | 9 | // the result of va_next. |
3859 | 9 | for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; |
3860 | 72 | GPR_idx < Num_GPR_Regs72 ; ++GPR_idx63 ) { |
3861 | 63 | unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); |
3862 | 63 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
3863 | 63 | SDValue Store = |
3864 | 63 | DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); |
3865 | 63 | MemOps.push_back(Store); |
3866 | 63 | // Increment the address by four for the next argument to store |
3867 | 63 | SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); |
3868 | 63 | FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); |
3869 | 63 | } |
3870 | 9 | } |
3871 | 6.53k | |
3872 | 6.53k | if (!MemOps.empty()) |
3873 | 38 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); |
3874 | 6.53k | |
3875 | 6.53k | return Chain; |
3876 | 6.53k | } |
3877 | | |
3878 | | SDValue PPCTargetLowering::LowerFormalArguments_Darwin( |
3879 | | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
3880 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3881 | 252 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3882 | 252 | // TODO: add description of PPC stack frame format, or at least some docs. |
3883 | 252 | // |
3884 | 252 | MachineFunction &MF = DAG.getMachineFunction(); |
3885 | 252 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3886 | 252 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
3887 | 252 | |
3888 | 252 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
3889 | 252 | bool isPPC64 = PtrVT == MVT::i64; |
3890 | 252 | // Potential tail calls could cause overwriting of argument stack slots. |
3891 | 252 | bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && |
3892 | 2 | (CallConv == CallingConv::Fast)); |
3893 | 252 | unsigned PtrByteSize = isPPC64 ? 866 : 4186 ; |
3894 | 252 | unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); |
3895 | 252 | unsigned ArgOffset = LinkageSize; |
3896 | 252 | // Area that is at least reserved in caller of this function. |
3897 | 252 | unsigned MinReservedArea = ArgOffset; |
3898 | 252 | |
3899 | 252 | static const MCPhysReg GPR_32[] = { // 32-bit registers. |
3900 | 252 | PPC::R3, PPC::R4, PPC::R5, PPC::R6, |
3901 | 252 | PPC::R7, PPC::R8, PPC::R9, PPC::R10, |
3902 | 252 | }; |
3903 | 252 | static const MCPhysReg GPR_64[] = { // 64-bit registers. |
3904 | 252 | PPC::X3, PPC::X4, PPC::X5, PPC::X6, |
3905 | 252 | PPC::X7, PPC::X8, PPC::X9, PPC::X10, |
3906 | 252 | }; |
3907 | 252 | static const MCPhysReg VR[] = { |
3908 | 252 | PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, |
3909 | 252 | PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 |
3910 | 252 | }; |
3911 | 252 | |
3912 | 252 | const unsigned Num_GPR_Regs = array_lengthof(GPR_32); |
3913 | 252 | const unsigned Num_FPR_Regs = useSoftFloat() ? 00 : 13252 ; |
3914 | 252 | const unsigned Num_VR_Regs = array_lengthof( VR); |
3915 | 252 | |
3916 | 252 | unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; |
3917 | 252 | |
3918 | 252 | const MCPhysReg *GPR = isPPC64 ? GPR_6466 : GPR_32186 ; |
3919 | 252 | |
3920 | 252 | // In 32-bit non-varargs functions, the stack space for vectors is after the |
3921 | 252 | // stack space for non-vectors. We do not use this space unless we have |
3922 | 252 | // too many vectors to fit in registers, something that only occurs in |
3923 | 252 | // constructed examples:), but we have to walk the arglist to figure |
3924 | 252 | // that out...for the pathological case, compute VecArgOffset as the |
3925 | 252 | // start of the vector parameter area. Computing VecArgOffset is the |
3926 | 252 | // entire point of the following loop. |
3927 | 252 | unsigned VecArgOffset = ArgOffset; |
3928 | 252 | if (!isVarArg && 252 !isPPC64250 ) { |
3929 | 463 | for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; |
3930 | 279 | ++ArgNo279 ) { |
3931 | 279 | EVT ObjectVT = Ins[ArgNo].VT; |
3932 | 279 | ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; |
3933 | 279 | |
3934 | 279 | if (Flags.isByVal()279 ) { |
3935 | 5 | // ObjSize is the true size, ArgSize rounded up to multiple of regs. |
3936 | 5 | unsigned ObjSize = Flags.getByValSize(); |
3937 | 5 | unsigned ArgSize = |
3938 | 5 | ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
3939 | 5 | VecArgOffset += ArgSize; |
3940 | 5 | continue; |
3941 | 5 | } |
3942 | 274 | |
3943 | 274 | switch(ObjectVT.getSimpleVT().SimpleTy) { |
3944 | 0 | default: 0 llvm_unreachable0 ("Unhandled argument type!"); |
3945 | 208 | case MVT::i1: |
3946 | 208 | case MVT::i32: |
3947 | 208 | case MVT::f32: |
3948 | 208 | VecArgOffset += 4; |
3949 | 208 | break; |
3950 | 64 | case MVT::i64: // PPC64 |
3951 | 64 | case MVT::f64: |
3952 | 64 | // FIXME: We are guaranteed to be !isPPC64 at this point. |
3953 | 64 | // Does MVT::i64 apply? |
3954 | 64 | VecArgOffset += 8; |
3955 | 64 | break; |
3956 | 2 | case MVT::v4f32: |
3957 | 2 | case MVT::v4i32: |
3958 | 2 | case MVT::v8i16: |
3959 | 2 | case MVT::v16i8: |
3960 | 2 | // Nothing to do, we're only looking at Nonvector args here. |
3961 | 2 | break; |
3962 | 279 | } |
3963 | 279 | } |
3964 | 184 | } |
3965 | 252 | // We've found where the vector parameter area in memory is. Skip the |
3966 | 252 | // first 12 parameters; these don't use that memory. |
3967 | 252 | VecArgOffset = ((VecArgOffset+15)/16)*16; |
3968 | 252 | VecArgOffset += 12*16; |
3969 | 252 | |
3970 | 252 | // Add DAG nodes to load the arguments or copy them out of registers. On |
3971 | 252 | // entry to a function on PPC, the arguments start after the linkage area, |
3972 | 252 | // although the first ones are often in registers. |
3973 | 252 | |
3974 | 252 | SmallVector<SDValue, 8> MemOps; |
3975 | 252 | unsigned nAltivecParamsAtEnd = 0; |
3976 | 252 | Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); |
3977 | 252 | unsigned CurArgIdx = 0; |
3978 | 650 | for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e650 ; ++ArgNo398 ) { |
3979 | 398 | SDValue ArgVal; |
3980 | 398 | bool needsLoad = false; |
3981 | 398 | EVT ObjectVT = Ins[ArgNo].VT; |
3982 | 398 | unsigned ObjSize = ObjectVT.getSizeInBits()/8; |
3983 | 398 | unsigned ArgSize = ObjSize; |
3984 | 398 | ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; |
3985 | 398 | if (Ins[ArgNo].isOrigArg()398 ) { |
3986 | 398 | std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); |
3987 | 398 | CurArgIdx = Ins[ArgNo].getOrigArgIndex(); |
3988 | 398 | } |
3989 | 398 | unsigned CurArgOffset = ArgOffset; |
3990 | 398 | |
3991 | 398 | // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. |
3992 | 398 | if (ObjectVT==MVT::v4f32 || 398 ObjectVT==MVT::v4i32396 || |
3993 | 398 | ObjectVT==MVT::v8i16396 || ObjectVT==MVT::v16i8396 ) { |
3994 | 4 | if (isVarArg || 4 isPPC644 ) { |
3995 | 2 | MinReservedArea = ((MinReservedArea+15)/16)*16; |
3996 | 2 | MinReservedArea += CalculateStackSlotSize(ObjectVT, |
3997 | 2 | Flags, |
3998 | 2 | PtrByteSize); |
3999 | 4 | } else nAltivecParamsAtEnd++; |
4000 | 4 | } else |
4001 | 398 | // Calculate min reserved area. |
4002 | 394 | MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, |
4003 | 394 | Flags, |
4004 | 394 | PtrByteSize); |
4005 | 398 | |
4006 | 398 | // FIXME the codegen can be much improved in some cases. |
4007 | 398 | // We do not have to keep everything in memory. |
4008 | 398 | if (Flags.isByVal()398 ) { |
4009 | 9 | assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); |
4010 | 9 | |
4011 | 9 | // ObjSize is the true size, ArgSize rounded up to multiple of registers. |
4012 | 9 | ObjSize = Flags.getByValSize(); |
4013 | 9 | ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
4014 | 9 | // Objects of size 1 and 2 are right justified, everything else is |
4015 | 9 | // left justified. This means the memory address is adjusted forwards. |
4016 | 9 | if (ObjSize==1 || 9 ObjSize==29 ) { |
4017 | 1 | CurArgOffset = CurArgOffset + (4 - ObjSize); |
4018 | 1 | } |
4019 | 9 | // The value of the object is its address. |
4020 | 9 | int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); |
4021 | 9 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
4022 | 9 | InVals.push_back(FIN); |
4023 | 9 | if (ObjSize==1 || 9 ObjSize==29 ) { |
4024 | 1 | if (GPR_idx != Num_GPR_Regs1 ) { |
4025 | 1 | unsigned VReg; |
4026 | 1 | if (isPPC64) |
4027 | 0 | VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); |
4028 | 1 | else |
4029 | 1 | VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); |
4030 | 1 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
4031 | 1 | EVT ObjType = ObjSize == 1 ? MVT::i80 : MVT::i161 ; |
4032 | 1 | SDValue Store = |
4033 | 1 | DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, |
4034 | 1 | MachinePointerInfo(&*FuncArg), ObjType); |
4035 | 1 | MemOps.push_back(Store); |
4036 | 1 | ++GPR_idx; |
4037 | 1 | } |
4038 | 1 | |
4039 | 1 | ArgOffset += PtrByteSize; |
4040 | 1 | |
4041 | 1 | continue; |
4042 | 1 | } |
4043 | 23 | for (unsigned j = 0; 8 j < ArgSize23 ; j += PtrByteSize15 ) { |
4044 | 17 | // Store whatever pieces of the object are in registers |
4045 | 17 | // to memory. ArgOffset will be the address of the beginning |
4046 | 17 | // of the object. |
4047 | 17 | if (GPR_idx != Num_GPR_Regs17 ) { |
4048 | 15 | unsigned VReg; |
4049 | 15 | if (isPPC64) |
4050 | 6 | VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); |
4051 | 15 | else |
4052 | 9 | VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); |
4053 | 15 | int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); |
4054 | 15 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
4055 | 15 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
4056 | 15 | SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, |
4057 | 15 | MachinePointerInfo(&*FuncArg, j)); |
4058 | 15 | MemOps.push_back(Store); |
4059 | 15 | ++GPR_idx; |
4060 | 15 | ArgOffset += PtrByteSize; |
4061 | 17 | } else { |
4062 | 2 | ArgOffset += ArgSize - (ArgOffset-CurArgOffset); |
4063 | 2 | break; |
4064 | 2 | } |
4065 | 17 | } |
4066 | 9 | continue; |
4067 | 9 | } |
4068 | 389 | |
4069 | 389 | switch (ObjectVT.getSimpleVT().SimpleTy) { |
4070 | 0 | default: 0 llvm_unreachable0 ("Unhandled argument type!"); |
4071 | 236 | case MVT::i1: |
4072 | 236 | case MVT::i32: |
4073 | 236 | if (!isPPC64236 ) { |
4074 | 211 | if (GPR_idx != Num_GPR_Regs211 ) { |
4075 | 198 | unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); |
4076 | 198 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); |
4077 | 198 | |
4078 | 198 | if (ObjectVT == MVT::i1) |
4079 | 0 | ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); |
4080 | 198 | |
4081 | 198 | ++GPR_idx; |
4082 | 211 | } else { |
4083 | 13 | needsLoad = true; |
4084 | 13 | ArgSize = PtrByteSize; |
4085 | 13 | } |
4086 | 211 | // All int arguments reserve stack space in the Darwin ABI. |
4087 | 211 | ArgOffset += PtrByteSize; |
4088 | 211 | break; |
4089 | 211 | } |
4090 | 25 | LLVM_FALLTHROUGH25 ; |
4091 | 92 | case MVT::i64: // PPC64 |
4092 | 92 | if (GPR_idx != Num_GPR_Regs92 ) { |
4093 | 82 | unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); |
4094 | 82 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); |
4095 | 82 | |
4096 | 82 | if (ObjectVT == MVT::i32 || 82 ObjectVT == MVT::i165 ) |
4097 | 82 | // PPC64 passes i8, i16, and i32 values in i64 registers. Promote |
4098 | 82 | // value to MVT::i64 and then truncate to the correct register size. |
4099 | 17 | ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); |
4100 | 82 | |
4101 | 82 | ++GPR_idx; |
4102 | 92 | } else { |
4103 | 10 | needsLoad = true; |
4104 | 10 | ArgSize = PtrByteSize; |
4105 | 10 | } |
4106 | 92 | // All int arguments reserve stack space in the Darwin ABI. |
4107 | 92 | ArgOffset += 8; |
4108 | 92 | break; |
4109 | 25 | |
4110 | 82 | case MVT::f32: |
4111 | 82 | case MVT::f64: |
4112 | 82 | // Every 4 bytes of argument space consumes one of the GPRs available for |
4113 | 82 | // argument passing. |
4114 | 82 | if (GPR_idx != Num_GPR_Regs82 ) { |
4115 | 73 | ++GPR_idx; |
4116 | 73 | if (ObjSize == 8 && 73 GPR_idx != Num_GPR_Regs67 && !isPPC6466 ) |
4117 | 56 | ++GPR_idx; |
4118 | 73 | } |
4119 | 82 | if (FPR_idx != Num_FPR_Regs82 ) { |
4120 | 82 | unsigned VReg; |
4121 | 82 | |
4122 | 82 | if (ObjectVT == MVT::f32) |
4123 | 6 | VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); |
4124 | 82 | else |
4125 | 76 | VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); |
4126 | 82 | |
4127 | 82 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); |
4128 | 82 | ++FPR_idx; |
4129 | 0 | } else { |
4130 | 0 | needsLoad = true; |
4131 | 0 | } |
4132 | 82 | |
4133 | 82 | // All FP arguments reserve stack space in the Darwin ABI. |
4134 | 82 | ArgOffset += isPPC64 ? 812 : ObjSize70 ; |
4135 | 82 | break; |
4136 | 4 | case MVT::v4f32: |
4137 | 4 | case MVT::v4i32: |
4138 | 4 | case MVT::v8i16: |
4139 | 4 | case MVT::v16i8: |
4140 | 4 | // Note that vector arguments in registers don't reserve stack space, |
4141 | 4 | // except in varargs functions. |
4142 | 4 | if (VR_idx != Num_VR_Regs4 ) { |
4143 | 4 | unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); |
4144 | 4 | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); |
4145 | 4 | if (isVarArg4 ) { |
4146 | 0 | while ((ArgOffset % 16) != 00 ) { |
4147 | 0 | ArgOffset += PtrByteSize; |
4148 | 0 | if (GPR_idx != Num_GPR_Regs) |
4149 | 0 | GPR_idx++; |
4150 | 0 | } |
4151 | 0 | ArgOffset += 16; |
4152 | 0 | GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? |
4153 | 0 | } |
4154 | 4 | ++VR_idx; |
4155 | 0 | } else { |
4156 | 0 | if (!isVarArg && 0 !isPPC640 ) { |
4157 | 0 | // Vectors go after all the nonvectors. |
4158 | 0 | CurArgOffset = VecArgOffset; |
4159 | 0 | VecArgOffset += 16; |
4160 | 0 | } else { |
4161 | 0 | // Vectors are aligned. |
4162 | 0 | ArgOffset = ((ArgOffset+15)/16)*16; |
4163 | 0 | CurArgOffset = ArgOffset; |
4164 | 0 | ArgOffset += 16; |
4165 | 0 | } |
4166 | 0 | needsLoad = true; |
4167 | 0 | } |
4168 | 236 | break; |
4169 | 389 | } |
4170 | 389 | |
4171 | 389 | // We need to load the argument to a virtual register if we determined above |
4172 | 389 | // that we ran out of physical registers of the appropriate type. |
4173 | 389 | if (389 needsLoad389 ) { |
4174 | 23 | int FI = MFI.CreateFixedObject(ObjSize, |
4175 | 23 | CurArgOffset + (ArgSize - ObjSize), |
4176 | 23 | isImmutable); |
4177 | 23 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
4178 | 23 | ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); |
4179 | 23 | } |
4180 | 398 | |
4181 | 398 | InVals.push_back(ArgVal); |
4182 | 398 | } |
4183 | 252 | |
4184 | 252 | // Allow for Altivec parameters at the end, if needed. |
4185 | 252 | if (252 nAltivecParamsAtEnd252 ) { |
4186 | 2 | MinReservedArea = ((MinReservedArea+15)/16)*16; |
4187 | 2 | MinReservedArea += 16*nAltivecParamsAtEnd; |
4188 | 2 | } |
4189 | 252 | |
4190 | 252 | // Area that is at least reserved in the caller of this function. |
4191 | 252 | MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); |
4192 | 252 | |
4193 | 252 | // Set the size that is at least reserved in caller of this function. Tail |
4194 | 252 | // call optimized functions' reserved stack space needs to be aligned so that |
4195 | 252 | // taking the difference between two stack areas will result in an aligned |
4196 | 252 | // stack. |
4197 | 252 | MinReservedArea = |
4198 | 252 | EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); |
4199 | 252 | FuncInfo->setMinReservedArea(MinReservedArea); |
4200 | 252 | |
4201 | 252 | // If the function takes variable number of arguments, make a frame index for |
4202 | 252 | // the start of the first vararg value... for expansion of llvm.va_start. |
4203 | 252 | if (isVarArg252 ) { |
4204 | 2 | int Depth = ArgOffset; |
4205 | 2 | |
4206 | 2 | FuncInfo->setVarArgsFrameIndex( |
4207 | 2 | MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, |
4208 | 2 | Depth, true)); |
4209 | 2 | SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
4210 | 2 | |
4211 | 2 | // If this function is vararg, store any remaining integer argument regs |
4212 | 2 | // to their spots on the stack so that they may be loaded by dereferencing |
4213 | 2 | // the result of va_next. |
4214 | 9 | for (; GPR_idx != Num_GPR_Regs9 ; ++GPR_idx7 ) { |
4215 | 7 | unsigned VReg; |
4216 | 7 | |
4217 | 7 | if (isPPC64) |
4218 | 0 | VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); |
4219 | 7 | else |
4220 | 7 | VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); |
4221 | 7 | |
4222 | 7 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); |
4223 | 7 | SDValue Store = |
4224 | 7 | DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); |
4225 | 7 | MemOps.push_back(Store); |
4226 | 7 | // Increment the address by four for the next argument to store |
4227 | 7 | SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); |
4228 | 7 | FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); |
4229 | 7 | } |
4230 | 2 | } |
4231 | 252 | |
4232 | 252 | if (!MemOps.empty()) |
4233 | 6 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); |
4234 | 252 | |
4235 | 252 | return Chain; |
4236 | 252 | } |
4237 | | |
4238 | | /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be |
4239 | | /// adjusted to accommodate the arguments for the tailcall. |
4240 | | static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, |
4241 | 1.49k | unsigned ParamSize) { |
4242 | 1.49k | |
4243 | 1.49k | if (!isTailCall1.49k ) return 01.49k ; |
4244 | 3 | |
4245 | 3 | PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); |
4246 | 3 | unsigned CallerMinReservedArea = FI->getMinReservedArea(); |
4247 | 3 | int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; |
4248 | 3 | // Remember only if the new adjustement is bigger. |
4249 | 3 | if (SPDiff < FI->getTailCallSPDelta()) |
4250 | 0 | FI->setTailCallSPDelta(SPDiff); |
4251 | 1.49k | |
4252 | 1.49k | return SPDiff; |
4253 | 1.49k | } |
4254 | | |
4255 | | static bool isFunctionGlobalAddress(SDValue Callee); |
4256 | | |
4257 | | static bool |
4258 | | callsShareTOCBase(const Function *Caller, SDValue Callee, |
4259 | 1.22k | const TargetMachine &TM) { |
4260 | 1.22k | // If !G, Callee can be an external symbol. |
4261 | 1.22k | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
4262 | 1.22k | if (!G) |
4263 | 107 | return false; |
4264 | 1.12k | |
4265 | 1.12k | // The medium and large code models are expected to provide a sufficiently |
4266 | 1.12k | // large TOC to provide all data addressing needs of a module with a |
4267 | 1.12k | // single TOC. Since each module will be addressed with a single TOC then we |
4268 | 1.12k | // only need to check that caller and callee don't cross dso boundaries. |
4269 | 1.12k | if (1.12k CodeModel::Medium == TM.getCodeModel() || |
4270 | 43 | CodeModel::Large == TM.getCodeModel()) |
4271 | 1.07k | return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal()); |
4272 | 43 | |
4273 | 43 | // Otherwise we need to ensure callee and caller are in the same section, |
4274 | 43 | // since the linker may allocate multiple TOCs, and we don't know which |
4275 | 43 | // sections will belong to the same TOC base. |
4276 | 43 | |
4277 | 43 | const GlobalValue *GV = G->getGlobal(); |
4278 | 43 | if (!GV->isStrongDefinitionForLinker()) |
4279 | 23 | return false; |
4280 | 20 | |
4281 | 20 | // Any explicitly-specified sections and section prefixes must also match. |
4282 | 20 | // Also, if we're using -ffunction-sections, then each function is always in |
4283 | 20 | // a different section (the same is true for COMDAT functions). |
4284 | 20 | if (20 TM.getFunctionSections() || 20 GV->hasComdat()20 || Caller->hasComdat()18 || |
4285 | 18 | GV->getSection() != Caller->getSection()) |
4286 | 4 | return false; |
4287 | 16 | if (const auto *16 F16 = dyn_cast<Function>(GV)) { |
4288 | 16 | if (F->getSectionPrefix() != Caller->getSectionPrefix()) |
4289 | 0 | return false; |
4290 | 16 | } |
4291 | 16 | |
4292 | 16 | // If the callee might be interposed, then we can't assume the ultimate call |
4293 | 16 | // target will be in the same section. Even in cases where we can assume that |
4294 | 16 | // interposition won't happen, in any case where the linker might insert a |
4295 | 16 | // stub to allow for interposition, we must generate code as though |
4296 | 16 | // interposition might occur. To understand why this matters, consider a |
4297 | 16 | // situation where: a -> b -> c where the arrows indicate calls. b and c are |
4298 | 16 | // in the same section, but a is in a different module (i.e. has a different |
4299 | 16 | // TOC base pointer). If the linker allows for interposition between b and c, |
4300 | 16 | // then it will generate a stub for the call edge between b and c which will |
4301 | 16 | // save the TOC pointer into the designated stack slot allocated by b. If we |
4302 | 16 | // return true here, and therefore allow a tail call between b and c, that |
4303 | 16 | // stack slot won't exist and the b -> c stub will end up saving b'c TOC base |
4304 | 16 | // pointer into the stack slot allocated by a (where the a -> b stub saved |
4305 | 16 | // a's TOC base pointer). If we're not considering a tail call, but rather, |
4306 | 16 | // whether a nop is needed after the call instruction in b, because the linker |
4307 | 16 | // will insert a stub, it might complain about a missing nop if we omit it |
4308 | 16 | // (although many don't complain in this case). |
4309 | 16 | if (16 !TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)16 ) |
4310 | 1 | return false; |
4311 | 15 | |
4312 | 15 | return true; |
4313 | 15 | } |
4314 | | |
4315 | | static bool |
4316 | | needStackSlotPassParameters(const PPCSubtarget &Subtarget, |
4317 | 25 | const SmallVectorImpl<ISD::OutputArg> &Outs) { |
4318 | 25 | assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); |
4319 | 25 | |
4320 | 25 | const unsigned PtrByteSize = 8; |
4321 | 25 | const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); |
4322 | 25 | |
4323 | 25 | static const MCPhysReg GPR[] = { |
4324 | 25 | PPC::X3, PPC::X4, PPC::X5, PPC::X6, |
4325 | 25 | PPC::X7, PPC::X8, PPC::X9, PPC::X10, |
4326 | 25 | }; |
4327 | 25 | static const MCPhysReg VR[] = { |
4328 | 25 | PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, |
4329 | 25 | PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 |
4330 | 25 | }; |
4331 | 25 | |
4332 | 25 | const unsigned NumGPRs = array_lengthof(GPR); |
4333 | 25 | const unsigned NumFPRs = 13; |
4334 | 25 | const unsigned NumVRs = array_lengthof(VR); |
4335 | 25 | const unsigned ParamAreaSize = NumGPRs * PtrByteSize; |
4336 | 25 | |
4337 | 25 | unsigned NumBytes = LinkageSize; |
4338 | 25 | unsigned AvailableFPRs = NumFPRs; |
4339 | 25 | unsigned AvailableVRs = NumVRs; |
4340 | 25 | |
4341 | 166 | for (const ISD::OutputArg& Param : Outs) { |
4342 | 166 | if (Param.Flags.isNest()166 ) continue0 ; |
4343 | 166 | |
4344 | 166 | if (166 CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, |
4345 | 166 | PtrByteSize, LinkageSize, ParamAreaSize, |
4346 | 166 | NumBytes, AvailableFPRs, AvailableVRs, |
4347 | 166 | Subtarget.hasQPX())) |
4348 | 9 | return true; |
4349 | 16 | } |
4350 | 16 | return false; |
4351 | 16 | } |
4352 | | |
4353 | | static bool |
4354 | 64 | hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) { |
4355 | 64 | if (CS.arg_size() != CallerFn->arg_size()) |
4356 | 9 | return false; |
4357 | 55 | |
4358 | 55 | ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin(); |
4359 | 55 | ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end(); |
4360 | 55 | Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); |
4361 | 55 | |
4362 | 119 | for (; CalleeArgIter != CalleeArgEnd119 ; ++CalleeArgIter, ++CallerArgIter64 ) { |
4363 | 80 | const Value* CalleeArg = *CalleeArgIter; |
4364 | 80 | const Value* CallerArg = &(*CallerArgIter); |
4365 | 80 | if (CalleeArg == CallerArg) |
4366 | 60 | continue; |
4367 | 20 | |
4368 | 20 | // e.g. @caller([4 x i64] %a, [4 x i64] %b) { |
4369 | 20 | // tail call @callee([4 x i64] undef, [4 x i64] %b) |
4370 | 20 | // } |
4371 | 20 | // 1st argument of callee is undef and has the same type as caller. |
4372 | 20 | if (20 CalleeArg->getType() == CallerArg->getType() && |
4373 | 12 | isa<UndefValue>(CalleeArg)) |
4374 | 4 | continue; |
4375 | 16 | |
4376 | 16 | return false; |
4377 | 16 | } |
4378 | 55 | |
4379 | 39 | return true; |
4380 | 64 | } |
4381 | | |
4382 | | bool |
4383 | | PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( |
4384 | | SDValue Callee, |
4385 | | CallingConv::ID CalleeCC, |
4386 | | ImmutableCallSite CS, |
4387 | | bool isVarArg, |
4388 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
4389 | | const SmallVectorImpl<ISD::InputArg> &Ins, |
4390 | 187 | SelectionDAG& DAG) const { |
4391 | 187 | bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; |
4392 | 187 | |
4393 | 187 | if (DisableSCO && 187 !TailCallOpt0 ) return false0 ; |
4394 | 187 | |
4395 | 187 | // Variadic argument functions are not supported. |
4396 | 187 | if (187 isVarArg187 ) return false6 ; |
4397 | 181 | |
4398 | 181 | MachineFunction &MF = DAG.getMachineFunction(); |
4399 | 181 | CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); |
4400 | 181 | |
4401 | 181 | // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has |
4402 | 181 | // the same calling convention |
4403 | 181 | if (CallerCC != CalleeCC181 ) return false23 ; |
4404 | 158 | |
4405 | 158 | // SCO support C calling convention |
4406 | 158 | if (158 CalleeCC != CallingConv::Fast && 158 CalleeCC != CallingConv::C156 ) |
4407 | 0 | return false; |
4408 | 158 | |
4409 | 158 | // Caller contains any byval parameter is not supported. |
4410 | 158 | if (158 any_of(Ins, [](const ISD::InputArg &IA) 158 { return IA.Flags.isByVal(); }36 )) |
4411 | 0 | return false; |
4412 | 158 | |
4413 | 158 | // Callee contains any byval parameter is not supported, too. |
4414 | 158 | // Note: This is a quick work around, because in some cases, e.g. |
4415 | 158 | // caller's stack size > callee's stack size, we are still able to apply |
4416 | 158 | // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 |
4417 | 934 | if (158 any_of(Outs, [](const ISD::OutputArg& OA) 158 { return OA.Flags.isByVal(); }934 )) |
4418 | 5 | return false; |
4419 | 153 | |
4420 | 153 | // No TCO/SCO on indirect call because Caller have to restore its TOC |
4421 | 153 | if (153 !isFunctionGlobalAddress(Callee) && |
4422 | 13 | !isa<ExternalSymbolSDNode>(Callee)) |
4423 | 12 | return false; |
4424 | 141 | |
4425 | 141 | // If the caller and callee potentially have different TOC bases then we |
4426 | 141 | // cannot tail call since we need to restore the TOC pointer after the call. |
4427 | 141 | // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 |
4428 | 141 | if (141 !callsShareTOCBase(MF.getFunction(), Callee, getTargetMachine())141 ) |
4429 | 76 | return false; |
4430 | 65 | |
4431 | 65 | // TCO allows altering callee ABI, so we don't have to check further. |
4432 | 65 | if (65 CalleeCC == CallingConv::Fast && 65 TailCallOpt2 ) |
4433 | 1 | return true; |
4434 | 64 | |
4435 | 64 | if (64 DisableSCO64 ) return false0 ; |
4436 | 64 | |
4437 | 64 | // If callee use the same argument list that caller is using, then we can |
4438 | 64 | // apply SCO on this case. If it is not, then we need to check if callee needs |
4439 | 64 | // stack for passing arguments. |
4440 | 64 | if (64 !hasSameArgumentList(MF.getFunction(), CS) && |
4441 | 64 | needStackSlotPassParameters(Subtarget, Outs)25 ) { |
4442 | 9 | return false; |
4443 | 9 | } |
4444 | 55 | |
4445 | 55 | return true; |
4446 | 55 | } |
4447 | | |
4448 | | /// IsEligibleForTailCallOptimization - Check whether the call is eligible |
4449 | | /// for tail call optimization. Targets which want to do tail call |
4450 | | /// optimization should implement this function. |
4451 | | bool |
4452 | | PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, |
4453 | | CallingConv::ID CalleeCC, |
4454 | | bool isVarArg, |
4455 | | const SmallVectorImpl<ISD::InputArg> &Ins, |
4456 | 16 | SelectionDAG& DAG) const { |
4457 | 16 | if (!getTargetMachine().Options.GuaranteedTailCallOpt) |
4458 | 14 | return false; |
4459 | 2 | |
4460 | 2 | // Variable argument functions are not supported. |
4461 | 2 | if (2 isVarArg2 ) |
4462 | 0 | return false; |
4463 | 2 | |
4464 | 2 | MachineFunction &MF = DAG.getMachineFunction(); |
4465 | 2 | CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); |
4466 | 2 | if (CalleeCC == CallingConv::Fast && 2 CallerCC == CalleeCC2 ) { |
4467 | 2 | // Functions containing by val parameters are not supported. |
4468 | 4 | for (unsigned i = 0; i != Ins.size()4 ; i++2 ) { |
4469 | 2 | ISD::ArgFlagsTy Flags = Ins[i].Flags; |
4470 | 2 | if (Flags.isByVal()2 ) return false0 ; |
4471 | 2 | } |
4472 | 2 | |
4473 | 2 | // Non-PIC/GOT tail calls are supported. |
4474 | 2 | if (2 getTargetMachine().getRelocationModel() != Reloc::PIC_2 ) |
4475 | 1 | return true; |
4476 | 1 | |
4477 | 1 | // At the moment we can only do local tail calls (in same module, hidden |
4478 | 1 | // or protected) if we are generating PIC. |
4479 | 1 | if (GlobalAddressSDNode *1 G1 = dyn_cast<GlobalAddressSDNode>(Callee)) |
4480 | 1 | return G->getGlobal()->hasHiddenVisibility() |
4481 | 1 | || G->getGlobal()->hasProtectedVisibility(); |
4482 | 0 | } |
4483 | 0 |
|
4484 | 0 | return false; |
4485 | 0 | } |
4486 | | |
4487 | | /// isCallCompatibleAddress - Return the immediate to use if the specified |
4488 | | /// 32-bit value is representable in the immediate field of a BxA instruction. |
4489 | 361 | static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { |
4490 | 361 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); |
4491 | 361 | if (!C361 ) return nullptr348 ; |
4492 | 13 | |
4493 | 13 | int Addr = C->getZExtValue(); |
4494 | 13 | if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. |
4495 | 13 | SignExtend32<26>(Addr) != Addr) |
4496 | 0 | return nullptr; // Top 6 bits have to be sext of immediate. |
4497 | 13 | |
4498 | 13 | return DAG |
4499 | 13 | .getConstant( |
4500 | 13 | (int)C->getZExtValue() >> 2, SDLoc(Op), |
4501 | 13 | DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) |
4502 | 13 | .getNode(); |
4503 | 13 | } |
4504 | | |
4505 | | namespace { |
4506 | | |
4507 | | struct TailCallArgumentInfo { |
4508 | | SDValue Arg; |
4509 | | SDValue FrameIdxOp; |
4510 | | int FrameIdx = 0; |
4511 | | |
4512 | 64 | TailCallArgumentInfo() = default; |
4513 | | }; |
4514 | | |
4515 | | } // end anonymous namespace |
4516 | | |
4517 | | /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. |
4518 | | static void StoreTailCallArgumentsToStackSlot( |
4519 | | SelectionDAG &DAG, SDValue Chain, |
4520 | | const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, |
4521 | 3 | SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) { |
4522 | 3 | for (unsigned i = 0, e = TailCallArgs.size(); i != e3 ; ++i0 ) { |
4523 | 0 | SDValue Arg = TailCallArgs[i].Arg; |
4524 | 0 | SDValue FIN = TailCallArgs[i].FrameIdxOp; |
4525 | 0 | int FI = TailCallArgs[i].FrameIdx; |
4526 | 0 | // Store relative to framepointer. |
4527 | 0 | MemOpChains.push_back(DAG.getStore( |
4528 | 0 | Chain, dl, Arg, FIN, |
4529 | 0 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); |
4530 | 0 | } |
4531 | 3 | } |
4532 | | |
4533 | | /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to |
4534 | | /// the appropriate stack slot for the tail call optimized function call. |
4535 | | static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, |
4536 | | SDValue OldRetAddr, SDValue OldFP, |
4537 | 3 | int SPDiff, const SDLoc &dl) { |
4538 | 3 | if (SPDiff3 ) { |
4539 | 1 | // Calculate the new stack slot for the return address. |
4540 | 1 | MachineFunction &MF = DAG.getMachineFunction(); |
4541 | 1 | const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); |
4542 | 1 | const PPCFrameLowering *FL = Subtarget.getFrameLowering(); |
4543 | 1 | bool isPPC64 = Subtarget.isPPC64(); |
4544 | 1 | int SlotSize = isPPC64 ? 80 : 41 ; |
4545 | 1 | int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); |
4546 | 1 | int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, |
4547 | 1 | NewRetAddrLoc, true); |
4548 | 1 | EVT VT = isPPC64 ? MVT::i640 : MVT::i321 ; |
4549 | 1 | SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); |
4550 | 1 | Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, |
4551 | 1 | MachinePointerInfo::getFixedStack(MF, NewRetAddr)); |
4552 | 1 | |
4553 | 1 | // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack |
4554 | 1 | // slot as the FP is never overwritten. |
4555 | 1 | if (Subtarget.isDarwinABI()1 ) { |
4556 | 0 | int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); |
4557 | 0 | int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, |
4558 | 0 | true); |
4559 | 0 | SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); |
4560 | 0 | Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, |
4561 | 0 | MachinePointerInfo::getFixedStack( |
4562 | 0 | DAG.getMachineFunction(), NewFPIdx)); |
4563 | 0 | } |
4564 | 1 | } |
4565 | 3 | return Chain; |
4566 | 3 | } |
4567 | | |
4568 | | /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate |
4569 | | /// the position of the argument. |
4570 | | static void |
4571 | | CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, |
4572 | | SDValue Arg, int SPDiff, unsigned ArgOffset, |
4573 | 64 | SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { |
4574 | 64 | int Offset = ArgOffset + SPDiff; |
4575 | 64 | uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; |
4576 | 64 | int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
4577 | 64 | EVT VT = isPPC64 ? MVT::i6464 : MVT::i320 ; |
4578 | 64 | SDValue FIN = DAG.getFrameIndex(FI, VT); |
4579 | 64 | TailCallArgumentInfo Info; |
4580 | 64 | Info.Arg = Arg; |
4581 | 64 | Info.FrameIdxOp = FIN; |
4582 | 64 | Info.FrameIdx = FI; |
4583 | 64 | TailCallArguments.push_back(Info); |
4584 | 64 | } |
4585 | | |
4586 | | /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address |
4587 | | /// stack slot. Returns the chain as result and the loaded frame pointers in |
4588 | | /// LROpOut/FPOpout. Used when tail calling. |
4589 | | SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( |
4590 | | SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, |
4591 | 1.55k | SDValue &FPOpOut, const SDLoc &dl) const { |
4592 | 1.55k | if (SPDiff1.55k ) { |
4593 | 1 | // Load the LR and FP stack slot for later adjusting. |
4594 | 1 | EVT VT = Subtarget.isPPC64() ? MVT::i640 : MVT::i321 ; |
4595 | 1 | LROpOut = getReturnAddrFrameIndex(DAG); |
4596 | 1 | LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); |
4597 | 1 | Chain = SDValue(LROpOut.getNode(), 1); |
4598 | 1 | |
4599 | 1 | // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack |
4600 | 1 | // slot as the FP is never overwritten. |
4601 | 1 | if (Subtarget.isDarwinABI()1 ) { |
4602 | 0 | FPOpOut = getFramePointerFrameIndex(DAG); |
4603 | 0 | FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); |
4604 | 0 | Chain = SDValue(FPOpOut.getNode(), 1); |
4605 | 0 | } |
4606 | 1 | } |
4607 | 1.55k | return Chain; |
4608 | 1.55k | } |
4609 | | |
4610 | | /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified |
4611 | | /// by "Src" to address "Dst" of size "Size". Alignment information is |
4612 | | /// specified by the specific parameter attribute. The copy will be passed as |
4613 | | /// a byval function parameter. |
4614 | | /// Sometimes what we are copying is the end of a larger object, the part that |
4615 | | /// does not fit in registers. |
4616 | | static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, |
4617 | | SDValue Chain, ISD::ArgFlagsTy Flags, |
4618 | 48 | SelectionDAG &DAG, const SDLoc &dl) { |
4619 | 48 | SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); |
4620 | 48 | return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), |
4621 | 48 | false, false, false, MachinePointerInfo(), |
4622 | 48 | MachinePointerInfo()); |
4623 | 48 | } |
4624 | | |
4625 | | /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of |
4626 | | /// tail calls. |
4627 | | static void LowerMemOpCallTo( |
4628 | | SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, |
4629 | | SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, |
4630 | | bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, |
4631 | 472 | SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) { |
4632 | 472 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
4633 | 472 | if (!isTailCall472 ) { |
4634 | 408 | if (isVector408 ) { |
4635 | 88 | SDValue StackPtr; |
4636 | 88 | if (isPPC64) |
4637 | 88 | StackPtr = DAG.getRegister(PPC::X1, MVT::i64); |
4638 | 88 | else |
4639 | 0 | StackPtr = DAG.getRegister(PPC::R1, MVT::i32); |
4640 | 88 | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, |
4641 | 88 | DAG.getConstant(ArgOffset, dl, PtrVT)); |
4642 | 88 | } |
4643 | 408 | MemOpChains.push_back( |
4644 | 408 | DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); |
4645 | 408 | // Calculate and remember argument location. |
4646 | 472 | } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, |
4647 | 64 | TailCallArguments); |
4648 | 472 | } |
4649 | | |
4650 | | static void |
4651 | | PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, |
4652 | | const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, |
4653 | | SDValue FPOp, |
4654 | 3 | SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { |
4655 | 3 | // Emit a sequence of copyto/copyfrom virtual registers for arguments that |
4656 | 3 | // might overwrite each other in case of tail call optimization. |
4657 | 3 | SmallVector<SDValue, 8> MemOpChains2; |
4658 | 3 | // Do not flag preceding copytoreg stuff together with the following stuff. |
4659 | 3 | InFlag = SDValue(); |
4660 | 3 | StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, |
4661 | 3 | MemOpChains2, dl); |
4662 | 3 | if (!MemOpChains2.empty()) |
4663 | 0 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); |
4664 | 3 | |
4665 | 3 | // Store the return address to the appropriate stack slot. |
4666 | 3 | Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); |
4667 | 3 | |
4668 | 3 | // Emit callseq_end just before tailcall node. |
4669 | 3 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), |
4670 | 3 | DAG.getIntPtrConstant(0, dl, true), InFlag, dl); |
4671 | 3 | InFlag = Chain.getValue(1); |
4672 | 3 | } |
4673 | | |
4674 | | // Is this global address that of a function that can be called by name? (as |
4675 | | // opposed to something that must hold a descriptor for an indirect call). |
4676 | 2.93k | static bool isFunctionGlobalAddress(SDValue Callee) { |
4677 | 2.93k | if (GlobalAddressSDNode *G2.93k = dyn_cast<GlobalAddressSDNode>(Callee)) { |
4678 | 2.43k | if (Callee.getOpcode() == ISD::GlobalTLSAddress || |
4679 | 2.42k | Callee.getOpcode() == ISD::TargetGlobalTLSAddress) |
4680 | 3 | return false; |
4681 | 2.42k | |
4682 | 2.42k | return G->getGlobal()->getValueType()->isFunctionTy(); |
4683 | 2.42k | } |
4684 | 506 | |
4685 | 506 | return false; |
4686 | 506 | } |
4687 | | |
4688 | | static unsigned |
4689 | | PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, |
4690 | | SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, |
4691 | | bool isPatchPoint, bool hasNest, |
4692 | | SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, |
4693 | | SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, |
4694 | 1.55k | ImmutableCallSite CS, const PPCSubtarget &Subtarget) { |
4695 | 1.55k | bool isPPC64 = Subtarget.isPPC64(); |
4696 | 1.55k | bool isSVR4ABI = Subtarget.isSVR4ABI(); |
4697 | 1.55k | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
4698 | 1.55k | |
4699 | 1.55k | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
4700 | 1.55k | NodeTys.push_back(MVT::Other); // Returns a chain |
4701 | 1.55k | NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. |
4702 | 1.55k | |
4703 | 1.55k | unsigned CallOpc = PPCISD::CALL; |
4704 | 1.55k | |
4705 | 1.55k | bool needIndirectCall = true; |
4706 | 1.55k | if (!isSVR4ABI || 1.55k !isPPC641.43k ) |
4707 | 351 | if (SDNode *351 Dest351 = isBLACompatibleAddress(Callee, DAG)) { |
4708 | 9 | // If this is an absolute destination address, use the munged value. |
4709 | 9 | Callee = SDValue(Dest, 0); |
4710 | 9 | needIndirectCall = false; |
4711 | 9 | } |
4712 | 1.55k | |
4713 | 1.55k | // PC-relative references to external symbols should go through $stub, unless |
4714 | 1.55k | // we're building with the leopard linker or later, which automatically |
4715 | 1.55k | // synthesizes these stubs. |
4716 | 1.55k | const TargetMachine &TM = DAG.getTarget(); |
4717 | 1.55k | const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); |
4718 | 1.55k | const GlobalValue *GV = nullptr; |
4719 | 1.55k | if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) |
4720 | 1.22k | GV = G->getGlobal(); |
4721 | 1.55k | bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); |
4722 | 1.55k | bool UsePlt = !Local && Subtarget.isTargetELF()1.41k && !isPPC641.29k ; |
4723 | 1.55k | |
4724 | 1.55k | if (isFunctionGlobalAddress(Callee)1.55k ) { |
4725 | 1.22k | GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); |
4726 | 1.22k | // A call to a TLS address is actually an indirect call to a |
4727 | 1.22k | // thread-specific pointer. |
4728 | 1.22k | unsigned OpFlags = 0; |
4729 | 1.22k | if (UsePlt) |
4730 | 97 | OpFlags = PPCII::MO_PLT; |
4731 | 1.22k | |
4732 | 1.22k | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, |
4733 | 1.22k | // every direct call is) turn it into a TargetGlobalAddress / |
4734 | 1.22k | // TargetExternalSymbol node so that legalize doesn't hack it. |
4735 | 1.22k | Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, |
4736 | 1.22k | Callee.getValueType(), 0, OpFlags); |
4737 | 1.22k | needIndirectCall = false; |
4738 | 1.22k | } |
4739 | 1.55k | |
4740 | 1.55k | if (ExternalSymbolSDNode *S1.55k = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
4741 | 256 | unsigned char OpFlags = 0; |
4742 | 256 | |
4743 | 256 | if (UsePlt) |
4744 | 120 | OpFlags = PPCII::MO_PLT; |
4745 | 256 | |
4746 | 256 | Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), |
4747 | 256 | OpFlags); |
4748 | 256 | needIndirectCall = false; |
4749 | 256 | } |
4750 | 1.55k | |
4751 | 1.55k | if (isPatchPoint1.55k ) { |
4752 | 28 | // We'll form an invalid direct call when lowering a patchpoint; the full |
4753 | 28 | // sequence for an indirect call is complicated, and many of the |
4754 | 28 | // instructions introduced might have side effects (and, thus, can't be |
4755 | 28 | // removed later). The call itself will be removed as soon as the |
4756 | 28 | // argument/return lowering is complete, so the fact that it has the wrong |
4757 | 28 | // kind of operands should not really matter. |
4758 | 28 | needIndirectCall = false; |
4759 | 28 | } |
4760 | 1.55k | |
4761 | 1.55k | if (needIndirectCall1.55k ) { |
4762 | 37 | // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair |
4763 | 37 | // to do the call, we can't use PPCISD::CALL. |
4764 | 37 | SDValue MTCTROps[] = {Chain, Callee, InFlag}; |
4765 | 37 | |
4766 | 37 | if (isSVR4ABI && 37 isPPC6431 && !isELFv2ABI28 ) { |
4767 | 19 | // Function pointers in the 64-bit SVR4 ABI do not point to the function |
4768 | 19 | // entry point, but to the function descriptor (the function entry point |
4769 | 19 | // address is part of the function descriptor though). |
4770 | 19 | // The function descriptor is a three doubleword structure with the |
4771 | 19 | // following fields: function entry point, TOC base address and |
4772 | 19 | // environment pointer. |
4773 | 19 | // Thus for a call through a function pointer, the following actions need |
4774 | 19 | // to be performed: |
4775 | 19 | // 1. Save the TOC of the caller in the TOC save area of its stack |
4776 | 19 | // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). |
4777 | 19 | // 2. Load the address of the function entry point from the function |
4778 | 19 | // descriptor. |
4779 | 19 | // 3. Load the TOC of the callee from the function descriptor into r2. |
4780 | 19 | // 4. Load the environment pointer from the function descriptor into |
4781 | 19 | // r11. |
4782 | 19 | // 5. Branch to the function entry point address. |
4783 | 19 | // 6. On return of the callee, the TOC of the caller needs to be |
4784 | 19 | // restored (this is done in FinishCall()). |
4785 | 19 | // |
4786 | 19 | // The loads are scheduled at the beginning of the call sequence, and the |
4787 | 19 | // register copies are flagged together to ensure that no other |
4788 | 19 | // operations can be scheduled in between. E.g. without flagging the |
4789 | 19 | // copies together, a TOC access in the caller could be scheduled between |
4790 | 19 | // the assignment of the callee TOC and the branch to the callee, which |
4791 | 19 | // results in the TOC access going through the TOC of the callee instead |
4792 | 19 | // of going through the TOC of the caller, which leads to incorrect code. |
4793 | 19 | |
4794 | 19 | // Load the address of the function entry point from the function |
4795 | 19 | // descriptor. |
4796 | 19 | SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); |
4797 | 19 | if (LDChain.getValueType() == MVT::Glue) |
4798 | 19 | LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); |
4799 | 19 | |
4800 | 19 | auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() |
4801 | 18 | ? (MachineMemOperand::MODereferenceable | |
4802 | 18 | MachineMemOperand::MOInvariant) |
4803 | 1 | : MachineMemOperand::MONone; |
4804 | 19 | |
4805 | 19 | MachinePointerInfo MPI(CS ? CS.getCalledValue()19 : nullptr0 ); |
4806 | 19 | SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, |
4807 | 19 | /* Alignment = */ 8, MMOFlags); |
4808 | 19 | |
4809 | 19 | // Load environment pointer into r11. |
4810 | 19 | SDValue PtrOff = DAG.getIntPtrConstant(16, dl); |
4811 | 19 | SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); |
4812 | 19 | SDValue LoadEnvPtr = |
4813 | 19 | DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), |
4814 | 19 | /* Alignment = */ 8, MMOFlags); |
4815 | 19 | |
4816 | 19 | SDValue TOCOff = DAG.getIntPtrConstant(8, dl); |
4817 | 19 | SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); |
4818 | 19 | SDValue TOCPtr = |
4819 | 19 | DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), |
4820 | 19 | /* Alignment = */ 8, MMOFlags); |
4821 | 19 | |
4822 | 19 | setUsesTOCBasePtr(DAG); |
4823 | 19 | SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, |
4824 | 19 | InFlag); |
4825 | 19 | Chain = TOCVal.getValue(0); |
4826 | 19 | InFlag = TOCVal.getValue(1); |
4827 | 19 | |
4828 | 19 | // If the function call has an explicit 'nest' parameter, it takes the |
4829 | 19 | // place of the environment pointer. |
4830 | 19 | if (!hasNest19 ) { |
4831 | 18 | SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, |
4832 | 18 | InFlag); |
4833 | 18 | |
4834 | 18 | Chain = EnvVal.getValue(0); |
4835 | 18 | InFlag = EnvVal.getValue(1); |
4836 | 18 | } |
4837 | 19 | |
4838 | 19 | MTCTROps[0] = Chain; |
4839 | 19 | MTCTROps[1] = LoadFuncPtr; |
4840 | 19 | MTCTROps[2] = InFlag; |
4841 | 19 | } |
4842 | 37 | |
4843 | 37 | Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, |
4844 | 37 | makeArrayRef(MTCTROps, InFlag.getNode() ? 334 : 23 )); |
4845 | 37 | InFlag = Chain.getValue(1); |
4846 | 37 | |
4847 | 37 | NodeTys.clear(); |
4848 | 37 | NodeTys.push_back(MVT::Other); |
4849 | 37 | NodeTys.push_back(MVT::Glue); |
4850 | 37 | Ops.push_back(Chain); |
4851 | 37 | CallOpc = PPCISD::BCTRL; |
4852 | 37 | Callee.setNode(nullptr); |
4853 | 37 | // Add use of X11 (holding environment pointer) |
4854 | 37 | if (isSVR4ABI && 37 isPPC6431 && !isELFv2ABI28 && !hasNest19 ) |
4855 | 18 | Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); |
4856 | 37 | // Add CTR register as callee so a bctr can be emitted later. |
4857 | 37 | if (isTailCall) |
4858 | 0 | Ops.push_back(DAG.getRegister(isPPC64 ? 0 PPC::CTR80 : PPC::CTR0 , PtrVT)); |
4859 | 37 | } |
4860 | 1.55k | |
4861 | 1.55k | // If this is a direct call, pass the chain and the callee. |
4862 | 1.55k | if (Callee.getNode()1.55k ) { |
4863 | 1.51k | Ops.push_back(Chain); |
4864 | 1.51k | Ops.push_back(Callee); |
4865 | 1.51k | } |
4866 | 1.55k | // If this is a tail call add stack pointer delta. |
4867 | 1.55k | if (isTailCall) |
4868 | 58 | Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); |
4869 | 1.55k | |
4870 | 1.55k | // Add argument registers to the end of the list so that they are known live |
4871 | 1.55k | // into the call. |
4872 | 5.36k | for (unsigned i = 0, e = RegsToPass.size(); i != e5.36k ; ++i3.81k ) |
4873 | 3.81k | Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
4874 | 3.81k | RegsToPass[i].second.getValueType())); |
4875 | 1.55k | |
4876 | 1.55k | // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live |
4877 | 1.55k | // into the call. |
4878 | 1.55k | if (isSVR4ABI && 1.55k isPPC641.43k && !isPatchPoint1.20k ) { |
4879 | 1.17k | setUsesTOCBasePtr(DAG); |
4880 | 1.17k | Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); |
4881 | 1.17k | } |
4882 | 1.55k | |
4883 | 1.55k | return CallOpc; |
4884 | 1.55k | } |
4885 | | |
4886 | | SDValue PPCTargetLowering::LowerCallResult( |
4887 | | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
4888 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
4889 | 1.49k | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
4890 | 1.49k | SmallVector<CCValAssign, 16> RVLocs; |
4891 | 1.49k | CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
4892 | 1.49k | *DAG.getContext()); |
4893 | 1.49k | CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); |
4894 | 1.49k | |
4895 | 1.49k | // Copy all of the result registers out of their specified physreg. |
4896 | 2.35k | for (unsigned i = 0, e = RVLocs.size(); i != e2.35k ; ++i861 ) { |
4897 | 861 | CCValAssign &VA = RVLocs[i]; |
4898 | 861 | assert(VA.isRegLoc() && "Can only return in registers!"); |
4899 | 861 | |
4900 | 861 | SDValue Val = DAG.getCopyFromReg(Chain, dl, |
4901 | 861 | VA.getLocReg(), VA.getLocVT(), InFlag); |
4902 | 861 | Chain = Val.getValue(1); |
4903 | 861 | InFlag = Val.getValue(2); |
4904 | 861 | |
4905 | 861 | switch (VA.getLocInfo()) { |
4906 | 0 | default: 0 llvm_unreachable0 ("Unknown loc info!"); |
4907 | 742 | case CCValAssign::Full: break; |
4908 | 55 | case CCValAssign::AExt: |
4909 | 55 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
4910 | 55 | break; |
4911 | 9 | case CCValAssign::ZExt: |
4912 | 9 | Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, |
4913 | 9 | DAG.getValueType(VA.getValVT())); |
4914 | 9 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
4915 | 9 | break; |
4916 | 55 | case CCValAssign::SExt: |
4917 | 55 | Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, |
4918 | 55 | DAG.getValueType(VA.getValVT())); |
4919 | 55 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
4920 | 55 | break; |
4921 | 861 | } |
4922 | 861 | |
4923 | 861 | InVals.push_back(Val); |
4924 | 861 | } |
4925 | 1.49k | |
4926 | 1.49k | return Chain; |
4927 | 1.49k | } |
4928 | | |
4929 | | SDValue PPCTargetLowering::FinishCall( |
4930 | | CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, |
4931 | | bool isPatchPoint, bool hasNest, SelectionDAG &DAG, |
4932 | | SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, |
4933 | | SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, |
4934 | | unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, |
4935 | 1.55k | SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const { |
4936 | 1.55k | std::vector<EVT> NodeTys; |
4937 | 1.55k | SmallVector<SDValue, 8> Ops; |
4938 | 1.55k | unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, |
4939 | 1.55k | SPDiff, isTailCall, isPatchPoint, hasNest, |
4940 | 1.55k | RegsToPass, Ops, NodeTys, CS, Subtarget); |
4941 | 1.55k | |
4942 | 1.55k | // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls |
4943 | 1.55k | if (isVarArg && 1.55k Subtarget.isSVR4ABI()140 && !Subtarget.isPPC64()102 ) |
4944 | 57 | Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); |
4945 | 1.55k | |
4946 | 1.55k | // When performing tail call optimization the callee pops its arguments off |
4947 | 1.55k | // the stack. Account for this here so these bytes can be pushed back on in |
4948 | 1.55k | // PPCFrameLowering::eliminateCallFramePseudoInstr. |
4949 | 1.55k | int BytesCalleePops = |
4950 | 1.55k | (CallConv == CallingConv::Fast && |
4951 | 1.55k | getTargetMachine().Options.GuaranteedTailCallOpt28 ) ? NumBytes3 : 01.54k ; |
4952 | 1.55k | |
4953 | 1.55k | // Add a register mask operand representing the call-preserved registers. |
4954 | 1.55k | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4955 | 1.55k | const uint32_t *Mask = |
4956 | 1.55k | TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); |
4957 | 1.55k | assert(Mask && "Missing call preserved mask for calling convention"); |
4958 | 1.55k | Ops.push_back(DAG.getRegisterMask(Mask)); |
4959 | 1.55k | |
4960 | 1.55k | if (InFlag.getNode()) |
4961 | 1.04k | Ops.push_back(InFlag); |
4962 | 1.55k | |
4963 | 1.55k | // Emit tail call. |
4964 | 1.55k | if (isTailCall1.55k ) { |
4965 | 58 | assert(((Callee.getOpcode() == ISD::Register && |
4966 | 58 | cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || |
4967 | 58 | Callee.getOpcode() == ISD::TargetExternalSymbol || |
4968 | 58 | Callee.getOpcode() == ISD::TargetGlobalAddress || |
4969 | 58 | isa<ConstantSDNode>(Callee)) && |
4970 | 58 | "Expecting an global address, external symbol, absolute value or register"); |
4971 | 58 | |
4972 | 58 | DAG.getMachineFunction().getFrameInfo().setHasTailCall(); |
4973 | 58 | return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); |
4974 | 58 | } |
4975 | 1.49k | |
4976 | 1.49k | // Add a NOP immediately after the branch instruction when using the 64-bit |
4977 | 1.49k | // SVR4 ABI. At link time, if caller and callee are in a different module and |
4978 | 1.49k | // thus have a different TOC, the call will be replaced with a call to a stub |
4979 | 1.49k | // function which saves the current TOC, loads the TOC of the callee and |
4980 | 1.49k | // branches to the callee. The NOP will be replaced with a load instruction |
4981 | 1.49k | // which restores the TOC of the caller from the TOC save slot of the current |
4982 | 1.49k | // stack frame. If caller and callee belong to the same module (and have the |
4983 | 1.49k | // same TOC), the NOP will remain unchanged. |
4984 | 1.49k | |
4985 | 1.49k | MachineFunction &MF = DAG.getMachineFunction(); |
4986 | 1.49k | if (!isTailCall && 1.49k Subtarget.isSVR4ABI()1.49k && Subtarget.isPPC64()1.37k && |
4987 | 1.49k | !isPatchPoint1.14k ) { |
4988 | 1.11k | if (CallOpc == PPCISD::BCTRL1.11k ) { |
4989 | 28 | // This is a call through a function pointer. |
4990 | 28 | // Restore the caller TOC from the save area into R2. |
4991 | 28 | // See PrepareCall() for more information about calls through function |
4992 | 28 | // pointers in the 64-bit SVR4 ABI. |
4993 | 28 | // We are using a target-specific load with r2 hard coded, because the |
4994 | 28 | // result of a target-independent load would never go directly into r2, |
4995 | 28 | // since r2 is a reserved register (which prevents the register allocator |
4996 | 28 | // from allocating it), resulting in an additional register being |
4997 | 28 | // allocated and an unnecessary move instruction being generated. |
4998 | 28 | CallOpc = PPCISD::BCTRL_LOAD_TOC; |
4999 | 28 | |
5000 | 28 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
5001 | 28 | SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); |
5002 | 28 | unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); |
5003 | 28 | SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); |
5004 | 28 | SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); |
5005 | 28 | |
5006 | 28 | // The address needs to go after the chain input but before the flag (or |
5007 | 28 | // any other variadic arguments). |
5008 | 28 | Ops.insert(std::next(Ops.begin()), AddTOC); |
5009 | 1.11k | } else if (1.08k CallOpc == PPCISD::CALL && |
5010 | 1.08k | !callsShareTOCBase(MF.getFunction(), Callee, DAG.getTarget())1.08k ) { |
5011 | 1.03k | // Otherwise insert NOP for non-local calls. |
5012 | 1.03k | CallOpc = PPCISD::CALL_NOP; |
5013 | 1.03k | } |
5014 | 1.11k | } |
5015 | 1.49k | |
5016 | 1.49k | Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); |
5017 | 1.49k | InFlag = Chain.getValue(1); |
5018 | 1.49k | |
5019 | 1.49k | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), |
5020 | 1.49k | DAG.getIntPtrConstant(BytesCalleePops, dl, true), |
5021 | 1.49k | InFlag, dl); |
5022 | 1.49k | if (!Ins.empty()) |
5023 | 726 | InFlag = Chain.getValue(1); |
5024 | 1.55k | |
5025 | 1.55k | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, |
5026 | 1.55k | Ins, dl, DAG, InVals); |
5027 | 1.55k | } |
5028 | | |
5029 | | SDValue |
5030 | | PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
5031 | 1.55k | SmallVectorImpl<SDValue> &InVals) const { |
5032 | 1.55k | SelectionDAG &DAG = CLI.DAG; |
5033 | 1.55k | SDLoc &dl = CLI.DL; |
5034 | 1.55k | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
5035 | 1.55k | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
5036 | 1.55k | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
5037 | 1.55k | SDValue Chain = CLI.Chain; |
5038 | 1.55k | SDValue Callee = CLI.Callee; |
5039 | 1.55k | bool &isTailCall = CLI.IsTailCall; |
5040 | 1.55k | CallingConv::ID CallConv = CLI.CallConv; |
5041 | 1.55k | bool isVarArg = CLI.IsVarArg; |
5042 | 1.55k | bool isPatchPoint = CLI.IsPatchPoint; |
5043 | 1.55k | ImmutableCallSite CS = CLI.CS; |
5044 | 1.55k | |
5045 | 1.55k | if (isTailCall1.55k ) { |
5046 | 204 | if (Subtarget.useLongCalls() && 204 !(CS && 1 CS.isMustTailCall()1 )) |
5047 | 1 | isTailCall = false; |
5048 | 203 | else if (203 Subtarget.isSVR4ABI() && 203 Subtarget.isPPC64()198 ) |
5049 | 187 | isTailCall = |
5050 | 187 | IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, |
5051 | 187 | isVarArg, Outs, Ins, DAG); |
5052 | 203 | else |
5053 | 16 | isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, |
5054 | 16 | Ins, DAG); |
5055 | 204 | if (isTailCall204 ) { |
5056 | 58 | ++NumTailCalls; |
5057 | 58 | if (!getTargetMachine().Options.GuaranteedTailCallOpt) |
5058 | 55 | ++NumSiblingCalls; |
5059 | 58 | |
5060 | 58 | assert(isa<GlobalAddressSDNode>(Callee) && |
5061 | 58 | "Callee should be an llvm::Function object."); |
5062 | 58 | DEBUG( |
5063 | 58 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); |
5064 | 58 | const unsigned Width = 80 - strlen("TCO caller: ") |
5065 | 58 | - strlen(", callee linkage: 0, 0"); |
5066 | 58 | dbgs() << "TCO caller: " |
5067 | 58 | << left_justify(DAG.getMachineFunction().getName(), Width) |
5068 | 58 | << ", callee linkage: " |
5069 | 58 | << GV->getVisibility() << ", " << GV->getLinkage() << "\n" |
5070 | 58 | ); |
5071 | 58 | } |
5072 | 204 | } |
5073 | 1.55k | |
5074 | 1.55k | if (!isTailCall && 1.55k CS1.49k && CS.isMustTailCall()1.20k ) |
5075 | 0 | report_fatal_error("failed to perform tail call elimination on a call " |
5076 | 0 | "site marked musttail"); |
5077 | 1.55k | |
5078 | 1.55k | // When long calls (i.e. indirect calls) are always used, calls are always |
5079 | 1.55k | // made via function pointer. If we have a function name, first translate it |
5080 | 1.55k | // into a pointer. |
5081 | 1.55k | if (1.55k Subtarget.useLongCalls() && 1.55k isa<GlobalAddressSDNode>(Callee)1 && |
5082 | 1 | !isTailCall) |
5083 | 1 | Callee = LowerGlobalAddress(Callee, DAG); |
5084 | 1.55k | |
5085 | 1.55k | if (Subtarget.isSVR4ABI()1.55k ) { |
5086 | 1.43k | if (Subtarget.isPPC64()) |
5087 | 1.20k | return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, |
5088 | 1.20k | isTailCall, isPatchPoint, Outs, OutVals, Ins, |
5089 | 1.20k | dl, DAG, InVals, CS); |
5090 | 1.43k | else |
5091 | 232 | return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, |
5092 | 232 | isTailCall, isPatchPoint, Outs, OutVals, Ins, |
5093 | 232 | dl, DAG, InVals, CS); |
5094 | 119 | } |
5095 | 119 | |
5096 | 119 | return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, |
5097 | 119 | isTailCall, isPatchPoint, Outs, OutVals, Ins, |
5098 | 119 | dl, DAG, InVals, CS); |
5099 | 119 | } |
5100 | | |
5101 | | SDValue PPCTargetLowering::LowerCall_32SVR4( |
5102 | | SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, |
5103 | | bool isTailCall, bool isPatchPoint, |
5104 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5105 | | const SmallVectorImpl<SDValue> &OutVals, |
5106 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
5107 | | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
5108 | 232 | ImmutableCallSite CS) const { |
5109 | 232 | // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description |
5110 | 232 | // of the 32-bit SVR4 ABI stack frame layout. |
5111 | 232 | |
5112 | 232 | assert((CallConv == CallingConv::C || |
5113 | 232 | CallConv == CallingConv::Fast) && "Unknown calling convention!"); |
5114 | 232 | |
5115 | 232 | unsigned PtrByteSize = 4; |
5116 | 232 | |
5117 | 232 | MachineFunction &MF = DAG.getMachineFunction(); |
5118 | 232 | |
5119 | 232 | // Mark this function as potentially containing a function that contains a |
5120 | 232 | // tail call. As a consequence the frame pointer will be used for dynamicalloc |
5121 | 232 | // and restoring the callers stack pointer in this functions epilog. This is |
5122 | 232 | // done because by tail calling the called function might overwrite the value |
5123 | 232 | // in this function's (MF) stack pointer stack slot 0(SP). |
5124 | 232 | if (getTargetMachine().Options.GuaranteedTailCallOpt && |
5125 | 1 | CallConv == CallingConv::Fast) |
5126 | 1 | MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); |
5127 | 232 | |
5128 | 232 | // Count how many bytes are to be pushed on the stack, including the linkage |
5129 | 232 | // area, parameter list area and the part of the local variable space which |
5130 | 232 | // contains copies of aggregates which are passed by value. |
5131 | 232 | |
5132 | 232 | // Assign locations to all of the outgoing arguments. |
5133 | 232 | SmallVector<CCValAssign, 16> ArgLocs; |
5134 | 232 | PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
5135 | 232 | |
5136 | 232 | // Reserve space for the linkage area on the stack. |
5137 | 232 | CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), |
5138 | 232 | PtrByteSize); |
5139 | 232 | if (useSoftFloat()) |
5140 | 27 | CCInfo.PreAnalyzeCallOperands(Outs); |
5141 | 232 | |
5142 | 232 | if (isVarArg232 ) { |
5143 | 57 | // Handle fixed and variable vector arguments differently. |
5144 | 57 | // Fixed vector arguments go into registers as long as registers are |
5145 | 57 | // available. Variable vector arguments always go into memory. |
5146 | 57 | unsigned NumArgs = Outs.size(); |
5147 | 57 | |
5148 | 157 | for (unsigned i = 0; i != NumArgs157 ; ++i100 ) { |
5149 | 100 | MVT ArgVT = Outs[i].VT; |
5150 | 100 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
5151 | 100 | bool Result; |
5152 | 100 | |
5153 | 100 | if (Outs[i].IsFixed100 ) { |
5154 | 17 | Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, |
5155 | 17 | CCInfo); |
5156 | 100 | } else { |
5157 | 83 | Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, |
5158 | 83 | ArgFlags, CCInfo); |
5159 | 83 | } |
5160 | 100 | |
5161 | 100 | if (Result100 ) { |
5162 | | #ifndef NDEBUG |
5163 | | errs() << "Call operand #" << i << " has unhandled type " |
5164 | | << EVT(ArgVT).getEVTString() << "\n"; |
5165 | | #endif |
5166 | 0 | llvm_unreachable(nullptr); |
5167 | 0 | } |
5168 | 100 | } |
5169 | 232 | } else { |
5170 | 175 | // All arguments are treated the same. |
5171 | 175 | CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); |
5172 | 175 | } |
5173 | 232 | CCInfo.clearWasPPCF128(); |
5174 | 232 | |
5175 | 232 | // Assign locations to all of the outgoing aggregate by value arguments. |
5176 | 232 | SmallVector<CCValAssign, 16> ByValArgLocs; |
5177 | 232 | CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); |
5178 | 232 | |
5179 | 232 | // Reserve stack space for the allocations in CCInfo. |
5180 | 232 | CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); |
5181 | 232 | |
5182 | 232 | CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); |
5183 | 232 | |
5184 | 232 | // Size of the linkage area, parameter list area and the part of the local |
5185 | 232 | // space variable where copies of aggregates which are passed by value are |
5186 | 232 | // stored. |
5187 | 232 | unsigned NumBytes = CCByValInfo.getNextStackOffset(); |
5188 | 232 | |
5189 | 232 | // Calculate by how many bytes the stack has to be adjusted in case of tail |
5190 | 232 | // call optimization. |
5191 | 232 | int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); |
5192 | 232 | |
5193 | 232 | // Adjust the stack pointer for the new arguments... |
5194 | 232 | // These operations are automatically eliminated by the prolog/epilog pass |
5195 | 232 | Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); |
5196 | 232 | SDValue CallSeqStart = Chain; |
5197 | 232 | |
5198 | 232 | // Load the return address and frame pointer so it can be moved somewhere else |
5199 | 232 | // later. |
5200 | 232 | SDValue LROp, FPOp; |
5201 | 232 | Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); |
5202 | 232 | |
5203 | 232 | // Set up a copy of the stack pointer for use loading and storing any |
5204 | 232 | // arguments that may not fit in the registers available for argument |
5205 | 232 | // passing. |
5206 | 232 | SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); |
5207 | 232 | |
5208 | 232 | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
5209 | 232 | SmallVector<TailCallArgumentInfo, 8> TailCallArguments; |
5210 | 232 | SmallVector<SDValue, 8> MemOpChains; |
5211 | 232 | |
5212 | 232 | bool seenFloatArg = false; |
5213 | 232 | // Walk the register/memloc assignments, inserting copies/loads. |
5214 | 232 | for (unsigned i = 0, j = 0, e = ArgLocs.size(); |
5215 | 739 | i != e; |
5216 | 507 | ++i507 ) { |
5217 | 507 | CCValAssign &VA = ArgLocs[i]; |
5218 | 507 | SDValue Arg = OutVals[i]; |
5219 | 507 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5220 | 507 | |
5221 | 507 | if (Flags.isByVal()507 ) { |
5222 | 0 | // Argument is an aggregate which is passed by value, thus we need to |
5223 | 0 | // create a copy of it in the local variable space of the current stack |
5224 | 0 | // frame (which is the stack frame of the caller) and pass the address of |
5225 | 0 | // this copy to the callee. |
5226 | 0 | assert((j < ByValArgLocs.size()) && "Index out of bounds!"); |
5227 | 0 | CCValAssign &ByValVA = ByValArgLocs[j++]; |
5228 | 0 | assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); |
5229 | 0 |
|
5230 | 0 | // Memory reserved in the local variable space of the callers stack frame. |
5231 | 0 | unsigned LocMemOffset = ByValVA.getLocMemOffset(); |
5232 | 0 |
|
5233 | 0 | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
5234 | 0 | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), |
5235 | 0 | StackPtr, PtrOff); |
5236 | 0 |
|
5237 | 0 | // Create a copy of the argument in the local area of the current |
5238 | 0 | // stack frame. |
5239 | 0 | SDValue MemcpyCall = |
5240 | 0 | CreateCopyOfByValArgument(Arg, PtrOff, |
5241 | 0 | CallSeqStart.getNode()->getOperand(0), |
5242 | 0 | Flags, DAG, dl); |
5243 | 0 |
|
5244 | 0 | // This must go outside the CALLSEQ_START..END. |
5245 | 0 | SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0, |
5246 | 0 | SDLoc(MemcpyCall)); |
5247 | 0 | DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), |
5248 | 0 | NewCallSeqStart.getNode()); |
5249 | 0 | Chain = CallSeqStart = NewCallSeqStart; |
5250 | 0 |
|
5251 | 0 | // Pass the address of the aggregate copy on the stack either in a |
5252 | 0 | // physical register or in the parameter list area of the current stack |
5253 | 0 | // frame to the callee. |
5254 | 0 | Arg = PtrOff; |
5255 | 0 | } |
5256 | 507 | |
5257 | 507 | if (VA.isRegLoc()507 ) { |
5258 | 501 | if (Arg.getValueType() == MVT::i1) |
5259 | 1 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); |
5260 | 501 | |
5261 | 501 | seenFloatArg |= VA.getLocVT().isFloatingPoint(); |
5262 | 501 | // Put argument in a physical register. |
5263 | 501 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
5264 | 507 | } else { |
5265 | 6 | // Put argument in the parameter list area of the current stack frame. |
5266 | 6 | assert(VA.isMemLoc()); |
5267 | 6 | unsigned LocMemOffset = VA.getLocMemOffset(); |
5268 | 6 | |
5269 | 6 | if (!isTailCall6 ) { |
5270 | 6 | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
5271 | 6 | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), |
5272 | 6 | StackPtr, PtrOff); |
5273 | 6 | |
5274 | 6 | MemOpChains.push_back( |
5275 | 6 | DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); |
5276 | 6 | } else { |
5277 | 0 | // Calculate and remember argument location. |
5278 | 0 | CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, |
5279 | 0 | TailCallArguments); |
5280 | 0 | } |
5281 | 6 | } |
5282 | 507 | } |
5283 | 232 | |
5284 | 232 | if (!MemOpChains.empty()) |
5285 | 3 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
5286 | 232 | |
5287 | 232 | // Build a sequence of copy-to-reg nodes chained together with token chain |
5288 | 232 | // and flag operands which copy the outgoing args into the appropriate regs. |
5289 | 232 | SDValue InFlag; |
5290 | 733 | for (unsigned i = 0, e = RegsToPass.size(); i != e733 ; ++i501 ) { |
5291 | 501 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
5292 | 501 | RegsToPass[i].second, InFlag); |
5293 | 501 | InFlag = Chain.getValue(1); |
5294 | 501 | } |
5295 | 232 | |
5296 | 232 | // Set CR bit 6 to true if this is a vararg call with floating args passed in |
5297 | 232 | // registers. |
5298 | 232 | if (isVarArg232 ) { |
5299 | 57 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
5300 | 57 | SDValue Ops[] = { Chain, InFlag }; |
5301 | 57 | |
5302 | 57 | Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET5 : PPCISD::CR6UNSET52 , |
5303 | 57 | dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 235 : 122 )); |
5304 | 57 | |
5305 | 57 | InFlag = Chain.getValue(1); |
5306 | 57 | } |
5307 | 232 | |
5308 | 232 | if (isTailCall) |
5309 | 1 | PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, |
5310 | 1 | TailCallArguments); |
5311 | 232 | |
5312 | 232 | return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, |
5313 | 232 | /* unused except on PPC64 ELFv1 */ false, DAG, |
5314 | 232 | RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, |
5315 | 232 | NumBytes, Ins, InVals, CS); |
5316 | 232 | } |
5317 | | |
5318 | | // Copy an argument into memory, being careful to do this outside the |
5319 | | // call sequence for the call to which the argument belongs. |
5320 | | SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( |
5321 | | SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, |
5322 | 48 | SelectionDAG &DAG, const SDLoc &dl) const { |
5323 | 48 | SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, |
5324 | 48 | CallSeqStart.getNode()->getOperand(0), |
5325 | 48 | Flags, DAG, dl); |
5326 | 48 | // The MEMCPY must go outside the CALLSEQ_START..END. |
5327 | 48 | int64_t FrameSize = CallSeqStart.getConstantOperandVal(1); |
5328 | 48 | SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0, |
5329 | 48 | SDLoc(MemcpyCall)); |
5330 | 48 | DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), |
5331 | 48 | NewCallSeqStart.getNode()); |
5332 | 48 | return NewCallSeqStart; |
5333 | 48 | } |
5334 | | |
5335 | | SDValue PPCTargetLowering::LowerCall_64SVR4( |
5336 | | SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, |
5337 | | bool isTailCall, bool isPatchPoint, |
5338 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5339 | | const SmallVectorImpl<SDValue> &OutVals, |
5340 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
5341 | | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
5342 | 1.20k | ImmutableCallSite CS) const { |
5343 | 1.20k | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
5344 | 1.20k | bool isLittleEndian = Subtarget.isLittleEndian(); |
5345 | 1.20k | unsigned NumOps = Outs.size(); |
5346 | 1.20k | bool hasNest = false; |
5347 | 1.20k | bool IsSibCall = false; |
5348 | 1.20k | |
5349 | 1.20k | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
5350 | 1.20k | unsigned PtrByteSize = 8; |
5351 | 1.20k | |
5352 | 1.20k | MachineFunction &MF = DAG.getMachineFunction(); |
5353 | 1.20k | |
5354 | 1.20k | if (isTailCall && 1.20k !getTargetMachine().Options.GuaranteedTailCallOpt56 ) |
5355 | 55 | IsSibCall = true; |
5356 | 1.20k | |
5357 | 1.20k | // Mark this function as potentially containing a function that contains a |
5358 | 1.20k | // tail call. As a consequence the frame pointer will be used for dynamicalloc |
5359 | 1.20k | // and restoring the callers stack pointer in this functions epilog. This is |
5360 | 1.20k | // done because by tail calling the called function might overwrite the value |
5361 | 1.20k | // in this function's (MF) stack pointer stack slot 0(SP). |
5362 | 1.20k | if (getTargetMachine().Options.GuaranteedTailCallOpt && |
5363 | 1 | CallConv == CallingConv::Fast) |
5364 | 1 | MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); |
5365 | 1.20k | |
5366 | 1.20k | assert(!(CallConv == CallingConv::Fast && isVarArg) && |
5367 | 1.20k | "fastcc not supported on varargs functions"); |
5368 | 1.20k | |
5369 | 1.20k | // Count how many bytes are to be pushed on the stack, including the linkage |
5370 | 1.20k | // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes |
5371 | 1.20k | // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage |
5372 | 1.20k | // area is 32 bytes reserved space for [SP][CR][LR][TOC]. |
5373 | 1.20k | unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); |
5374 | 1.20k | unsigned NumBytes = LinkageSize; |
5375 | 1.20k | unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; |
5376 | 1.20k | unsigned &QFPR_idx = FPR_idx; |
5377 | 1.20k | |
5378 | 1.20k | static const MCPhysReg GPR[] = { |
5379 | 1.20k | PPC::X3, PPC::X4, PPC::X5, PPC::X6, |
5380 | 1.20k | PPC::X7, PPC::X8, PPC::X9, PPC::X10, |
5381 | 1.20k | }; |
5382 | 1.20k | static const MCPhysReg VR[] = { |
5383 | 1.20k | PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, |
5384 | 1.20k | PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 |
5385 | 1.20k | }; |
5386 | 1.20k | |
5387 | 1.20k | const unsigned NumGPRs = array_lengthof(GPR); |
5388 | 1.20k | const unsigned NumFPRs = useSoftFloat() ? 08 : 131.19k ; |
5389 | 1.20k | const unsigned NumVRs = array_lengthof(VR); |
5390 | 1.20k | const unsigned NumQFPRs = NumFPRs; |
5391 | 1.20k | |
5392 | 1.20k | // On ELFv2, we can avoid allocating the parameter area if all the arguments |
5393 | 1.20k | // can be passed to the callee in registers. |
5394 | 1.20k | // For the fast calling convention, there is another check below. |
5395 | 1.20k | // Note: We should keep consistent with LowerFormalArguments_64SVR4() |
5396 | 1.20k | bool HasParameterArea = !isELFv2ABI || isVarArg582 || CallConv == CallingConv::Fast565 ; |
5397 | 1.20k | if (!HasParameterArea1.20k ) { |
5398 | 564 | unsigned ParamAreaSize = NumGPRs * PtrByteSize; |
5399 | 564 | unsigned AvailableFPRs = NumFPRs; |
5400 | 564 | unsigned AvailableVRs = NumVRs; |
5401 | 564 | unsigned NumBytesTmp = NumBytes; |
5402 | 1.63k | for (unsigned i = 0; i != NumOps1.63k ; ++i1.07k ) { |
5403 | 1.07k | if (Outs[i].Flags.isNest()1.07k ) continue0 ; |
5404 | 1.07k | if (1.07k CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, |
5405 | 1.07k | PtrByteSize, LinkageSize, ParamAreaSize, |
5406 | 1.07k | NumBytesTmp, AvailableFPRs, AvailableVRs, |
5407 | 1.07k | Subtarget.hasQPX())) |
5408 | 83 | HasParameterArea = true; |
5409 | 1.07k | } |
5410 | 564 | } |
5411 | 1.20k | |
5412 | 1.20k | // When using the fast calling convention, we don't provide backing for |
5413 | 1.20k | // arguments that will be in registers. |
5414 | 1.20k | unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; |
5415 | 1.20k | |
5416 | 1.20k | // Add up all the space actually used. |
5417 | 4.47k | for (unsigned i = 0; i != NumOps4.47k ; ++i3.27k ) { |
5418 | 3.27k | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5419 | 3.27k | EVT ArgVT = Outs[i].VT; |
5420 | 3.27k | EVT OrigVT = Outs[i].ArgVT; |
5421 | 3.27k | |
5422 | 3.27k | if (Flags.isNest()) |
5423 | 2 | continue; |
5424 | 3.26k | |
5425 | 3.26k | if (3.26k CallConv == CallingConv::Fast3.26k ) { |
5426 | 1.06k | if (Flags.isByVal()) |
5427 | 1 | NumGPRsUsed += (Flags.getByValSize()+7)/8; |
5428 | 1.06k | else |
5429 | 1.06k | switch (ArgVT.getSimpleVT().SimpleTy) { |
5430 | 0 | default: 0 llvm_unreachable0 ("Unexpected ValueType for argument!"); |
5431 | 364 | case MVT::i1: |
5432 | 364 | case MVT::i32: |
5433 | 364 | case MVT::i64: |
5434 | 364 | if (++NumGPRsUsed <= NumGPRs) |
5435 | 188 | continue; |
5436 | 176 | break; |
5437 | 352 | case MVT::v4i32: |
5438 | 352 | case MVT::v8i16: |
5439 | 352 | case MVT::v16i8: |
5440 | 352 | case MVT::v2f64: |
5441 | 352 | case MVT::v2i64: |
5442 | 352 | case MVT::v1i128: |
5443 | 352 | if (++NumVRsUsed <= NumVRs) |
5444 | 264 | continue; |
5445 | 88 | break; |
5446 | 0 | case MVT::v4f32: |
5447 | 0 | // When using QPX, this is handled like a FP register, otherwise, it |
5448 | 0 | // is an Altivec register. |
5449 | 0 | if (Subtarget.hasQPX()0 ) { |
5450 | 0 | if (++NumFPRsUsed <= NumFPRs) |
5451 | 0 | continue; |
5452 | 0 | } else { |
5453 | 0 | if (++NumVRsUsed <= NumVRs) |
5454 | 0 | continue; |
5455 | 0 | } |
5456 | 0 | break; |
5457 | 352 | case MVT::f32: |
5458 | 352 | case MVT::f64: |
5459 | 352 | case MVT::v4f64: // QPX |
5460 | 352 | case MVT::v4i1: // QPX |
5461 | 352 | if (++NumFPRsUsed <= NumFPRs) |
5462 | 286 | continue; |
5463 | 66 | break; |
5464 | 1.06k | } |
5465 | 1.06k | } |
5466 | 2.53k | |
5467 | 2.53k | /* Respect alignment of argument on the stack. */ |
5468 | 2.53k | unsigned Align = |
5469 | 2.53k | CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); |
5470 | 2.53k | NumBytes = ((NumBytes + Align - 1) / Align) * Align; |
5471 | 2.53k | |
5472 | 2.53k | NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); |
5473 | 2.53k | if (Flags.isInConsecutiveRegsLast()) |
5474 | 90 | NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
5475 | 3.27k | } |
5476 | 1.20k | |
5477 | 1.20k | unsigned NumBytesActuallyUsed = NumBytes; |
5478 | 1.20k | |
5479 | 1.20k | // In the old ELFv1 ABI, |
5480 | 1.20k | // the prolog code of the callee may store up to 8 GPR argument registers to |
5481 | 1.20k | // the stack, allowing va_start to index over them in memory if its varargs. |
5482 | 1.20k | // Because we cannot tell if this is needed on the caller side, we have to |
5483 | 1.20k | // conservatively assume that it is needed. As such, make sure we have at |
5484 | 1.20k | // least enough stack space for the caller to store the 8 GPRs. |
5485 | 1.20k | // In the ELFv2 ABI, we allocate the parameter area iff a callee |
5486 | 1.20k | // really requires memory operands, e.g. a vararg function. |
5487 | 1.20k | if (HasParameterArea) |
5488 | 667 | NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); |
5489 | 1.20k | else |
5490 | 533 | NumBytes = LinkageSize; |
5491 | 1.20k | |
5492 | 1.20k | // Tail call needs the stack to be aligned. |
5493 | 1.20k | if (getTargetMachine().Options.GuaranteedTailCallOpt && |
5494 | 1 | CallConv == CallingConv::Fast) |
5495 | 1 | NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); |
5496 | 1.20k | |
5497 | 1.20k | int SPDiff = 0; |
5498 | 1.20k | |
5499 | 1.20k | // Calculate by how many bytes the stack has to be adjusted in case of tail |
5500 | 1.20k | // call optimization. |
5501 | 1.20k | if (!IsSibCall) |
5502 | 1.14k | SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); |
5503 | 1.20k | |
5504 | 1.20k | // To protect arguments on the stack from being clobbered in a tail call, |
5505 | 1.20k | // force all the loads to happen before doing any other lowering. |
5506 | 1.20k | if (isTailCall) |
5507 | 56 | Chain = DAG.getStackArgumentTokenFactor(Chain); |
5508 | 1.20k | |
5509 | 1.20k | // Adjust the stack pointer for the new arguments... |
5510 | 1.20k | // These operations are automatically eliminated by the prolog/epilog pass |
5511 | 1.20k | if (!IsSibCall) |
5512 | 1.14k | Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); |
5513 | 1.20k | SDValue CallSeqStart = Chain; |
5514 | 1.20k | |
5515 | 1.20k | // Load the return address and frame pointer so it can be move somewhere else |
5516 | 1.20k | // later. |
5517 | 1.20k | SDValue LROp, FPOp; |
5518 | 1.20k | Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); |
5519 | 1.20k | |
5520 | 1.20k | // Set up a copy of the stack pointer for use loading and storing any |
5521 | 1.20k | // arguments that may not fit in the registers available for argument |
5522 | 1.20k | // passing. |
5523 | 1.20k | SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); |
5524 | 1.20k | |
5525 | 1.20k | // Figure out which arguments are going to go in registers, and which in |
5526 | 1.20k | // memory. Also, if this is a vararg function, floating point operations |
5527 | 1.20k | // must be stored to our stack, and loaded into integer regs as well, if |
5528 | 1.20k | // any integer regs are available for argument passing. |
5529 | 1.20k | unsigned ArgOffset = LinkageSize; |
5530 | 1.20k | |
5531 | 1.20k | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
5532 | 1.20k | SmallVector<TailCallArgumentInfo, 8> TailCallArguments; |
5533 | 1.20k | |
5534 | 1.20k | SmallVector<SDValue, 8> MemOpChains; |
5535 | 4.47k | for (unsigned i = 0; i != NumOps4.47k ; ++i3.27k ) { |
5536 | 3.27k | SDValue Arg = OutVals[i]; |
5537 | 3.27k | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5538 | 3.27k | EVT ArgVT = Outs[i].VT; |
5539 | 3.27k | EVT OrigVT = Outs[i].ArgVT; |
5540 | 3.27k | |
5541 | 3.27k | // PtrOff will be used to store the current argument to the stack if a |
5542 | 3.27k | // register cannot be found for it. |
5543 | 3.27k | SDValue PtrOff; |
5544 | 3.27k | |
5545 | 3.27k | // We re-align the argument offset for each argument, except when using the |
5546 | 3.27k | // fast calling convention, when we need to make sure we do that only when |
5547 | 3.27k | // we'll actually use a stack slot. |
5548 | 2.53k | auto ComputePtrOff = [&]() { |
5549 | 2.53k | /* Respect alignment of argument on the stack. */ |
5550 | 2.53k | unsigned Align = |
5551 | 2.53k | CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); |
5552 | 2.53k | ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; |
5553 | 2.53k | |
5554 | 2.53k | PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); |
5555 | 2.53k | |
5556 | 2.53k | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); |
5557 | 2.53k | }; |
5558 | 3.27k | |
5559 | 3.27k | if (CallConv != CallingConv::Fast3.27k ) { |
5560 | 2.20k | ComputePtrOff(); |
5561 | 2.20k | |
5562 | 2.20k | /* Compute GPR index associated with argument offset. */ |
5563 | 2.20k | GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; |
5564 | 2.20k | GPR_idx = std::min(GPR_idx, NumGPRs); |
5565 | 2.20k | } |
5566 | 3.27k | |
5567 | 3.27k | // Promote integers to 64-bit values. |
5568 | 3.27k | if (Arg.getValueType() == MVT::i32 || 3.27k Arg.getValueType() == MVT::i13.03k ) { |
5569 | 235 | // FIXME: Should this use ANY_EXTEND if neither sext nor zext? |
5570 | 235 | unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND119 : ISD::ZERO_EXTEND116 ; |
5571 | 235 | Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); |
5572 | 235 | } |
5573 | 3.27k | |
5574 | 3.27k | // FIXME memcpy is used way more than necessary. Correctness first. |
5575 | 3.27k | // Note: "by value" is code for passing a structure by value, not |
5576 | 3.27k | // basic types. |
5577 | 3.27k | if (Flags.isByVal()3.27k ) { |
5578 | 57 | // Note: Size includes alignment padding, so |
5579 | 57 | // struct x { short a; char b; } |
5580 | 57 | // will have Size = 4. With #pragma pack(1), it will have Size = 3. |
5581 | 57 | // These are the proper values we need for right-justifying the |
5582 | 57 | // aggregate in a parameter register. |
5583 | 57 | unsigned Size = Flags.getByValSize(); |
5584 | 57 | |
5585 | 57 | // An empty aggregate parameter takes up no storage and no |
5586 | 57 | // registers. |
5587 | 57 | if (Size == 0) |
5588 | 2 | continue; |
5589 | 55 | |
5590 | 55 | if (55 CallConv == CallingConv::Fast55 ) |
5591 | 1 | ComputePtrOff(); |
5592 | 55 | |
5593 | 55 | // All aggregates smaller than 8 bytes must be passed right-justified. |
5594 | 55 | if (Size==1 || 55 Size==251 || Size==447 ) { |
5595 | 16 | EVT VT = (Size==1) ? MVT::i84 : ((Size==2) ? 12 MVT::i164 : MVT::i328 ); |
5596 | 16 | if (GPR_idx != NumGPRs16 ) { |
5597 | 7 | SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, |
5598 | 7 | MachinePointerInfo(), VT); |
5599 | 7 | MemOpChains.push_back(Load.getValue(1)); |
5600 | 7 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
5601 | 7 | |
5602 | 7 | ArgOffset += PtrByteSize; |
5603 | 7 | continue; |
5604 | 7 | } |
5605 | 48 | } |
5606 | 48 | |
5607 | 48 | if (48 GPR_idx == NumGPRs && 48 Size < 823 ) { |
5608 | 13 | SDValue AddPtr = PtrOff; |
5609 | 13 | if (!isLittleEndian13 ) { |
5610 | 12 | SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, |
5611 | 12 | PtrOff.getValueType()); |
5612 | 12 | AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); |
5613 | 12 | } |
5614 | 13 | Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, |
5615 | 13 | CallSeqStart, |
5616 | 13 | Flags, DAG, dl); |
5617 | 13 | ArgOffset += PtrByteSize; |
5618 | 13 | continue; |
5619 | 13 | } |
5620 | 35 | // Copy entire object into memory. There are cases where gcc-generated |
5621 | 35 | // code assumes it is there, even if it could be put entirely into |
5622 | 35 | // registers. (This is not what the doc says.) |
5623 | 35 | |
5624 | 35 | // FIXME: The above statement is likely due to a misunderstanding of the |
5625 | 35 | // documents. All arguments must be copied into the parameter area BY |
5626 | 35 | // THE CALLEE in the event that the callee takes the address of any |
5627 | 35 | // formal argument. That has not yet been implemented. However, it is |
5628 | 35 | // reasonable to use the stack area as a staging area for the register |
5629 | 35 | // load. |
5630 | 35 | |
5631 | 35 | // Skip this for small aggregates, as we will use the same slot for a |
5632 | 35 | // right-justified copy, below. |
5633 | 35 | if (35 Size >= 835 ) |
5634 | 27 | Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, |
5635 | 27 | CallSeqStart, |
5636 | 27 | Flags, DAG, dl); |
5637 | 35 | |
5638 | 35 | // When a register is available, pass a small aggregate right-justified. |
5639 | 35 | if (Size < 8 && 35 GPR_idx != NumGPRs8 ) { |
5640 | 8 | // The easiest way to get this right-justified in a register |
5641 | 8 | // is to copy the structure into the rightmost portion of a |
5642 | 8 | // local variable slot, then load the whole slot into the |
5643 | 8 | // register. |
5644 | 8 | // FIXME: The memcpy seems to produce pretty awful code for |
5645 | 8 | // small aggregates, particularly for packed ones. |
5646 | 8 | // FIXME: It would be preferable to use the slot in the |
5647 | 8 | // parameter save area instead of a new local variable. |
5648 | 8 | SDValue AddPtr = PtrOff; |
5649 | 8 | if (!isLittleEndian8 ) { |
5650 | 8 | SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); |
5651 | 8 | AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); |
5652 | 8 | } |
5653 | 8 | Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, |
5654 | 8 | CallSeqStart, |
5655 | 8 | Flags, DAG, dl); |
5656 | 8 | |
5657 | 8 | // Load the slot into the register. |
5658 | 8 | SDValue Load = |
5659 | 8 | DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); |
5660 | 8 | MemOpChains.push_back(Load.getValue(1)); |
5661 | 8 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
5662 | 8 | |
5663 | 8 | // Done with this argument. |
5664 | 8 | ArgOffset += PtrByteSize; |
5665 | 8 | continue; |
5666 | 8 | } |
5667 | 27 | |
5668 | 27 | // For aggregates larger than PtrByteSize, copy the pieces of the |
5669 | 27 | // object that fit into registers from the parameter save area. |
5670 | 87 | for (unsigned j=0; 27 j<Size87 ; j+=PtrByteSize60 ) { |
5671 | 76 | SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); |
5672 | 76 | SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); |
5673 | 76 | if (GPR_idx != NumGPRs76 ) { |
5674 | 60 | SDValue Load = |
5675 | 60 | DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); |
5676 | 60 | MemOpChains.push_back(Load.getValue(1)); |
5677 | 60 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
5678 | 60 | ArgOffset += PtrByteSize; |
5679 | 76 | } else { |
5680 | 16 | ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; |
5681 | 16 | break; |
5682 | 16 | } |
5683 | 76 | } |
5684 | 57 | continue; |
5685 | 57 | } |
5686 | 3.21k | |
5687 | 3.21k | switch (Arg.getSimpleValueType().SimpleTy) { |
5688 | 0 | default: 0 llvm_unreachable0 ("Unexpected ValueType for argument!"); |
5689 | 1.79k | case MVT::i1: |
5690 | 1.79k | case MVT::i32: |
5691 | 1.79k | case MVT::i64: |
5692 | 1.79k | if (Flags.isNest()1.79k ) { |
5693 | 2 | // The 'nest' parameter, if any, is passed in R11. |
5694 | 2 | RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); |
5695 | 2 | hasNest = true; |
5696 | 2 | break; |
5697 | 2 | } |
5698 | 1.78k | |
5699 | 1.78k | // These can be scalar arguments or elements of an integer array type |
5700 | 1.78k | // passed directly. Clang may use those instead of "byval" aggregate |
5701 | 1.78k | // types to avoid forcing arguments to memory unnecessarily. |
5702 | 1.78k | if (1.78k GPR_idx != NumGPRs1.78k ) { |
5703 | 1.47k | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); |
5704 | 1.78k | } else { |
5705 | 313 | if (CallConv == CallingConv::Fast) |
5706 | 176 | ComputePtrOff(); |
5707 | 313 | |
5708 | 313 | assert(HasParameterArea && |
5709 | 313 | "Parameter area must exist to pass an argument in memory."); |
5710 | 313 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
5711 | 313 | true, isTailCall, false, MemOpChains, |
5712 | 313 | TailCallArguments, dl); |
5713 | 313 | if (CallConv == CallingConv::Fast) |
5714 | 176 | ArgOffset += PtrByteSize; |
5715 | 313 | } |
5716 | 1.78k | if (CallConv != CallingConv::Fast) |
5717 | 1.42k | ArgOffset += PtrByteSize; |
5718 | 1.78k | break; |
5719 | 973 | case MVT::f32: |
5720 | 973 | case MVT::f64: { |
5721 | 973 | // These can be scalar arguments or elements of a float array type |
5722 | 973 | // passed directly. The latter are used to implement ELFv2 homogenous |
5723 | 973 | // float aggregates. |
5724 | 973 | |
5725 | 973 | // Named arguments go into FPRs first, and once they overflow, the |
5726 | 973 | // remaining arguments go into GPRs and then the parameter save area. |
5727 | 973 | // Unnamed arguments for vararg functions always go to GPRs and |
5728 | 973 | // then the parameter save area. For now, put all arguments to vararg |
5729 | 973 | // routines always in both locations (FPR *and* GPR or stack slot). |
5730 | 965 | bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; |
5731 | 973 | bool NeededLoad = false; |
5732 | 973 | |
5733 | 973 | // First load the argument into the next available FPR. |
5734 | 973 | if (FPR_idx != NumFPRs) |
5735 | 877 | RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); |
5736 | 973 | |
5737 | 973 | // Next, load the argument into GPR or stack slot if needed. |
5738 | 973 | if (!NeedGPROrStack) |
5739 | 869 | ; |
5740 | 104 | else if (104 GPR_idx != NumGPRs && 104 CallConv != CallingConv::Fast33 ) { |
5741 | 33 | // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 |
5742 | 33 | // once we support fp <-> gpr moves. |
5743 | 33 | |
5744 | 33 | // In the non-vararg case, this can only ever happen in the |
5745 | 33 | // presence of f32 array types, since otherwise we never run |
5746 | 33 | // out of FPRs before running out of GPRs. |
5747 | 33 | SDValue ArgVal; |
5748 | 33 | |
5749 | 33 | // Double values are always passed in a single GPR. |
5750 | 33 | if (Arg.getValueType() != MVT::f3233 ) { |
5751 | 8 | ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); |
5752 | 8 | |
5753 | 8 | // Non-array float values are extended and passed in a GPR. |
5754 | 33 | } else if (25 !Flags.isInConsecutiveRegs()25 ) { |
5755 | 4 | ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); |
5756 | 4 | ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); |
5757 | 4 | |
5758 | 4 | // If we have an array of floats, we collect every odd element |
5759 | 4 | // together with its predecessor into one GPR. |
5760 | 25 | } else if (21 ArgOffset % PtrByteSize != 021 ) { |
5761 | 9 | SDValue Lo, Hi; |
5762 | 9 | Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); |
5763 | 9 | Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); |
5764 | 9 | if (!isLittleEndian) |
5765 | 0 | std::swap(Lo, Hi); |
5766 | 9 | ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); |
5767 | 9 | |
5768 | 9 | // The final element, if even, goes into the first half of a GPR. |
5769 | 21 | } else if (12 Flags.isInConsecutiveRegsLast()12 ) { |
5770 | 6 | ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); |
5771 | 6 | ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); |
5772 | 6 | if (!isLittleEndian) |
5773 | 0 | ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, |
5774 | 0 | DAG.getConstant(32, dl, MVT::i32)); |
5775 | 6 | |
5776 | 6 | // Non-final even elements are skipped; they will be handled |
5777 | 6 | // together the with subsequent argument on the next go-around. |
5778 | 6 | } else |
5779 | 6 | ArgVal = SDValue(); |
5780 | 33 | |
5781 | 33 | if (ArgVal.getNode()) |
5782 | 27 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); |
5783 | 104 | } else { |
5784 | 71 | if (CallConv == CallingConv::Fast) |
5785 | 66 | ComputePtrOff(); |
5786 | 71 | |
5787 | 71 | // Single-precision floating-point values are mapped to the |
5788 | 71 | // second (rightmost) word of the stack doubleword. |
5789 | 71 | if (Arg.getValueType() == MVT::f32 && |
5790 | 71 | !isLittleEndian3 && !Flags.isInConsecutiveRegs()2 ) { |
5791 | 2 | SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); |
5792 | 2 | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); |
5793 | 2 | } |
5794 | 104 | |
5795 | 104 | assert(HasParameterArea && |
5796 | 104 | "Parameter area must exist to pass an argument in memory."); |
5797 | 104 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
5798 | 104 | true, isTailCall, false, MemOpChains, |
5799 | 104 | TailCallArguments, dl); |
5800 | 104 | |
5801 | 104 | NeededLoad = true; |
5802 | 104 | } |
5803 | 973 | // When passing an array of floats, the array occupies consecutive |
5804 | 973 | // space in the argument area; only round up to the next doubleword |
5805 | 973 | // at the end of the array. Otherwise, each float takes 8 bytes. |
5806 | 973 | if (CallConv != CallingConv::Fast || 973 NeededLoad352 ) { |
5807 | 687 | ArgOffset += (Arg.getValueType() == MVT::f32 && |
5808 | 687 | Flags.isInConsecutiveRegs()402 ) ? 4255 : 8432 ; |
5809 | 687 | if (Flags.isInConsecutiveRegsLast()) |
5810 | 45 | ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; |
5811 | 687 | } |
5812 | 973 | break; |
5813 | 973 | } |
5814 | 449 | case MVT::v4f32: |
5815 | 449 | case MVT::v4i32: |
5816 | 449 | case MVT::v8i16: |
5817 | 449 | case MVT::v16i8: |
5818 | 449 | case MVT::v2f64: |
5819 | 449 | case MVT::v2i64: |
5820 | 449 | case MVT::v1i128: |
5821 | 449 | if (!Subtarget.hasQPX()449 ) { |
5822 | 449 | // These can be scalar arguments or elements of a vector array type |
5823 | 449 | // passed directly. The latter are used to implement ELFv2 homogenous |
5824 | 449 | // vector aggregates. |
5825 | 449 | |
5826 | 449 | // For a varargs call, named arguments go into VRs or on the stack as |
5827 | 449 | // usual; unnamed arguments always go to the stack or the corresponding |
5828 | 449 | // GPRs when within range. For now, we always put the value in both |
5829 | 449 | // locations (or even all three). |
5830 | 449 | if (isVarArg449 ) { |
5831 | 25 | assert(HasParameterArea && |
5832 | 25 | "Parameter area must exist if we have a varargs call."); |
5833 | 25 | // We could elide this store in the case where the object fits |
5834 | 25 | // entirely in R registers. Maybe later. |
5835 | 25 | SDValue Store = |
5836 | 25 | DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); |
5837 | 25 | MemOpChains.push_back(Store); |
5838 | 25 | if (VR_idx != NumVRs25 ) { |
5839 | 25 | SDValue Load = |
5840 | 25 | DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); |
5841 | 25 | MemOpChains.push_back(Load.getValue(1)); |
5842 | 25 | RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); |
5843 | 25 | } |
5844 | 25 | ArgOffset += 16; |
5845 | 75 | for (unsigned i=0; i<1675 ; i+=PtrByteSize50 ) { |
5846 | 50 | if (GPR_idx == NumGPRs) |
5847 | 0 | break; |
5848 | 50 | SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, |
5849 | 50 | DAG.getConstant(i, dl, PtrVT)); |
5850 | 50 | SDValue Load = |
5851 | 50 | DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); |
5852 | 50 | MemOpChains.push_back(Load.getValue(1)); |
5853 | 50 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
5854 | 50 | } |
5855 | 25 | break; |
5856 | 25 | } |
5857 | 424 | |
5858 | 424 | // Non-varargs Altivec params go into VRs or on the stack. |
5859 | 424 | if (424 VR_idx != NumVRs424 ) { |
5860 | 336 | RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); |
5861 | 424 | } else { |
5862 | 88 | if (CallConv == CallingConv::Fast) |
5863 | 88 | ComputePtrOff(); |
5864 | 88 | |
5865 | 88 | assert(HasParameterArea && |
5866 | 88 | "Parameter area must exist to pass an argument in memory."); |
5867 | 88 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
5868 | 88 | true, isTailCall, true, MemOpChains, |
5869 | 88 | TailCallArguments, dl); |
5870 | 88 | if (CallConv == CallingConv::Fast) |
5871 | 88 | ArgOffset += 16; |
5872 | 88 | } |
5873 | 424 | |
5874 | 424 | if (CallConv != CallingConv::Fast) |
5875 | 72 | ArgOffset += 16; |
5876 | 449 | break; |
5877 | 449 | } // not QPX |
5878 | 0 |
|
5879 | 449 | assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && |
5880 | 0 | "Invalid QPX parameter type"); |
5881 | 0 |
|
5882 | 0 | /* fall through */ |
5883 | 2 | case MVT::v4f64: |
5884 | 2 | case MVT::v4i1: { |
5885 | 2 | bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; |
5886 | 2 | if (isVarArg2 ) { |
5887 | 0 | assert(HasParameterArea && |
5888 | 0 | "Parameter area must exist if we have a varargs call."); |
5889 | 0 | // We could elide this store in the case where the object fits |
5890 | 0 | // entirely in R registers. Maybe later. |
5891 | 0 | SDValue Store = |
5892 | 0 | DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); |
5893 | 0 | MemOpChains.push_back(Store); |
5894 | 0 | if (QFPR_idx != NumQFPRs0 ) { |
5895 | 0 | SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f320 : MVT::v4f640 , dl, Store, |
5896 | 0 | PtrOff, MachinePointerInfo()); |
5897 | 0 | MemOpChains.push_back(Load.getValue(1)); |
5898 | 0 | RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); |
5899 | 0 | } |
5900 | 0 | ArgOffset += (IsF32 ? 160 : 320 ); |
5901 | 0 | for (unsigned i = 0; i < (IsF32 ? 0 16U0 : 32U0 ); i += PtrByteSize0 ) { |
5902 | 0 | if (GPR_idx == NumGPRs) |
5903 | 0 | break; |
5904 | 0 | SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, |
5905 | 0 | DAG.getConstant(i, dl, PtrVT)); |
5906 | 0 | SDValue Load = |
5907 | 0 | DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); |
5908 | 0 | MemOpChains.push_back(Load.getValue(1)); |
5909 | 0 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
5910 | 0 | } |
5911 | 0 | break; |
5912 | 0 | } |
5913 | 2 | |
5914 | 2 | // Non-varargs QPX params go into registers or on the stack. |
5915 | 2 | if (2 QFPR_idx != NumQFPRs2 ) { |
5916 | 2 | RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); |
5917 | 2 | } else { |
5918 | 0 | if (CallConv == CallingConv::Fast) |
5919 | 0 | ComputePtrOff(); |
5920 | 0 |
|
5921 | 0 | assert(HasParameterArea && |
5922 | 0 | "Parameter area must exist to pass an argument in memory."); |
5923 | 0 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
5924 | 0 | true, isTailCall, true, MemOpChains, |
5925 | 0 | TailCallArguments, dl); |
5926 | 0 | if (CallConv == CallingConv::Fast) |
5927 | 0 | ArgOffset += (IsF32 ? 0 160 : 320 ); |
5928 | 0 | } |
5929 | 2 | |
5930 | 2 | if (CallConv != CallingConv::Fast) |
5931 | 2 | ArgOffset += (IsF32 ? 2 160 : 322 ); |
5932 | 1.79k | break; |
5933 | 1.79k | } |
5934 | 3.27k | } |
5935 | 3.27k | } |
5936 | 1.20k | |
5937 | 1.20k | assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && |
5938 | 1.20k | "mismatch in size of parameter area"); |
5939 | 1.20k | (void)NumBytesActuallyUsed; |
5940 | 1.20k | |
5941 | 1.20k | if (!MemOpChains.empty()) |
5942 | 100 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
5943 | 1.20k | |
5944 | 1.20k | // Check if this is an indirect call (MTCTR/BCTRL). |
5945 | 1.20k | // See PrepareCall() for more information about calls through function |
5946 | 1.20k | // pointers in the 64-bit SVR4 ABI. |
5947 | 1.20k | if (!isTailCall && 1.20k !isPatchPoint1.14k && |
5948 | 1.11k | !isFunctionGlobalAddress(Callee) && |
5949 | 1.20k | !isa<ExternalSymbolSDNode>(Callee)134 ) { |
5950 | 28 | // Load r2 into a virtual register and store it to the TOC save area. |
5951 | 28 | setUsesTOCBasePtr(DAG); |
5952 | 28 | SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); |
5953 | 28 | // TOC save area offset. |
5954 | 28 | unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); |
5955 | 28 | SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); |
5956 | 28 | SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); |
5957 | 28 | Chain = DAG.getStore( |
5958 | 28 | Val.getValue(1), dl, Val, AddPtr, |
5959 | 28 | MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); |
5960 | 28 | // In the ELFv2 ABI, R12 must contain the address of an indirect callee. |
5961 | 28 | // This does not mean the MTCTR instruction must use R12; it's easier |
5962 | 28 | // to model this as an extra parameter, so do that. |
5963 | 28 | if (isELFv2ABI && 28 !isPatchPoint9 ) |
5964 | 9 | RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); |
5965 | 28 | } |
5966 | 1.20k | |
5967 | 1.20k | // Build a sequence of copy-to-reg nodes chained together with token chain |
5968 | 1.20k | // and flag operands which copy the outgoing args into the appropriate regs. |
5969 | 1.20k | SDValue InFlag; |
5970 | 4.07k | for (unsigned i = 0, e = RegsToPass.size(); i != e4.07k ; ++i2.87k ) { |
5971 | 2.87k | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
5972 | 2.87k | RegsToPass[i].second, InFlag); |
5973 | 2.87k | InFlag = Chain.getValue(1); |
5974 | 2.87k | } |
5975 | 1.20k | |
5976 | 1.20k | if (isTailCall && 1.20k !IsSibCall56 ) |
5977 | 1 | PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, |
5978 | 1 | TailCallArguments); |
5979 | 1.20k | |
5980 | 1.20k | return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, |
5981 | 1.20k | DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, |
5982 | 1.20k | SPDiff, NumBytes, Ins, InVals, CS); |
5983 | 1.20k | } |
5984 | | |
5985 | | SDValue PPCTargetLowering::LowerCall_Darwin( |
5986 | | SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, |
5987 | | bool isTailCall, bool isPatchPoint, |
5988 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5989 | | const SmallVectorImpl<SDValue> &OutVals, |
5990 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
5991 | | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
5992 | 119 | ImmutableCallSite CS) const { |
5993 | 119 | unsigned NumOps = Outs.size(); |
5994 | 119 | |
5995 | 119 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
5996 | 119 | bool isPPC64 = PtrVT == MVT::i64; |
5997 | 119 | unsigned PtrByteSize = isPPC64 ? 820 : 499 ; |
5998 | 119 | |
5999 | 119 | MachineFunction &MF = DAG.getMachineFunction(); |
6000 | 119 | |
6001 | 119 | // Mark this function as potentially containing a function that contains a |
6002 | 119 | // tail call. As a consequence the frame pointer will be used for dynamicalloc |
6003 | 119 | // and restoring the callers stack pointer in this functions epilog. This is |
6004 | 119 | // done because by tail calling the called function might overwrite the value |
6005 | 119 | // in this function's (MF) stack pointer stack slot 0(SP). |
6006 | 119 | if (getTargetMachine().Options.GuaranteedTailCallOpt && |
6007 | 1 | CallConv == CallingConv::Fast) |
6008 | 1 | MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); |
6009 | 119 | |
6010 | 119 | // Count how many bytes are to be pushed on the stack, including the linkage |
6011 | 119 | // area, and parameter passing area. We start with 24/48 bytes, which is |
6012 | 119 | // prereserved space for [SP][CR][LR][3 x unused]. |
6013 | 119 | unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); |
6014 | 119 | unsigned NumBytes = LinkageSize; |
6015 | 119 | |
6016 | 119 | // Add up all the space actually used. |
6017 | 119 | // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually |
6018 | 119 | // they all go in registers, but we must reserve stack space for them for |
6019 | 119 | // possible use by the caller. In varargs or 64-bit calls, parameters are |
6020 | 119 | // assigned stack space in order, with padding so Altivec parameters are |
6021 | 119 | // 16-byte aligned. |
6022 | 119 | unsigned nAltivecParamsAtEnd = 0; |
6023 | 490 | for (unsigned i = 0; i != NumOps490 ; ++i371 ) { |
6024 | 371 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
6025 | 371 | EVT ArgVT = Outs[i].VT; |
6026 | 371 | // Varargs Altivec parameters are padded to a 16 byte boundary. |
6027 | 371 | if (ArgVT == MVT::v4f32 || 371 ArgVT == MVT::v4i32371 || |
6028 | 371 | ArgVT == MVT::v8i16371 || ArgVT == MVT::v16i8371 || |
6029 | 371 | ArgVT == MVT::v2f64371 || ArgVT == MVT::v2i64371 ) { |
6030 | 0 | if (!isVarArg && 0 !isPPC640 ) { |
6031 | 0 | // Non-varargs Altivec parameters go after all the non-Altivec |
6032 | 0 | // parameters; handle those later so we know how much padding we need. |
6033 | 0 | nAltivecParamsAtEnd++; |
6034 | 0 | continue; |
6035 | 0 | } |
6036 | 0 | // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. |
6037 | 0 | NumBytes = ((NumBytes+15)/16)*16; |
6038 | 0 | } |
6039 | 371 | NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); |
6040 | 371 | } |
6041 | 119 | |
6042 | 119 | // Allow for Altivec parameters at the end, if needed. |
6043 | 119 | if (nAltivecParamsAtEnd119 ) { |
6044 | 0 | NumBytes = ((NumBytes+15)/16)*16; |
6045 | 0 | NumBytes += 16*nAltivecParamsAtEnd; |
6046 | 0 | } |
6047 | 119 | |
6048 | 119 | // The prolog code of the callee may store up to 8 GPR argument registers to |
6049 | 119 | // the stack, allowing va_start to index over them in memory if its varargs. |
6050 | 119 | // Because we cannot tell if this is needed on the caller side, we have to |
6051 | 119 | // conservatively assume that it is needed. As such, make sure we have at |
6052 | 119 | // least enough stack space for the caller to store the 8 GPRs. |
6053 | 119 | NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); |
6054 | 119 | |
6055 | 119 | // Tail call needs the stack to be aligned. |
6056 | 119 | if (getTargetMachine().Options.GuaranteedTailCallOpt && |
6057 | 1 | CallConv == CallingConv::Fast) |
6058 | 1 | NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); |
6059 | 119 | |
6060 | 119 | // Calculate by how many bytes the stack has to be adjusted in case of tail |
6061 | 119 | // call optimization. |
6062 | 119 | int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); |
6063 | 119 | |
6064 | 119 | // To protect arguments on the stack from being clobbered in a tail call, |
6065 | 119 | // force all the loads to happen before doing any other lowering. |
6066 | 119 | if (isTailCall) |
6067 | 1 | Chain = DAG.getStackArgumentTokenFactor(Chain); |
6068 | 119 | |
6069 | 119 | // Adjust the stack pointer for the new arguments... |
6070 | 119 | // These operations are automatically eliminated by the prolog/epilog pass |
6071 | 119 | Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); |
6072 | 119 | SDValue CallSeqStart = Chain; |
6073 | 119 | |
6074 | 119 | // Load the return address and frame pointer so it can be move somewhere else |
6075 | 119 | // later. |
6076 | 119 | SDValue LROp, FPOp; |
6077 | 119 | Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); |
6078 | 119 | |
6079 | 119 | // Set up a copy of the stack pointer for use loading and storing any |
6080 | 119 | // arguments that may not fit in the registers available for argument |
6081 | 119 | // passing. |
6082 | 119 | SDValue StackPtr; |
6083 | 119 | if (isPPC64) |
6084 | 20 | StackPtr = DAG.getRegister(PPC::X1, MVT::i64); |
6085 | 119 | else |
6086 | 99 | StackPtr = DAG.getRegister(PPC::R1, MVT::i32); |
6087 | 119 | |
6088 | 119 | // Figure out which arguments are going to go in registers, and which in |
6089 | 119 | // memory. Also, if this is a vararg function, floating point operations |
6090 | 119 | // must be stored to our stack, and loaded into integer regs as well, if |
6091 | 119 | // any integer regs are available for argument passing. |
6092 | 119 | unsigned ArgOffset = LinkageSize; |
6093 | 119 | unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; |
6094 | 119 | |
6095 | 119 | static const MCPhysReg GPR_32[] = { // 32-bit registers. |
6096 | 119 | PPC::R3, PPC::R4, PPC::R5, PPC::R6, |
6097 | 119 | PPC::R7, PPC::R8, PPC::R9, PPC::R10, |
6098 | 119 | }; |
6099 | 119 | static const MCPhysReg GPR_64[] = { // 64-bit registers. |
6100 | 119 | PPC::X3, PPC::X4, PPC::X5, PPC::X6, |
6101 | 119 | PPC::X7, PPC::X8, PPC::X9, PPC::X10, |
6102 | 119 | }; |
6103 | 119 | static const MCPhysReg VR[] = { |
6104 | 119 | PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, |
6105 | 119 | PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 |
6106 | 119 | }; |
6107 | 119 | const unsigned NumGPRs = array_lengthof(GPR_32); |
6108 | 119 | const unsigned NumFPRs = 13; |
6109 | 119 | const unsigned NumVRs = array_lengthof(VR); |
6110 | 119 | |
6111 | 119 | const MCPhysReg *GPR = isPPC64 ? GPR_6420 : GPR_3299 ; |
6112 | 119 | |
6113 | 119 | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
6114 | 119 | SmallVector<TailCallArgumentInfo, 8> TailCallArguments; |
6115 | 119 | |
6116 | 119 | SmallVector<SDValue, 8> MemOpChains; |
6117 | 490 | for (unsigned i = 0; i != NumOps490 ; ++i371 ) { |
6118 | 371 | SDValue Arg = OutVals[i]; |
6119 | 371 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
6120 | 371 | |
6121 | 371 | // PtrOff will be used to store the current argument to the stack if a |
6122 | 371 | // register cannot be found for it. |
6123 | 371 | SDValue PtrOff; |
6124 | 371 | |
6125 | 371 | PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); |
6126 | 371 | |
6127 | 371 | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); |
6128 | 371 | |
6129 | 371 | // On PPC64, promote integers to 64-bit values. |
6130 | 371 | if (isPPC64 && 371 Arg.getValueType() == MVT::i3252 ) { |
6131 | 3 | // FIXME: Should this use ANY_EXTEND if neither sext nor zext? |
6132 | 3 | unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND0 : ISD::ZERO_EXTEND3 ; |
6133 | 3 | Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); |
6134 | 3 | } |
6135 | 371 | |
6136 | 371 | // FIXME memcpy is used way more than necessary. Correctness first. |
6137 | 371 | // Note: "by value" is code for passing a structure by value, not |
6138 | 371 | // basic types. |
6139 | 371 | if (Flags.isByVal()371 ) { |
6140 | 2 | unsigned Size = Flags.getByValSize(); |
6141 | 2 | // Very small objects are passed right-justified. Everything else is |
6142 | 2 | // passed left-justified. |
6143 | 2 | if (Size==1 || 2 Size==22 ) { |
6144 | 2 | EVT VT = (Size==1) ? MVT::i80 : MVT::i162 ; |
6145 | 2 | if (GPR_idx != NumGPRs2 ) { |
6146 | 2 | SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, |
6147 | 2 | MachinePointerInfo(), VT); |
6148 | 2 | MemOpChains.push_back(Load.getValue(1)); |
6149 | 2 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
6150 | 2 | |
6151 | 2 | ArgOffset += PtrByteSize; |
6152 | 2 | } else { |
6153 | 0 | SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, |
6154 | 0 | PtrOff.getValueType()); |
6155 | 0 | SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); |
6156 | 0 | Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, |
6157 | 0 | CallSeqStart, |
6158 | 0 | Flags, DAG, dl); |
6159 | 0 | ArgOffset += PtrByteSize; |
6160 | 0 | } |
6161 | 2 | continue; |
6162 | 2 | } |
6163 | 0 | // Copy entire object into memory. There are cases where gcc-generated |
6164 | 0 | // code assumes it is there, even if it could be put entirely into |
6165 | 0 | // registers. (This is not what the doc says.) |
6166 | 0 | Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, |
6167 | 0 | CallSeqStart, |
6168 | 0 | Flags, DAG, dl); |
6169 | 0 |
|
6170 | 0 | // For small aggregates (Darwin only) and aggregates >= PtrByteSize, |
6171 | 0 | // copy the pieces of the object that fit into registers from the |
6172 | 0 | // parameter save area. |
6173 | 0 | for (unsigned j=0; j<Size0 ; j+=PtrByteSize0 ) { |
6174 | 0 | SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType()); |
6175 | 0 | SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); |
6176 | 0 | if (GPR_idx != NumGPRs0 ) { |
6177 | 0 | SDValue Load = |
6178 | 0 | DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo()); |
6179 | 0 | MemOpChains.push_back(Load.getValue(1)); |
6180 | 0 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
6181 | 0 | ArgOffset += PtrByteSize; |
6182 | 0 | } else { |
6183 | 0 | ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; |
6184 | 0 | break; |
6185 | 0 | } |
6186 | 0 | } |
6187 | 2 | continue; |
6188 | 2 | } |
6189 | 369 | |
6190 | 369 | switch (Arg.getSimpleValueType().SimpleTy) { |
6191 | 0 | default: 0 llvm_unreachable0 ("Unexpected ValueType for argument!"); |
6192 | 257 | case MVT::i1: |
6193 | 257 | case MVT::i32: |
6194 | 257 | case MVT::i64: |
6195 | 257 | if (GPR_idx != NumGPRs257 ) { |
6196 | 257 | if (Arg.getValueType() == MVT::i1) |
6197 | 1 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); |
6198 | 257 | |
6199 | 257 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); |
6200 | 0 | } else { |
6201 | 0 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
6202 | 0 | isPPC64, isTailCall, false, MemOpChains, |
6203 | 0 | TailCallArguments, dl); |
6204 | 0 | } |
6205 | 257 | ArgOffset += PtrByteSize; |
6206 | 257 | break; |
6207 | 112 | case MVT::f32: |
6208 | 112 | case MVT::f64: |
6209 | 112 | if (FPR_idx != NumFPRs112 ) { |
6210 | 112 | RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); |
6211 | 112 | |
6212 | 112 | if (isVarArg112 ) { |
6213 | 30 | SDValue Store = |
6214 | 30 | DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); |
6215 | 30 | MemOpChains.push_back(Store); |
6216 | 30 | |
6217 | 30 | // Float varargs are always shadowed in available integer registers |
6218 | 30 | if (GPR_idx != NumGPRs30 ) { |
6219 | 30 | SDValue Load = |
6220 | 30 | DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); |
6221 | 30 | MemOpChains.push_back(Load.getValue(1)); |
6222 | 30 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
6223 | 30 | } |
6224 | 30 | if (GPR_idx != NumGPRs && 30 Arg.getValueType() == MVT::f6430 && !isPPC6430 ){ |
6225 | 30 | SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); |
6226 | 30 | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); |
6227 | 30 | SDValue Load = |
6228 | 30 | DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo()); |
6229 | 30 | MemOpChains.push_back(Load.getValue(1)); |
6230 | 30 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
6231 | 30 | } |
6232 | 112 | } else { |
6233 | 82 | // If we have any FPRs remaining, we may also have GPRs remaining. |
6234 | 82 | // Args passed in FPRs consume either 1 (f32) or 2 (f64) available |
6235 | 82 | // GPRs. |
6236 | 82 | if (GPR_idx != NumGPRs) |
6237 | 68 | ++GPR_idx; |
6238 | 82 | if (GPR_idx != NumGPRs && 82 Arg.getValueType() == MVT::f6468 && |
6239 | 68 | !isPPC64) // PPC64 has 64-bit GPR's obviously :) |
6240 | 60 | ++GPR_idx; |
6241 | 82 | } |
6242 | 112 | } else |
6243 | 0 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
6244 | 0 | isPPC64, isTailCall, false, MemOpChains, |
6245 | 0 | TailCallArguments, dl); |
6246 | 112 | if (isPPC64) |
6247 | 8 | ArgOffset += 8; |
6248 | 112 | else |
6249 | 104 | ArgOffset += Arg.getValueType() == MVT::f32 ? 104 414 : 890 ; |
6250 | 112 | break; |
6251 | 0 | case MVT::v4f32: |
6252 | 0 | case MVT::v4i32: |
6253 | 0 | case MVT::v8i16: |
6254 | 0 | case MVT::v16i8: |
6255 | 0 | if (isVarArg0 ) { |
6256 | 0 | // These go aligned on the stack, or in the corresponding R registers |
6257 | 0 | // when within range. The Darwin PPC ABI doc claims they also go in |
6258 | 0 | // V registers; in fact gcc does this only for arguments that are |
6259 | 0 | // prototyped, not for those that match the ... We do it for all |
6260 | 0 | // arguments, seems to work. |
6261 | 0 | while (ArgOffset % 16 !=00 ) { |
6262 | 0 | ArgOffset += PtrByteSize; |
6263 | 0 | if (GPR_idx != NumGPRs) |
6264 | 0 | GPR_idx++; |
6265 | 0 | } |
6266 | 0 | // We could elide this store in the case where the object fits |
6267 | 0 | // entirely in R registers. Maybe later. |
6268 | 0 | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, |
6269 | 0 | DAG.getConstant(ArgOffset, dl, PtrVT)); |
6270 | 0 | SDValue Store = |
6271 | 0 | DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); |
6272 | 0 | MemOpChains.push_back(Store); |
6273 | 0 | if (VR_idx != NumVRs0 ) { |
6274 | 0 | SDValue Load = |
6275 | 0 | DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); |
6276 | 0 | MemOpChains.push_back(Load.getValue(1)); |
6277 | 0 | RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); |
6278 | 0 | } |
6279 | 0 | ArgOffset += 16; |
6280 | 0 | for (unsigned i=0; i<160 ; i+=PtrByteSize0 ) { |
6281 | 0 | if (GPR_idx == NumGPRs) |
6282 | 0 | break; |
6283 | 0 | SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, |
6284 | 0 | DAG.getConstant(i, dl, PtrVT)); |
6285 | 0 | SDValue Load = |
6286 | 0 | DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); |
6287 | 0 | MemOpChains.push_back(Load.getValue(1)); |
6288 | 0 | RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); |
6289 | 0 | } |
6290 | 0 | break; |
6291 | 0 | } |
6292 | 0 |
|
6293 | 0 | // Non-varargs Altivec params generally go in registers, but have |
6294 | 0 | // stack space allocated at the end. |
6295 | 0 | if (0 VR_idx != NumVRs0 ) { |
6296 | 0 | // Doesn't have GPR space allocated. |
6297 | 0 | RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); |
6298 | 0 | } else if (0 nAltivecParamsAtEnd==00 ) { |
6299 | 0 | // We are emitting Altivec params in order. |
6300 | 0 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
6301 | 0 | isPPC64, isTailCall, true, MemOpChains, |
6302 | 0 | TailCallArguments, dl); |
6303 | 0 | ArgOffset += 16; |
6304 | 0 | } |
6305 | 257 | break; |
6306 | 371 | } |
6307 | 371 | } |
6308 | 119 | // If all Altivec parameters fit in registers, as they usually do, |
6309 | 119 | // they get stack space following the non-Altivec parameters. We |
6310 | 119 | // don't track this here because nobody below needs it. |
6311 | 119 | // If there are more Altivec parameters than fit in registers emit |
6312 | 119 | // the stores here. |
6313 | 119 | if (119 !isVarArg && 119 nAltivecParamsAtEnd > NumVRs81 ) { |
6314 | 0 | unsigned j = 0; |
6315 | 0 | // Offset is aligned; skip 1st 12 params which go in V registers. |
6316 | 0 | ArgOffset = ((ArgOffset+15)/16)*16; |
6317 | 0 | ArgOffset += 12*16; |
6318 | 0 | for (unsigned i = 0; i != NumOps0 ; ++i0 ) { |
6319 | 0 | SDValue Arg = OutVals[i]; |
6320 | 0 | EVT ArgType = Outs[i].VT; |
6321 | 0 | if (ArgType==MVT::v4f32 || 0 ArgType==MVT::v4i320 || |
6322 | 0 | ArgType==MVT::v8i160 || ArgType==MVT::v16i80 ) { |
6323 | 0 | if (++j > NumVRs0 ) { |
6324 | 0 | SDValue PtrOff; |
6325 | 0 | // We are emitting Altivec params in order. |
6326 | 0 | LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, |
6327 | 0 | isPPC64, isTailCall, true, MemOpChains, |
6328 | 0 | TailCallArguments, dl); |
6329 | 0 | ArgOffset += 16; |
6330 | 0 | } |
6331 | 0 | } |
6332 | 0 | } |
6333 | 0 | } |
6334 | 119 | |
6335 | 119 | if (!MemOpChains.empty()) |
6336 | 15 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
6337 | 119 | |
6338 | 119 | // On Darwin, R12 must contain the address of an indirect callee. This does |
6339 | 119 | // not mean the MTCTR instruction must use R12; it's easier to model this as |
6340 | 119 | // an extra parameter, so do that. |
6341 | 119 | if (!isTailCall && |
6342 | 118 | !isFunctionGlobalAddress(Callee) && |
6343 | 40 | !isa<ExternalSymbolSDNode>(Callee) && |
6344 | 10 | !isBLACompatibleAddress(Callee, DAG)) |
6345 | 6 | RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? 6 PPC::X124 : |
6346 | 6 | PPC::R12), Callee)); |
6347 | 119 | |
6348 | 119 | // Build a sequence of copy-to-reg nodes chained together with token chain |
6349 | 119 | // and flag operands which copy the outgoing args into the appropriate regs. |
6350 | 119 | SDValue InFlag; |
6351 | 556 | for (unsigned i = 0, e = RegsToPass.size(); i != e556 ; ++i437 ) { |
6352 | 437 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
6353 | 437 | RegsToPass[i].second, InFlag); |
6354 | 437 | InFlag = Chain.getValue(1); |
6355 | 437 | } |
6356 | 119 | |
6357 | 119 | if (isTailCall) |
6358 | 1 | PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, |
6359 | 1 | TailCallArguments); |
6360 | 119 | |
6361 | 119 | return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, |
6362 | 119 | /* unused except on PPC64 ELFv1 */ false, DAG, |
6363 | 119 | RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, |
6364 | 119 | NumBytes, Ins, InVals, CS); |
6365 | 119 | } |
6366 | | |
6367 | | bool |
6368 | | PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, |
6369 | | MachineFunction &MF, bool isVarArg, |
6370 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
6371 | 9.12k | LLVMContext &Context) const { |
6372 | 9.12k | SmallVector<CCValAssign, 16> RVLocs; |
6373 | 9.12k | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
6374 | 9.12k | return CCInfo.CheckReturn(Outs, RetCC_PPC); |
6375 | 9.12k | } |
6376 | | |
6377 | | SDValue |
6378 | | PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
6379 | | bool isVarArg, |
6380 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
6381 | | const SmallVectorImpl<SDValue> &OutVals, |
6382 | 6.95k | const SDLoc &dl, SelectionDAG &DAG) const { |
6383 | 6.95k | SmallVector<CCValAssign, 16> RVLocs; |
6384 | 6.95k | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
6385 | 6.95k | *DAG.getContext()); |
6386 | 6.95k | CCInfo.AnalyzeReturn(Outs, RetCC_PPC); |
6387 | 6.95k | |
6388 | 6.95k | SDValue Flag; |
6389 | 6.95k | SmallVector<SDValue, 4> RetOps(1, Chain); |
6390 | 6.95k | |
6391 | 6.95k | // Copy the result values into the output registers. |
6392 | 12.4k | for (unsigned i = 0; i != RVLocs.size()12.4k ; ++i5.54k ) { |
6393 | 5.54k | CCValAssign &VA = RVLocs[i]; |
6394 | 5.54k | assert(VA.isRegLoc() && "Can only return in registers!"); |
6395 | 5.54k | |
6396 | 5.54k | SDValue Arg = OutVals[i]; |
6397 | 5.54k | |
6398 | 5.54k | switch (VA.getLocInfo()) { |
6399 | 0 | default: 0 llvm_unreachable0 ("Unknown loc info!"); |
6400 | 4.27k | case CCValAssign::Full: break; |
6401 | 677 | case CCValAssign::AExt: |
6402 | 677 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); |
6403 | 677 | break; |
6404 | 156 | case CCValAssign::ZExt: |
6405 | 156 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); |
6406 | 156 | break; |
6407 | 429 | case CCValAssign::SExt: |
6408 | 429 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); |
6409 | 429 | break; |
6410 | 5.54k | } |
6411 | 5.54k | |
6412 | 5.54k | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); |
6413 | 5.54k | Flag = Chain.getValue(1); |
6414 | 5.54k | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); |
6415 | 5.54k | } |
6416 | 6.95k | |
6417 | 6.95k | const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
6418 | 6.95k | const MCPhysReg *I = |
6419 | 6.95k | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
6420 | 6.95k | if (I6.95k ) { |
6421 | 158 | for (; *I158 ; ++I155 ) { |
6422 | 155 | |
6423 | 155 | if (PPC::G8RCRegClass.contains(*I)) |
6424 | 56 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
6425 | 99 | else if (99 PPC::F8RCRegClass.contains(*I)99 ) |
6426 | 54 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); |
6427 | 45 | else if (45 PPC::CRRCRegClass.contains(*I)45 ) |
6428 | 9 | RetOps.push_back(DAG.getRegister(*I, MVT::i1)); |
6429 | 36 | else if (36 PPC::VRRCRegClass.contains(*I)36 ) |
6430 | 36 | RetOps.push_back(DAG.getRegister(*I, MVT::Other)); |
6431 | 36 | else |
6432 | 0 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
6433 | 155 | } |
6434 | 3 | } |
6435 | 6.95k | |
6436 | 6.95k | RetOps[0] = Chain; // Update chain. |
6437 | 6.95k | |
6438 | 6.95k | // Add the flag if we have it. |
6439 | 6.95k | if (Flag.getNode()) |
6440 | 5.11k | RetOps.push_back(Flag); |
6441 | 6.95k | |
6442 | 6.95k | return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); |
6443 | 6.95k | } |
6444 | | |
6445 | | SDValue |
6446 | | PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, |
6447 | 1 | SelectionDAG &DAG) const { |
6448 | 1 | SDLoc dl(Op); |
6449 | 1 | |
6450 | 1 | // Get the correct type for integers. |
6451 | 1 | EVT IntVT = Op.getValueType(); |
6452 | 1 | |
6453 | 1 | // Get the inputs. |
6454 | 1 | SDValue Chain = Op.getOperand(0); |
6455 | 1 | SDValue FPSIdx = getFramePointerFrameIndex(DAG); |
6456 | 1 | // Build a DYNAREAOFFSET node. |
6457 | 1 | SDValue Ops[2] = {Chain, FPSIdx}; |
6458 | 1 | SDVTList VTs = DAG.getVTList(IntVT); |
6459 | 1 | return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); |
6460 | 1 | } |
6461 | | |
6462 | | SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, |
6463 | 1 | SelectionDAG &DAG) const { |
6464 | 1 | // When we pop the dynamic allocation we need to restore the SP link. |
6465 | 1 | SDLoc dl(Op); |
6466 | 1 | |
6467 | 1 | // Get the correct type for pointers. |
6468 | 1 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6469 | 1 | |
6470 | 1 | // Construct the stack pointer operand. |
6471 | 1 | bool isPPC64 = Subtarget.isPPC64(); |
6472 | 1 | unsigned SP = isPPC64 ? PPC::X11 : PPC::R10 ; |
6473 | 1 | SDValue StackPtr = DAG.getRegister(SP, PtrVT); |
6474 | 1 | |
6475 | 1 | // Get the operands for the STACKRESTORE. |
6476 | 1 | SDValue Chain = Op.getOperand(0); |
6477 | 1 | SDValue SaveSP = Op.getOperand(1); |
6478 | 1 | |
6479 | 1 | // Load the old link SP. |
6480 | 1 | SDValue LoadLinkSP = |
6481 | 1 | DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); |
6482 | 1 | |
6483 | 1 | // Restore the stack pointer. |
6484 | 1 | Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); |
6485 | 1 | |
6486 | 1 | // Store the old link SP. |
6487 | 1 | return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); |
6488 | 1 | } |
6489 | | |
6490 | 5 | SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { |
6491 | 5 | MachineFunction &MF = DAG.getMachineFunction(); |
6492 | 5 | bool isPPC64 = Subtarget.isPPC64(); |
6493 | 5 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
6494 | 5 | |
6495 | 5 | // Get current frame pointer save index. The users of this index will be |
6496 | 5 | // primarily DYNALLOC instructions. |
6497 | 5 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
6498 | 5 | int RASI = FI->getReturnAddrSaveIndex(); |
6499 | 5 | |
6500 | 5 | // If the frame pointer save index hasn't been defined yet. |
6501 | 5 | if (!RASI5 ) { |
6502 | 5 | // Find out what the fix offset of the frame pointer save area. |
6503 | 5 | int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); |
6504 | 5 | // Allocate the frame index for frame pointer save area. |
6505 | 5 | RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 82 : 43 , LROffset, false); |
6506 | 5 | // Save the result. |
6507 | 5 | FI->setReturnAddrSaveIndex(RASI); |
6508 | 5 | } |
6509 | 5 | return DAG.getFrameIndex(RASI, PtrVT); |
6510 | 5 | } |
6511 | | |
6512 | | SDValue |
6513 | 23 | PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { |
6514 | 23 | MachineFunction &MF = DAG.getMachineFunction(); |
6515 | 23 | bool isPPC64 = Subtarget.isPPC64(); |
6516 | 23 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
6517 | 23 | |
6518 | 23 | // Get current frame pointer save index. The users of this index will be |
6519 | 23 | // primarily DYNALLOC instructions. |
6520 | 23 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
6521 | 23 | int FPSI = FI->getFramePointerSaveIndex(); |
6522 | 23 | |
6523 | 23 | // If the frame pointer save index hasn't been defined yet. |
6524 | 23 | if (!FPSI23 ) { |
6525 | 21 | // Find out what the fix offset of the frame pointer save area. |
6526 | 21 | int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); |
6527 | 21 | // Allocate the frame index for frame pointer save area. |
6528 | 21 | FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 812 : 49 , FPOffset, true); |
6529 | 21 | // Save the result. |
6530 | 21 | FI->setFramePointerSaveIndex(FPSI); |
6531 | 21 | } |
6532 | 23 | return DAG.getFrameIndex(FPSI, PtrVT); |
6533 | 23 | } |
6534 | | |
6535 | | SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, |
6536 | 22 | SelectionDAG &DAG) const { |
6537 | 22 | // Get the inputs. |
6538 | 22 | SDValue Chain = Op.getOperand(0); |
6539 | 22 | SDValue Size = Op.getOperand(1); |
6540 | 22 | SDLoc dl(Op); |
6541 | 22 | |
6542 | 22 | // Get the correct type for pointers. |
6543 | 22 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6544 | 22 | // Negate the size. |
6545 | 22 | SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, |
6546 | 22 | DAG.getConstant(0, dl, PtrVT), Size); |
6547 | 22 | // Construct a node for the frame pointer save index. |
6548 | 22 | SDValue FPSIdx = getFramePointerFrameIndex(DAG); |
6549 | 22 | // Build a DYNALLOC node. |
6550 | 22 | SDValue Ops[3] = { Chain, NegSize, FPSIdx }; |
6551 | 22 | SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); |
6552 | 22 | return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); |
6553 | 22 | } |
6554 | | |
6555 | | SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, |
6556 | 1 | SelectionDAG &DAG) const { |
6557 | 1 | MachineFunction &MF = DAG.getMachineFunction(); |
6558 | 1 | |
6559 | 1 | bool isPPC64 = Subtarget.isPPC64(); |
6560 | 1 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6561 | 1 | |
6562 | 1 | int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 81 : 40 , 0, false); |
6563 | 1 | return DAG.getFrameIndex(FI, PtrVT); |
6564 | 1 | } |
6565 | | |
6566 | | SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, |
6567 | 6 | SelectionDAG &DAG) const { |
6568 | 6 | SDLoc DL(Op); |
6569 | 6 | return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, |
6570 | 6 | DAG.getVTList(MVT::i32, MVT::Other), |
6571 | 6 | Op.getOperand(0), Op.getOperand(1)); |
6572 | 6 | } |
6573 | | |
6574 | | SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, |
6575 | 5 | SelectionDAG &DAG) const { |
6576 | 5 | SDLoc DL(Op); |
6577 | 5 | return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, |
6578 | 5 | Op.getOperand(0), Op.getOperand(1)); |
6579 | 5 | } |
6580 | | |
6581 | 102 | SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { |
6582 | 102 | if (Op.getValueType().isVector()) |
6583 | 89 | return LowerVectorLoad(Op, DAG); |
6584 | 13 | |
6585 | 102 | assert(Op.getValueType() == MVT::i1 && |
6586 | 13 | "Custom lowering only for i1 loads"); |
6587 | 13 | |
6588 | 13 | // First, load 8 bits into 32 bits, then truncate to 1 bit. |
6589 | 13 | |
6590 | 13 | SDLoc dl(Op); |
6591 | 13 | LoadSDNode *LD = cast<LoadSDNode>(Op); |
6592 | 13 | |
6593 | 13 | SDValue Chain = LD->getChain(); |
6594 | 13 | SDValue BasePtr = LD->getBasePtr(); |
6595 | 13 | MachineMemOperand *MMO = LD->getMemOperand(); |
6596 | 13 | |
6597 | 13 | SDValue NewLD = |
6598 | 13 | DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, |
6599 | 13 | BasePtr, MVT::i8, MMO); |
6600 | 13 | SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); |
6601 | 13 | |
6602 | 13 | SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; |
6603 | 13 | return DAG.getMergeValues(Ops, dl); |
6604 | 13 | } |
6605 | | |
6606 | 48 | SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { |
6607 | 48 | if (Op.getOperand(1).getValueType().isVector()) |
6608 | 47 | return LowerVectorStore(Op, DAG); |
6609 | 1 | |
6610 | 48 | assert(Op.getOperand(1).getValueType() == MVT::i1 && |
6611 | 1 | "Custom lowering only for i1 stores"); |
6612 | 1 | |
6613 | 1 | // First, zero extend to 32 bits, then use a truncating store to 8 bits. |
6614 | 1 | |
6615 | 1 | SDLoc dl(Op); |
6616 | 1 | StoreSDNode *ST = cast<StoreSDNode>(Op); |
6617 | 1 | |
6618 | 1 | SDValue Chain = ST->getChain(); |
6619 | 1 | SDValue BasePtr = ST->getBasePtr(); |
6620 | 1 | SDValue Value = ST->getValue(); |
6621 | 1 | MachineMemOperand *MMO = ST->getMemOperand(); |
6622 | 1 | |
6623 | 1 | Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), |
6624 | 1 | Value); |
6625 | 1 | return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); |
6626 | 1 | } |
6627 | | |
6628 | | // FIXME: Remove this once the ANDI glue bug is fixed: |
6629 | 0 | SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { |
6630 | 0 | assert(Op.getValueType() == MVT::i1 && |
6631 | 0 | "Custom lowering only for i1 results"); |
6632 | 0 |
|
6633 | 0 | SDLoc DL(Op); |
6634 | 0 | return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, |
6635 | 0 | Op.getOperand(0)); |
6636 | 0 | } |
6637 | | |
6638 | | /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when |
6639 | | /// possible. |
6640 | 257 | SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { |
6641 | 257 | // Not FP? Not a fsel. |
6642 | 257 | if (!Op.getOperand(0).getValueType().isFloatingPoint() || |
6643 | 115 | !Op.getOperand(2).getValueType().isFloatingPoint()) |
6644 | 142 | return Op; |
6645 | 115 | |
6646 | 115 | // We might be able to do better than this under some circumstances, but in |
6647 | 115 | // general, fsel-based lowering of select is a finite-math-only optimization. |
6648 | 115 | // For more information, see section F.3 of the 2.06 ISA specification. |
6649 | 115 | if (115 !DAG.getTarget().Options.NoInfsFPMath || |
6650 | 19 | !DAG.getTarget().Options.NoNaNsFPMath) |
6651 | 96 | return Op; |
6652 | 19 | // TODO: Propagate flags from the select rather than global settings. |
6653 | 19 | SDNodeFlags Flags; |
6654 | 19 | Flags.setNoInfs(true); |
6655 | 19 | Flags.setNoNaNs(true); |
6656 | 19 | |
6657 | 19 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); |
6658 | 19 | |
6659 | 19 | EVT ResVT = Op.getValueType(); |
6660 | 19 | EVT CmpVT = Op.getOperand(0).getValueType(); |
6661 | 19 | SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); |
6662 | 19 | SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); |
6663 | 19 | SDLoc dl(Op); |
6664 | 19 | |
6665 | 19 | // If the RHS of the comparison is a 0.0, we don't need to do the |
6666 | 19 | // subtraction at all. |
6667 | 19 | SDValue Sel1; |
6668 | 19 | if (isFloatingPointZero(RHS)) |
6669 | 9 | switch (CC) { |
6670 | 0 | default: break; // SETUO etc aren't handled by fsel. |
6671 | 0 | case ISD::SETNE: |
6672 | 0 | std::swap(TV, FV); |
6673 | 0 | LLVM_FALLTHROUGH; |
6674 | 2 | case ISD::SETEQ: |
6675 | 2 | if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits |
6676 | 0 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); |
6677 | 2 | Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); |
6678 | 2 | if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits |
6679 | 0 | Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); |
6680 | 2 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, |
6681 | 2 | DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); |
6682 | 5 | case ISD::SETULT: |
6683 | 5 | case ISD::SETLT: |
6684 | 5 | std::swap(TV, FV); // fsel is natively setge, swap operands for setlt |
6685 | 5 | LLVM_FALLTHROUGH; |
6686 | 5 | case ISD::SETOGE: |
6687 | 5 | case ISD::SETGE: |
6688 | 5 | if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits |
6689 | 0 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); |
6690 | 5 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); |
6691 | 2 | case ISD::SETUGT: |
6692 | 2 | case ISD::SETGT: |
6693 | 2 | std::swap(TV, FV); // fsel is natively setge, swap operands for setlt |
6694 | 2 | LLVM_FALLTHROUGH; |
6695 | 2 | case ISD::SETOLE: |
6696 | 2 | case ISD::SETLE: |
6697 | 2 | if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits |
6698 | 0 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); |
6699 | 0 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, |
6700 | 0 | DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); |
6701 | 10 | } |
6702 | 10 | |
6703 | 10 | SDValue Cmp; |
6704 | 10 | switch (CC) { |
6705 | 0 | default: break; // SETUO etc aren't handled by fsel. |
6706 | 0 | case ISD::SETNE: |
6707 | 0 | std::swap(TV, FV); |
6708 | 0 | LLVM_FALLTHROUGH; |
6709 | 2 | case ISD::SETEQ: |
6710 | 2 | Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); |
6711 | 2 | if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits |
6712 | 0 | Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); |
6713 | 2 | Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); |
6714 | 2 | if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits |
6715 | 0 | Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); |
6716 | 2 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, |
6717 | 2 | DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); |
6718 | 2 | case ISD::SETULT: |
6719 | 2 | case ISD::SETLT: |
6720 | 2 | Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); |
6721 | 2 | if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits |
6722 | 0 | Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); |
6723 | 2 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); |
6724 | 2 | case ISD::SETOGE: |
6725 | 2 | case ISD::SETGE: |
6726 | 2 | Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); |
6727 | 2 | if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits |
6728 | 0 | Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); |
6729 | 2 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); |
6730 | 2 | case ISD::SETUGT: |
6731 | 2 | case ISD::SETGT: |
6732 | 2 | Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); |
6733 | 2 | if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits |
6734 | 0 | Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); |
6735 | 2 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); |
6736 | 2 | case ISD::SETOLE: |
6737 | 2 | case ISD::SETLE: |
6738 | 2 | Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); |
6739 | 2 | if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits |
6740 | 0 | Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); |
6741 | 0 | return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); |
6742 | 0 | } |
6743 | 0 | return Op; |
6744 | 0 | } |
6745 | | |
6746 | | void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, |
6747 | | SelectionDAG &DAG, |
6748 | 85 | const SDLoc &dl) const { |
6749 | 85 | assert(Op.getOperand(0).getValueType().isFloatingPoint()); |
6750 | 85 | SDValue Src = Op.getOperand(0); |
6751 | 85 | if (Src.getValueType() == MVT::f32) |
6752 | 51 | Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); |
6753 | 85 | |
6754 | 85 | SDValue Tmp; |
6755 | 85 | switch (Op.getSimpleValueType().SimpleTy) { |
6756 | 0 | default: 0 llvm_unreachable0 ("Unhandled FP_TO_INT type in custom expander!"); |
6757 | 42 | case MVT::i32: |
6758 | 42 | Tmp = DAG.getNode( |
6759 | 42 | Op.getOpcode() == ISD::FP_TO_SINT |
6760 | 26 | ? PPCISD::FCTIWZ |
6761 | 16 | : (Subtarget.hasFPCVT() ? 16 PPCISD::FCTIWUZ14 : PPCISD::FCTIDZ2 ), |
6762 | 42 | dl, MVT::f64, Src); |
6763 | 42 | break; |
6764 | 43 | case MVT::i64: |
6765 | 43 | assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && |
6766 | 43 | "i64 FP_TO_UINT is supported only with FPCVT"); |
6767 | 28 | Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : |
6768 | 15 | PPCISD::FCTIDUZ, |
6769 | 43 | dl, MVT::f64, Src); |
6770 | 43 | break; |
6771 | 85 | } |
6772 | 85 | |
6773 | 85 | // Convert the FP value to an int value through memory. |
6774 | 85 | bool i32Stack = Op.getValueType() == MVT::i32 && 85 Subtarget.hasSTFIWX()42 && |
6775 | 27 | (Op.getOpcode() == ISD::FP_TO_SINT || 27 Subtarget.hasFPCVT()16 ); |
6776 | 85 | SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i3225 : MVT::f6460 ); |
6777 | 85 | int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); |
6778 | 85 | MachinePointerInfo MPI = |
6779 | 85 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); |
6780 | 85 | |
6781 | 85 | // Emit a store to the stack slot. |
6782 | 85 | SDValue Chain; |
6783 | 85 | if (i32Stack85 ) { |
6784 | 25 | MachineFunction &MF = DAG.getMachineFunction(); |
6785 | 25 | MachineMemOperand *MMO = |
6786 | 25 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); |
6787 | 25 | SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; |
6788 | 25 | Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, |
6789 | 25 | DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); |
6790 | 25 | } else |
6791 | 60 | Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); |
6792 | 85 | |
6793 | 85 | // Result is a load from the stack slot. If loading 4 bytes, make sure to |
6794 | 85 | // add in a bias on big endian. |
6795 | 85 | if (Op.getValueType() == MVT::i32 && 85 !i32Stack42 ) { |
6796 | 17 | FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, |
6797 | 17 | DAG.getConstant(4, dl, FIPtr.getValueType())); |
6798 | 17 | MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 00 : 417 ); |
6799 | 17 | } |
6800 | 85 | |
6801 | 85 | RLI.Chain = Chain; |
6802 | 85 | RLI.Ptr = FIPtr; |
6803 | 85 | RLI.MPI = MPI; |
6804 | 85 | } |
6805 | | |
6806 | | /// \brief Custom lowers floating point to integer conversions to use |
6807 | | /// the direct move instructions available in ISA 2.07 to avoid the |
6808 | | /// need for load/store combinations. |
6809 | | SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, |
6810 | | SelectionDAG &DAG, |
6811 | 535 | const SDLoc &dl) const { |
6812 | 535 | assert(Op.getOperand(0).getValueType().isFloatingPoint()); |
6813 | 535 | SDValue Src = Op.getOperand(0); |
6814 | 535 | |
6815 | 535 | if (Src.getValueType() == MVT::f32) |
6816 | 260 | Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); |
6817 | 535 | |
6818 | 535 | SDValue Tmp; |
6819 | 535 | switch (Op.getSimpleValueType().SimpleTy) { |
6820 | 0 | default: 0 llvm_unreachable0 ("Unhandled FP_TO_INT type in custom expander!"); |
6821 | 345 | case MVT::i32: |
6822 | 345 | Tmp = DAG.getNode( |
6823 | 345 | Op.getOpcode() == ISD::FP_TO_SINT |
6824 | 181 | ? PPCISD::FCTIWZ |
6825 | 164 | : (Subtarget.hasFPCVT() ? 164 PPCISD::FCTIWUZ164 : PPCISD::FCTIDZ0 ), |
6826 | 345 | dl, MVT::f64, Src); |
6827 | 345 | Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); |
6828 | 345 | break; |
6829 | 190 | case MVT::i64: |
6830 | 190 | assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && |
6831 | 190 | "i64 FP_TO_UINT is supported only with FPCVT"); |
6832 | 94 | Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : |
6833 | 96 | PPCISD::FCTIDUZ, |
6834 | 190 | dl, MVT::f64, Src); |
6835 | 190 | Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); |
6836 | 190 | break; |
6837 | 535 | } |
6838 | 535 | return Tmp; |
6839 | 535 | } |
6840 | | |
6841 | | SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, |
6842 | 616 | const SDLoc &dl) const { |
6843 | 616 | if (Subtarget.hasDirectMove() && 616 Subtarget.isPPC64()535 ) |
6844 | 535 | return LowerFP_TO_INTDirectMove(Op, DAG, dl); |
6845 | 81 | |
6846 | 81 | ReuseLoadInfo RLI; |
6847 | 81 | LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); |
6848 | 81 | |
6849 | 81 | return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, |
6850 | 81 | RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); |
6851 | 81 | } |
6852 | | |
6853 | | // We're trying to insert a regular store, S, and then a load, L. If the |
6854 | | // incoming value, O, is a load, we might just be able to have our load use the |
6855 | | // address used by O. However, we don't know if anything else will store to |
6856 | | // that address before we can load from it. To prevent this situation, we need |
6857 | | // to insert our load, L, into the chain as a peer of O. To do this, we give L |
6858 | | // the same chain operand as O, we create a token factor from the chain results |
6859 | | // of O and L, and we replace all uses of O's chain result with that token |
6860 | | // factor (see spliceIntoChain below for this last part). |
6861 | | bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, |
6862 | | ReuseLoadInfo &RLI, |
6863 | | SelectionDAG &DAG, |
6864 | 166 | ISD::LoadExtType ET) const { |
6865 | 166 | SDLoc dl(Op); |
6866 | 166 | if (ET == ISD::NON_EXTLOAD && |
6867 | 94 | (Op.getOpcode() == ISD::FP_TO_UINT || |
6868 | 94 | Op.getOpcode() == ISD::FP_TO_SINT) && |
6869 | 4 | isOperationLegalOrCustom(Op.getOpcode(), |
6870 | 166 | Op.getOperand(0).getValueType())) { |
6871 | 4 | |
6872 | 4 | LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); |
6873 | 4 | return true; |
6874 | 4 | } |
6875 | 162 | |
6876 | 162 | LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); |
6877 | 162 | if (!LD || 162 LD->getExtensionType() != ET23 || LD->isVolatile()20 || |
6878 | 20 | LD->isNonTemporal()) |
6879 | 142 | return false; |
6880 | 20 | if (20 LD->getMemoryVT() != MemVT20 ) |
6881 | 0 | return false; |
6882 | 20 | |
6883 | 20 | RLI.Ptr = LD->getBasePtr(); |
6884 | 20 | if (LD->isIndexed() && 20 !LD->getOffset().isUndef()0 ) { |
6885 | 0 | assert(LD->getAddressingMode() == ISD::PRE_INC && |
6886 | 0 | "Non-pre-inc AM on PPC?"); |
6887 | 0 | RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, |
6888 | 0 | LD->getOffset()); |
6889 | 0 | } |
6890 | 20 | |
6891 | 20 | RLI.Chain = LD->getChain(); |
6892 | 20 | RLI.MPI = LD->getPointerInfo(); |
6893 | 20 | RLI.IsDereferenceable = LD->isDereferenceable(); |
6894 | 20 | RLI.IsInvariant = LD->isInvariant(); |
6895 | 20 | RLI.Alignment = LD->getAlignment(); |
6896 | 20 | RLI.AAInfo = LD->getAAInfo(); |
6897 | 20 | RLI.Ranges = LD->getRanges(); |
6898 | 20 | |
6899 | 20 | RLI.ResChain = SDValue(LD, LD->isIndexed() ? 20 : 120 ); |
6900 | 166 | return true; |
6901 | 166 | } |
6902 | | |
6903 | | // Given the head of the old chain, ResChain, insert a token factor containing |
6904 | | // it and NewResChain, and make users of ResChain now be users of that token |
6905 | | // factor. |
6906 | | // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. |
6907 | | void PPCTargetLowering::spliceIntoChain(SDValue ResChain, |
6908 | | SDValue NewResChain, |
6909 | 24 | SelectionDAG &DAG) const { |
6910 | 24 | if (!ResChain) |
6911 | 4 | return; |
6912 | 20 | |
6913 | 20 | SDLoc dl(NewResChain); |
6914 | 20 | |
6915 | 20 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
6916 | 20 | NewResChain, DAG.getUNDEF(MVT::Other)); |
6917 | 20 | assert(TF.getNode() != NewResChain.getNode() && |
6918 | 20 | "A new TF really is required here"); |
6919 | 20 | |
6920 | 20 | DAG.ReplaceAllUsesOfValueWith(ResChain, TF); |
6921 | 20 | DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); |
6922 | 20 | } |
6923 | | |
6924 | | /// \brief Analyze profitability of direct move |
6925 | | /// prefer float load to int load plus direct move |
6926 | | /// when there is no integer use of int load |
6927 | 104 | bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { |
6928 | 104 | SDNode *Origin = Op.getOperand(0).getNode(); |
6929 | 104 | if (Origin->getOpcode() != ISD::LOAD) |
6930 | 97 | return true; |
6931 | 7 | |
6932 | 7 | // If there is no LXSIBZX/LXSIHZX, like Power8, |
6933 | 7 | // prefer direct move if the memory size is 1 or 2 bytes. |
6934 | 7 | MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand(); |
6935 | 7 | if (!Subtarget.hasP9Vector() && 7 MMO->getSize() <= 27 ) |
6936 | 2 | return true; |
6937 | 5 | |
6938 | 5 | for (SDNode::use_iterator UI = Origin->use_begin(), |
6939 | 5 | UE = Origin->use_end(); |
6940 | 11 | UI != UE11 ; ++UI6 ) { |
6941 | 7 | |
6942 | 7 | // Only look at the users of the loaded value. |
6943 | 7 | if (UI.getUse().get().getResNo() != 0) |
6944 | 2 | continue; |
6945 | 5 | |
6946 | 5 | if (5 UI->getOpcode() != ISD::SINT_TO_FP && |
6947 | 1 | UI->getOpcode() != ISD::UINT_TO_FP) |
6948 | 1 | return true; |
6949 | 7 | } |
6950 | 5 | |
6951 | 4 | return false; |
6952 | 104 | } |
6953 | | |
6954 | | /// \brief Custom lowers integer to floating point conversions to use |
6955 | | /// the direct move instructions available in ISA 2.07 to avoid the |
6956 | | /// need for load/store combinations. |
6957 | | SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, |
6958 | | SelectionDAG &DAG, |
6959 | 100 | const SDLoc &dl) const { |
6960 | 100 | assert((Op.getValueType() == MVT::f32 || |
6961 | 100 | Op.getValueType() == MVT::f64) && |
6962 | 100 | "Invalid floating point type as target of conversion"); |
6963 | 100 | assert(Subtarget.hasFPCVT() && |
6964 | 100 | "Int to FP conversions with direct moves require FPCVT"); |
6965 | 100 | SDValue FP; |
6966 | 100 | SDValue Src = Op.getOperand(0); |
6967 | 100 | bool SinglePrec = Op.getValueType() == MVT::f32; |
6968 | 100 | bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; |
6969 | 100 | bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; |
6970 | 49 | unsigned ConvOp = Signed ? (SinglePrec ? 49 PPCISD::FCFIDS25 : PPCISD::FCFID24 ) : |
6971 | 51 | (SinglePrec ? 51 PPCISD::FCFIDUS24 : PPCISD::FCFIDU27 ); |
6972 | 100 | |
6973 | 100 | if (WordInt100 ) { |
6974 | 71 | FP = DAG.getNode(Signed ? PPCISD::MTVSRA35 : PPCISD::MTVSRZ36 , |
6975 | 71 | dl, MVT::f64, Src); |
6976 | 71 | FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f3233 : MVT::f6438 , FP); |
6977 | 71 | } |
6978 | 29 | else { |
6979 | 29 | FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); |
6980 | 29 | FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f3216 : MVT::f6413 , FP); |
6981 | 29 | } |
6982 | 100 | |
6983 | 100 | return FP; |
6984 | 100 | } |
6985 | | |
6986 | | SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, |
6987 | 206 | SelectionDAG &DAG) const { |
6988 | 206 | SDLoc dl(Op); |
6989 | 206 | |
6990 | 206 | if (Subtarget.hasQPX() && 206 Op.getOperand(0).getValueType() == MVT::v4i11 ) { |
6991 | 0 | if (Op.getValueType() != MVT::v4f32 && 0 Op.getValueType() != MVT::v4f640 ) |
6992 | 0 | return SDValue(); |
6993 | 0 |
|
6994 | 0 | SDValue Value = Op.getOperand(0); |
6995 | 0 | // The values are now known to be -1 (false) or 1 (true). To convert this |
6996 | 0 | // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). |
6997 | 0 | // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 |
6998 | 0 | Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); |
6999 | 0 |
|
7000 | 0 | SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); |
7001 | 0 |
|
7002 | 0 | Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); |
7003 | 0 |
|
7004 | 0 | if (Op.getValueType() != MVT::v4f64) |
7005 | 0 | Value = DAG.getNode(ISD::FP_ROUND, dl, |
7006 | 0 | Op.getValueType(), Value, |
7007 | 0 | DAG.getIntPtrConstant(1, dl)); |
7008 | 0 | return Value; |
7009 | 0 | } |
7010 | 206 | |
7011 | 206 | // Don't handle ppc_fp128 here; let it be lowered to a libcall. |
7012 | 206 | if (206 Op.getValueType() != MVT::f32 && 206 Op.getValueType() != MVT::f64102 ) |
7013 | 0 | return SDValue(); |
7014 | 206 | |
7015 | 206 | if (206 Op.getOperand(0).getValueType() == MVT::i1206 ) |
7016 | 1 | return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), |
7017 | 1 | DAG.getConstantFP(1.0, dl, Op.getValueType()), |
7018 | 1 | DAG.getConstantFP(0.0, dl, Op.getValueType())); |
7019 | 205 | |
7020 | 205 | // If we have direct moves, we can do all the conversion, skip the store/load |
7021 | 205 | // however, without FPCVT we can't do most conversions. |
7022 | 205 | if (205 Subtarget.hasDirectMove() && 205 directMoveIsProfitable(Op)104 && |
7023 | 205 | Subtarget.isPPC64()100 && Subtarget.hasFPCVT()100 ) |
7024 | 100 | return LowerINT_TO_FPDirectMove(Op, DAG, dl); |
7025 | 105 | |
7026 | 205 | assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && |
7027 | 105 | "UINT_TO_FP is supported only with FPCVT"); |
7028 | 105 | |
7029 | 105 | // If we have FCFIDS, then use it when converting to single-precision. |
7030 | 105 | // Otherwise, convert to double-precision and then round. |
7031 | 82 | unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) |
7032 | 38 | ? (Op.getOpcode() == ISD::UINT_TO_FP ? 38 PPCISD::FCFIDUS17 |
7033 | 38 | : PPCISD::FCFIDS) |
7034 | 67 | : (Op.getOpcode() == ISD::UINT_TO_FP ? 67 PPCISD::FCFIDU9 |
7035 | 67 | : PPCISD::FCFID); |
7036 | 82 | MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) |
7037 | 38 | ? MVT::f32 |
7038 | 67 | : MVT::f64; |
7039 | 105 | |
7040 | 105 | if (Op.getOperand(0).getValueType() == MVT::i64105 ) { |
7041 | 46 | SDValue SINT = Op.getOperand(0); |
7042 | 46 | // When converting to single-precision, we actually need to convert |
7043 | 46 | // to double-precision first and then round to single-precision. |
7044 | 46 | // To avoid double-rounding effects during that operation, we have |
7045 | 46 | // to prepare the input operand. Bits that might be truncated when |
7046 | 46 | // converting to double-precision are replaced by a bit that won't |
7047 | 46 | // be lost at this stage, but is below the single-precision rounding |
7048 | 46 | // position. |
7049 | 46 | // |
7050 | 46 | // However, if -enable-unsafe-fp-math is in effect, accept double |
7051 | 46 | // rounding to avoid the extra overhead. |
7052 | 46 | if (Op.getValueType() == MVT::f32 && |
7053 | 30 | !Subtarget.hasFPCVT() && |
7054 | 46 | !DAG.getTarget().Options.UnsafeFPMath8 ) { |
7055 | 7 | |
7056 | 7 | // Twiddle input to make sure the low 11 bits are zero. (If this |
7057 | 7 | // is the case, we are guaranteed the value will fit into the 53 bit |
7058 | 7 | // mantissa of an IEEE double-precision value without rounding.) |
7059 | 7 | // If any of those low 11 bits were not zero originally, make sure |
7060 | 7 | // bit 12 (value 2048) is set instead, so that the final rounding |
7061 | 7 | // to single-precision gets the correct result. |
7062 | 7 | SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, |
7063 | 7 | SINT, DAG.getConstant(2047, dl, MVT::i64)); |
7064 | 7 | Round = DAG.getNode(ISD::ADD, dl, MVT::i64, |
7065 | 7 | Round, DAG.getConstant(2047, dl, MVT::i64)); |
7066 | 7 | Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); |
7067 | 7 | Round = DAG.getNode(ISD::AND, dl, MVT::i64, |
7068 | 7 | Round, DAG.getConstant(-2048, dl, MVT::i64)); |
7069 | 7 | |
7070 | 7 | // However, we cannot use that value unconditionally: if the magnitude |
7071 | 7 | // of the input value is small, the bit-twiddling we did above might |
7072 | 7 | // end up visibly changing the output. Fortunately, in that case, we |
7073 | 7 | // don't need to twiddle bits since the original input will convert |
7074 | 7 | // exactly to double-precision floating-point already. Therefore, |
7075 | 7 | // construct a conditional to use the original value if the top 11 |
7076 | 7 | // bits are all sign-bit copies, and use the rounded value computed |
7077 | 7 | // above otherwise. |
7078 | 7 | SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, |
7079 | 7 | SINT, DAG.getConstant(53, dl, MVT::i32)); |
7080 | 7 | Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, |
7081 | 7 | Cond, DAG.getConstant(1, dl, MVT::i64)); |
7082 | 7 | Cond = DAG.getSetCC(dl, MVT::i32, |
7083 | 7 | Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); |
7084 | 7 | |
7085 | 7 | SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); |
7086 | 7 | } |
7087 | 46 | |
7088 | 46 | ReuseLoadInfo RLI; |
7089 | 46 | SDValue Bits; |
7090 | 46 | |
7091 | 46 | MachineFunction &MF = DAG.getMachineFunction(); |
7092 | 46 | if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)46 ) { |
7093 | 1 | Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, |
7094 | 1 | RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); |
7095 | 1 | spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); |
7096 | 46 | } else if (45 Subtarget.hasLFIWAX() && |
7097 | 45 | canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)38 ) { |
7098 | 1 | MachineMemOperand *MMO = |
7099 | 1 | MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, |
7100 | 1 | RLI.Alignment, RLI.AAInfo, RLI.Ranges); |
7101 | 1 | SDValue Ops[] = { RLI.Chain, RLI.Ptr }; |
7102 | 1 | Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, |
7103 | 1 | DAG.getVTList(MVT::f64, MVT::Other), |
7104 | 1 | Ops, MVT::i32, MMO); |
7105 | 1 | spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); |
7106 | 45 | } else if (44 Subtarget.hasFPCVT() && |
7107 | 44 | canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)34 ) { |
7108 | 1 | MachineMemOperand *MMO = |
7109 | 1 | MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, |
7110 | 1 | RLI.Alignment, RLI.AAInfo, RLI.Ranges); |
7111 | 1 | SDValue Ops[] = { RLI.Chain, RLI.Ptr }; |
7112 | 1 | Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, |
7113 | 1 | DAG.getVTList(MVT::f64, MVT::Other), |
7114 | 1 | Ops, MVT::i32, MMO); |
7115 | 1 | spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); |
7116 | 44 | } else if (43 ((Subtarget.hasLFIWAX() && |
7117 | 36 | SINT.getOpcode() == ISD::SIGN_EXTEND) || |
7118 | 42 | (Subtarget.hasFPCVT() && |
7119 | 42 | SINT.getOpcode() == ISD::ZERO_EXTEND)) && |
7120 | 43 | SINT.getOperand(0).getValueType() == MVT::i322 ) { |
7121 | 2 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7122 | 2 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
7123 | 2 | |
7124 | 2 | int FrameIdx = MFI.CreateStackObject(4, 4, false); |
7125 | 2 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
7126 | 2 | |
7127 | 2 | SDValue Store = |
7128 | 2 | DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, |
7129 | 2 | MachinePointerInfo::getFixedStack( |
7130 | 2 | DAG.getMachineFunction(), FrameIdx)); |
7131 | 2 | |
7132 | 2 | assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && |
7133 | 2 | "Expected an i32 store"); |
7134 | 2 | |
7135 | 2 | RLI.Ptr = FIdx; |
7136 | 2 | RLI.Chain = Store; |
7137 | 2 | RLI.MPI = |
7138 | 2 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); |
7139 | 2 | RLI.Alignment = 4; |
7140 | 2 | |
7141 | 2 | MachineMemOperand *MMO = |
7142 | 2 | MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, |
7143 | 2 | RLI.Alignment, RLI.AAInfo, RLI.Ranges); |
7144 | 2 | SDValue Ops[] = { RLI.Chain, RLI.Ptr }; |
7145 | 2 | Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? |
7146 | 2 | PPCISD::LFIWZX1 : PPCISD::LFIWAX1 , |
7147 | 2 | dl, DAG.getVTList(MVT::f64, MVT::Other), |
7148 | 2 | Ops, MVT::i32, MMO); |
7149 | 2 | } else |
7150 | 41 | Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); |
7151 | 46 | |
7152 | 46 | SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); |
7153 | 46 | |
7154 | 46 | if (Op.getValueType() == MVT::f32 && 46 !Subtarget.hasFPCVT()30 ) |
7155 | 8 | FP = DAG.getNode(ISD::FP_ROUND, dl, |
7156 | 8 | MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); |
7157 | 46 | return FP; |
7158 | 46 | } |
7159 | 59 | |
7160 | 105 | assert(Op.getOperand(0).getValueType() == MVT::i32 && |
7161 | 59 | "Unhandled INT_TO_FP type in custom expander!"); |
7162 | 59 | // Since we only generate this in 64-bit mode, we can take advantage of |
7163 | 59 | // 64-bit registers. In particular, sign extend the input value into the |
7164 | 59 | // 64-bit register with extsw, store the WHOLE 64-bit value into the stack |
7165 | 59 | // then lfd it and fcfid it. |
7166 | 59 | MachineFunction &MF = DAG.getMachineFunction(); |
7167 | 59 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7168 | 59 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
7169 | 59 | |
7170 | 59 | SDValue Ld; |
7171 | 59 | if (Subtarget.hasLFIWAX() || 59 Subtarget.hasFPCVT()11 ) { |
7172 | 48 | ReuseLoadInfo RLI; |
7173 | 48 | bool ReusingLoad; |
7174 | 48 | if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, |
7175 | 48 | DAG))) { |
7176 | 27 | int FrameIdx = MFI.CreateStackObject(4, 4, false); |
7177 | 27 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
7178 | 27 | |
7179 | 27 | SDValue Store = |
7180 | 27 | DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, |
7181 | 27 | MachinePointerInfo::getFixedStack( |
7182 | 27 | DAG.getMachineFunction(), FrameIdx)); |
7183 | 27 | |
7184 | 27 | assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && |
7185 | 27 | "Expected an i32 store"); |
7186 | 27 | |
7187 | 27 | RLI.Ptr = FIdx; |
7188 | 27 | RLI.Chain = Store; |
7189 | 27 | RLI.MPI = |
7190 | 27 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); |
7191 | 27 | RLI.Alignment = 4; |
7192 | 27 | } |
7193 | 48 | |
7194 | 48 | MachineMemOperand *MMO = |
7195 | 48 | MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, |
7196 | 48 | RLI.Alignment, RLI.AAInfo, RLI.Ranges); |
7197 | 48 | SDValue Ops[] = { RLI.Chain, RLI.Ptr }; |
7198 | 48 | Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? |
7199 | 48 | PPCISD::LFIWZX12 : PPCISD::LFIWAX36 , |
7200 | 48 | dl, DAG.getVTList(MVT::f64, MVT::Other), |
7201 | 48 | Ops, MVT::i32, MMO); |
7202 | 48 | if (ReusingLoad) |
7203 | 21 | spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); |
7204 | 59 | } else { |
7205 | 11 | assert(Subtarget.isPPC64() && |
7206 | 11 | "i32->FP without LFIWAX supported only on PPC64"); |
7207 | 11 | |
7208 | 11 | int FrameIdx = MFI.CreateStackObject(8, 8, false); |
7209 | 11 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
7210 | 11 | |
7211 | 11 | SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, |
7212 | 11 | Op.getOperand(0)); |
7213 | 11 | |
7214 | 11 | // STD the extended value into the stack slot. |
7215 | 11 | SDValue Store = DAG.getStore( |
7216 | 11 | DAG.getEntryNode(), dl, Ext64, FIdx, |
7217 | 11 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); |
7218 | 11 | |
7219 | 11 | // Load the value as a double. |
7220 | 11 | Ld = DAG.getLoad( |
7221 | 11 | MVT::f64, dl, Store, FIdx, |
7222 | 11 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); |
7223 | 11 | } |
7224 | 59 | |
7225 | 59 | // FCFID it and return it. |
7226 | 59 | SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); |
7227 | 59 | if (Op.getValueType() == MVT::f32 && 59 !Subtarget.hasFPCVT()25 ) |
7228 | 9 | FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, |
7229 | 9 | DAG.getIntPtrConstant(0, dl)); |
7230 | 206 | return FP; |
7231 | 206 | } |
7232 | | |
7233 | | SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, |
7234 | 1 | SelectionDAG &DAG) const { |
7235 | 1 | SDLoc dl(Op); |
7236 | 1 | /* |
7237 | 1 | The rounding mode is in bits 30:31 of FPSR, and has the following |
7238 | 1 | settings: |
7239 | 1 | 00 Round to nearest |
7240 | 1 | 01 Round to 0 |
7241 | 1 | 10 Round to +inf |
7242 | 1 | 11 Round to -inf |
7243 | 1 | |
7244 | 1 | FLT_ROUNDS, on the other hand, expects the following: |
7245 | 1 | -1 Undefined |
7246 | 1 | 0 Round to 0 |
7247 | 1 | 1 Round to nearest |
7248 | 1 | 2 Round to +inf |
7249 | 1 | 3 Round to -inf |
7250 | 1 | |
7251 | 1 | To perform the conversion, we do: |
7252 | 1 | ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) |
7253 | 1 | */ |
7254 | 1 | |
7255 | 1 | MachineFunction &MF = DAG.getMachineFunction(); |
7256 | 1 | EVT VT = Op.getValueType(); |
7257 | 1 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
7258 | 1 | |
7259 | 1 | // Save FP Control Word to register |
7260 | 1 | EVT NodeTys[] = { |
7261 | 1 | MVT::f64, // return register |
7262 | 1 | MVT::Glue // unused in this context |
7263 | 1 | }; |
7264 | 1 | SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); |
7265 | 1 | |
7266 | 1 | // Save FP register to stack slot |
7267 | 1 | int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); |
7268 | 1 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
7269 | 1 | SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, |
7270 | 1 | MachinePointerInfo()); |
7271 | 1 | |
7272 | 1 | // Load FP Control Word from low 32 bits of stack slot. |
7273 | 1 | SDValue Four = DAG.getConstant(4, dl, PtrVT); |
7274 | 1 | SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); |
7275 | 1 | SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); |
7276 | 1 | |
7277 | 1 | // Transform as necessary |
7278 | 1 | SDValue CWD1 = |
7279 | 1 | DAG.getNode(ISD::AND, dl, MVT::i32, |
7280 | 1 | CWD, DAG.getConstant(3, dl, MVT::i32)); |
7281 | 1 | SDValue CWD2 = |
7282 | 1 | DAG.getNode(ISD::SRL, dl, MVT::i32, |
7283 | 1 | DAG.getNode(ISD::AND, dl, MVT::i32, |
7284 | 1 | DAG.getNode(ISD::XOR, dl, MVT::i32, |
7285 | 1 | CWD, DAG.getConstant(3, dl, MVT::i32)), |
7286 | 1 | DAG.getConstant(3, dl, MVT::i32)), |
7287 | 1 | DAG.getConstant(1, dl, MVT::i32)); |
7288 | 1 | |
7289 | 1 | SDValue RetVal = |
7290 | 1 | DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); |
7291 | 1 | |
7292 | 1 | return DAG.getNode((VT.getSizeInBits() < 16 ? |
7293 | 1 | ISD::TRUNCATE0 : ISD::ZERO_EXTEND1 ), dl, VT, RetVal); |
7294 | 1 | } |
7295 | | |
7296 | 10 | SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { |
7297 | 10 | EVT VT = Op.getValueType(); |
7298 | 10 | unsigned BitWidth = VT.getSizeInBits(); |
7299 | 10 | SDLoc dl(Op); |
7300 | 10 | assert(Op.getNumOperands() == 3 && |
7301 | 10 | VT == Op.getOperand(1).getValueType() && |
7302 | 10 | "Unexpected SHL!"); |
7303 | 10 | |
7304 | 10 | // Expand into a bunch of logical ops. Note that these ops |
7305 | 10 | // depend on the PPC behavior for oversized shift amounts. |
7306 | 10 | SDValue Lo = Op.getOperand(0); |
7307 | 10 | SDValue Hi = Op.getOperand(1); |
7308 | 10 | SDValue Amt = Op.getOperand(2); |
7309 | 10 | EVT AmtVT = Amt.getValueType(); |
7310 | 10 | |
7311 | 10 | SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, |
7312 | 10 | DAG.getConstant(BitWidth, dl, AmtVT), Amt); |
7313 | 10 | SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); |
7314 | 10 | SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); |
7315 | 10 | SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); |
7316 | 10 | SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, |
7317 | 10 | DAG.getConstant(-BitWidth, dl, AmtVT)); |
7318 | 10 | SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); |
7319 | 10 | SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); |
7320 | 10 | SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); |
7321 | 10 | SDValue OutOps[] = { OutLo, OutHi }; |
7322 | 10 | return DAG.getMergeValues(OutOps, dl); |
7323 | 10 | } |
7324 | | |
7325 | 10 | SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { |
7326 | 10 | EVT VT = Op.getValueType(); |
7327 | 10 | SDLoc dl(Op); |
7328 | 10 | unsigned BitWidth = VT.getSizeInBits(); |
7329 | 10 | assert(Op.getNumOperands() == 3 && |
7330 | 10 | VT == Op.getOperand(1).getValueType() && |
7331 | 10 | "Unexpected SRL!"); |
7332 | 10 | |
7333 | 10 | // Expand into a bunch of logical ops. Note that these ops |
7334 | 10 | // depend on the PPC behavior for oversized shift amounts. |
7335 | 10 | SDValue Lo = Op.getOperand(0); |
7336 | 10 | SDValue Hi = Op.getOperand(1); |
7337 | 10 | SDValue Amt = Op.getOperand(2); |
7338 | 10 | EVT AmtVT = Amt.getValueType(); |
7339 | 10 | |
7340 | 10 | SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, |
7341 | 10 | DAG.getConstant(BitWidth, dl, AmtVT), Amt); |
7342 | 10 | SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); |
7343 | 10 | SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); |
7344 | 10 | SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); |
7345 | 10 | SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, |
7346 | 10 | DAG.getConstant(-BitWidth, dl, AmtVT)); |
7347 | 10 | SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); |
7348 | 10 | SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); |
7349 | 10 | SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); |
7350 | 10 | SDValue OutOps[] = { OutLo, OutHi }; |
7351 | 10 | return DAG.getMergeValues(OutOps, dl); |
7352 | 10 | } |
7353 | | |
7354 | 6 | SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { |
7355 | 6 | SDLoc dl(Op); |
7356 | 6 | EVT VT = Op.getValueType(); |
7357 | 6 | unsigned BitWidth = VT.getSizeInBits(); |
7358 | 6 | assert(Op.getNumOperands() == 3 && |
7359 | 6 | VT == Op.getOperand(1).getValueType() && |
7360 | 6 | "Unexpected SRA!"); |
7361 | 6 | |
7362 | 6 | // Expand into a bunch of logical ops, followed by a select_cc. |
7363 | 6 | SDValue Lo = Op.getOperand(0); |
7364 | 6 | SDValue Hi = Op.getOperand(1); |
7365 | 6 | SDValue Amt = Op.getOperand(2); |
7366 | 6 | EVT AmtVT = Amt.getValueType(); |
7367 | 6 | |
7368 | 6 | SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, |
7369 | 6 | DAG.getConstant(BitWidth, dl, AmtVT), Amt); |
7370 | 6 | SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); |
7371 | 6 | SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); |
7372 | 6 | SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); |
7373 | 6 | SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, |
7374 | 6 | DAG.getConstant(-BitWidth, dl, AmtVT)); |
7375 | 6 | SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); |
7376 | 6 | SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); |
7377 | 6 | SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), |
7378 | 6 | Tmp4, Tmp6, ISD::SETLE); |
7379 | 6 | SDValue OutOps[] = { OutLo, OutHi }; |
7380 | 6 | return DAG.getMergeValues(OutOps, dl); |
7381 | 6 | } |
7382 | | |
7383 | | //===----------------------------------------------------------------------===// |
7384 | | // Vector related lowering. |
7385 | | // |
7386 | | |
7387 | | /// BuildSplatI - Build a canonical splati of Val with an element size of |
7388 | | /// SplatSize. Cast the result to VT. |
7389 | | static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, |
7390 | 439 | SelectionDAG &DAG, const SDLoc &dl) { |
7391 | 439 | assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); |
7392 | 439 | |
7393 | 439 | static const MVT VTys[] = { // canonical VT to use for each size. |
7394 | 439 | MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 |
7395 | 439 | }; |
7396 | 439 | |
7397 | 439 | EVT ReqVT = VT != MVT::Other ? VT411 : VTys[SplatSize-1]28 ; |
7398 | 439 | |
7399 | 439 | // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. |
7400 | 439 | if (Val == -1) |
7401 | 246 | SplatSize = 1; |
7402 | 439 | |
7403 | 439 | EVT CanonicalVT = VTys[SplatSize-1]; |
7404 | 439 | |
7405 | 439 | // Build a canonical splat for this value. |
7406 | 439 | return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); |
7407 | 439 | } |
7408 | | |
7409 | | /// BuildIntrinsicOp - Return a unary operator intrinsic node with the |
7410 | | /// specified intrinsic ID. |
7411 | | static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, |
7412 | 108 | const SDLoc &dl, EVT DestVT = MVT::Other) { |
7413 | 108 | if (DestVT == MVT::Other108 ) DestVT = Op.getValueType()0 ; |
7414 | 108 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, |
7415 | 108 | DAG.getConstant(IID, dl, MVT::i32), Op); |
7416 | 108 | } |
7417 | | |
7418 | | /// BuildIntrinsicOp - Return a binary operator intrinsic node with the |
7419 | | /// specified intrinsic ID. |
7420 | | static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, |
7421 | | SelectionDAG &DAG, const SDLoc &dl, |
7422 | 61 | EVT DestVT = MVT::Other) { |
7423 | 61 | if (DestVT == MVT::Other61 ) DestVT = LHS.getValueType()44 ; |
7424 | 61 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, |
7425 | 61 | DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); |
7426 | 61 | } |
7427 | | |
7428 | | /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the |
7429 | | /// specified intrinsic ID. |
7430 | | static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, |
7431 | | SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, |
7432 | 120 | EVT DestVT = MVT::Other) { |
7433 | 120 | if (DestVT == MVT::Other120 ) DestVT = Op0.getValueType()113 ; |
7434 | 120 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, |
7435 | 120 | DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); |
7436 | 120 | } |
7437 | | |
7438 | | /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified |
7439 | | /// amount. The result has the specified value type. |
7440 | | static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, |
7441 | 46 | SelectionDAG &DAG, const SDLoc &dl) { |
7442 | 46 | // Force LHS/RHS to be the right type. |
7443 | 46 | LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); |
7444 | 46 | RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); |
7445 | 46 | |
7446 | 46 | int Ops[16]; |
7447 | 782 | for (unsigned i = 0; i != 16782 ; ++i736 ) |
7448 | 736 | Ops[i] = i + Amt; |
7449 | 46 | SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); |
7450 | 46 | return DAG.getNode(ISD::BITCAST, dl, VT, T); |
7451 | 46 | } |
7452 | | |
7453 | | /// Do we have an efficient pattern in a .td file for this node? |
7454 | | /// |
7455 | | /// \param V - pointer to the BuildVectorSDNode being matched |
7456 | | /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? |
7457 | | /// |
7458 | | /// There are some patterns where it is beneficial to keep a BUILD_VECTOR |
7459 | | /// node as a BUILD_VECTOR node rather than expanding it. The patterns where |
7460 | | /// the opposite is true (expansion is beneficial) are: |
7461 | | /// - The node builds a vector out of integers that are not 32 or 64-bits |
7462 | | /// - The node builds a vector out of constants |
7463 | | /// - The node is a "load-and-splat" |
7464 | | /// In all other cases, we will choose to keep the BUILD_VECTOR. |
7465 | | static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, |
7466 | | bool HasDirectMove, |
7467 | 1.30k | bool HasP8Vector) { |
7468 | 1.30k | EVT VecVT = V->getValueType(0); |
7469 | 1.30k | bool RightType = VecVT == MVT::v2f64 || |
7470 | 1.15k | (HasP8Vector && 1.15k VecVT == MVT::v4f321.11k ) || |
7471 | 1.02k | (HasDirectMove && 1.02k (VecVT == MVT::v2i64 || 979 VecVT == MVT::v4i32486 )); |
7472 | 1.30k | if (!RightType) |
7473 | 143 | return false; |
7474 | 1.15k | |
7475 | 1.15k | bool IsSplat = true; |
7476 | 1.15k | bool IsLoad = false; |
7477 | 1.15k | SDValue Op0 = V->getOperand(0); |
7478 | 1.15k | |
7479 | 1.15k | // This function is called in a block that confirms the node is not a constant |
7480 | 1.15k | // splat. So a constant BUILD_VECTOR here means the vector is built out of |
7481 | 1.15k | // different constants. |
7482 | 1.15k | if (V->isConstant()) |
7483 | 170 | return false; |
7484 | 3.87k | for (int i = 0, e = V->getNumOperands(); 987 i < e3.87k ; ++i2.88k ) { |
7485 | 2.90k | if (V->getOperand(i).isUndef()) |
7486 | 13 | return false; |
7487 | 2.88k | // We want to expand nodes that represent load-and-splat even if the |
7488 | 2.88k | // loaded value is a floating point truncation or conversion to int. |
7489 | 2.88k | if (2.88k V->getOperand(i).getOpcode() == ISD::LOAD || |
7490 | 2.18k | (V->getOperand(i).getOpcode() == ISD::FP_ROUND && |
7491 | 2.18k | V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || |
7492 | 1.98k | (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && |
7493 | 1.98k | V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || |
7494 | 1.78k | (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && |
7495 | 314 | V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) |
7496 | 1.30k | IsLoad = true; |
7497 | 2.88k | // If the operands are different or the input is not a load and has more |
7498 | 2.88k | // uses than just this BV node, then it isn't a splat. |
7499 | 2.88k | if (V->getOperand(i) != Op0 || |
7500 | 1.49k | (!IsLoad && 1.49k !V->isOnlyUserOf(V->getOperand(i).getNode())850 )) |
7501 | 1.40k | IsSplat = false; |
7502 | 2.90k | } |
7503 | 974 | return !(IsSplat && 974 IsLoad254 ); |
7504 | 1.30k | } |
7505 | | |
7506 | | // If this is a case we can't handle, return null and let the default |
7507 | | // expansion code take care of it. If we CAN select this case, and if it |
7508 | | // selects to a single instruction, return Op. Otherwise, if we can codegen |
7509 | | // this case more efficiently than a constant pool load, lower it to the |
7510 | | // sequence of ops that should be used. |
7511 | | SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, |
7512 | 2.19k | SelectionDAG &DAG) const { |
7513 | 2.19k | SDLoc dl(Op); |
7514 | 2.19k | BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); |
7515 | 2.19k | assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); |
7516 | 2.19k | |
7517 | 2.19k | if (Subtarget.hasQPX() && 2.19k Op.getValueType() == MVT::v4i170 ) { |
7518 | 9 | // We first build an i32 vector, load it into a QPX register, |
7519 | 9 | // then convert it to a floating-point vector and compare it |
7520 | 9 | // to a zero vector to get the boolean result. |
7521 | 9 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
7522 | 9 | int FrameIdx = MFI.CreateStackObject(16, 16, false); |
7523 | 9 | MachinePointerInfo PtrInfo = |
7524 | 9 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); |
7525 | 9 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
7526 | 9 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
7527 | 9 | |
7528 | 9 | assert(BVN->getNumOperands() == 4 && |
7529 | 9 | "BUILD_VECTOR for v4i1 does not have 4 operands"); |
7530 | 9 | |
7531 | 9 | bool IsConst = true; |
7532 | 17 | for (unsigned i = 0; i < 417 ; ++i8 ) { |
7533 | 15 | if (BVN->getOperand(i).isUndef()15 ) continue2 ; |
7534 | 13 | if (13 !isa<ConstantSDNode>(BVN->getOperand(i))13 ) { |
7535 | 7 | IsConst = false; |
7536 | 7 | break; |
7537 | 7 | } |
7538 | 15 | } |
7539 | 9 | |
7540 | 9 | if (IsConst9 ) { |
7541 | 2 | Constant *One = |
7542 | 2 | ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); |
7543 | 2 | Constant *NegOne = |
7544 | 2 | ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); |
7545 | 2 | |
7546 | 2 | Constant *CV[4]; |
7547 | 10 | for (unsigned i = 0; i < 410 ; ++i8 ) { |
7548 | 8 | if (BVN->getOperand(i).isUndef()) |
7549 | 2 | CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); |
7550 | 6 | else if (6 isNullConstant(BVN->getOperand(i))6 ) |
7551 | 2 | CV[i] = NegOne; |
7552 | 6 | else |
7553 | 4 | CV[i] = One; |
7554 | 8 | } |
7555 | 2 | |
7556 | 2 | Constant *CP = ConstantVector::get(CV); |
7557 | 2 | SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), |
7558 | 2 | 16 /* alignment */); |
7559 | 2 | |
7560 | 2 | SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; |
7561 | 2 | SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); |
7562 | 2 | return DAG.getMemIntrinsicNode( |
7563 | 2 | PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, |
7564 | 2 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
7565 | 2 | } |
7566 | 7 | |
7567 | 7 | SmallVector<SDValue, 4> Stores; |
7568 | 35 | for (unsigned i = 0; i < 435 ; ++i28 ) { |
7569 | 28 | if (BVN->getOperand(i).isUndef()28 ) continue2 ; |
7570 | 26 | |
7571 | 26 | unsigned Offset = 4*i; |
7572 | 26 | SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); |
7573 | 26 | Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); |
7574 | 26 | |
7575 | 26 | unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); |
7576 | 26 | if (StoreSize > 426 ) { |
7577 | 0 | Stores.push_back( |
7578 | 0 | DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, |
7579 | 0 | PtrInfo.getWithOffset(Offset), MVT::i32)); |
7580 | 26 | } else { |
7581 | 26 | SDValue StoreValue = BVN->getOperand(i); |
7582 | 26 | if (StoreSize < 4) |
7583 | 18 | StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); |
7584 | 26 | |
7585 | 26 | Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, |
7586 | 26 | PtrInfo.getWithOffset(Offset))); |
7587 | 26 | } |
7588 | 28 | } |
7589 | 7 | |
7590 | 7 | SDValue StoreChain; |
7591 | 7 | if (!Stores.empty()) |
7592 | 7 | StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); |
7593 | 7 | else |
7594 | 0 | StoreChain = DAG.getEntryNode(); |
7595 | 9 | |
7596 | 9 | // Now load from v4i32 into the QPX register; this will extend it to |
7597 | 9 | // v4i64 but not yet convert it to a floating point. Nevertheless, this |
7598 | 9 | // is typed as v4f64 because the QPX register integer states are not |
7599 | 9 | // explicitly represented. |
7600 | 9 | |
7601 | 9 | SDValue Ops[] = {StoreChain, |
7602 | 9 | DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), |
7603 | 9 | FIdx}; |
7604 | 9 | SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); |
7605 | 9 | |
7606 | 9 | SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, |
7607 | 9 | dl, VTs, Ops, MVT::v4i32, PtrInfo); |
7608 | 9 | LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, |
7609 | 9 | DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), |
7610 | 9 | LoadedVect); |
7611 | 9 | |
7612 | 9 | SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); |
7613 | 9 | |
7614 | 9 | return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); |
7615 | 9 | } |
7616 | 2.18k | |
7617 | 2.18k | // All other QPX vectors are handled by generic code. |
7618 | 2.18k | if (2.18k Subtarget.hasQPX()2.18k ) |
7619 | 61 | return SDValue(); |
7620 | 2.12k | |
7621 | 2.12k | // Check if this is a splat of a constant value. |
7622 | 2.12k | APInt APSplatBits, APSplatUndef; |
7623 | 2.12k | unsigned SplatBitSize; |
7624 | 2.12k | bool HasAnyUndefs; |
7625 | 2.12k | if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, |
7626 | 2.12k | HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || |
7627 | 2.12k | SplatBitSize > 321.06k ) { |
7628 | 1.33k | // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be |
7629 | 1.33k | // lowered to VSX instructions under certain conditions. |
7630 | 1.33k | // Without VSX, there is no pattern more efficient than expanding the node. |
7631 | 1.33k | if (Subtarget.hasVSX() && |
7632 | 1.30k | haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), |
7633 | 1.30k | Subtarget.hasP8Vector())) |
7634 | 874 | return Op; |
7635 | 465 | return SDValue(); |
7636 | 465 | } |
7637 | 787 | |
7638 | 787 | unsigned SplatBits = APSplatBits.getZExtValue(); |
7639 | 787 | unsigned SplatUndef = APSplatUndef.getZExtValue(); |
7640 | 787 | unsigned SplatSize = SplatBitSize / 8; |
7641 | 787 | |
7642 | 787 | // First, handle single instruction cases. |
7643 | 787 | |
7644 | 787 | // All zeros? |
7645 | 787 | if (SplatBits == 0787 ) { |
7646 | 257 | // Canonicalize all zero vectors to be v4i32. |
7647 | 257 | if (Op.getValueType() != MVT::v4i32 || 257 HasAnyUndefs208 ) { |
7648 | 49 | SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); |
7649 | 49 | Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); |
7650 | 49 | } |
7651 | 257 | return Op; |
7652 | 257 | } |
7653 | 530 | |
7654 | 530 | // We have XXSPLTIB for constant splats one byte wide |
7655 | 530 | if (530 Subtarget.hasP9Vector() && 530 SplatSize == 1118 ) { |
7656 | 68 | // This is a splat of 1-byte elements with some elements potentially undef. |
7657 | 68 | // Rather than trying to match undef in the SDAG patterns, ensure that all |
7658 | 68 | // elements are the same constant. |
7659 | 68 | if (HasAnyUndefs || 68 ISD::isBuildVectorAllOnes(BVN)68 ) { |
7660 | 30 | SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits, |
7661 | 30 | dl, MVT::i32)); |
7662 | 30 | SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); |
7663 | 30 | if (Op.getValueType() != MVT::v16i8) |
7664 | 8 | return DAG.getBitcast(Op.getValueType(), NewBV); |
7665 | 22 | return NewBV; |
7666 | 22 | } |
7667 | 38 | |
7668 | 38 | // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll |
7669 | 38 | // detect that constant splats like v8i16: 0xABAB are really just splats |
7670 | 38 | // of a 1-byte constant. In this case, we need to convert the node to a |
7671 | 38 | // splat of v16i8 and a bitcast. |
7672 | 38 | if (38 Op.getValueType() != MVT::v16i838 ) |
7673 | 6 | return DAG.getBitcast(Op.getValueType(), |
7674 | 6 | DAG.getConstant(SplatBits, dl, MVT::v16i8)); |
7675 | 32 | |
7676 | 32 | return Op; |
7677 | 32 | } |
7678 | 462 | |
7679 | 462 | // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. |
7680 | 462 | int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> |
7681 | 462 | (32-SplatBitSize)); |
7682 | 462 | if (SextVal >= -16 && 462 SextVal <= 15437 ) |
7683 | 386 | return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); |
7684 | 76 | |
7685 | 76 | // Two instruction sequences. |
7686 | 76 | |
7687 | 76 | // If this value is in the range [-32,30] and is even, use: |
7688 | 76 | // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) |
7689 | 76 | // If this value is in the range [17,31] and is odd, use: |
7690 | 76 | // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) |
7691 | 76 | // If this value is in the range [-31,-17] and is odd, use: |
7692 | 76 | // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) |
7693 | 76 | // Note the last two are three-instruction sequences. |
7694 | 76 | if (76 SextVal >= -32 && 76 SextVal <= 3161 ) { |
7695 | 33 | // To avoid having these optimizations undone by constant folding, |
7696 | 33 | // we convert to a pseudo that will be expanded later into one of |
7697 | 33 | // the above forms. |
7698 | 33 | SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); |
7699 | 4 | EVT VT = (SplatSize == 1 ? MVT::v16i8 : |
7700 | 29 | (SplatSize == 2 ? 29 MVT::v8i1611 : MVT::v4i3218 )); |
7701 | 33 | SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); |
7702 | 33 | SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); |
7703 | 33 | if (VT == Op.getValueType()) |
7704 | 28 | return RetVal; |
7705 | 33 | else |
7706 | 5 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); |
7707 | 43 | } |
7708 | 43 | |
7709 | 43 | // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is |
7710 | 43 | // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important |
7711 | 43 | // for fneg/fabs. |
7712 | 43 | if (43 SplatSize == 4 && 43 SplatBits == (0x7FFFFFFF&~SplatUndef)38 ) { |
7713 | 2 | // Make -1 and vspltisw -1: |
7714 | 2 | SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); |
7715 | 2 | |
7716 | 2 | // Make the VSLW intrinsic, computing 0x8000_0000. |
7717 | 2 | SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, |
7718 | 2 | OnesV, DAG, dl); |
7719 | 2 | |
7720 | 2 | // xor by OnesV to invert it. |
7721 | 2 | Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); |
7722 | 2 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); |
7723 | 2 | } |
7724 | 41 | |
7725 | 41 | // Check to see if this is a wide variety of vsplti*, binop self cases. |
7726 | 41 | static const signed char SplatCsts[] = { |
7727 | 41 | -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, |
7728 | 41 | -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 |
7729 | 41 | }; |
7730 | 41 | |
7731 | 950 | for (unsigned idx = 0; idx < array_lengthof(SplatCsts)950 ; ++idx909 ) { |
7732 | 941 | // Indirect through the SplatCsts array so that we favor 'vsplti -1' for |
7733 | 941 | // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' |
7734 | 941 | int i = SplatCsts[idx]; |
7735 | 941 | |
7736 | 941 | // Figure out what shift amount will be used by altivec if shifted by i in |
7737 | 941 | // this splat size. |
7738 | 941 | unsigned TypeShiftAmt = i & (SplatBitSize-1); |
7739 | 941 | |
7740 | 941 | // vsplti + shl self. |
7741 | 941 | if (SextVal == (int)((unsigned)i << TypeShiftAmt)941 ) { |
7742 | 8 | SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); |
7743 | 8 | static const unsigned IIDs[] = { // Intrinsic to use for each size. |
7744 | 8 | Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, |
7745 | 8 | Intrinsic::ppc_altivec_vslw |
7746 | 8 | }; |
7747 | 8 | Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); |
7748 | 8 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); |
7749 | 8 | } |
7750 | 933 | |
7751 | 933 | // vsplti + srl self. |
7752 | 933 | if (933 SextVal == (int)((unsigned)i >> TypeShiftAmt)933 ) { |
7753 | 17 | SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); |
7754 | 17 | static const unsigned IIDs[] = { // Intrinsic to use for each size. |
7755 | 17 | Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, |
7756 | 17 | Intrinsic::ppc_altivec_vsrw |
7757 | 17 | }; |
7758 | 17 | Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); |
7759 | 17 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); |
7760 | 17 | } |
7761 | 916 | |
7762 | 916 | // vsplti + sra self. |
7763 | 916 | if (916 SextVal == (int)((unsigned)i >> TypeShiftAmt)916 ) { |
7764 | 0 | SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); |
7765 | 0 | static const unsigned IIDs[] = { // Intrinsic to use for each size. |
7766 | 0 | Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, |
7767 | 0 | Intrinsic::ppc_altivec_vsraw |
7768 | 0 | }; |
7769 | 0 | Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); |
7770 | 0 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); |
7771 | 0 | } |
7772 | 916 | |
7773 | 916 | // vsplti + rol self. |
7774 | 916 | if (916 SextVal == (int)(((unsigned)i << TypeShiftAmt) | |
7775 | 916 | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { |
7776 | 3 | SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); |
7777 | 3 | static const unsigned IIDs[] = { // Intrinsic to use for each size. |
7778 | 3 | Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, |
7779 | 3 | Intrinsic::ppc_altivec_vrlw |
7780 | 3 | }; |
7781 | 3 | Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); |
7782 | 3 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); |
7783 | 3 | } |
7784 | 913 | |
7785 | 913 | // t = vsplti c, result = vsldoi t, t, 1 |
7786 | 913 | if (913 SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 913 0xFF458 : 0455 ))) { |
7787 | 4 | SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); |
7788 | 4 | unsigned Amt = Subtarget.isLittleEndian() ? 152 : 12 ; |
7789 | 4 | return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); |
7790 | 4 | } |
7791 | 909 | // t = vsplti c, result = vsldoi t, t, 2 |
7792 | 909 | if (909 SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 909 0xFFFF456 : 0453 ))) { |
7793 | 0 | SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); |
7794 | 0 | unsigned Amt = Subtarget.isLittleEndian() ? 140 : 20 ; |
7795 | 0 | return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); |
7796 | 0 | } |
7797 | 909 | // t = vsplti c, result = vsldoi t, t, 3 |
7798 | 909 | if (909 SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 909 0xFFFFFF456 : 0453 ))) { |
7799 | 0 | SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); |
7800 | 0 | unsigned Amt = Subtarget.isLittleEndian() ? 130 : 30 ; |
7801 | 0 | return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); |
7802 | 0 | } |
7803 | 941 | } |
7804 | 41 | |
7805 | 9 | return SDValue(); |
7806 | 2.19k | } |
7807 | | |
7808 | | /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit |
7809 | | /// the specified operations to build the shuffle. |
7810 | | static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, |
7811 | | SDValue RHS, SelectionDAG &DAG, |
7812 | 160 | const SDLoc &dl) { |
7813 | 160 | unsigned OpNum = (PFEntry >> 26) & 0x0F; |
7814 | 160 | unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); |
7815 | 160 | unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); |
7816 | 160 | |
7817 | 160 | enum { |
7818 | 160 | OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> |
7819 | 160 | OP_VMRGHW, |
7820 | 160 | OP_VMRGLW, |
7821 | 160 | OP_VSPLTISW0, |
7822 | 160 | OP_VSPLTISW1, |
7823 | 160 | OP_VSPLTISW2, |
7824 | 160 | OP_VSPLTISW3, |
7825 | 160 | OP_VSLDOI4, |
7826 | 160 | OP_VSLDOI8, |
7827 | 160 | OP_VSLDOI12 |
7828 | 160 | }; |
7829 | 160 | |
7830 | 160 | if (OpNum == OP_COPY160 ) { |
7831 | 91 | if (LHSID == (1*9+2)*9+391 ) return LHS61 ; |
7832 | 0 | assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); |
7833 | 30 | return RHS; |
7834 | 30 | } |
7835 | 69 | |
7836 | 69 | SDValue OpLHS, OpRHS; |
7837 | 69 | OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); |
7838 | 69 | OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); |
7839 | 69 | |
7840 | 69 | int ShufIdxs[16]; |
7841 | 69 | switch (OpNum) { |
7842 | 0 | default: 0 llvm_unreachable0 ("Unknown i32 permute!"); |
7843 | 11 | case OP_VMRGHW: |
7844 | 11 | ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; |
7845 | 11 | ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; |
7846 | 11 | ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; |
7847 | 11 | ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; |
7848 | 11 | break; |
7849 | 12 | case OP_VMRGLW: |
7850 | 12 | ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; |
7851 | 12 | ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; |
7852 | 12 | ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; |
7853 | 12 | ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; |
7854 | 12 | break; |
7855 | 1 | case OP_VSPLTISW0: |
7856 | 17 | for (unsigned i = 0; i != 1617 ; ++i16 ) |
7857 | 16 | ShufIdxs[i] = (i&3)+0; |
7858 | 1 | break; |
7859 | 0 | case OP_VSPLTISW1: |
7860 | 0 | for (unsigned i = 0; i != 160 ; ++i0 ) |
7861 | 0 | ShufIdxs[i] = (i&3)+4; |
7862 | 0 | break; |
7863 | 3 | case OP_VSPLTISW2: |
7864 | 51 | for (unsigned i = 0; i != 1651 ; ++i48 ) |
7865 | 48 | ShufIdxs[i] = (i&3)+8; |
7866 | 3 | break; |
7867 | 0 | case OP_VSPLTISW3: |
7868 | 0 | for (unsigned i = 0; i != 160 ; ++i0 ) |
7869 | 0 | ShufIdxs[i] = (i&3)+12; |
7870 | 0 | break; |
7871 | 14 | case OP_VSLDOI4: |
7872 | 14 | return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); |
7873 | 14 | case OP_VSLDOI8: |
7874 | 14 | return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); |
7875 | 14 | case OP_VSLDOI12: |
7876 | 14 | return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); |
7877 | 27 | } |
7878 | 27 | EVT VT = OpLHS.getValueType(); |
7879 | 27 | OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); |
7880 | 27 | OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); |
7881 | 27 | SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); |
7882 | 27 | return DAG.getNode(ISD::BITCAST, dl, VT, T); |
7883 | 27 | } |
7884 | | |
7885 | | /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this |
7886 | | /// is a shuffle we can handle in a single instruction, return it. Otherwise, |
7887 | | /// return the code it can be lowered into. Worst case, it can always be |
7888 | | /// lowered into a vperm. |
7889 | | SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, |
7890 | 1.08k | SelectionDAG &DAG) const { |
7891 | 1.08k | SDLoc dl(Op); |
7892 | 1.08k | SDValue V1 = Op.getOperand(0); |
7893 | 1.08k | SDValue V2 = Op.getOperand(1); |
7894 | 1.08k | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); |
7895 | 1.08k | EVT VT = Op.getValueType(); |
7896 | 1.08k | bool isLittleEndian = Subtarget.isLittleEndian(); |
7897 | 1.08k | |
7898 | 1.08k | unsigned ShiftElts, InsertAtByte; |
7899 | 1.08k | bool Swap = false; |
7900 | 1.08k | if (Subtarget.hasP9Vector() && |
7901 | 439 | PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, |
7902 | 1.08k | isLittleEndian)) { |
7903 | 134 | if (Swap) |
7904 | 70 | std::swap(V1, V2); |
7905 | 134 | SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); |
7906 | 134 | SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); |
7907 | 134 | if (ShiftElts134 ) { |
7908 | 96 | SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, |
7909 | 96 | DAG.getConstant(ShiftElts, dl, MVT::i32)); |
7910 | 96 | SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl, |
7911 | 96 | DAG.getConstant(InsertAtByte, dl, MVT::i32)); |
7912 | 96 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); |
7913 | 96 | } |
7914 | 38 | SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2, |
7915 | 38 | DAG.getConstant(InsertAtByte, dl, MVT::i32)); |
7916 | 38 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); |
7917 | 38 | } |
7918 | 952 | |
7919 | 952 | |
7920 | 952 | if (952 Subtarget.hasVSX() && |
7921 | 952 | PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)633 ) { |
7922 | 60 | if (Swap) |
7923 | 14 | std::swap(V1, V2); |
7924 | 60 | SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); |
7925 | 60 | SDValue Conv2 = |
7926 | 60 | DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V126 : V234 ); |
7927 | 60 | |
7928 | 60 | SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, |
7929 | 60 | DAG.getConstant(ShiftElts, dl, MVT::i32)); |
7930 | 60 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); |
7931 | 60 | } |
7932 | 892 | |
7933 | 892 | if (892 Subtarget.hasVSX() && |
7934 | 892 | PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)573 ) { |
7935 | 13 | if (Swap) |
7936 | 7 | std::swap(V1, V2); |
7937 | 13 | SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); |
7938 | 13 | SDValue Conv2 = |
7939 | 13 | DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V10 : V213 ); |
7940 | 13 | |
7941 | 13 | SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, |
7942 | 13 | DAG.getConstant(ShiftElts, dl, MVT::i32)); |
7943 | 13 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); |
7944 | 13 | } |
7945 | 879 | |
7946 | 879 | if (879 Subtarget.hasP9Vector()879 ) { |
7947 | 294 | if (PPC::isXXBRHShuffleMask(SVOp)294 ) { |
7948 | 2 | SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); |
7949 | 2 | SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); |
7950 | 2 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); |
7951 | 292 | } else if (292 PPC::isXXBRWShuffleMask(SVOp)292 ) { |
7952 | 2 | SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); |
7953 | 2 | SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); |
7954 | 2 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); |
7955 | 290 | } else if (290 PPC::isXXBRDShuffleMask(SVOp)290 ) { |
7956 | 2 | SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); |
7957 | 2 | SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); |
7958 | 2 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); |
7959 | 288 | } else if (288 PPC::isXXBRQShuffleMask(SVOp)288 ) { |
7960 | 2 | SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); |
7961 | 2 | SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); |
7962 | 2 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); |
7963 | 2 | } |
7964 | 871 | } |
7965 | 871 | |
7966 | 871 | if (871 Subtarget.hasVSX()871 ) { |
7967 | 552 | if (V2.isUndef() && 552 PPC::isSplatShuffleMask(SVOp, 4)261 ) { |
7968 | 87 | int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); |
7969 | 87 | |
7970 | 87 | // If the source for the shuffle is a scalar_to_vector that came from a |
7971 | 87 | // 32-bit load, it will have used LXVWSX so we don't need to splat again. |
7972 | 87 | if (Subtarget.hasP9Vector() && |
7973 | 40 | ((isLittleEndian && 40 SplatIdx == 319 ) || |
7974 | 87 | (!isLittleEndian && 21 SplatIdx == 021 ))) { |
7975 | 38 | SDValue Src = V1.getOperand(0); |
7976 | 38 | if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && |
7977 | 38 | Src.getOperand(0).getOpcode() == ISD::LOAD && |
7978 | 26 | Src.getOperand(0).hasOneUse()) |
7979 | 24 | return V1; |
7980 | 63 | } |
7981 | 63 | SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); |
7982 | 63 | SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, |
7983 | 63 | DAG.getConstant(SplatIdx, dl, MVT::i32)); |
7984 | 63 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); |
7985 | 63 | } |
7986 | 465 | |
7987 | 465 | // Left shifts of 8 bytes are actually swaps. Convert accordingly. |
7988 | 465 | if (465 V2.isUndef() && 465 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8174 ) { |
7989 | 0 | SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); |
7990 | 0 | SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); |
7991 | 0 | return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); |
7992 | 0 | } |
7993 | 784 | } |
7994 | 784 | |
7995 | 784 | if (784 Subtarget.hasQPX()784 ) { |
7996 | 71 | if (VT.getVectorNumElements() != 4) |
7997 | 0 | return SDValue(); |
7998 | 71 | |
7999 | 71 | if (71 V2.isUndef()71 ) V2 = V17 ; |
8000 | 71 | |
8001 | 71 | int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); |
8002 | 71 | if (AlignIdx != -171 ) { |
8003 | 0 | return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, |
8004 | 0 | DAG.getConstant(AlignIdx, dl, MVT::i32)); |
8005 | 71 | } else if (71 SVOp->isSplat()71 ) { |
8006 | 7 | int SplatIdx = SVOp->getSplatIndex(); |
8007 | 7 | if (SplatIdx >= 47 ) { |
8008 | 0 | std::swap(V1, V2); |
8009 | 0 | SplatIdx -= 4; |
8010 | 0 | } |
8011 | 71 | |
8012 | 71 | return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, |
8013 | 71 | DAG.getConstant(SplatIdx, dl, MVT::i32)); |
8014 | 71 | } |
8015 | 64 | |
8016 | 64 | // Lower this into a qvgpci/qvfperm pair. |
8017 | 64 | |
8018 | 64 | // Compute the qvgpci literal |
8019 | 64 | unsigned idx = 0; |
8020 | 320 | for (unsigned i = 0; i < 4320 ; ++i256 ) { |
8021 | 256 | int m = SVOp->getMaskElt(i); |
8022 | 256 | unsigned mm = m >= 0 ? (unsigned) m192 : i64 ; |
8023 | 256 | idx |= mm << (3-i)*3; |
8024 | 256 | } |
8025 | 71 | |
8026 | 71 | SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, |
8027 | 71 | DAG.getConstant(idx, dl, MVT::i32)); |
8028 | 71 | return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); |
8029 | 71 | } |
8030 | 713 | |
8031 | 713 | // Cases that are handled by instructions that take permute immediates |
8032 | 713 | // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be |
8033 | 713 | // selected by the instruction selector. |
8034 | 713 | if (713 V2.isUndef()713 ) { |
8035 | 285 | if (PPC::isSplatShuffleMask(SVOp, 1) || |
8036 | 245 | PPC::isSplatShuffleMask(SVOp, 2) || |
8037 | 217 | PPC::isSplatShuffleMask(SVOp, 4) || |
8038 | 199 | PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || |
8039 | 189 | PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || |
8040 | 183 | PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || |
8041 | 140 | PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || |
8042 | 134 | PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || |
8043 | 106 | PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || |
8044 | 96 | PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || |
8045 | 90 | PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || |
8046 | 62 | PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || |
8047 | 52 | (Subtarget.hasP8Altivec() && 52 ( |
8048 | 49 | PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || |
8049 | 45 | PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || |
8050 | 285 | PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)39 ))) { |
8051 | 247 | return Op; |
8052 | 247 | } |
8053 | 466 | } |
8054 | 466 | |
8055 | 466 | // Altivec has a variety of "shuffle immediates" that take two vector inputs |
8056 | 466 | // and produce a fixed permutation. If any of these match, do not lower to |
8057 | 466 | // VPERM. |
8058 | 466 | unsigned int ShuffleKind = isLittleEndian ? 466 2163 : 0303 ; |
8059 | 466 | if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || |
8060 | 464 | PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || |
8061 | 462 | PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || |
8062 | 428 | PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || |
8063 | 390 | PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || |
8064 | 326 | PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || |
8065 | 261 | PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || |
8066 | 239 | PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || |
8067 | 175 | PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || |
8068 | 74 | (Subtarget.hasP8Altivec() && 74 ( |
8069 | 51 | PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || |
8070 | 47 | PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || |
8071 | 51 | PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) |
8072 | 404 | return Op; |
8073 | 62 | |
8074 | 62 | // Check to see if this is a shuffle of 4-byte values. If so, we can use our |
8075 | 62 | // perfect shuffle table to emit an optimal matching sequence. |
8076 | 62 | ArrayRef<int> PermMask = SVOp->getMask(); |
8077 | 62 | |
8078 | 62 | unsigned PFIndexes[4]; |
8079 | 62 | bool isFourElementShuffle = true; |
8080 | 279 | for (unsigned i = 0; i != 4 && 279 isFourElementShuffle228 ; ++i217 ) { // Element number |
8081 | 217 | unsigned EltNo = 8; // Start out undef. |
8082 | 1.05k | for (unsigned j = 0; j != 41.05k ; ++j836 ) { // Intra-element byte. |
8083 | 848 | if (PermMask[i*4+j] < 0) |
8084 | 54 | continue; // Undef, ignore it. |
8085 | 794 | |
8086 | 794 | unsigned ByteSource = PermMask[i*4+j]; |
8087 | 794 | if ((ByteSource & 3) != j794 ) { |
8088 | 12 | isFourElementShuffle = false; |
8089 | 12 | break; |
8090 | 12 | } |
8091 | 782 | |
8092 | 782 | if (782 EltNo == 8782 ) { |
8093 | 198 | EltNo = ByteSource/4; |
8094 | 782 | } else if (584 EltNo != ByteSource/4584 ) { |
8095 | 0 | isFourElementShuffle = false; |
8096 | 0 | break; |
8097 | 0 | } |
8098 | 848 | } |
8099 | 217 | PFIndexes[i] = EltNo; |
8100 | 217 | } |
8101 | 62 | |
8102 | 62 | // If this shuffle can be expressed as a shuffle of 4-byte elements, use the |
8103 | 62 | // perfect shuffle vector to determine if it is cost effective to do this as |
8104 | 62 | // discrete instructions, or whether we should use a vperm. |
8105 | 62 | // For now, we skip this for little endian until such time as we have a |
8106 | 62 | // little-endian perfect shuffle table. |
8107 | 62 | if (isFourElementShuffle && 62 !isLittleEndian50 ) { |
8108 | 34 | // Compute the index in the perfect shuffle table. |
8109 | 34 | unsigned PFTableIndex = |
8110 | 34 | PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; |
8111 | 34 | |
8112 | 34 | unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; |
8113 | 34 | unsigned Cost = (PFEntry >> 30); |
8114 | 34 | |
8115 | 34 | // Determining when to avoid vperm is tricky. Many things affect the cost |
8116 | 34 | // of vperm, particularly how many times the perm mask needs to be computed. |
8117 | 34 | // For example, if the perm mask can be hoisted out of a loop or is already |
8118 | 34 | // used (perhaps because there are multiple permutes with the same shuffle |
8119 | 34 | // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of |
8120 | 34 | // the loop requires an extra register. |
8121 | 34 | // |
8122 | 34 | // As a compromise, we only emit discrete instructions if the shuffle can be |
8123 | 34 | // generated in 3 or fewer operations. When we have loop information |
8124 | 34 | // available, if this block is within a loop, we should avoid using vperm |
8125 | 34 | // for 3-operation perms and use a constant pool load instead. |
8126 | 34 | if (Cost < 3) |
8127 | 22 | return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); |
8128 | 40 | } |
8129 | 40 | |
8130 | 40 | // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant |
8131 | 40 | // vector that will get spilled to the constant pool. |
8132 | 40 | if (40 V2.isUndef()40 ) V2 = V132 ; |
8133 | 40 | |
8134 | 40 | // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except |
8135 | 40 | // that it is in input element units, not in bytes. Convert now. |
8136 | 40 | |
8137 | 40 | // For little endian, the order of the input vectors is reversed, and |
8138 | 40 | // the permutation mask is complemented with respect to 31. This is |
8139 | 40 | // necessary to produce proper semantics with the big-endian-biased vperm |
8140 | 40 | // instruction. |
8141 | 40 | EVT EltVT = V1.getValueType().getVectorElementType(); |
8142 | 40 | unsigned BytesPerElement = EltVT.getSizeInBits()/8; |
8143 | 40 | |
8144 | 40 | SmallVector<SDValue, 16> ResultMask; |
8145 | 680 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e680 ; ++i640 ) { |
8146 | 640 | unsigned SrcElt = PermMask[i] < 0 ? 046 : PermMask[i]594 ; |
8147 | 640 | |
8148 | 1.28k | for (unsigned j = 0; j != BytesPerElement1.28k ; ++j640 ) |
8149 | 640 | if (640 isLittleEndian640 ) |
8150 | 304 | ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), |
8151 | 304 | dl, MVT::i32)); |
8152 | 640 | else |
8153 | 336 | ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, |
8154 | 336 | MVT::i32)); |
8155 | 640 | } |
8156 | 40 | |
8157 | 40 | SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); |
8158 | 40 | if (isLittleEndian) |
8159 | 19 | return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), |
8160 | 19 | V2, V1, VPermMask); |
8161 | 40 | else |
8162 | 21 | return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), |
8163 | 21 | V1, V2, VPermMask); |
8164 | 0 | } |
8165 | | |
8166 | | /// getVectorCompareInfo - Given an intrinsic, return false if it is not a |
8167 | | /// vector comparison. If it is, return true and fill in Opc/isDot with |
8168 | | /// information about the intrinsic. |
8169 | | static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, |
8170 | 1.35k | bool &isDot, const PPCSubtarget &Subtarget) { |
8171 | 1.35k | unsigned IntrinsicID = |
8172 | 1.35k | cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); |
8173 | 1.35k | CompareOpc = -1; |
8174 | 1.35k | isDot = false; |
8175 | 1.35k | switch (IntrinsicID) { |
8176 | 1.33k | default: |
8177 | 1.33k | return false; |
8178 | 1.35k | // Comparison predicates. |
8179 | 1 | case Intrinsic::ppc_altivec_vcmpbfp_p: |
8180 | 1 | CompareOpc = 966; |
8181 | 1 | isDot = true; |
8182 | 1 | break; |
8183 | 1 | case Intrinsic::ppc_altivec_vcmpeqfp_p: |
8184 | 1 | CompareOpc = 198; |
8185 | 1 | isDot = true; |
8186 | 1 | break; |
8187 | 0 | case Intrinsic::ppc_altivec_vcmpequb_p: |
8188 | 0 | CompareOpc = 6; |
8189 | 0 | isDot = true; |
8190 | 0 | break; |
8191 | 2 | case Intrinsic::ppc_altivec_vcmpequh_p: |
8192 | 2 | CompareOpc = 70; |
8193 | 2 | isDot = true; |
8194 | 2 | break; |
8195 | 1 | case Intrinsic::ppc_altivec_vcmpequw_p: |
8196 | 1 | CompareOpc = 134; |
8197 | 1 | isDot = true; |
8198 | 1 | break; |
8199 | 2 | case Intrinsic::ppc_altivec_vcmpequd_p: |
8200 | 2 | if (Subtarget.hasP8Altivec()2 ) { |
8201 | 2 | CompareOpc = 199; |
8202 | 2 | isDot = true; |
8203 | 2 | } else |
8204 | 0 | return false; |
8205 | 2 | break; |
8206 | 0 | case Intrinsic::ppc_altivec_vcmpneb_p: |
8207 | 0 | case Intrinsic::ppc_altivec_vcmpneh_p: |
8208 | 0 | case Intrinsic::ppc_altivec_vcmpnew_p: |
8209 | 0 | case Intrinsic::ppc_altivec_vcmpnezb_p: |
8210 | 0 | case Intrinsic::ppc_altivec_vcmpnezh_p: |
8211 | 0 | case Intrinsic::ppc_altivec_vcmpnezw_p: |
8212 | 0 | if (Subtarget.hasP9Altivec()0 ) { |
8213 | 0 | switch (IntrinsicID) { |
8214 | 0 | default: |
8215 | 0 | llvm_unreachable("Unknown comparison intrinsic."); |
8216 | 0 | case Intrinsic::ppc_altivec_vcmpneb_p: |
8217 | 0 | CompareOpc = 7; |
8218 | 0 | break; |
8219 | 0 | case Intrinsic::ppc_altivec_vcmpneh_p: |
8220 | 0 | CompareOpc = 71; |
8221 | 0 | break; |
8222 | 0 | case Intrinsic::ppc_altivec_vcmpnew_p: |
8223 | 0 | CompareOpc = 135; |
8224 | 0 | break; |
8225 | 0 | case Intrinsic::ppc_altivec_vcmpnezb_p: |
8226 | 0 | CompareOpc = 263; |
8227 | 0 | break; |
8228 | 0 | case Intrinsic::ppc_altivec_vcmpnezh_p: |
8229 | 0 | CompareOpc = 327; |
8230 | 0 | break; |
8231 | 0 | case Intrinsic::ppc_altivec_vcmpnezw_p: |
8232 | 0 | CompareOpc = 391; |
8233 | 0 | break; |
8234 | 0 | } |
8235 | 0 | isDot = true; |
8236 | 0 | } else |
8237 | 0 | return false; |
8238 | 0 | break; |
8239 | 0 | case Intrinsic::ppc_altivec_vcmpgefp_p: |
8240 | 0 | CompareOpc = 454; |
8241 | 0 | isDot = true; |
8242 | 0 | break; |
8243 | 0 | case Intrinsic::ppc_altivec_vcmpgtfp_p: |
8244 | 0 | CompareOpc = 710; |
8245 | 0 | isDot = true; |
8246 | 0 | break; |
8247 | 0 | case Intrinsic::ppc_altivec_vcmpgtsb_p: |
8248 | 0 | CompareOpc = 774; |
8249 | 0 | isDot = true; |
8250 | 0 | break; |
8251 | 0 | case Intrinsic::ppc_altivec_vcmpgtsh_p: |
8252 | 0 | CompareOpc = 838; |
8253 | 0 | isDot = true; |
8254 | 0 | break; |
8255 | 0 | case Intrinsic::ppc_altivec_vcmpgtsw_p: |
8256 | 0 | CompareOpc = 902; |
8257 | 0 | isDot = true; |
8258 | 0 | break; |
8259 | 2 | case Intrinsic::ppc_altivec_vcmpgtsd_p: |
8260 | 2 | if (Subtarget.hasP8Altivec()2 ) { |
8261 | 2 | CompareOpc = 967; |
8262 | 2 | isDot = true; |
8263 | 2 | } else |
8264 | 0 | return false; |
8265 | 2 | break; |
8266 | 0 | case Intrinsic::ppc_altivec_vcmpgtub_p: |
8267 | 0 | CompareOpc = 518; |
8268 | 0 | isDot = true; |
8269 | 0 | break; |
8270 | 0 | case Intrinsic::ppc_altivec_vcmpgtuh_p: |
8271 | 0 | CompareOpc = 582; |
8272 | 0 | isDot = true; |
8273 | 0 | break; |
8274 | 0 | case Intrinsic::ppc_altivec_vcmpgtuw_p: |
8275 | 0 | CompareOpc = 646; |
8276 | 0 | isDot = true; |
8277 | 0 | break; |
8278 | 2 | case Intrinsic::ppc_altivec_vcmpgtud_p: |
8279 | 2 | if (Subtarget.hasP8Altivec()2 ) { |
8280 | 2 | CompareOpc = 711; |
8281 | 2 | isDot = true; |
8282 | 2 | } else |
8283 | 0 | return false; |
8284 | 2 | break; |
8285 | 2 | |
8286 | 2 | // VSX predicate comparisons use the same infrastructure |
8287 | 0 | case Intrinsic::ppc_vsx_xvcmpeqdp_p: |
8288 | 0 | case Intrinsic::ppc_vsx_xvcmpgedp_p: |
8289 | 0 | case Intrinsic::ppc_vsx_xvcmpgtdp_p: |
8290 | 0 | case Intrinsic::ppc_vsx_xvcmpeqsp_p: |
8291 | 0 | case Intrinsic::ppc_vsx_xvcmpgesp_p: |
8292 | 0 | case Intrinsic::ppc_vsx_xvcmpgtsp_p: |
8293 | 0 | if (Subtarget.hasVSX()0 ) { |
8294 | 0 | switch (IntrinsicID) { |
8295 | 0 | case Intrinsic::ppc_vsx_xvcmpeqdp_p: |
8296 | 0 | CompareOpc = 99; |
8297 | 0 | break; |
8298 | 0 | case Intrinsic::ppc_vsx_xvcmpgedp_p: |
8299 | 0 | CompareOpc = 115; |
8300 | 0 | break; |
8301 | 0 | case Intrinsic::ppc_vsx_xvcmpgtdp_p: |
8302 | 0 | CompareOpc = 107; |
8303 | 0 | break; |
8304 | 0 | case Intrinsic::ppc_vsx_xvcmpeqsp_p: |
8305 | 0 | CompareOpc = 67; |
8306 | 0 | break; |
8307 | 0 | case Intrinsic::ppc_vsx_xvcmpgesp_p: |
8308 | 0 | CompareOpc = 83; |
8309 | 0 | break; |
8310 | 0 | case Intrinsic::ppc_vsx_xvcmpgtsp_p: |
8311 | 0 | CompareOpc = 75; |
8312 | 0 | break; |
8313 | 0 | } |
8314 | 0 | isDot = true; |
8315 | 0 | } else |
8316 | 0 | return false; |
8317 | 0 | break; |
8318 | 0 |
|
8319 | 0 | // Normal Comparisons. |
8320 | 1 | case Intrinsic::ppc_altivec_vcmpbfp: |
8321 | 1 | CompareOpc = 966; |
8322 | 1 | break; |
8323 | 0 | case Intrinsic::ppc_altivec_vcmpeqfp: |
8324 | 0 | CompareOpc = 198; |
8325 | 0 | break; |
8326 | 0 | case Intrinsic::ppc_altivec_vcmpequb: |
8327 | 0 | CompareOpc = 6; |
8328 | 0 | break; |
8329 | 0 | case Intrinsic::ppc_altivec_vcmpequh: |
8330 | 0 | CompareOpc = 70; |
8331 | 0 | break; |
8332 | 0 | case Intrinsic::ppc_altivec_vcmpequw: |
8333 | 0 | CompareOpc = 134; |
8334 | 0 | break; |
8335 | 2 | case Intrinsic::ppc_altivec_vcmpequd: |
8336 | 2 | if (Subtarget.hasP8Altivec()) |
8337 | 2 | CompareOpc = 199; |
8338 | 2 | else |
8339 | 0 | return false; |
8340 | 2 | break; |
8341 | 6 | case Intrinsic::ppc_altivec_vcmpneb: |
8342 | 6 | case Intrinsic::ppc_altivec_vcmpneh: |
8343 | 6 | case Intrinsic::ppc_altivec_vcmpnew: |
8344 | 6 | case Intrinsic::ppc_altivec_vcmpnezb: |
8345 | 6 | case Intrinsic::ppc_altivec_vcmpnezh: |
8346 | 6 | case Intrinsic::ppc_altivec_vcmpnezw: |
8347 | 6 | if (Subtarget.hasP9Altivec()) |
8348 | 6 | switch (IntrinsicID) { |
8349 | 0 | default: |
8350 | 0 | llvm_unreachable("Unknown comparison intrinsic."); |
8351 | 1 | case Intrinsic::ppc_altivec_vcmpneb: |
8352 | 1 | CompareOpc = 7; |
8353 | 1 | break; |
8354 | 1 | case Intrinsic::ppc_altivec_vcmpneh: |
8355 | 1 | CompareOpc = 71; |
8356 | 1 | break; |
8357 | 1 | case Intrinsic::ppc_altivec_vcmpnew: |
8358 | 1 | CompareOpc = 135; |
8359 | 1 | break; |
8360 | 1 | case Intrinsic::ppc_altivec_vcmpnezb: |
8361 | 1 | CompareOpc = 263; |
8362 | 1 | break; |
8363 | 1 | case Intrinsic::ppc_altivec_vcmpnezh: |
8364 | 1 | CompareOpc = 327; |
8365 | 1 | break; |
8366 | 1 | case Intrinsic::ppc_altivec_vcmpnezw: |
8367 | 1 | CompareOpc = 391; |
8368 | 1 | break; |
8369 | 6 | } |
8370 | 6 | else |
8371 | 0 | return false; |
8372 | 6 | break; |
8373 | 0 | case Intrinsic::ppc_altivec_vcmpgefp: |
8374 | 0 | CompareOpc = 454; |
8375 | 0 | break; |
8376 | 0 | case Intrinsic::ppc_altivec_vcmpgtfp: |
8377 | 0 | CompareOpc = 710; |
8378 | 0 | break; |
8379 | 0 | case Intrinsic::ppc_altivec_vcmpgtsb: |
8380 | 0 | CompareOpc = 774; |
8381 | 0 | break; |
8382 | 0 | case Intrinsic::ppc_altivec_vcmpgtsh: |
8383 | 0 | CompareOpc = 838; |
8384 | 0 | break; |
8385 | 0 | case Intrinsic::ppc_altivec_vcmpgtsw: |
8386 | 0 | CompareOpc = 902; |
8387 | 0 | break; |
8388 | 2 | case Intrinsic::ppc_altivec_vcmpgtsd: |
8389 | 2 | if (Subtarget.hasP8Altivec()) |
8390 | 2 | CompareOpc = 967; |
8391 | 2 | else |
8392 | 0 | return false; |
8393 | 2 | break; |
8394 | 0 | case Intrinsic::ppc_altivec_vcmpgtub: |
8395 | 0 | CompareOpc = 518; |
8396 | 0 | break; |
8397 | 0 | case Intrinsic::ppc_altivec_vcmpgtuh: |
8398 | 0 | CompareOpc = 582; |
8399 | 0 | break; |
8400 | 0 | case Intrinsic::ppc_altivec_vcmpgtuw: |
8401 | 0 | CompareOpc = 646; |
8402 | 0 | break; |
8403 | 2 | case Intrinsic::ppc_altivec_vcmpgtud: |
8404 | 2 | if (Subtarget.hasP8Altivec()) |
8405 | 2 | CompareOpc = 711; |
8406 | 2 | else |
8407 | 0 | return false; |
8408 | 2 | break; |
8409 | 24 | } |
8410 | 24 | return true; |
8411 | 24 | } |
8412 | | |
8413 | | /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom |
8414 | | /// lower, do it, otherwise return null. |
8415 | | SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
8416 | 1.37k | SelectionDAG &DAG) const { |
8417 | 1.37k | unsigned IntrinsicID = |
8418 | 1.37k | cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
8419 | 1.37k | |
8420 | 1.37k | SDLoc dl(Op); |
8421 | 1.37k | |
8422 | 1.37k | if (IntrinsicID == Intrinsic::thread_pointer1.37k ) { |
8423 | 3 | // Reads the thread pointer register, used for __builtin_thread_pointer. |
8424 | 3 | if (Subtarget.isPPC64()) |
8425 | 2 | return DAG.getRegister(PPC::X13, MVT::i64); |
8426 | 1 | return DAG.getRegister(PPC::R2, MVT::i32); |
8427 | 1 | } |
8428 | 1.36k | |
8429 | 1.36k | // We are looking for absolute values here. |
8430 | 1.36k | // The idea is to try to fit one of two patterns: |
8431 | 1.36k | // max (a, (0-a)) OR max ((0-a), a) |
8432 | 1.36k | if (1.36k Subtarget.hasP9Vector() && |
8433 | 160 | (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw || |
8434 | 154 | IntrinsicID == Intrinsic::ppc_altivec_vmaxsh || |
8435 | 1.36k | IntrinsicID == Intrinsic::ppc_altivec_vmaxsb150 )) { |
8436 | 14 | SDValue V1 = Op.getOperand(1); |
8437 | 14 | SDValue V2 = Op.getOperand(2); |
8438 | 14 | if (V1.getSimpleValueType() == V2.getSimpleValueType() && |
8439 | 14 | (V1.getSimpleValueType() == MVT::v4i32 || |
8440 | 8 | V1.getSimpleValueType() == MVT::v8i16 || |
8441 | 14 | V1.getSimpleValueType() == MVT::v16i84 )) { |
8442 | 14 | if ( V1.getOpcode() == ISD::SUB && |
8443 | 8 | ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && |
8444 | 14 | V1.getOperand(1) == V22 ) { |
8445 | 2 | // Generate the abs instruction with the operands |
8446 | 2 | return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2); |
8447 | 2 | } |
8448 | 12 | |
8449 | 12 | if ( 12 V2.getOpcode() == ISD::SUB && |
8450 | 12 | ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && |
8451 | 12 | V2.getOperand(1) == V112 ) { |
8452 | 12 | // Generate the abs instruction with the operands |
8453 | 12 | return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1); |
8454 | 12 | } |
8455 | 1.35k | } |
8456 | 14 | } |
8457 | 1.35k | |
8458 | 1.35k | // If this is a lowered altivec predicate compare, CompareOpc is set to the |
8459 | 1.35k | // opcode number of the comparison. |
8460 | 1.35k | int CompareOpc; |
8461 | 1.35k | bool isDot; |
8462 | 1.35k | if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) |
8463 | 1.33k | return SDValue(); // Don't custom lower most intrinsics. |
8464 | 20 | |
8465 | 20 | // If this is a non-dot comparison, make the VCMP node and we are done. |
8466 | 20 | if (20 !isDot20 ) { |
8467 | 13 | SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), |
8468 | 13 | Op.getOperand(1), Op.getOperand(2), |
8469 | 13 | DAG.getConstant(CompareOpc, dl, MVT::i32)); |
8470 | 13 | return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); |
8471 | 13 | } |
8472 | 7 | |
8473 | 7 | // Create the PPCISD altivec 'dot' comparison node. |
8474 | 7 | SDValue Ops[] = { |
8475 | 7 | Op.getOperand(2), // LHS |
8476 | 7 | Op.getOperand(3), // RHS |
8477 | 7 | DAG.getConstant(CompareOpc, dl, MVT::i32) |
8478 | 7 | }; |
8479 | 7 | EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; |
8480 | 7 | SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); |
8481 | 7 | |
8482 | 7 | // Now that we have the comparison, emit a copy from the CR to a GPR. |
8483 | 7 | // This is flagged to the above dot comparison. |
8484 | 7 | SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, |
8485 | 7 | DAG.getRegister(PPC::CR6, MVT::i32), |
8486 | 7 | CompNode.getValue(1)); |
8487 | 7 | |
8488 | 7 | // Unpack the result based on how the target uses it. |
8489 | 7 | unsigned BitNo; // Bit # of CR6. |
8490 | 7 | bool InvertBit; // Invert result? |
8491 | 7 | switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { |
8492 | 0 | default: // Can't happen, don't crash on invalid number though. |
8493 | 0 | case 0: // Return the value of the EQ bit of CR6. |
8494 | 0 | BitNo = 0; InvertBit = false; |
8495 | 0 | break; |
8496 | 1 | case 1: // Return the inverted value of the EQ bit of CR6. |
8497 | 1 | BitNo = 0; InvertBit = true; |
8498 | 1 | break; |
8499 | 6 | case 2: // Return the value of the LT bit of CR6. |
8500 | 6 | BitNo = 2; InvertBit = false; |
8501 | 6 | break; |
8502 | 0 | case 3: // Return the inverted value of the LT bit of CR6. |
8503 | 0 | BitNo = 2; InvertBit = true; |
8504 | 0 | break; |
8505 | 7 | } |
8506 | 7 | |
8507 | 7 | // Shift the bit into the low position. |
8508 | 7 | Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, |
8509 | 7 | DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); |
8510 | 7 | // Isolate the bit. |
8511 | 7 | Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, |
8512 | 7 | DAG.getConstant(1, dl, MVT::i32)); |
8513 | 7 | |
8514 | 7 | // If we are supposed to, toggle the bit. |
8515 | 7 | if (InvertBit) |
8516 | 1 | Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, |
8517 | 1 | DAG.getConstant(1, dl, MVT::i32)); |
8518 | 1.37k | return Flags; |
8519 | 1.37k | } |
8520 | | |
8521 | | SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, |
8522 | 1.76k | SelectionDAG &DAG) const { |
8523 | 1.76k | // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to |
8524 | 1.76k | // the beginning of the argument list. |
8525 | 1.76k | int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 00 : 11.76k ; |
8526 | 1.76k | SDLoc DL(Op); |
8527 | 1.76k | switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { |
8528 | 23 | case Intrinsic::ppc_cfence: { |
8529 | 23 | assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); |
8530 | 23 | assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); |
8531 | 23 | return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, |
8532 | 23 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, |
8533 | 23 | Op.getOperand(ArgStart + 1)), |
8534 | 23 | Op.getOperand(0)), |
8535 | 23 | 0); |
8536 | 1.76k | } |
8537 | 1.73k | default: |
8538 | 1.73k | break; |
8539 | 1.73k | } |
8540 | 1.73k | return SDValue(); |
8541 | 1.73k | } |
8542 | | |
8543 | 40 | SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { |
8544 | 40 | // Check for a DIV with the same operands as this REM. |
8545 | 52 | for (auto UI : Op.getOperand(1)->uses()) { |
8546 | 52 | if ((Op.getOpcode() == ISD::SREM && 52 UI->getOpcode() == ISD::SDIV34 ) || |
8547 | 46 | (Op.getOpcode() == ISD::UREM && 46 UI->getOpcode() == ISD::UDIV18 )) |
8548 | 8 | if (8 UI->getOperand(0) == Op.getOperand(0) && |
8549 | 4 | UI->getOperand(1) == Op.getOperand(1)) |
8550 | 4 | return SDValue(); |
8551 | 36 | } |
8552 | 36 | return Op; |
8553 | 36 | } |
8554 | | |
8555 | | SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, |
8556 | 0 | SelectionDAG &DAG) const { |
8557 | 0 | SDLoc dl(Op); |
8558 | 0 | // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int |
8559 | 0 | // instructions), but for smaller types, we need to first extend up to v2i32 |
8560 | 0 | // before doing going farther. |
8561 | 0 | if (Op.getValueType() == MVT::v2i640 ) { |
8562 | 0 | EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
8563 | 0 | if (ExtVT != MVT::v2i320 ) { |
8564 | 0 | Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); |
8565 | 0 | Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, |
8566 | 0 | DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), |
8567 | 0 | ExtVT.getVectorElementType(), 4))); |
8568 | 0 | Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); |
8569 | 0 | Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, |
8570 | 0 | DAG.getValueType(MVT::v2i32)); |
8571 | 0 | } |
8572 | 0 |
|
8573 | 0 | return Op; |
8574 | 0 | } |
8575 | 0 |
|
8576 | 0 | return SDValue(); |
8577 | 0 | } |
8578 | | |
8579 | | SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, |
8580 | 39 | SelectionDAG &DAG) const { |
8581 | 39 | SDLoc dl(Op); |
8582 | 39 | // Create a stack slot that is 16-byte aligned. |
8583 | 39 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
8584 | 39 | int FrameIdx = MFI.CreateStackObject(16, 16, false); |
8585 | 39 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
8586 | 39 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
8587 | 39 | |
8588 | 39 | // Store the input value into Value#0 of the stack slot. |
8589 | 39 | SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, |
8590 | 39 | MachinePointerInfo()); |
8591 | 39 | // Load it out. |
8592 | 39 | return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); |
8593 | 39 | } |
8594 | | |
8595 | | SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, |
8596 | 36 | SelectionDAG &DAG) const { |
8597 | 36 | assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && |
8598 | 36 | "Should only be called for ISD::INSERT_VECTOR_ELT"); |
8599 | 36 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
8600 | 36 | // We have legal lowering for constant indices but not for variable ones. |
8601 | 36 | if (C) |
8602 | 32 | return Op; |
8603 | 4 | return SDValue(); |
8604 | 4 | } |
8605 | | |
8606 | | SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
8607 | 8 | SelectionDAG &DAG) const { |
8608 | 8 | SDLoc dl(Op); |
8609 | 8 | SDNode *N = Op.getNode(); |
8610 | 8 | |
8611 | 8 | assert(N->getOperand(0).getValueType() == MVT::v4i1 && |
8612 | 8 | "Unknown extract_vector_elt type"); |
8613 | 8 | |
8614 | 8 | SDValue Value = N->getOperand(0); |
8615 | 8 | |
8616 | 8 | // The first part of this is like the store lowering except that we don't |
8617 | 8 | // need to track the chain. |
8618 | 8 | |
8619 | 8 | // The values are now known to be -1 (false) or 1 (true). To convert this |
8620 | 8 | // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). |
8621 | 8 | // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 |
8622 | 8 | Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); |
8623 | 8 | |
8624 | 8 | // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to |
8625 | 8 | // understand how to form the extending load. |
8626 | 8 | SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); |
8627 | 8 | |
8628 | 8 | Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); |
8629 | 8 | |
8630 | 8 | // Now convert to an integer and store. |
8631 | 8 | Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, |
8632 | 8 | DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), |
8633 | 8 | Value); |
8634 | 8 | |
8635 | 8 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
8636 | 8 | int FrameIdx = MFI.CreateStackObject(16, 16, false); |
8637 | 8 | MachinePointerInfo PtrInfo = |
8638 | 8 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); |
8639 | 8 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
8640 | 8 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
8641 | 8 | |
8642 | 8 | SDValue StoreChain = DAG.getEntryNode(); |
8643 | 8 | SDValue Ops[] = {StoreChain, |
8644 | 8 | DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), |
8645 | 8 | Value, FIdx}; |
8646 | 8 | SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); |
8647 | 8 | |
8648 | 8 | StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, |
8649 | 8 | dl, VTs, Ops, MVT::v4i32, PtrInfo); |
8650 | 8 | |
8651 | 8 | // Extract the value requested. |
8652 | 8 | unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
8653 | 8 | SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); |
8654 | 8 | Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); |
8655 | 8 | |
8656 | 8 | SDValue IntVal = |
8657 | 8 | DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); |
8658 | 8 | |
8659 | 8 | if (!Subtarget.useCRBits()) |
8660 | 0 | return IntVal; |
8661 | 8 | |
8662 | 8 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); |
8663 | 8 | } |
8664 | | |
8665 | | /// Lowering for QPX v4i1 loads |
8666 | | SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, |
8667 | 89 | SelectionDAG &DAG) const { |
8668 | 89 | SDLoc dl(Op); |
8669 | 89 | LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); |
8670 | 89 | SDValue LoadChain = LN->getChain(); |
8671 | 89 | SDValue BasePtr = LN->getBasePtr(); |
8672 | 89 | |
8673 | 89 | if (Op.getValueType() == MVT::v4f64 || |
8674 | 89 | Op.getValueType() == MVT::v4f3222 ) { |
8675 | 87 | EVT MemVT = LN->getMemoryVT(); |
8676 | 87 | unsigned Alignment = LN->getAlignment(); |
8677 | 87 | |
8678 | 87 | // If this load is properly aligned, then it is legal. |
8679 | 87 | if (Alignment >= MemVT.getStoreSize()) |
8680 | 87 | return Op; |
8681 | 0 |
|
8682 | 0 | EVT ScalarVT = Op.getValueType().getScalarType(), |
8683 | 0 | ScalarMemVT = MemVT.getScalarType(); |
8684 | 0 | unsigned Stride = ScalarMemVT.getStoreSize(); |
8685 | 0 |
|
8686 | 0 | SDValue Vals[4], LoadChains[4]; |
8687 | 0 | for (unsigned Idx = 0; Idx < 40 ; ++Idx0 ) { |
8688 | 0 | SDValue Load; |
8689 | 0 | if (ScalarVT != ScalarMemVT) |
8690 | 0 | Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, |
8691 | 0 | BasePtr, |
8692 | 0 | LN->getPointerInfo().getWithOffset(Idx * Stride), |
8693 | 0 | ScalarMemVT, MinAlign(Alignment, Idx * Stride), |
8694 | 0 | LN->getMemOperand()->getFlags(), LN->getAAInfo()); |
8695 | 0 | else |
8696 | 0 | Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, |
8697 | 0 | LN->getPointerInfo().getWithOffset(Idx * Stride), |
8698 | 0 | MinAlign(Alignment, Idx * Stride), |
8699 | 0 | LN->getMemOperand()->getFlags(), LN->getAAInfo()); |
8700 | 0 |
|
8701 | 0 | if (Idx == 0 && 0 LN->isIndexed()0 ) { |
8702 | 0 | assert(LN->getAddressingMode() == ISD::PRE_INC && |
8703 | 0 | "Unknown addressing mode on vector load"); |
8704 | 0 | Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), |
8705 | 0 | LN->getAddressingMode()); |
8706 | 0 | } |
8707 | 0 |
|
8708 | 0 | Vals[Idx] = Load; |
8709 | 0 | LoadChains[Idx] = Load.getValue(1); |
8710 | 0 |
|
8711 | 0 | BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, |
8712 | 0 | DAG.getConstant(Stride, dl, |
8713 | 0 | BasePtr.getValueType())); |
8714 | 0 | } |
8715 | 0 |
|
8716 | 0 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); |
8717 | 0 | SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); |
8718 | 0 |
|
8719 | 0 | if (LN->isIndexed()0 ) { |
8720 | 0 | SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; |
8721 | 0 | return DAG.getMergeValues(RetOps, dl); |
8722 | 0 | } |
8723 | 0 |
|
8724 | 0 | SDValue RetOps[] = { Value, TF }; |
8725 | 0 | return DAG.getMergeValues(RetOps, dl); |
8726 | 0 | } |
8727 | 2 | |
8728 | 89 | assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); |
8729 | 2 | assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); |
8730 | 2 | |
8731 | 2 | // To lower v4i1 from a byte array, we load the byte elements of the |
8732 | 2 | // vector and then reuse the BUILD_VECTOR logic. |
8733 | 2 | |
8734 | 2 | SDValue VectElmts[4], VectElmtChains[4]; |
8735 | 10 | for (unsigned i = 0; i < 410 ; ++i8 ) { |
8736 | 8 | SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); |
8737 | 8 | Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); |
8738 | 8 | |
8739 | 8 | VectElmts[i] = DAG.getExtLoad( |
8740 | 8 | ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, |
8741 | 8 | LN->getPointerInfo().getWithOffset(i), MVT::i8, |
8742 | 8 | /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); |
8743 | 8 | VectElmtChains[i] = VectElmts[i].getValue(1); |
8744 | 8 | } |
8745 | 89 | |
8746 | 89 | LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); |
8747 | 89 | SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); |
8748 | 89 | |
8749 | 89 | SDValue RVals[] = { Value, LoadChain }; |
8750 | 89 | return DAG.getMergeValues(RVals, dl); |
8751 | 89 | } |
8752 | | |
8753 | | /// Lowering for QPX v4i1 stores |
8754 | | SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, |
8755 | 47 | SelectionDAG &DAG) const { |
8756 | 47 | SDLoc dl(Op); |
8757 | 47 | StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); |
8758 | 47 | SDValue StoreChain = SN->getChain(); |
8759 | 47 | SDValue BasePtr = SN->getBasePtr(); |
8760 | 47 | SDValue Value = SN->getValue(); |
8761 | 47 | |
8762 | 47 | if (Value.getValueType() == MVT::v4f64 || |
8763 | 47 | Value.getValueType() == MVT::v4f3210 ) { |
8764 | 45 | EVT MemVT = SN->getMemoryVT(); |
8765 | 45 | unsigned Alignment = SN->getAlignment(); |
8766 | 45 | |
8767 | 45 | // If this store is properly aligned, then it is legal. |
8768 | 45 | if (Alignment >= MemVT.getStoreSize()) |
8769 | 18 | return Op; |
8770 | 27 | |
8771 | 27 | EVT ScalarVT = Value.getValueType().getScalarType(), |
8772 | 27 | ScalarMemVT = MemVT.getScalarType(); |
8773 | 27 | unsigned Stride = ScalarMemVT.getStoreSize(); |
8774 | 27 | |
8775 | 27 | SDValue Stores[4]; |
8776 | 135 | for (unsigned Idx = 0; Idx < 4135 ; ++Idx108 ) { |
8777 | 108 | SDValue Ex = DAG.getNode( |
8778 | 108 | ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, |
8779 | 108 | DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); |
8780 | 108 | SDValue Store; |
8781 | 108 | if (ScalarVT != ScalarMemVT) |
8782 | 0 | Store = |
8783 | 0 | DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, |
8784 | 0 | SN->getPointerInfo().getWithOffset(Idx * Stride), |
8785 | 0 | ScalarMemVT, MinAlign(Alignment, Idx * Stride), |
8786 | 0 | SN->getMemOperand()->getFlags(), SN->getAAInfo()); |
8787 | 108 | else |
8788 | 108 | Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, |
8789 | 108 | SN->getPointerInfo().getWithOffset(Idx * Stride), |
8790 | 108 | MinAlign(Alignment, Idx * Stride), |
8791 | 108 | SN->getMemOperand()->getFlags(), SN->getAAInfo()); |
8792 | 108 | |
8793 | 108 | if (Idx == 0 && 108 SN->isIndexed()27 ) { |
8794 | 0 | assert(SN->getAddressingMode() == ISD::PRE_INC && |
8795 | 0 | "Unknown addressing mode on vector store"); |
8796 | 0 | Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), |
8797 | 0 | SN->getAddressingMode()); |
8798 | 0 | } |
8799 | 108 | |
8800 | 108 | BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, |
8801 | 108 | DAG.getConstant(Stride, dl, |
8802 | 108 | BasePtr.getValueType())); |
8803 | 108 | Stores[Idx] = Store; |
8804 | 108 | } |
8805 | 27 | |
8806 | 27 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); |
8807 | 27 | |
8808 | 27 | if (SN->isIndexed()27 ) { |
8809 | 0 | SDValue RetOps[] = { TF, Stores[0].getValue(1) }; |
8810 | 0 | return DAG.getMergeValues(RetOps, dl); |
8811 | 0 | } |
8812 | 27 | |
8813 | 27 | return TF; |
8814 | 27 | } |
8815 | 2 | |
8816 | 47 | assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); |
8817 | 2 | assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); |
8818 | 2 | |
8819 | 2 | // The values are now known to be -1 (false) or 1 (true). To convert this |
8820 | 2 | // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). |
8821 | 2 | // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 |
8822 | 2 | Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); |
8823 | 2 | |
8824 | 2 | // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to |
8825 | 2 | // understand how to form the extending load. |
8826 | 2 | SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); |
8827 | 2 | |
8828 | 2 | Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); |
8829 | 2 | |
8830 | 2 | // Now convert to an integer and store. |
8831 | 2 | Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, |
8832 | 2 | DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), |
8833 | 2 | Value); |
8834 | 2 | |
8835 | 2 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
8836 | 2 | int FrameIdx = MFI.CreateStackObject(16, 16, false); |
8837 | 2 | MachinePointerInfo PtrInfo = |
8838 | 2 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); |
8839 | 2 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
8840 | 2 | SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); |
8841 | 2 | |
8842 | 2 | SDValue Ops[] = {StoreChain, |
8843 | 2 | DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), |
8844 | 2 | Value, FIdx}; |
8845 | 2 | SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); |
8846 | 2 | |
8847 | 2 | StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, |
8848 | 2 | dl, VTs, Ops, MVT::v4i32, PtrInfo); |
8849 | 2 | |
8850 | 2 | // Move data into the byte array. |
8851 | 2 | SDValue Loads[4], LoadChains[4]; |
8852 | 10 | for (unsigned i = 0; i < 410 ; ++i8 ) { |
8853 | 8 | unsigned Offset = 4*i; |
8854 | 8 | SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); |
8855 | 8 | Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); |
8856 | 8 | |
8857 | 8 | Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, |
8858 | 8 | PtrInfo.getWithOffset(Offset)); |
8859 | 8 | LoadChains[i] = Loads[i].getValue(1); |
8860 | 8 | } |
8861 | 2 | |
8862 | 2 | StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); |
8863 | 2 | |
8864 | 2 | SDValue Stores[4]; |
8865 | 10 | for (unsigned i = 0; i < 410 ; ++i8 ) { |
8866 | 8 | SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); |
8867 | 8 | Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); |
8868 | 8 | |
8869 | 8 | Stores[i] = DAG.getTruncStore( |
8870 | 8 | StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), |
8871 | 8 | MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), |
8872 | 8 | SN->getAAInfo()); |
8873 | 8 | } |
8874 | 47 | |
8875 | 47 | StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); |
8876 | 47 | |
8877 | 47 | return StoreChain; |
8878 | 47 | } |
8879 | | |
8880 | 17 | SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { |
8881 | 17 | SDLoc dl(Op); |
8882 | 17 | if (Op.getValueType() == MVT::v4i3217 ) { |
8883 | 7 | SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); |
8884 | 7 | |
8885 | 7 | SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); |
8886 | 7 | SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. |
8887 | 7 | |
8888 | 7 | SDValue RHSSwap = // = vrlw RHS, 16 |
8889 | 7 | BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); |
8890 | 7 | |
8891 | 7 | // Shrinkify inputs to v8i16. |
8892 | 7 | LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); |
8893 | 7 | RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); |
8894 | 7 | RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); |
8895 | 7 | |
8896 | 7 | // Low parts multiplied together, generating 32-bit results (we ignore the |
8897 | 7 | // top parts). |
8898 | 7 | SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, |
8899 | 7 | LHS, RHS, DAG, dl, MVT::v4i32); |
8900 | 7 | |
8901 | 7 | SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, |
8902 | 7 | LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); |
8903 | 7 | // Shift the high parts up 16 bits. |
8904 | 7 | HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, |
8905 | 7 | Neg16, DAG, dl); |
8906 | 7 | return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); |
8907 | 10 | } else if (10 Op.getValueType() == MVT::v8i1610 ) { |
8908 | 5 | SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); |
8909 | 5 | |
8910 | 5 | SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); |
8911 | 5 | |
8912 | 5 | return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, |
8913 | 5 | LHS, RHS, Zero, DAG, dl); |
8914 | 5 | } else if (5 Op.getValueType() == MVT::v16i85 ) { |
8915 | 5 | SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); |
8916 | 5 | bool isLittleEndian = Subtarget.isLittleEndian(); |
8917 | 5 | |
8918 | 5 | // Multiply the even 8-bit parts, producing 16-bit sums. |
8919 | 5 | SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, |
8920 | 5 | LHS, RHS, DAG, dl, MVT::v8i16); |
8921 | 5 | EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); |
8922 | 5 | |
8923 | 5 | // Multiply the odd 8-bit parts, producing 16-bit sums. |
8924 | 5 | SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, |
8925 | 5 | LHS, RHS, DAG, dl, MVT::v8i16); |
8926 | 5 | OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); |
8927 | 5 | |
8928 | 5 | // Merge the results together. Because vmuleub and vmuloub are |
8929 | 5 | // instructions with a big-endian bias, we must reverse the |
8930 | 5 | // element numbering and reverse the meaning of "odd" and "even" |
8931 | 5 | // when generating little endian code. |
8932 | 5 | int Ops[16]; |
8933 | 45 | for (unsigned i = 0; i != 845 ; ++i40 ) { |
8934 | 40 | if (isLittleEndian40 ) { |
8935 | 16 | Ops[i*2 ] = 2*i; |
8936 | 16 | Ops[i*2+1] = 2*i+16; |
8937 | 40 | } else { |
8938 | 24 | Ops[i*2 ] = 2*i+1; |
8939 | 24 | Ops[i*2+1] = 2*i+1+16; |
8940 | 24 | } |
8941 | 40 | } |
8942 | 5 | if (isLittleEndian) |
8943 | 2 | return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); |
8944 | 5 | else |
8945 | 3 | return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); |
8946 | 0 | } else { |
8947 | 0 | llvm_unreachable("Unknown mul to lower!"); |
8948 | 10 | } |
8949 | 0 | } |
8950 | | |
8951 | | /// LowerOperation - Provide custom lowering hooks for some operations. |
8952 | | /// |
8953 | 10.1k | SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
8954 | 10.1k | switch (Op.getOpcode()) { |
8955 | 0 | default: 0 llvm_unreachable0 ("Wasn't expecting to be able to lower this!"); |
8956 | 652 | case ISD::ConstantPool: return LowerConstantPool(Op, DAG); |
8957 | 9 | case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); |
8958 | 1.49k | case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); |
8959 | 39 | case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); |
8960 | 11 | case ISD::JumpTable: return LowerJumpTable(Op, DAG); |
8961 | 38 | case ISD::SETCC: return LowerSETCC(Op, DAG); |
8962 | 1 | case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); |
8963 | 1 | case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); |
8964 | 6 | case ISD::VASTART: |
8965 | 6 | return LowerVASTART(Op, DAG); |
8966 | 10.1k | |
8967 | 1 | case ISD::VAARG: |
8968 | 1 | return LowerVAARG(Op, DAG); |
8969 | 10.1k | |
8970 | 1 | case ISD::VACOPY: |
8971 | 1 | return LowerVACOPY(Op, DAG); |
8972 | 10.1k | |
8973 | 1 | case ISD::STACKRESTORE: |
8974 | 1 | return LowerSTACKRESTORE(Op, DAG); |
8975 | 10.1k | |
8976 | 22 | case ISD::DYNAMIC_STACKALLOC: |
8977 | 22 | return LowerDYNAMIC_STACKALLOC(Op, DAG); |
8978 | 10.1k | |
8979 | 1 | case ISD::GET_DYNAMIC_AREA_OFFSET: |
8980 | 1 | return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); |
8981 | 10.1k | |
8982 | 1 | case ISD::EH_DWARF_CFA: |
8983 | 1 | return LowerEH_DWARF_CFA(Op, DAG); |
8984 | 10.1k | |
8985 | 6 | case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); |
8986 | 5 | case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); |
8987 | 10.1k | |
8988 | 102 | case ISD::LOAD: return LowerLOAD(Op, DAG); |
8989 | 48 | case ISD::STORE: return LowerSTORE(Op, DAG); |
8990 | 0 | case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); |
8991 | 257 | case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); |
8992 | 615 | case ISD::FP_TO_UINT: |
8993 | 615 | case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, |
8994 | 615 | SDLoc(Op)); |
8995 | 206 | case ISD::UINT_TO_FP: |
8996 | 206 | case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); |
8997 | 1 | case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); |
8998 | 206 | |
8999 | 206 | // Lower 64-bit shifts. |
9000 | 10 | case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); |
9001 | 10 | case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); |
9002 | 6 | case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); |
9003 | 206 | |
9004 | 206 | // Vector-related lowering. |
9005 | 2.19k | case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); |
9006 | 1.08k | case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); |
9007 | 1.37k | case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
9008 | 39 | case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); |
9009 | 0 | case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); |
9010 | 8 | case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
9011 | 36 | case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); |
9012 | 17 | case ISD::MUL: return LowerMUL(Op, DAG); |
9013 | 206 | |
9014 | 206 | // For counter-based loop handling. |
9015 | 0 | case ISD::INTRINSIC_W_CHAIN: return SDValue(); |
9016 | 206 | |
9017 | 206 | // Frame & Return address. |
9018 | 6 | case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); |
9019 | 10 | case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); |
9020 | 206 | |
9021 | 1.76k | case ISD::INTRINSIC_VOID: |
9022 | 1.76k | return LowerINTRINSIC_VOID(Op, DAG); |
9023 | 40 | case ISD::SREM: |
9024 | 40 | case ISD::UREM: |
9025 | 40 | return LowerREM(Op, DAG); |
9026 | 0 | } |
9027 | 0 | } |
9028 | | |
9029 | | void PPCTargetLowering::ReplaceNodeResults(SDNode *N, |
9030 | | SmallVectorImpl<SDValue>&Results, |
9031 | 12 | SelectionDAG &DAG) const { |
9032 | 12 | SDLoc dl(N); |
9033 | 12 | switch (N->getOpcode()) { |
9034 | 0 | default: |
9035 | 0 | llvm_unreachable("Do not know how to custom type legalize this operation!"); |
9036 | 2 | case ISD::READCYCLECOUNTER: { |
9037 | 2 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); |
9038 | 2 | SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); |
9039 | 2 | |
9040 | 2 | Results.push_back(RTB); |
9041 | 2 | Results.push_back(RTB.getValue(1)); |
9042 | 2 | Results.push_back(RTB.getValue(2)); |
9043 | 2 | break; |
9044 | 12 | } |
9045 | 1 | case ISD::INTRINSIC_W_CHAIN: { |
9046 | 1 | if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != |
9047 | 1 | Intrinsic::ppc_is_decremented_ctr_nonzero) |
9048 | 0 | break; |
9049 | 1 | |
9050 | 1 | assert(N->getValueType(0) == MVT::i1 && |
9051 | 1 | "Unexpected result type for CTR decrement intrinsic"); |
9052 | 1 | EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
9053 | 1 | N->getValueType(0)); |
9054 | 1 | SDVTList VTs = DAG.getVTList(SVT, MVT::Other); |
9055 | 1 | SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), |
9056 | 1 | N->getOperand(1)); |
9057 | 1 | |
9058 | 1 | Results.push_back(NewInt); |
9059 | 1 | Results.push_back(NewInt.getValue(1)); |
9060 | 1 | break; |
9061 | 1 | } |
9062 | 0 | case ISD::VAARG: { |
9063 | 0 | if (!Subtarget.isSVR4ABI() || 0 Subtarget.isPPC64()0 ) |
9064 | 0 | return; |
9065 | 0 |
|
9066 | 0 | EVT VT = N->getValueType(0); |
9067 | 0 |
|
9068 | 0 | if (VT == MVT::i640 ) { |
9069 | 0 | SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); |
9070 | 0 |
|
9071 | 0 | Results.push_back(NewNode); |
9072 | 0 | Results.push_back(NewNode.getValue(1)); |
9073 | 0 | } |
9074 | 0 | return; |
9075 | 0 | } |
9076 | 8 | case ISD::FP_ROUND_INREG: { |
9077 | 8 | assert(N->getValueType(0) == MVT::ppcf128); |
9078 | 8 | assert(N->getOperand(0).getValueType() == MVT::ppcf128); |
9079 | 8 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, |
9080 | 8 | MVT::f64, N->getOperand(0), |
9081 | 8 | DAG.getIntPtrConstant(0, dl)); |
9082 | 8 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, |
9083 | 8 | MVT::f64, N->getOperand(0), |
9084 | 8 | DAG.getIntPtrConstant(1, dl)); |
9085 | 8 | |
9086 | 8 | // Add the two halves of the long double in round-to-zero mode. |
9087 | 8 | SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); |
9088 | 8 | |
9089 | 8 | // We know the low half is about to be thrown away, so just use something |
9090 | 8 | // convenient. |
9091 | 8 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, |
9092 | 8 | FPreg, FPreg)); |
9093 | 8 | return; |
9094 | 0 | } |
9095 | 1 | case ISD::FP_TO_SINT: |
9096 | 1 | case ISD::FP_TO_UINT: |
9097 | 1 | // LowerFP_TO_INT() can only handle f32 and f64. |
9098 | 1 | if (N->getOperand(0).getValueType() == MVT::ppcf128) |
9099 | 0 | return; |
9100 | 1 | Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); |
9101 | 1 | return; |
9102 | 12 | } |
9103 | 12 | } |
9104 | | |
9105 | | //===----------------------------------------------------------------------===// |
9106 | | // Other Lowering Code |
9107 | | //===----------------------------------------------------------------------===// |
9108 | | |
9109 | 707 | static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { |
9110 | 707 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
9111 | 707 | Function *Func = Intrinsic::getDeclaration(M, Id); |
9112 | 707 | return Builder.CreateCall(Func, {}); |
9113 | 707 | } |
9114 | | |
9115 | | // The mappings for emitLeading/TrailingFence is taken from |
9116 | | // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html |
9117 | | Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, |
9118 | | Instruction *Inst, |
9119 | 492 | AtomicOrdering Ord) const { |
9120 | 492 | if (Ord == AtomicOrdering::SequentiallyConsistent) |
9121 | 133 | return callIntrinsic(Builder, Intrinsic::ppc_sync); |
9122 | 359 | if (359 isReleaseOrStronger(Ord)359 ) |
9123 | 234 | return callIntrinsic(Builder, Intrinsic::ppc_lwsync); |
9124 | 125 | return nullptr; |
9125 | 125 | } |
9126 | | |
9127 | | Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, |
9128 | | Instruction *Inst, |
9129 | 492 | AtomicOrdering Ord) const { |
9130 | 492 | if (Inst->hasAtomicLoad() && 492 isAcquireOrStronger(Ord)471 ) { |
9131 | 363 | // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and |
9132 | 363 | // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html |
9133 | 363 | // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. |
9134 | 363 | if (isa<LoadInst>(Inst) && 363 Subtarget.isPPC64()27 ) |
9135 | 23 | return Builder.CreateCall( |
9136 | 23 | Intrinsic::getDeclaration( |
9137 | 23 | Builder.GetInsertBlock()->getParent()->getParent(), |
9138 | 23 | Intrinsic::ppc_cfence, {Inst->getType()}), |
9139 | 23 | {Inst}); |
9140 | 340 | // FIXME: Can use isync for rmw operation. |
9141 | 340 | return callIntrinsic(Builder, Intrinsic::ppc_lwsync); |
9142 | 340 | } |
9143 | 129 | return nullptr; |
9144 | 129 | } |
9145 | | |
9146 | | MachineBasicBlock * |
9147 | | PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, |
9148 | | unsigned AtomicSize, |
9149 | | unsigned BinOpcode, |
9150 | | unsigned CmpOpcode, |
9151 | 483 | unsigned CmpPred) const { |
9152 | 483 | // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. |
9153 | 483 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
9154 | 483 | |
9155 | 483 | auto LoadMnemonic = PPC::LDARX; |
9156 | 483 | auto StoreMnemonic = PPC::STDCX; |
9157 | 483 | switch (AtomicSize) { |
9158 | 0 | default: |
9159 | 0 | llvm_unreachable("Unexpected size of atomic entity"); |
9160 | 120 | case 1: |
9161 | 120 | LoadMnemonic = PPC::LBARX; |
9162 | 120 | StoreMnemonic = PPC::STBCX; |
9163 | 120 | assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); |
9164 | 120 | break; |
9165 | 120 | case 2: |
9166 | 120 | LoadMnemonic = PPC::LHARX; |
9167 | 120 | StoreMnemonic = PPC::STHCX; |
9168 | 120 | assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); |
9169 | 120 | break; |
9170 | 120 | case 4: |
9171 | 120 | LoadMnemonic = PPC::LWARX; |
9172 | 120 | StoreMnemonic = PPC::STWCX; |
9173 | 120 | break; |
9174 | 123 | case 8: |
9175 | 123 | LoadMnemonic = PPC::LDARX; |
9176 | 123 | StoreMnemonic = PPC::STDCX; |
9177 | 123 | break; |
9178 | 483 | } |
9179 | 483 | |
9180 | 483 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
9181 | 483 | MachineFunction *F = BB->getParent(); |
9182 | 483 | MachineFunction::iterator It = ++BB->getIterator(); |
9183 | 483 | |
9184 | 483 | unsigned dest = MI.getOperand(0).getReg(); |
9185 | 483 | unsigned ptrA = MI.getOperand(1).getReg(); |
9186 | 483 | unsigned ptrB = MI.getOperand(2).getReg(); |
9187 | 483 | unsigned incr = MI.getOperand(3).getReg(); |
9188 | 483 | DebugLoc dl = MI.getDebugLoc(); |
9189 | 483 | |
9190 | 483 | MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9191 | 483 | MachineBasicBlock *loop2MBB = |
9192 | 483 | CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB)180 : nullptr303 ; |
9193 | 483 | MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9194 | 483 | F->insert(It, loopMBB); |
9195 | 483 | if (CmpOpcode) |
9196 | 180 | F->insert(It, loop2MBB); |
9197 | 483 | F->insert(It, exitMBB); |
9198 | 483 | exitMBB->splice(exitMBB->begin(), BB, |
9199 | 483 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
9200 | 483 | exitMBB->transferSuccessorsAndUpdatePHIs(BB); |
9201 | 483 | |
9202 | 483 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
9203 | 231 | unsigned TmpReg = (!BinOpcode) ? incr : |
9204 | 252 | RegInfo.createVirtualRegister( AtomicSize == 8 ? 252 &PPC::G8RCRegClass65 |
9205 | 252 | : &PPC::GPRCRegClass); |
9206 | 483 | |
9207 | 483 | // thisMBB: |
9208 | 483 | // ... |
9209 | 483 | // fallthrough --> loopMBB |
9210 | 483 | BB->addSuccessor(loopMBB); |
9211 | 483 | |
9212 | 483 | // loopMBB: |
9213 | 483 | // l[wd]arx dest, ptr |
9214 | 483 | // add r0, dest, incr |
9215 | 483 | // st[wd]cx. r0, ptr |
9216 | 483 | // bne- loopMBB |
9217 | 483 | // fallthrough --> exitMBB |
9218 | 483 | |
9219 | 483 | // For max/min... |
9220 | 483 | // loopMBB: |
9221 | 483 | // l[wd]arx dest, ptr |
9222 | 483 | // cmpl?[wd] incr, dest |
9223 | 483 | // bgt exitMBB |
9224 | 483 | // loop2MBB: |
9225 | 483 | // st[wd]cx. dest, ptr |
9226 | 483 | // bne- loopMBB |
9227 | 483 | // fallthrough --> exitMBB |
9228 | 483 | |
9229 | 483 | BB = loopMBB; |
9230 | 483 | BuildMI(BB, dl, TII->get(LoadMnemonic), dest) |
9231 | 483 | .addReg(ptrA).addReg(ptrB); |
9232 | 483 | if (BinOpcode) |
9233 | 252 | BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); |
9234 | 483 | if (CmpOpcode483 ) { |
9235 | 180 | // Signed comparisons of byte or halfword values must be sign-extended. |
9236 | 180 | if (CmpOpcode == PPC::CMPW && 180 AtomicSize < 470 ) { |
9237 | 48 | unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); |
9238 | 48 | BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB24 : PPC::EXTSH24 ), |
9239 | 48 | ExtReg).addReg(dest); |
9240 | 48 | BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) |
9241 | 48 | .addReg(incr).addReg(ExtReg); |
9242 | 48 | } else |
9243 | 132 | BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) |
9244 | 132 | .addReg(incr).addReg(dest); |
9245 | 180 | |
9246 | 180 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
9247 | 180 | .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); |
9248 | 180 | BB->addSuccessor(loop2MBB); |
9249 | 180 | BB->addSuccessor(exitMBB); |
9250 | 180 | BB = loop2MBB; |
9251 | 180 | } |
9252 | 483 | BuildMI(BB, dl, TII->get(StoreMnemonic)) |
9253 | 483 | .addReg(TmpReg).addReg(ptrA).addReg(ptrB); |
9254 | 483 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
9255 | 483 | .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); |
9256 | 483 | BB->addSuccessor(loopMBB); |
9257 | 483 | BB->addSuccessor(exitMBB); |
9258 | 483 | |
9259 | 483 | // exitMBB: |
9260 | 483 | // ... |
9261 | 483 | BB = exitMBB; |
9262 | 483 | return BB; |
9263 | 483 | } |
9264 | | |
9265 | | MachineBasicBlock * |
9266 | | PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, |
9267 | | MachineBasicBlock *BB, |
9268 | | bool is8bit, // operation |
9269 | | unsigned BinOpcode, |
9270 | | unsigned CmpOpcode, |
9271 | 260 | unsigned CmpPred) const { |
9272 | 260 | // If we support part-word atomic mnemonics, just use them |
9273 | 260 | if (Subtarget.hasPartwordAtomics()) |
9274 | 240 | return EmitAtomicBinary(MI, BB, is8bit ? 240 1120 : 2120 , BinOpcode, |
9275 | 240 | CmpOpcode, CmpPred); |
9276 | 20 | |
9277 | 20 | // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. |
9278 | 20 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
9279 | 20 | // In 64 bit mode we have to use 64 bits for addresses, even though the |
9280 | 20 | // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address |
9281 | 20 | // registers without caring whether they're 32 or 64, but here we're |
9282 | 20 | // doing actual arithmetic on the addresses. |
9283 | 20 | bool is64bit = Subtarget.isPPC64(); |
9284 | 20 | bool isLittleEndian = Subtarget.isLittleEndian(); |
9285 | 20 | unsigned ZeroReg = is64bit ? PPC::ZERO818 : PPC::ZERO2 ; |
9286 | 20 | |
9287 | 20 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
9288 | 20 | MachineFunction *F = BB->getParent(); |
9289 | 20 | MachineFunction::iterator It = ++BB->getIterator(); |
9290 | 20 | |
9291 | 20 | unsigned dest = MI.getOperand(0).getReg(); |
9292 | 20 | unsigned ptrA = MI.getOperand(1).getReg(); |
9293 | 20 | unsigned ptrB = MI.getOperand(2).getReg(); |
9294 | 20 | unsigned incr = MI.getOperand(3).getReg(); |
9295 | 20 | DebugLoc dl = MI.getDebugLoc(); |
9296 | 20 | |
9297 | 20 | MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9298 | 20 | MachineBasicBlock *loop2MBB = |
9299 | 20 | CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB)8 : nullptr12 ; |
9300 | 20 | MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9301 | 20 | F->insert(It, loopMBB); |
9302 | 20 | if (CmpOpcode) |
9303 | 8 | F->insert(It, loop2MBB); |
9304 | 20 | F->insert(It, exitMBB); |
9305 | 20 | exitMBB->splice(exitMBB->begin(), BB, |
9306 | 20 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
9307 | 20 | exitMBB->transferSuccessorsAndUpdatePHIs(BB); |
9308 | 20 | |
9309 | 20 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
9310 | 18 | const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass |
9311 | 2 | : &PPC::GPRCRegClass; |
9312 | 20 | unsigned PtrReg = RegInfo.createVirtualRegister(RC); |
9313 | 20 | unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); |
9314 | 20 | unsigned ShiftReg = |
9315 | 20 | isLittleEndian ? Shift1Reg0 : RegInfo.createVirtualRegister(RC)20 ; |
9316 | 20 | unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); |
9317 | 20 | unsigned MaskReg = RegInfo.createVirtualRegister(RC); |
9318 | 20 | unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); |
9319 | 20 | unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); |
9320 | 20 | unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); |
9321 | 20 | unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); |
9322 | 20 | unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); |
9323 | 20 | unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); |
9324 | 20 | unsigned Ptr1Reg; |
9325 | 20 | unsigned TmpReg = (!BinOpcode) ? Incr2Reg12 : RegInfo.createVirtualRegister(RC)8 ; |
9326 | 20 | |
9327 | 20 | // thisMBB: |
9328 | 20 | // ... |
9329 | 20 | // fallthrough --> loopMBB |
9330 | 20 | BB->addSuccessor(loopMBB); |
9331 | 20 | |
9332 | 20 | // The 4-byte load must be aligned, while a char or short may be |
9333 | 20 | // anywhere in the word. Hence all this nasty bookkeeping code. |
9334 | 20 | // add ptr1, ptrA, ptrB [copy if ptrA==0] |
9335 | 20 | // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] |
9336 | 20 | // xori shift, shift1, 24 [16] |
9337 | 20 | // rlwinm ptr, ptr1, 0, 0, 29 |
9338 | 20 | // slw incr2, incr, shift |
9339 | 20 | // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] |
9340 | 20 | // slw mask, mask2, shift |
9341 | 20 | // loopMBB: |
9342 | 20 | // lwarx tmpDest, ptr |
9343 | 20 | // add tmp, tmpDest, incr2 |
9344 | 20 | // andc tmp2, tmpDest, mask |
9345 | 20 | // and tmp3, tmp, mask |
9346 | 20 | // or tmp4, tmp3, tmp2 |
9347 | 20 | // stwcx. tmp4, ptr |
9348 | 20 | // bne- loopMBB |
9349 | 20 | // fallthrough --> exitMBB |
9350 | 20 | // srw dest, tmpDest, shift |
9351 | 20 | if (ptrA != ZeroReg20 ) { |
9352 | 0 | Ptr1Reg = RegInfo.createVirtualRegister(RC); |
9353 | 0 | BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD80 : PPC::ADD40 ), Ptr1Reg) |
9354 | 0 | .addReg(ptrA).addReg(ptrB); |
9355 | 20 | } else { |
9356 | 20 | Ptr1Reg = ptrB; |
9357 | 20 | } |
9358 | 20 | BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) |
9359 | 20 | .addImm(3).addImm(27).addImm(is8bit ? 2810 : 2710 ); |
9360 | 20 | if (!isLittleEndian) |
9361 | 20 | BuildMI(BB, dl, TII->get(is64bit ? 20 PPC::XORI818 : PPC::XORI2 ), ShiftReg) |
9362 | 20 | .addReg(Shift1Reg).addImm(is8bit ? 2410 : 1610 ); |
9363 | 20 | if (is64bit) |
9364 | 18 | BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) |
9365 | 18 | .addReg(Ptr1Reg).addImm(0).addImm(61); |
9366 | 20 | else |
9367 | 2 | BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) |
9368 | 2 | .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); |
9369 | 20 | BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) |
9370 | 20 | .addReg(incr).addReg(ShiftReg); |
9371 | 20 | if (is8bit) |
9372 | 10 | BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); |
9373 | 10 | else { |
9374 | 10 | BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); |
9375 | 10 | BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); |
9376 | 10 | } |
9377 | 20 | BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) |
9378 | 20 | .addReg(Mask2Reg).addReg(ShiftReg); |
9379 | 20 | |
9380 | 20 | BB = loopMBB; |
9381 | 20 | BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) |
9382 | 20 | .addReg(ZeroReg).addReg(PtrReg); |
9383 | 20 | if (BinOpcode) |
9384 | 8 | BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) |
9385 | 8 | .addReg(Incr2Reg).addReg(TmpDestReg); |
9386 | 20 | BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC818 : PPC::ANDC2 ), Tmp2Reg) |
9387 | 20 | .addReg(TmpDestReg).addReg(MaskReg); |
9388 | 20 | BuildMI(BB, dl, TII->get(is64bit ? PPC::AND818 : PPC::AND2 ), Tmp3Reg) |
9389 | 20 | .addReg(TmpReg).addReg(MaskReg); |
9390 | 20 | if (CmpOpcode20 ) { |
9391 | 8 | // For unsigned comparisons, we can directly compare the shifted values. |
9392 | 8 | // For signed comparisons we shift and sign extend. |
9393 | 8 | unsigned SReg = RegInfo.createVirtualRegister(RC); |
9394 | 8 | BuildMI(BB, dl, TII->get(is64bit ? PPC::AND88 : PPC::AND0 ), SReg) |
9395 | 8 | .addReg(TmpDestReg).addReg(MaskReg); |
9396 | 8 | unsigned ValueReg = SReg; |
9397 | 8 | unsigned CmpReg = Incr2Reg; |
9398 | 8 | if (CmpOpcode == PPC::CMPW8 ) { |
9399 | 4 | ValueReg = RegInfo.createVirtualRegister(RC); |
9400 | 4 | BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) |
9401 | 4 | .addReg(SReg).addReg(ShiftReg); |
9402 | 4 | unsigned ValueSReg = RegInfo.createVirtualRegister(RC); |
9403 | 4 | BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB2 : PPC::EXTSH2 ), ValueSReg) |
9404 | 4 | .addReg(ValueReg); |
9405 | 4 | ValueReg = ValueSReg; |
9406 | 4 | CmpReg = incr; |
9407 | 4 | } |
9408 | 8 | BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) |
9409 | 8 | .addReg(CmpReg).addReg(ValueReg); |
9410 | 8 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
9411 | 8 | .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); |
9412 | 8 | BB->addSuccessor(loop2MBB); |
9413 | 8 | BB->addSuccessor(exitMBB); |
9414 | 8 | BB = loop2MBB; |
9415 | 8 | } |
9416 | 20 | BuildMI(BB, dl, TII->get(is64bit ? PPC::OR818 : PPC::OR2 ), Tmp4Reg) |
9417 | 260 | .addReg(Tmp3Reg).addReg(Tmp2Reg); |
9418 | 260 | BuildMI(BB, dl, TII->get(PPC::STWCX)) |
9419 | 260 | .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); |
9420 | 260 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
9421 | 260 | .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); |
9422 | 260 | BB->addSuccessor(loopMBB); |
9423 | 260 | BB->addSuccessor(exitMBB); |
9424 | 260 | |
9425 | 260 | // exitMBB: |
9426 | 260 | // ... |
9427 | 260 | BB = exitMBB; |
9428 | 260 | BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) |
9429 | 260 | .addReg(ShiftReg); |
9430 | 260 | return BB; |
9431 | 260 | } |
9432 | | |
9433 | | llvm::MachineBasicBlock * |
9434 | | PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, |
9435 | 6 | MachineBasicBlock *MBB) const { |
9436 | 6 | DebugLoc DL = MI.getDebugLoc(); |
9437 | 6 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
9438 | 6 | const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
9439 | 6 | |
9440 | 6 | MachineFunction *MF = MBB->getParent(); |
9441 | 6 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
9442 | 6 | |
9443 | 6 | const BasicBlock *BB = MBB->getBasicBlock(); |
9444 | 6 | MachineFunction::iterator I = ++MBB->getIterator(); |
9445 | 6 | |
9446 | 6 | // Memory Reference |
9447 | 6 | MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); |
9448 | 6 | MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); |
9449 | 6 | |
9450 | 6 | unsigned DstReg = MI.getOperand(0).getReg(); |
9451 | 6 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
9452 | 6 | assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); |
9453 | 6 | unsigned mainDstReg = MRI.createVirtualRegister(RC); |
9454 | 6 | unsigned restoreDstReg = MRI.createVirtualRegister(RC); |
9455 | 6 | |
9456 | 6 | MVT PVT = getPointerTy(MF->getDataLayout()); |
9457 | 6 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
9458 | 6 | "Invalid Pointer Size!"); |
9459 | 6 | // For v = setjmp(buf), we generate |
9460 | 6 | // |
9461 | 6 | // thisMBB: |
9462 | 6 | // SjLjSetup mainMBB |
9463 | 6 | // bl mainMBB |
9464 | 6 | // v_restore = 1 |
9465 | 6 | // b sinkMBB |
9466 | 6 | // |
9467 | 6 | // mainMBB: |
9468 | 6 | // buf[LabelOffset] = LR |
9469 | 6 | // v_main = 0 |
9470 | 6 | // |
9471 | 6 | // sinkMBB: |
9472 | 6 | // v = phi(main, restore) |
9473 | 6 | // |
9474 | 6 | |
9475 | 6 | MachineBasicBlock *thisMBB = MBB; |
9476 | 6 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
9477 | 6 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
9478 | 6 | MF->insert(I, mainMBB); |
9479 | 6 | MF->insert(I, sinkMBB); |
9480 | 6 | |
9481 | 6 | MachineInstrBuilder MIB; |
9482 | 6 | |
9483 | 6 | // Transfer the remainder of BB and its successor edges to sinkMBB. |
9484 | 6 | sinkMBB->splice(sinkMBB->begin(), MBB, |
9485 | 6 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
9486 | 6 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
9487 | 6 | |
9488 | 6 | // Note that the structure of the jmp_buf used here is not compatible |
9489 | 6 | // with that used by libc, and is not designed to be. Specifically, it |
9490 | 6 | // stores only those 'reserved' registers that LLVM does not otherwise |
9491 | 6 | // understand how to spill. Also, by convention, by the time this |
9492 | 6 | // intrinsic is called, Clang has already stored the frame address in the |
9493 | 6 | // first slot of the buffer and stack address in the third. Following the |
9494 | 6 | // X86 target code, we'll store the jump address in the second slot. We also |
9495 | 6 | // need to save the TOC pointer (R2) to handle jumps between shared |
9496 | 6 | // libraries, and that will be stored in the fourth slot. The thread |
9497 | 6 | // identifier (R13) is not affected. |
9498 | 6 | |
9499 | 6 | // thisMBB: |
9500 | 6 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
9501 | 6 | const int64_t TOCOffset = 3 * PVT.getStoreSize(); |
9502 | 6 | const int64_t BPOffset = 4 * PVT.getStoreSize(); |
9503 | 6 | |
9504 | 6 | // Prepare IP either in reg. |
9505 | 6 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
9506 | 6 | unsigned LabelReg = MRI.createVirtualRegister(PtrRC); |
9507 | 6 | unsigned BufReg = MI.getOperand(1).getReg(); |
9508 | 6 | |
9509 | 6 | if (Subtarget.isPPC64() && 6 Subtarget.isSVR4ABI()6 ) { |
9510 | 6 | setUsesTOCBasePtr(*MBB->getParent()); |
9511 | 6 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) |
9512 | 6 | .addReg(PPC::X2) |
9513 | 6 | .addImm(TOCOffset) |
9514 | 6 | .addReg(BufReg); |
9515 | 6 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9516 | 6 | } |
9517 | 6 | |
9518 | 6 | // Naked functions never have a base pointer, and so we use r1. For all |
9519 | 6 | // other functions, this decision must be delayed until during PEI. |
9520 | 6 | unsigned BaseReg; |
9521 | 6 | if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) |
9522 | 0 | BaseReg = Subtarget.isPPC64() ? 0 PPC::X10 : PPC::R10 ; |
9523 | 6 | else |
9524 | 6 | BaseReg = Subtarget.isPPC64() ? 6 PPC::BP86 : PPC::BP0 ; |
9525 | 6 | |
9526 | 6 | MIB = BuildMI(*thisMBB, MI, DL, |
9527 | 6 | TII->get(Subtarget.isPPC64() ? PPC::STD6 : PPC::STW0 )) |
9528 | 6 | .addReg(BaseReg) |
9529 | 6 | .addImm(BPOffset) |
9530 | 6 | .addReg(BufReg); |
9531 | 6 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9532 | 6 | |
9533 | 6 | // Setup |
9534 | 6 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); |
9535 | 6 | MIB.addRegMask(TRI->getNoPreservedMask()); |
9536 | 6 | |
9537 | 6 | BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); |
9538 | 6 | |
9539 | 6 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) |
9540 | 6 | .addMBB(mainMBB); |
9541 | 6 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); |
9542 | 6 | |
9543 | 6 | thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); |
9544 | 6 | thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); |
9545 | 6 | |
9546 | 6 | // mainMBB: |
9547 | 6 | // mainDstReg = 0 |
9548 | 6 | MIB = |
9549 | 6 | BuildMI(mainMBB, DL, |
9550 | 6 | TII->get(Subtarget.isPPC64() ? PPC::MFLR86 : PPC::MFLR0 ), LabelReg); |
9551 | 6 | |
9552 | 6 | // Store IP |
9553 | 6 | if (Subtarget.isPPC64()6 ) { |
9554 | 6 | MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) |
9555 | 6 | .addReg(LabelReg) |
9556 | 6 | .addImm(LabelOffset) |
9557 | 6 | .addReg(BufReg); |
9558 | 6 | } else { |
9559 | 0 | MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) |
9560 | 0 | .addReg(LabelReg) |
9561 | 0 | .addImm(LabelOffset) |
9562 | 0 | .addReg(BufReg); |
9563 | 0 | } |
9564 | 6 | |
9565 | 6 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9566 | 6 | |
9567 | 6 | BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); |
9568 | 6 | mainMBB->addSuccessor(sinkMBB); |
9569 | 6 | |
9570 | 6 | // sinkMBB: |
9571 | 6 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, |
9572 | 6 | TII->get(PPC::PHI), DstReg) |
9573 | 6 | .addReg(mainDstReg).addMBB(mainMBB) |
9574 | 6 | .addReg(restoreDstReg).addMBB(thisMBB); |
9575 | 6 | |
9576 | 6 | MI.eraseFromParent(); |
9577 | 6 | return sinkMBB; |
9578 | 6 | } |
9579 | | |
9580 | | MachineBasicBlock * |
9581 | | PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, |
9582 | 5 | MachineBasicBlock *MBB) const { |
9583 | 5 | DebugLoc DL = MI.getDebugLoc(); |
9584 | 5 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
9585 | 5 | |
9586 | 5 | MachineFunction *MF = MBB->getParent(); |
9587 | 5 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
9588 | 5 | |
9589 | 5 | // Memory Reference |
9590 | 5 | MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); |
9591 | 5 | MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); |
9592 | 5 | |
9593 | 5 | MVT PVT = getPointerTy(MF->getDataLayout()); |
9594 | 5 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
9595 | 5 | "Invalid Pointer Size!"); |
9596 | 5 | |
9597 | 5 | const TargetRegisterClass *RC = |
9598 | 5 | (PVT == MVT::i64) ? &PPC::G8RCRegClass5 : &PPC::GPRCRegClass0 ; |
9599 | 5 | unsigned Tmp = MRI.createVirtualRegister(RC); |
9600 | 5 | // Since FP is only updated here but NOT referenced, it's treated as GPR. |
9601 | 5 | unsigned FP = (PVT == MVT::i64) ? PPC::X315 : PPC::R310 ; |
9602 | 5 | unsigned SP = (PVT == MVT::i64) ? PPC::X15 : PPC::R10 ; |
9603 | 5 | unsigned BP = |
9604 | 5 | (PVT == MVT::i64) |
9605 | 5 | ? PPC::X30 |
9606 | 0 | : (Subtarget.isSVR4ABI() && 0 isPositionIndependent()0 ? PPC::R290 |
9607 | 0 | : PPC::R30); |
9608 | 5 | |
9609 | 5 | MachineInstrBuilder MIB; |
9610 | 5 | |
9611 | 5 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
9612 | 5 | const int64_t SPOffset = 2 * PVT.getStoreSize(); |
9613 | 5 | const int64_t TOCOffset = 3 * PVT.getStoreSize(); |
9614 | 5 | const int64_t BPOffset = 4 * PVT.getStoreSize(); |
9615 | 5 | |
9616 | 5 | unsigned BufReg = MI.getOperand(0).getReg(); |
9617 | 5 | |
9618 | 5 | // Reload FP (the jumped-to function may not have had a |
9619 | 5 | // frame pointer, and if so, then its r31 will be restored |
9620 | 5 | // as necessary). |
9621 | 5 | if (PVT == MVT::i645 ) { |
9622 | 5 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) |
9623 | 5 | .addImm(0) |
9624 | 5 | .addReg(BufReg); |
9625 | 5 | } else { |
9626 | 0 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) |
9627 | 0 | .addImm(0) |
9628 | 0 | .addReg(BufReg); |
9629 | 0 | } |
9630 | 5 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9631 | 5 | |
9632 | 5 | // Reload IP |
9633 | 5 | if (PVT == MVT::i645 ) { |
9634 | 5 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) |
9635 | 5 | .addImm(LabelOffset) |
9636 | 5 | .addReg(BufReg); |
9637 | 5 | } else { |
9638 | 0 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) |
9639 | 0 | .addImm(LabelOffset) |
9640 | 0 | .addReg(BufReg); |
9641 | 0 | } |
9642 | 5 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9643 | 5 | |
9644 | 5 | // Reload SP |
9645 | 5 | if (PVT == MVT::i645 ) { |
9646 | 5 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) |
9647 | 5 | .addImm(SPOffset) |
9648 | 5 | .addReg(BufReg); |
9649 | 5 | } else { |
9650 | 0 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) |
9651 | 0 | .addImm(SPOffset) |
9652 | 0 | .addReg(BufReg); |
9653 | 0 | } |
9654 | 5 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9655 | 5 | |
9656 | 5 | // Reload BP |
9657 | 5 | if (PVT == MVT::i645 ) { |
9658 | 5 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) |
9659 | 5 | .addImm(BPOffset) |
9660 | 5 | .addReg(BufReg); |
9661 | 5 | } else { |
9662 | 0 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) |
9663 | 0 | .addImm(BPOffset) |
9664 | 0 | .addReg(BufReg); |
9665 | 0 | } |
9666 | 5 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9667 | 5 | |
9668 | 5 | // Reload TOC |
9669 | 5 | if (PVT == MVT::i64 && 5 Subtarget.isSVR4ABI()5 ) { |
9670 | 5 | setUsesTOCBasePtr(*MBB->getParent()); |
9671 | 5 | MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) |
9672 | 5 | .addImm(TOCOffset) |
9673 | 5 | .addReg(BufReg); |
9674 | 5 | |
9675 | 5 | MIB.setMemRefs(MMOBegin, MMOEnd); |
9676 | 5 | } |
9677 | 5 | |
9678 | 5 | // Jump |
9679 | 5 | BuildMI(*MBB, MI, DL, |
9680 | 5 | TII->get(PVT == MVT::i64 ? PPC::MTCTR85 : PPC::MTCTR0 )).addReg(Tmp); |
9681 | 5 | BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR85 : PPC::BCTR0 )); |
9682 | 5 | |
9683 | 5 | MI.eraseFromParent(); |
9684 | 5 | return MBB; |
9685 | 5 | } |
9686 | | |
9687 | | MachineBasicBlock * |
9688 | | PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
9689 | 1.74k | MachineBasicBlock *BB) const { |
9690 | 1.74k | if (MI.getOpcode() == TargetOpcode::STACKMAP || |
9691 | 1.74k | MI.getOpcode() == TargetOpcode::PATCHPOINT1.72k ) { |
9692 | 59 | if (Subtarget.isPPC64() && 59 Subtarget.isSVR4ABI()59 && |
9693 | 59 | MI.getOpcode() == TargetOpcode::PATCHPOINT59 ) { |
9694 | 40 | // Call lowering should have added an r2 operand to indicate a dependence |
9695 | 40 | // on the TOC base pointer value. It can't however, because there is no |
9696 | 40 | // way to mark the dependence as implicit there, and so the stackmap code |
9697 | 40 | // will confuse it with a regular operand. Instead, add the dependence |
9698 | 40 | // here. |
9699 | 40 | setUsesTOCBasePtr(*BB->getParent()); |
9700 | 40 | MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); |
9701 | 40 | } |
9702 | 59 | |
9703 | 59 | return emitPatchPoint(MI, BB); |
9704 | 59 | } |
9705 | 1.68k | |
9706 | 1.68k | if (1.68k MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || |
9707 | 1.68k | MI.getOpcode() == PPC::EH_SjLj_SetJmp641.68k ) { |
9708 | 6 | return emitEHSjLjSetJmp(MI, BB); |
9709 | 1.68k | } else if (1.68k MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || |
9710 | 1.68k | MI.getOpcode() == PPC::EH_SjLj_LongJmp641.68k ) { |
9711 | 5 | return emitEHSjLjLongJmp(MI, BB); |
9712 | 5 | } |
9713 | 1.67k | |
9714 | 1.67k | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
9715 | 1.67k | |
9716 | 1.67k | // To "insert" these instructions we actually have to insert their |
9717 | 1.67k | // control-flow patterns. |
9718 | 1.67k | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
9719 | 1.67k | MachineFunction::iterator It = ++BB->getIterator(); |
9720 | 1.67k | |
9721 | 1.67k | MachineFunction *F = BB->getParent(); |
9722 | 1.67k | |
9723 | 1.67k | if (MI.getOpcode() == PPC::SELECT_CC_I4 || |
9724 | 1.56k | MI.getOpcode() == PPC::SELECT_CC_I8 || |
9725 | 1.67k | MI.getOpcode() == PPC::SELECT_I41.51k || MI.getOpcode() == PPC::SELECT_I81.34k ) { |
9726 | 703 | SmallVector<MachineOperand, 2> Cond; |
9727 | 703 | if (MI.getOpcode() == PPC::SELECT_CC_I4 || |
9728 | 595 | MI.getOpcode() == PPC::SELECT_CC_I8) |
9729 | 163 | Cond.push_back(MI.getOperand(4)); |
9730 | 703 | else |
9731 | 540 | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); |
9732 | 703 | Cond.push_back(MI.getOperand(1)); |
9733 | 703 | |
9734 | 703 | DebugLoc dl = MI.getDebugLoc(); |
9735 | 703 | TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, |
9736 | 703 | MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); |
9737 | 1.67k | } else if (972 MI.getOpcode() == PPC::SELECT_CC_I4 || |
9738 | 972 | MI.getOpcode() == PPC::SELECT_CC_I8 || |
9739 | 972 | MI.getOpcode() == PPC::SELECT_CC_F4 || |
9740 | 937 | MI.getOpcode() == PPC::SELECT_CC_F8 || |
9741 | 923 | MI.getOpcode() == PPC::SELECT_CC_QFRC || |
9742 | 923 | MI.getOpcode() == PPC::SELECT_CC_QSRC || |
9743 | 923 | MI.getOpcode() == PPC::SELECT_CC_QBRC || |
9744 | 923 | MI.getOpcode() == PPC::SELECT_CC_VRRC || |
9745 | 923 | MI.getOpcode() == PPC::SELECT_CC_VSFRC || |
9746 | 905 | MI.getOpcode() == PPC::SELECT_CC_VSSRC || |
9747 | 905 | MI.getOpcode() == PPC::SELECT_CC_VSRC || |
9748 | 905 | MI.getOpcode() == PPC::SELECT_I4 || |
9749 | 905 | MI.getOpcode() == PPC::SELECT_I8 || |
9750 | 905 | MI.getOpcode() == PPC::SELECT_F4 || |
9751 | 884 | MI.getOpcode() == PPC::SELECT_F8 || |
9752 | 870 | MI.getOpcode() == PPC::SELECT_QFRC || |
9753 | 850 | MI.getOpcode() == PPC::SELECT_QSRC || |
9754 | 830 | MI.getOpcode() == PPC::SELECT_QBRC || |
9755 | 810 | MI.getOpcode() == PPC::SELECT_VRRC || |
9756 | 770 | MI.getOpcode() == PPC::SELECT_VSFRC || |
9757 | 744 | MI.getOpcode() == PPC::SELECT_VSSRC || |
9758 | 972 | MI.getOpcode() == PPC::SELECT_VSRC743 ) { |
9759 | 229 | // The incoming instruction knows the destination vreg to set, the |
9760 | 229 | // condition code register to branch on, the true/false values to |
9761 | 229 | // select between, and a branch opcode to use. |
9762 | 229 | |
9763 | 229 | // thisMBB: |
9764 | 229 | // ... |
9765 | 229 | // TrueVal = ... |
9766 | 229 | // cmpTY ccX, r1, r2 |
9767 | 229 | // bCC copy1MBB |
9768 | 229 | // fallthrough --> copy0MBB |
9769 | 229 | MachineBasicBlock *thisMBB = BB; |
9770 | 229 | MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); |
9771 | 229 | MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9772 | 229 | DebugLoc dl = MI.getDebugLoc(); |
9773 | 229 | F->insert(It, copy0MBB); |
9774 | 229 | F->insert(It, sinkMBB); |
9775 | 229 | |
9776 | 229 | // Transfer the remainder of BB and its successor edges to sinkMBB. |
9777 | 229 | sinkMBB->splice(sinkMBB->begin(), BB, |
9778 | 229 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
9779 | 229 | sinkMBB->transferSuccessorsAndUpdatePHIs(BB); |
9780 | 229 | |
9781 | 229 | // Next, add the true and fallthrough blocks as its successors. |
9782 | 229 | BB->addSuccessor(copy0MBB); |
9783 | 229 | BB->addSuccessor(sinkMBB); |
9784 | 229 | |
9785 | 229 | if (MI.getOpcode() == PPC::SELECT_I4 || 229 MI.getOpcode() == PPC::SELECT_I8229 || |
9786 | 229 | MI.getOpcode() == PPC::SELECT_F4229 || MI.getOpcode() == PPC::SELECT_F8208 || |
9787 | 194 | MI.getOpcode() == PPC::SELECT_QFRC || |
9788 | 174 | MI.getOpcode() == PPC::SELECT_QSRC || |
9789 | 154 | MI.getOpcode() == PPC::SELECT_QBRC || |
9790 | 134 | MI.getOpcode() == PPC::SELECT_VRRC || |
9791 | 94 | MI.getOpcode() == PPC::SELECT_VSFRC || |
9792 | 68 | MI.getOpcode() == PPC::SELECT_VSSRC || |
9793 | 229 | MI.getOpcode() == PPC::SELECT_VSRC67 ) { |
9794 | 162 | BuildMI(BB, dl, TII->get(PPC::BC)) |
9795 | 162 | .addReg(MI.getOperand(1).getReg()) |
9796 | 162 | .addMBB(sinkMBB); |
9797 | 229 | } else { |
9798 | 67 | unsigned SelectPred = MI.getOperand(4).getImm(); |
9799 | 67 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
9800 | 67 | .addImm(SelectPred) |
9801 | 67 | .addReg(MI.getOperand(1).getReg()) |
9802 | 67 | .addMBB(sinkMBB); |
9803 | 67 | } |
9804 | 229 | |
9805 | 229 | // copy0MBB: |
9806 | 229 | // %FalseValue = ... |
9807 | 229 | // # fallthrough to sinkMBB |
9808 | 229 | BB = copy0MBB; |
9809 | 229 | |
9810 | 229 | // Update machine-CFG edges |
9811 | 229 | BB->addSuccessor(sinkMBB); |
9812 | 229 | |
9813 | 229 | // sinkMBB: |
9814 | 229 | // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] |
9815 | 229 | // ... |
9816 | 229 | BB = sinkMBB; |
9817 | 229 | BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) |
9818 | 229 | .addReg(MI.getOperand(3).getReg()) |
9819 | 229 | .addMBB(copy0MBB) |
9820 | 229 | .addReg(MI.getOperand(2).getReg()) |
9821 | 229 | .addMBB(thisMBB); |
9822 | 972 | } else if (743 MI.getOpcode() == PPC::ReadTB743 ) { |
9823 | 2 | // To read the 64-bit time-base register on a 32-bit target, we read the |
9824 | 2 | // two halves. Should the counter have wrapped while it was being read, we |
9825 | 2 | // need to try again. |
9826 | 2 | // ... |
9827 | 2 | // readLoop: |
9828 | 2 | // mfspr Rx,TBU # load from TBU |
9829 | 2 | // mfspr Ry,TB # load from TB |
9830 | 2 | // mfspr Rz,TBU # load from TBU |
9831 | 2 | // cmpw crX,Rx,Rz # check if 'old'='new' |
9832 | 2 | // bne readLoop # branch if they're not equal |
9833 | 2 | // ... |
9834 | 2 | |
9835 | 2 | MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9836 | 2 | MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
9837 | 2 | DebugLoc dl = MI.getDebugLoc(); |
9838 | 2 | F->insert(It, readMBB); |
9839 | 2 | F->insert(It, sinkMBB); |
9840 | 2 | |
9841 | 2 | // Transfer the remainder of BB and its successor edges to sinkMBB. |
9842 | 2 | sinkMBB->splice(sinkMBB->begin(), BB, |
9843 | 2 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
9844 | 2 | sinkMBB->transferSuccessorsAndUpdatePHIs(BB); |
9845 | 2 | |
9846 | 2 | BB->addSuccessor(readMBB); |
9847 | 2 | BB = readMBB; |
9848 | 2 | |
9849 | 2 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
9850 | 2 | unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); |
9851 | 2 | unsigned LoReg = MI.getOperand(0).getReg(); |
9852 | 2 | unsigned HiReg = MI.getOperand(1).getReg(); |
9853 | 2 | |
9854 | 2 | BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); |
9855 | 2 | BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); |
9856 | 2 | BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); |
9857 | 2 | |
9858 | 2 | unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); |
9859 | 2 | |
9860 | 2 | BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) |
9861 | 2 | .addReg(HiReg).addReg(ReadAgainReg); |
9862 | 2 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
9863 | 2 | .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); |
9864 | 2 | |
9865 | 2 | BB->addSuccessor(readMBB); |
9866 | 2 | BB->addSuccessor(sinkMBB); |
9867 | 743 | } else if (741 MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8741 ) |
9868 | 16 | BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); |
9869 | 725 | else if (725 MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16725 ) |
9870 | 14 | BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); |
9871 | 711 | else if (711 MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32711 ) |
9872 | 13 | BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); |
9873 | 698 | else if (698 MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64698 ) |
9874 | 14 | BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); |
9875 | 698 | |
9876 | 684 | else if (684 MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8684 ) |
9877 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); |
9878 | 674 | else if (674 MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16674 ) |
9879 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); |
9880 | 664 | else if (664 MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32664 ) |
9881 | 10 | BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); |
9882 | 654 | else if (654 MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64654 ) |
9883 | 11 | BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); |
9884 | 654 | |
9885 | 643 | else if (643 MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8643 ) |
9886 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); |
9887 | 633 | else if (633 MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16633 ) |
9888 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); |
9889 | 623 | else if (623 MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32623 ) |
9890 | 10 | BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); |
9891 | 613 | else if (613 MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64613 ) |
9892 | 10 | BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); |
9893 | 613 | |
9894 | 603 | else if (603 MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8603 ) |
9895 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); |
9896 | 593 | else if (593 MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16593 ) |
9897 | 12 | BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); |
9898 | 581 | else if (581 MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32581 ) |
9899 | 10 | BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); |
9900 | 571 | else if (571 MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64571 ) |
9901 | 10 | BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); |
9902 | 571 | |
9903 | 561 | else if (561 MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8561 ) |
9904 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); |
9905 | 551 | else if (551 MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16551 ) |
9906 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); |
9907 | 541 | else if (541 MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32541 ) |
9908 | 10 | BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); |
9909 | 531 | else if (531 MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64531 ) |
9910 | 10 | BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); |
9911 | 531 | |
9912 | 521 | else if (521 MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8521 ) |
9913 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); |
9914 | 511 | else if (511 MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16511 ) |
9915 | 10 | BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); |
9916 | 501 | else if (501 MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32501 ) |
9917 | 10 | BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); |
9918 | 491 | else if (491 MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64491 ) |
9919 | 10 | BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); |
9920 | 491 | |
9921 | 481 | else if (481 MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8481 ) |
9922 | 13 | BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); |
9923 | 468 | else if (468 MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16468 ) |
9924 | 13 | BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); |
9925 | 455 | else if (455 MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32455 ) |
9926 | 11 | BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); |
9927 | 444 | else if (444 MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64444 ) |
9928 | 11 | BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); |
9929 | 444 | |
9930 | 433 | else if (433 MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8433 ) |
9931 | 13 | BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); |
9932 | 420 | else if (420 MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16420 ) |
9933 | 13 | BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); |
9934 | 407 | else if (407 MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32407 ) |
9935 | 11 | BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); |
9936 | 396 | else if (396 MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64396 ) |
9937 | 11 | BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); |
9938 | 396 | |
9939 | 385 | else if (385 MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8385 ) |
9940 | 12 | BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); |
9941 | 373 | else if (373 MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16373 ) |
9942 | 12 | BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); |
9943 | 361 | else if (361 MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32361 ) |
9944 | 11 | BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); |
9945 | 350 | else if (350 MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64350 ) |
9946 | 11 | BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); |
9947 | 350 | |
9948 | 339 | else if (339 MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8339 ) |
9949 | 12 | BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); |
9950 | 327 | else if (327 MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16327 ) |
9951 | 12 | BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); |
9952 | 315 | else if (315 MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32315 ) |
9953 | 11 | BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); |
9954 | 304 | else if (304 MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64304 ) |
9955 | 11 | BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); |
9956 | 304 | |
9957 | 293 | else if (293 MI.getOpcode() == PPC::ATOMIC_SWAP_I8293 ) |
9958 | 14 | BB = EmitPartwordAtomicBinary(MI, BB, true, 0); |
9959 | 279 | else if (279 MI.getOpcode() == PPC::ATOMIC_SWAP_I16279 ) |
9960 | 14 | BB = EmitPartwordAtomicBinary(MI, BB, false, 0); |
9961 | 265 | else if (265 MI.getOpcode() == PPC::ATOMIC_SWAP_I32265 ) |
9962 | 13 | BB = EmitAtomicBinary(MI, BB, 4, 0); |
9963 | 252 | else if (252 MI.getOpcode() == PPC::ATOMIC_SWAP_I64252 ) |
9964 | 14 | BB = EmitAtomicBinary(MI, BB, 8, 0); |
9965 | 238 | else if (238 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || |
9966 | 215 | MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || |
9967 | 190 | (Subtarget.hasPartwordAtomics() && |
9968 | 190 | MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || |
9969 | 168 | (Subtarget.hasPartwordAtomics() && |
9970 | 238 | MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I1696 )) { |
9971 | 92 | bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; |
9972 | 92 | |
9973 | 92 | auto LoadMnemonic = PPC::LDARX; |
9974 | 92 | auto StoreMnemonic = PPC::STDCX; |
9975 | 92 | switch (MI.getOpcode()) { |
9976 | 0 | default: |
9977 | 0 | llvm_unreachable("Compare and swap of unknown size"); |
9978 | 22 | case PPC::ATOMIC_CMP_SWAP_I8: |
9979 | 22 | LoadMnemonic = PPC::LBARX; |
9980 | 22 | StoreMnemonic = PPC::STBCX; |
9981 | 22 | assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); |
9982 | 22 | break; |
9983 | 22 | case PPC::ATOMIC_CMP_SWAP_I16: |
9984 | 22 | LoadMnemonic = PPC::LHARX; |
9985 | 22 | StoreMnemonic = PPC::STHCX; |
9986 | 22 | assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); |
9987 | 22 | break; |
9988 | 23 | case PPC::ATOMIC_CMP_SWAP_I32: |
9989 | 23 | LoadMnemonic = PPC::LWARX; |
9990 | 23 | StoreMnemonic = PPC::STWCX; |
9991 | 23 | break; |
9992 | 25 | case PPC::ATOMIC_CMP_SWAP_I64: |
9993 | 25 | LoadMnemonic = PPC::LDARX; |
9994 | 25 | StoreMnemonic = PPC::STDCX; |
9995 | 25 | break; |
9996 | 92 | } |
9997 | 92 | unsigned dest = MI.getOperand(0).getReg(); |
9998 | 92 | unsigned ptrA = MI.getOperand(1).getReg(); |
9999 | 92 | unsigned ptrB = MI.getOperand(2).getReg(); |
10000 | 92 | unsigned oldval = MI.getOperand(3).getReg(); |
10001 | 92 | unsigned newval = MI.getOperand(4).getReg(); |
10002 | 92 | DebugLoc dl = MI.getDebugLoc(); |
10003 | 92 | |
10004 | 92 | MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); |
10005 | 92 | MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); |
10006 | 92 | MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); |
10007 | 92 | MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); |
10008 | 92 | F->insert(It, loop1MBB); |
10009 | 92 | F->insert(It, loop2MBB); |
10010 | 92 | F->insert(It, midMBB); |
10011 | 92 | F->insert(It, exitMBB); |
10012 | 92 | exitMBB->splice(exitMBB->begin(), BB, |
10013 | 92 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
10014 | 92 | exitMBB->transferSuccessorsAndUpdatePHIs(BB); |
10015 | 92 | |
10016 | 92 | // thisMBB: |
10017 | 92 | // ... |
10018 | 92 | // fallthrough --> loopMBB |
10019 | 92 | BB->addSuccessor(loop1MBB); |
10020 | 92 | |
10021 | 92 | // loop1MBB: |
10022 | 92 | // l[bhwd]arx dest, ptr |
10023 | 92 | // cmp[wd] dest, oldval |
10024 | 92 | // bne- midMBB |
10025 | 92 | // loop2MBB: |
10026 | 92 | // st[bhwd]cx. newval, ptr |
10027 | 92 | // bne- loopMBB |
10028 | 92 | // b exitBB |
10029 | 92 | // midMBB: |
10030 | 92 | // st[bhwd]cx. dest, ptr |
10031 | 92 | // exitBB: |
10032 | 92 | BB = loop1MBB; |
10033 | 92 | BuildMI(BB, dl, TII->get(LoadMnemonic), dest) |
10034 | 92 | .addReg(ptrA).addReg(ptrB); |
10035 | 92 | BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD25 : PPC::CMPW67 ), PPC::CR0) |
10036 | 92 | .addReg(oldval).addReg(dest); |
10037 | 92 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
10038 | 92 | .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); |
10039 | 92 | BB->addSuccessor(loop2MBB); |
10040 | 92 | BB->addSuccessor(midMBB); |
10041 | 92 | |
10042 | 92 | BB = loop2MBB; |
10043 | 92 | BuildMI(BB, dl, TII->get(StoreMnemonic)) |
10044 | 92 | .addReg(newval).addReg(ptrA).addReg(ptrB); |
10045 | 92 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
10046 | 92 | .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); |
10047 | 92 | BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); |
10048 | 92 | BB->addSuccessor(loop1MBB); |
10049 | 92 | BB->addSuccessor(exitMBB); |
10050 | 92 | |
10051 | 92 | BB = midMBB; |
10052 | 92 | BuildMI(BB, dl, TII->get(StoreMnemonic)) |
10053 | 92 | .addReg(dest).addReg(ptrA).addReg(ptrB); |
10054 | 92 | BB->addSuccessor(exitMBB); |
10055 | 92 | |
10056 | 92 | // exitMBB: |
10057 | 92 | // ... |
10058 | 92 | BB = exitMBB; |
10059 | 238 | } else if (146 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || |
10060 | 146 | MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16142 ) { |
10061 | 8 | // We must use 64-bit registers for addresses when targeting 64-bit, |
10062 | 8 | // since we're actually doing arithmetic on them. Other registers |
10063 | 8 | // can be 32-bit. |
10064 | 8 | bool is64bit = Subtarget.isPPC64(); |
10065 | 8 | bool isLittleEndian = Subtarget.isLittleEndian(); |
10066 | 8 | bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; |
10067 | 8 | |
10068 | 8 | unsigned dest = MI.getOperand(0).getReg(); |
10069 | 8 | unsigned ptrA = MI.getOperand(1).getReg(); |
10070 | 8 | unsigned ptrB = MI.getOperand(2).getReg(); |
10071 | 8 | unsigned oldval = MI.getOperand(3).getReg(); |
10072 | 8 | unsigned newval = MI.getOperand(4).getReg(); |
10073 | 8 | DebugLoc dl = MI.getDebugLoc(); |
10074 | 8 | |
10075 | 8 | MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); |
10076 | 8 | MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); |
10077 | 8 | MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); |
10078 | 8 | MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); |
10079 | 8 | F->insert(It, loop1MBB); |
10080 | 8 | F->insert(It, loop2MBB); |
10081 | 8 | F->insert(It, midMBB); |
10082 | 8 | F->insert(It, exitMBB); |
10083 | 8 | exitMBB->splice(exitMBB->begin(), BB, |
10084 | 8 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
10085 | 8 | exitMBB->transferSuccessorsAndUpdatePHIs(BB); |
10086 | 8 | |
10087 | 8 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
10088 | 6 | const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass |
10089 | 2 | : &PPC::GPRCRegClass; |
10090 | 8 | unsigned PtrReg = RegInfo.createVirtualRegister(RC); |
10091 | 8 | unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); |
10092 | 8 | unsigned ShiftReg = |
10093 | 8 | isLittleEndian ? Shift1Reg0 : RegInfo.createVirtualRegister(RC)8 ; |
10094 | 8 | unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); |
10095 | 8 | unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); |
10096 | 8 | unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); |
10097 | 8 | unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); |
10098 | 8 | unsigned MaskReg = RegInfo.createVirtualRegister(RC); |
10099 | 8 | unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); |
10100 | 8 | unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); |
10101 | 8 | unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); |
10102 | 8 | unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); |
10103 | 8 | unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); |
10104 | 8 | unsigned Ptr1Reg; |
10105 | 8 | unsigned TmpReg = RegInfo.createVirtualRegister(RC); |
10106 | 8 | unsigned ZeroReg = is64bit ? PPC::ZERO86 : PPC::ZERO2 ; |
10107 | 8 | // thisMBB: |
10108 | 8 | // ... |
10109 | 8 | // fallthrough --> loopMBB |
10110 | 8 | BB->addSuccessor(loop1MBB); |
10111 | 8 | |
10112 | 8 | // The 4-byte load must be aligned, while a char or short may be |
10113 | 8 | // anywhere in the word. Hence all this nasty bookkeeping code. |
10114 | 8 | // add ptr1, ptrA, ptrB [copy if ptrA==0] |
10115 | 8 | // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] |
10116 | 8 | // xori shift, shift1, 24 [16] |
10117 | 8 | // rlwinm ptr, ptr1, 0, 0, 29 |
10118 | 8 | // slw newval2, newval, shift |
10119 | 8 | // slw oldval2, oldval,shift |
10120 | 8 | // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] |
10121 | 8 | // slw mask, mask2, shift |
10122 | 8 | // and newval3, newval2, mask |
10123 | 8 | // and oldval3, oldval2, mask |
10124 | 8 | // loop1MBB: |
10125 | 8 | // lwarx tmpDest, ptr |
10126 | 8 | // and tmp, tmpDest, mask |
10127 | 8 | // cmpw tmp, oldval3 |
10128 | 8 | // bne- midMBB |
10129 | 8 | // loop2MBB: |
10130 | 8 | // andc tmp2, tmpDest, mask |
10131 | 8 | // or tmp4, tmp2, newval3 |
10132 | 8 | // stwcx. tmp4, ptr |
10133 | 8 | // bne- loop1MBB |
10134 | 8 | // b exitBB |
10135 | 8 | // midMBB: |
10136 | 8 | // stwcx. tmpDest, ptr |
10137 | 8 | // exitBB: |
10138 | 8 | // srw dest, tmpDest, shift |
10139 | 8 | if (ptrA != ZeroReg8 ) { |
10140 | 0 | Ptr1Reg = RegInfo.createVirtualRegister(RC); |
10141 | 0 | BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD80 : PPC::ADD40 ), Ptr1Reg) |
10142 | 0 | .addReg(ptrA).addReg(ptrB); |
10143 | 8 | } else { |
10144 | 8 | Ptr1Reg = ptrB; |
10145 | 8 | } |
10146 | 8 | BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) |
10147 | 8 | .addImm(3).addImm(27).addImm(is8bit ? 284 : 274 ); |
10148 | 8 | if (!isLittleEndian) |
10149 | 8 | BuildMI(BB, dl, TII->get(is64bit ? 8 PPC::XORI86 : PPC::XORI2 ), ShiftReg) |
10150 | 8 | .addReg(Shift1Reg).addImm(is8bit ? 244 : 164 ); |
10151 | 8 | if (is64bit) |
10152 | 6 | BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) |
10153 | 6 | .addReg(Ptr1Reg).addImm(0).addImm(61); |
10154 | 8 | else |
10155 | 2 | BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) |
10156 | 2 | .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); |
10157 | 8 | BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) |
10158 | 8 | .addReg(newval).addReg(ShiftReg); |
10159 | 8 | BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) |
10160 | 8 | .addReg(oldval).addReg(ShiftReg); |
10161 | 8 | if (is8bit) |
10162 | 4 | BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); |
10163 | 4 | else { |
10164 | 4 | BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); |
10165 | 4 | BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) |
10166 | 4 | .addReg(Mask3Reg).addImm(65535); |
10167 | 4 | } |
10168 | 8 | BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) |
10169 | 8 | .addReg(Mask2Reg).addReg(ShiftReg); |
10170 | 8 | BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) |
10171 | 8 | .addReg(NewVal2Reg).addReg(MaskReg); |
10172 | 8 | BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) |
10173 | 8 | .addReg(OldVal2Reg).addReg(MaskReg); |
10174 | 8 | |
10175 | 8 | BB = loop1MBB; |
10176 | 8 | BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) |
10177 | 8 | .addReg(ZeroReg).addReg(PtrReg); |
10178 | 8 | BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) |
10179 | 8 | .addReg(TmpDestReg).addReg(MaskReg); |
10180 | 8 | BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) |
10181 | 8 | .addReg(TmpReg).addReg(OldVal3Reg); |
10182 | 8 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
10183 | 8 | .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); |
10184 | 8 | BB->addSuccessor(loop2MBB); |
10185 | 8 | BB->addSuccessor(midMBB); |
10186 | 8 | |
10187 | 8 | BB = loop2MBB; |
10188 | 8 | BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) |
10189 | 8 | .addReg(TmpDestReg).addReg(MaskReg); |
10190 | 8 | BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) |
10191 | 8 | .addReg(Tmp2Reg).addReg(NewVal3Reg); |
10192 | 8 | BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) |
10193 | 8 | .addReg(ZeroReg).addReg(PtrReg); |
10194 | 8 | BuildMI(BB, dl, TII->get(PPC::BCC)) |
10195 | 8 | .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); |
10196 | 8 | BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); |
10197 | 8 | BB->addSuccessor(loop1MBB); |
10198 | 8 | BB->addSuccessor(exitMBB); |
10199 | 8 | |
10200 | 8 | BB = midMBB; |
10201 | 8 | BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) |
10202 | 8 | .addReg(ZeroReg).addReg(PtrReg); |
10203 | 8 | BB->addSuccessor(exitMBB); |
10204 | 8 | |
10205 | 8 | // exitMBB: |
10206 | 8 | // ... |
10207 | 8 | BB = exitMBB; |
10208 | 8 | BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) |
10209 | 8 | .addReg(ShiftReg); |
10210 | 146 | } else if (138 MI.getOpcode() == PPC::FADDrtz138 ) { |
10211 | 8 | // This pseudo performs an FADD with rounding mode temporarily forced |
10212 | 8 | // to round-to-zero. We emit this via custom inserter since the FPSCR |
10213 | 8 | // is not modeled at the SelectionDAG level. |
10214 | 8 | unsigned Dest = MI.getOperand(0).getReg(); |
10215 | 8 | unsigned Src1 = MI.getOperand(1).getReg(); |
10216 | 8 | unsigned Src2 = MI.getOperand(2).getReg(); |
10217 | 8 | DebugLoc dl = MI.getDebugLoc(); |
10218 | 8 | |
10219 | 8 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
10220 | 8 | unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); |
10221 | 8 | |
10222 | 8 | // Save FPSCR value. |
10223 | 8 | BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); |
10224 | 8 | |
10225 | 8 | // Set rounding mode to round-to-zero. |
10226 | 8 | BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); |
10227 | 8 | BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); |
10228 | 8 | |
10229 | 8 | // Perform addition. |
10230 | 8 | BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); |
10231 | 8 | |
10232 | 8 | // Restore FPSCR value. |
10233 | 8 | BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); |
10234 | 138 | } else if (130 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || |
10235 | 129 | MI.getOpcode() == PPC::ANDIo_1_GT_BIT || |
10236 | 113 | MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || |
10237 | 130 | MI.getOpcode() == PPC::ANDIo_1_GT_BIT8112 ) { |
10238 | 130 | unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || |
10239 | 129 | MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) |
10240 | 113 | ? PPC::ANDIo8 |
10241 | 17 | : PPC::ANDIo; |
10242 | 130 | bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || |
10243 | 129 | MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); |
10244 | 130 | |
10245 | 130 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
10246 | 130 | unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? |
10247 | 17 | &PPC::GPRCRegClass : |
10248 | 113 | &PPC::G8RCRegClass); |
10249 | 130 | |
10250 | 130 | DebugLoc dl = MI.getDebugLoc(); |
10251 | 130 | BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) |
10252 | 130 | .addReg(MI.getOperand(1).getReg()) |
10253 | 130 | .addImm(1); |
10254 | 130 | BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), |
10255 | 130 | MI.getOperand(0).getReg()) |
10256 | 130 | .addReg(isEQ ? PPC::CR0EQ2 : PPC::CR0GT128 ); |
10257 | 0 | } else if (0 MI.getOpcode() == PPC::TCHECK_RET0 ) { |
10258 | 0 | DebugLoc Dl = MI.getDebugLoc(); |
10259 | 0 | MachineRegisterInfo &RegInfo = F->getRegInfo(); |
10260 | 0 | unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); |
10261 | 0 | BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); |
10262 | 0 | return BB; |
10263 | 0 | } else { |
10264 | 0 | llvm_unreachable("Unexpected instr type to insert"); |
10265 | 972 | } |
10266 | 1.67k | |
10267 | 1.67k | MI.eraseFromParent(); // The pseudo instruction is gone now. |
10268 | 1.67k | return BB; |
10269 | 1.67k | } |
10270 | | |
10271 | | //===----------------------------------------------------------------------===// |
10272 | | // Target Optimization Hooks |
10273 | | //===----------------------------------------------------------------------===// |
10274 | | |
10275 | 39 | static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { |
10276 | 39 | // For the estimates, convergence is quadratic, so we essentially double the |
10277 | 39 | // number of digits correct after every iteration. For both FRE and FRSQRTE, |
10278 | 39 | // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), |
10279 | 39 | // this is 2^-14. IEEE float has 23 digits and double has 52 digits. |
10280 | 39 | int RefinementSteps = Subtarget.hasRecipPrec() ? 139 : 30 ; |
10281 | 39 | if (VT.getScalarType() == MVT::f64) |
10282 | 18 | RefinementSteps++; |
10283 | 39 | return RefinementSteps; |
10284 | 39 | } |
10285 | | |
10286 | | SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
10287 | | int Enabled, int &RefinementSteps, |
10288 | | bool &UseOneConstNR, |
10289 | 23 | bool Reciprocal) const { |
10290 | 23 | EVT VT = Operand.getValueType(); |
10291 | 23 | if ((VT == MVT::f32 && 23 Subtarget.hasFRSQRTES()7 ) || |
10292 | 16 | (VT == MVT::f64 && 16 Subtarget.hasFRSQRTE()8 ) || |
10293 | 8 | (VT == MVT::v4f32 && 8 Subtarget.hasAltivec()5 ) || |
10294 | 6 | (VT == MVT::v2f64 && 6 Subtarget.hasVSX()0 ) || |
10295 | 6 | (VT == MVT::v4f32 && 6 Subtarget.hasQPX()3 ) || |
10296 | 23 | (VT == MVT::v4f64 && 3 Subtarget.hasQPX()3 )) { |
10297 | 23 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
10298 | 21 | RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); |
10299 | 23 | |
10300 | 23 | UseOneConstNR = true; |
10301 | 23 | return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); |
10302 | 23 | } |
10303 | 0 | return SDValue(); |
10304 | 0 | } |
10305 | | |
10306 | | SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
10307 | | int Enabled, |
10308 | 22 | int &RefinementSteps) const { |
10309 | 22 | EVT VT = Operand.getValueType(); |
10310 | 22 | if ((VT == MVT::f32 && 22 Subtarget.hasFRES()8 ) || |
10311 | 14 | (VT == MVT::f64 && 14 Subtarget.hasFRE()11 ) || |
10312 | 7 | (VT == MVT::v4f32 && 7 Subtarget.hasAltivec()2 ) || |
10313 | 6 | (VT == MVT::v2f64 && 6 Subtarget.hasVSX()0 ) || |
10314 | 6 | (VT == MVT::v4f32 && 6 Subtarget.hasQPX()1 ) || |
10315 | 22 | (VT == MVT::v4f64 && 5 Subtarget.hasQPX()1 )) { |
10316 | 18 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
10317 | 18 | RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); |
10318 | 18 | return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); |
10319 | 18 | } |
10320 | 4 | return SDValue(); |
10321 | 4 | } |
10322 | | |
10323 | 6 | unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { |
10324 | 6 | // Note: This functionality is used only when unsafe-fp-math is enabled, and |
10325 | 6 | // on cores with reciprocal estimates (which are used when unsafe-fp-math is |
10326 | 6 | // enabled for division), this functionality is redundant with the default |
10327 | 6 | // combiner logic (once the division -> reciprocal/multiply transformation |
10328 | 6 | // has taken place). As a result, this matters more for older cores than for |
10329 | 6 | // newer ones. |
10330 | 6 | |
10331 | 6 | // Combine multiple FDIVs with the same divisor into multiple FMULs by the |
10332 | 6 | // reciprocal if there are two or more FDIVs (for embedded cores with only |
10333 | 6 | // one FP pipeline) for three or more FDIVs (for generic OOO cores). |
10334 | 6 | switch (Subtarget.getDarwinDirective()) { |
10335 | 6 | default: |
10336 | 6 | return 3; |
10337 | 0 | case PPC::DIR_440: |
10338 | 0 | case PPC::DIR_A2: |
10339 | 0 | case PPC::DIR_E500mc: |
10340 | 0 | case PPC::DIR_E5500: |
10341 | 0 | return 2; |
10342 | 0 | } |
10343 | 0 | } |
10344 | | |
10345 | | // isConsecutiveLSLoc needs to work even if all adds have not yet been |
10346 | | // collapsed, and so we need to look through chains of them. |
10347 | | static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, |
10348 | 12.1k | int64_t& Offset, SelectionDAG &DAG) { |
10349 | 12.1k | if (DAG.isBaseWithConstantOffset(Loc)12.1k ) { |
10350 | 6.27k | Base = Loc.getOperand(0); |
10351 | 6.27k | Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); |
10352 | 6.27k | |
10353 | 6.27k | // The base might itself be a base plus an offset, and if so, accumulate |
10354 | 6.27k | // that as well. |
10355 | 6.27k | getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); |
10356 | 6.27k | } |
10357 | 12.1k | } |
10358 | | |
10359 | | static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, |
10360 | | unsigned Bytes, int Dist, |
10361 | 3.08k | SelectionDAG &DAG) { |
10362 | 3.08k | if (VT.getSizeInBits() / 8 != Bytes) |
10363 | 162 | return false; |
10364 | 2.92k | |
10365 | 2.92k | SDValue BaseLoc = Base->getBasePtr(); |
10366 | 2.92k | if (Loc.getOpcode() == ISD::FrameIndex2.92k ) { |
10367 | 0 | if (BaseLoc.getOpcode() != ISD::FrameIndex) |
10368 | 0 | return false; |
10369 | 0 | const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
10370 | 0 | int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); |
10371 | 0 | int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); |
10372 | 0 | int FS = MFI.getObjectSize(FI); |
10373 | 0 | int BFS = MFI.getObjectSize(BFI); |
10374 | 0 | if (FS != BFS || 0 FS != (int)Bytes0 ) return false0 ; |
10375 | 0 | return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); |
10376 | 0 | } |
10377 | 2.92k | |
10378 | 2.92k | SDValue Base1 = Loc, Base2 = BaseLoc; |
10379 | 2.92k | int64_t Offset1 = 0, Offset2 = 0; |
10380 | 2.92k | getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); |
10381 | 2.92k | getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); |
10382 | 2.92k | if (Base1 == Base2 && 2.92k Offset1 == (Offset2 + Dist * Bytes)2.16k ) |
10383 | 226 | return true; |
10384 | 2.69k | |
10385 | 2.69k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
10386 | 2.69k | const GlobalValue *GV1 = nullptr; |
10387 | 2.69k | const GlobalValue *GV2 = nullptr; |
10388 | 2.69k | Offset1 = 0; |
10389 | 2.69k | Offset2 = 0; |
10390 | 2.69k | bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); |
10391 | 2.69k | bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); |
10392 | 2.69k | if (isGA1 && 2.69k isGA215 && GV1 == GV212 ) |
10393 | 12 | return Offset1 == (Offset2 + Dist*Bytes); |
10394 | 2.68k | return false; |
10395 | 2.68k | } |
10396 | | |
10397 | | // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does |
10398 | | // not enforce equality of the chain operands. |
10399 | | static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, |
10400 | | unsigned Bytes, int Dist, |
10401 | 3.08k | SelectionDAG &DAG) { |
10402 | 3.08k | if (LSBaseSDNode *LS3.08k = dyn_cast<LSBaseSDNode>(N)) { |
10403 | 2.13k | EVT VT = LS->getMemoryVT(); |
10404 | 2.13k | SDValue Loc = LS->getBasePtr(); |
10405 | 2.13k | return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); |
10406 | 2.13k | } |
10407 | 955 | |
10408 | 955 | if (955 N->getOpcode() == ISD::INTRINSIC_W_CHAIN955 ) { |
10409 | 952 | EVT VT; |
10410 | 952 | switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { |
10411 | 0 | default: return false; |
10412 | 754 | case Intrinsic::ppc_qpx_qvlfd: |
10413 | 754 | case Intrinsic::ppc_qpx_qvlfda: |
10414 | 754 | VT = MVT::v4f64; |
10415 | 754 | break; |
10416 | 4 | case Intrinsic::ppc_qpx_qvlfs: |
10417 | 4 | case Intrinsic::ppc_qpx_qvlfsa: |
10418 | 4 | VT = MVT::v4f32; |
10419 | 4 | break; |
10420 | 0 | case Intrinsic::ppc_qpx_qvlfcd: |
10421 | 0 | case Intrinsic::ppc_qpx_qvlfcda: |
10422 | 0 | VT = MVT::v2f64; |
10423 | 0 | break; |
10424 | 0 | case Intrinsic::ppc_qpx_qvlfcs: |
10425 | 0 | case Intrinsic::ppc_qpx_qvlfcsa: |
10426 | 0 | VT = MVT::v2f32; |
10427 | 0 | break; |
10428 | 194 | case Intrinsic::ppc_qpx_qvlfiwa: |
10429 | 194 | case Intrinsic::ppc_qpx_qvlfiwz: |
10430 | 194 | case Intrinsic::ppc_altivec_lvx: |
10431 | 194 | case Intrinsic::ppc_altivec_lvxl: |
10432 | 194 | case Intrinsic::ppc_vsx_lxvw4x: |
10433 | 194 | case Intrinsic::ppc_vsx_lxvw4x_be: |
10434 | 194 | VT = MVT::v4i32; |
10435 | 194 | break; |
10436 | 0 | case Intrinsic::ppc_vsx_lxvd2x: |
10437 | 0 | case Intrinsic::ppc_vsx_lxvd2x_be: |
10438 | 0 | VT = MVT::v2f64; |
10439 | 0 | break; |
10440 | 0 | case Intrinsic::ppc_altivec_lvebx: |
10441 | 0 | VT = MVT::i8; |
10442 | 0 | break; |
10443 | 0 | case Intrinsic::ppc_altivec_lvehx: |
10444 | 0 | VT = MVT::i16; |
10445 | 0 | break; |
10446 | 0 | case Intrinsic::ppc_altivec_lvewx: |
10447 | 0 | VT = MVT::i32; |
10448 | 0 | break; |
10449 | 952 | } |
10450 | 952 | |
10451 | 952 | return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); |
10452 | 952 | } |
10453 | 3 | |
10454 | 3 | if (3 N->getOpcode() == ISD::INTRINSIC_VOID3 ) { |
10455 | 1 | EVT VT; |
10456 | 1 | switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { |
10457 | 0 | default: return false; |
10458 | 0 | case Intrinsic::ppc_qpx_qvstfd: |
10459 | 0 | case Intrinsic::ppc_qpx_qvstfda: |
10460 | 0 | VT = MVT::v4f64; |
10461 | 0 | break; |
10462 | 0 | case Intrinsic::ppc_qpx_qvstfs: |
10463 | 0 | case Intrinsic::ppc_qpx_qvstfsa: |
10464 | 0 | VT = MVT::v4f32; |
10465 | 0 | break; |
10466 | 0 | case Intrinsic::ppc_qpx_qvstfcd: |
10467 | 0 | case Intrinsic::ppc_qpx_qvstfcda: |
10468 | 0 | VT = MVT::v2f64; |
10469 | 0 | break; |
10470 | 0 | case Intrinsic::ppc_qpx_qvstfcs: |
10471 | 0 | case Intrinsic::ppc_qpx_qvstfcsa: |
10472 | 0 | VT = MVT::v2f32; |
10473 | 0 | break; |
10474 | 1 | case Intrinsic::ppc_qpx_qvstfiw: |
10475 | 1 | case Intrinsic::ppc_qpx_qvstfiwa: |
10476 | 1 | case Intrinsic::ppc_altivec_stvx: |
10477 | 1 | case Intrinsic::ppc_altivec_stvxl: |
10478 | 1 | case Intrinsic::ppc_vsx_stxvw4x: |
10479 | 1 | VT = MVT::v4i32; |
10480 | 1 | break; |
10481 | 0 | case Intrinsic::ppc_vsx_stxvd2x: |
10482 | 0 | VT = MVT::v2f64; |
10483 | 0 | break; |
10484 | 0 | case Intrinsic::ppc_vsx_stxvw4x_be: |
10485 | 0 | VT = MVT::v4i32; |
10486 | 0 | break; |
10487 | 0 | case Intrinsic::ppc_vsx_stxvd2x_be: |
10488 | 0 | VT = MVT::v2f64; |
10489 | 0 | break; |
10490 | 0 | case Intrinsic::ppc_altivec_stvebx: |
10491 | 0 | VT = MVT::i8; |
10492 | 0 | break; |
10493 | 0 | case Intrinsic::ppc_altivec_stvehx: |
10494 | 0 | VT = MVT::i16; |
10495 | 0 | break; |
10496 | 0 | case Intrinsic::ppc_altivec_stvewx: |
10497 | 0 | VT = MVT::i32; |
10498 | 0 | break; |
10499 | 1 | } |
10500 | 1 | |
10501 | 1 | return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); |
10502 | 1 | } |
10503 | 2 | |
10504 | 2 | return false; |
10505 | 2 | } |
10506 | | |
10507 | | // Return true is there is a nearyby consecutive load to the one provided |
10508 | | // (regardless of alignment). We search up and down the chain, looking though |
10509 | | // token factors and other loads (but nothing else). As a result, a true result |
10510 | | // indicates that it is safe to create a new consecutive load adjacent to the |
10511 | | // load provided. |
10512 | 108 | static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { |
10513 | 108 | SDValue Chain = LD->getChain(); |
10514 | 108 | EVT VT = LD->getMemoryVT(); |
10515 | 108 | |
10516 | 108 | SmallSet<SDNode *, 16> LoadRoots; |
10517 | 108 | SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); |
10518 | 108 | SmallSet<SDNode *, 16> Visited; |
10519 | 108 | |
10520 | 108 | // First, search up the chain, branching to follow all token-factor operands. |
10521 | 108 | // If we find a consecutive load, then we're done, otherwise, record all |
10522 | 108 | // nodes just above the top-level loads and token factors. |
10523 | 740 | while (!Queue.empty()740 ) { |
10524 | 633 | SDNode *ChainNext = Queue.pop_back_val(); |
10525 | 633 | if (!Visited.insert(ChainNext).second) |
10526 | 0 | continue; |
10527 | 633 | |
10528 | 633 | if (MemSDNode *633 ChainLD633 = dyn_cast<MemSDNode>(ChainNext)) { |
10529 | 386 | if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) |
10530 | 1 | return true; |
10531 | 385 | |
10532 | 385 | if (385 !Visited.count(ChainLD->getChain().getNode())385 ) |
10533 | 21 | Queue.push_back(ChainLD->getChain().getNode()); |
10534 | 633 | } else if (247 ChainNext->getOpcode() == ISD::TokenFactor247 ) { |
10535 | 140 | for (const SDUse &O : ChainNext->ops()) |
10536 | 504 | if (504 !Visited.count(O.getNode())504 ) |
10537 | 504 | Queue.push_back(O.getNode()); |
10538 | 140 | } else |
10539 | 107 | LoadRoots.insert(ChainNext); |
10540 | 633 | } |
10541 | 108 | |
10542 | 108 | // Second, search down the chain, starting from the top-level nodes recorded |
10543 | 108 | // in the first phase. These top-level nodes are the nodes just above all |
10544 | 108 | // loads and token factors. Starting with their uses, recursively look though |
10545 | 108 | // all loads (just the chain uses) and token factors to find a consecutive |
10546 | 108 | // load. |
10547 | 107 | Visited.clear(); |
10548 | 107 | Queue.clear(); |
10549 | 107 | |
10550 | 107 | for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), |
10551 | 165 | IE = LoadRoots.end(); I != IE165 ; ++I58 ) { |
10552 | 107 | Queue.push_back(*I); |
10553 | 107 | |
10554 | 2.47k | while (!Queue.empty()2.47k ) { |
10555 | 2.41k | SDNode *LoadRoot = Queue.pop_back_val(); |
10556 | 2.41k | if (!Visited.insert(LoadRoot).second) |
10557 | 8 | continue; |
10558 | 2.40k | |
10559 | 2.40k | if (MemSDNode *2.40k ChainLD2.40k = dyn_cast<MemSDNode>(LoadRoot)) |
10560 | 1.82k | if (1.82k isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)1.82k ) |
10561 | 49 | return true; |
10562 | 2.35k | |
10563 | 2.35k | for (SDNode::use_iterator UI = LoadRoot->use_begin(), |
10564 | 8.95k | UE = LoadRoot->use_end(); UI != UE8.95k ; ++UI6.59k ) |
10565 | 6.59k | if (6.59k ((isa<MemSDNode>(*UI) && |
10566 | 2.28k | cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || |
10567 | 6.59k | UI->getOpcode() == ISD::TokenFactor4.32k ) && !Visited.count(*UI)4.07k ) |
10568 | 2.74k | Queue.push_back(*UI); |
10569 | 2.41k | } |
10570 | 107 | } |
10571 | 107 | |
10572 | 58 | return false; |
10573 | 108 | } |
10574 | | |
10575 | | /// This function is called when we have proved that a SETCC node can be replaced |
10576 | | /// by subtraction (and other supporting instructions) so that the result of |
10577 | | /// comparison is kept in a GPR instead of CR. This function is purely for |
10578 | | /// codegen purposes and has some flags to guide the codegen process. |
10579 | | static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, |
10580 | 54 | bool Swap, SDLoc &DL, SelectionDAG &DAG) { |
10581 | 54 | assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); |
10582 | 54 | |
10583 | 54 | // Zero extend the operands to the largest legal integer. Originally, they |
10584 | 54 | // must be of a strictly smaller size. |
10585 | 54 | auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), |
10586 | 54 | DAG.getConstant(Size, DL, MVT::i32)); |
10587 | 54 | auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), |
10588 | 54 | DAG.getConstant(Size, DL, MVT::i32)); |
10589 | 54 | |
10590 | 54 | // Swap if needed. Depends on the condition code. |
10591 | 54 | if (Swap) |
10592 | 27 | std::swap(Op0, Op1); |
10593 | 54 | |
10594 | 54 | // Subtract extended integers. |
10595 | 54 | auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); |
10596 | 54 | |
10597 | 54 | // Move the sign bit to the least significant position and zero out the rest. |
10598 | 54 | // Now the least significant bit carries the result of original comparison. |
10599 | 54 | auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, |
10600 | 54 | DAG.getConstant(Size - 1, DL, MVT::i32)); |
10601 | 54 | auto Final = Shifted; |
10602 | 54 | |
10603 | 54 | // Complement the result if needed. Based on the condition code. |
10604 | 54 | if (Complement) |
10605 | 26 | Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, |
10606 | 26 | DAG.getConstant(1, DL, MVT::i64)); |
10607 | 54 | |
10608 | 54 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); |
10609 | 54 | } |
10610 | | |
10611 | | SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, |
10612 | 238 | DAGCombinerInfo &DCI) const { |
10613 | 238 | assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); |
10614 | 238 | |
10615 | 238 | SelectionDAG &DAG = DCI.DAG; |
10616 | 238 | SDLoc DL(N); |
10617 | 238 | |
10618 | 238 | // Size of integers being compared has a critical role in the following |
10619 | 238 | // analysis, so we prefer to do this when all types are legal. |
10620 | 238 | if (!DCI.isAfterLegalizeVectorOps()) |
10621 | 125 | return SDValue(); |
10622 | 113 | |
10623 | 113 | // If all users of SETCC extend its value to a legal integer type |
10624 | 113 | // then we replace SETCC with a subtraction |
10625 | 113 | for (SDNode::use_iterator UI = N->use_begin(), |
10626 | 170 | UE = N->use_end(); UI != UE170 ; ++UI57 ) { |
10627 | 113 | if (UI->getOpcode() != ISD::ZERO_EXTEND) |
10628 | 56 | return SDValue(); |
10629 | 113 | } |
10630 | 113 | |
10631 | 57 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
10632 | 57 | auto OpSize = N->getOperand(0).getValueSizeInBits(); |
10633 | 57 | |
10634 | 57 | unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); |
10635 | 57 | |
10636 | 57 | if (OpSize < Size57 ) { |
10637 | 54 | switch (CC) { |
10638 | 0 | default: break; |
10639 | 14 | case ISD::SETULT: |
10640 | 14 | return generateEquivalentSub(N, Size, false, false, DL, DAG); |
10641 | 13 | case ISD::SETULE: |
10642 | 13 | return generateEquivalentSub(N, Size, true, true, DL, DAG); |
10643 | 14 | case ISD::SETUGT: |
10644 | 14 | return generateEquivalentSub(N, Size, false, true, DL, DAG); |
10645 | 13 | case ISD::SETUGE: |
10646 | 13 | return generateEquivalentSub(N, Size, true, false, DL, DAG); |
10647 | 3 | } |
10648 | 3 | } |
10649 | 3 | |
10650 | 3 | return SDValue(); |
10651 | 3 | } |
10652 | | |
10653 | | SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, |
10654 | 10.5k | DAGCombinerInfo &DCI) const { |
10655 | 10.5k | SelectionDAG &DAG = DCI.DAG; |
10656 | 10.5k | SDLoc dl(N); |
10657 | 10.5k | |
10658 | 10.5k | assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); |
10659 | 10.5k | // If we're tracking CR bits, we need to be careful that we don't have: |
10660 | 10.5k | // trunc(binary-ops(zext(x), zext(y))) |
10661 | 10.5k | // or |
10662 | 10.5k | // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) |
10663 | 10.5k | // such that we're unnecessarily moving things into GPRs when it would be |
10664 | 10.5k | // better to keep them in CR bits. |
10665 | 10.5k | |
10666 | 10.5k | // Note that trunc here can be an actual i1 trunc, or can be the effective |
10667 | 10.5k | // truncation that comes from a setcc or select_cc. |
10668 | 10.5k | if (N->getOpcode() == ISD::TRUNCATE && |
10669 | 5.97k | N->getValueType(0) != MVT::i1) |
10670 | 5.63k | return SDValue(); |
10671 | 4.90k | |
10672 | 4.90k | if (4.90k N->getOperand(0).getValueType() != MVT::i32 && |
10673 | 3.54k | N->getOperand(0).getValueType() != MVT::i64) |
10674 | 2.81k | return SDValue(); |
10675 | 2.09k | |
10676 | 2.09k | if (2.09k N->getOpcode() == ISD::SETCC || |
10677 | 2.09k | N->getOpcode() == ISD::SELECT_CC655 ) { |
10678 | 1.76k | // If we're looking at a comparison, then we need to make sure that the |
10679 | 1.76k | // high bits (all except for the first) don't matter the result. |
10680 | 1.76k | ISD::CondCode CC = |
10681 | 1.76k | cast<CondCodeSDNode>(N->getOperand( |
10682 | 1.76k | N->getOpcode() == ISD::SETCC ? 21.44k : 4322 ))->get(); |
10683 | 1.76k | unsigned OpBits = N->getOperand(0).getValueSizeInBits(); |
10684 | 1.76k | |
10685 | 1.76k | if (ISD::isSignedIntSetCC(CC)1.76k ) { |
10686 | 398 | if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || |
10687 | 0 | DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) |
10688 | 398 | return SDValue(); |
10689 | 1.36k | } else if (1.36k ISD::isUnsignedIntSetCC(CC)1.36k ) { |
10690 | 343 | if (!DAG.MaskedValueIsZero(N->getOperand(0), |
10691 | 343 | APInt::getHighBitsSet(OpBits, OpBits-1)) || |
10692 | 10 | !DAG.MaskedValueIsZero(N->getOperand(1), |
10693 | 10 | APInt::getHighBitsSet(OpBits, OpBits-1))) |
10694 | 333 | return (N->getOpcode() == ISD::SETCC ? 333 ConvertSETCCToSubtract(N, DCI)238 |
10695 | 333 | : SDValue()); |
10696 | 1.02k | } else { |
10697 | 1.02k | // This is neither a signed nor an unsigned comparison, just make sure |
10698 | 1.02k | // that the high bits are equal. |
10699 | 1.02k | KnownBits Op1Known, Op2Known; |
10700 | 1.02k | DAG.computeKnownBits(N->getOperand(0), Op1Known); |
10701 | 1.02k | DAG.computeKnownBits(N->getOperand(1), Op2Known); |
10702 | 1.02k | |
10703 | 1.02k | // We don't really care about what is known about the first bit (if |
10704 | 1.02k | // anything), so clear it in all masks prior to comparing them. |
10705 | 1.02k | Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); |
10706 | 1.02k | Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); |
10707 | 1.02k | |
10708 | 1.02k | if (Op1Known.Zero != Op2Known.Zero || 1.02k Op1Known.One != Op2Known.One423 ) |
10709 | 614 | return SDValue(); |
10710 | 750 | } |
10711 | 1.76k | } |
10712 | 750 | |
10713 | 750 | // We now know that the higher-order bits are irrelevant, we just need to |
10714 | 750 | // make sure that all of the intermediate operations are bit operations, and |
10715 | 750 | // all inputs are extensions. |
10716 | 750 | if (750 N->getOperand(0).getOpcode() != ISD::AND && |
10717 | 738 | N->getOperand(0).getOpcode() != ISD::OR && |
10718 | 738 | N->getOperand(0).getOpcode() != ISD::XOR && |
10719 | 738 | N->getOperand(0).getOpcode() != ISD::SELECT && |
10720 | 738 | N->getOperand(0).getOpcode() != ISD::SELECT_CC && |
10721 | 738 | N->getOperand(0).getOpcode() != ISD::TRUNCATE && |
10722 | 560 | N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && |
10723 | 559 | N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && |
10724 | 559 | N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) |
10725 | 559 | return SDValue(); |
10726 | 191 | |
10727 | 191 | if (191 (N->getOpcode() == ISD::SETCC || 191 N->getOpcode() == ISD::SELECT_CC4 ) && |
10728 | 189 | N->getOperand(1).getOpcode() != ISD::AND && |
10729 | 179 | N->getOperand(1).getOpcode() != ISD::OR && |
10730 | 179 | N->getOperand(1).getOpcode() != ISD::XOR && |
10731 | 179 | N->getOperand(1).getOpcode() != ISD::SELECT && |
10732 | 179 | N->getOperand(1).getOpcode() != ISD::SELECT_CC && |
10733 | 179 | N->getOperand(1).getOpcode() != ISD::TRUNCATE && |
10734 | 1 | N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && |
10735 | 1 | N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && |
10736 | 1 | N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) |
10737 | 1 | return SDValue(); |
10738 | 190 | |
10739 | 190 | SmallVector<SDValue, 4> Inputs; |
10740 | 190 | SmallVector<SDValue, 8> BinOps, PromOps; |
10741 | 190 | SmallPtrSet<SDNode *, 16> Visited; |
10742 | 190 | |
10743 | 566 | for (unsigned i = 0; i < 2566 ; ++i376 ) { |
10744 | 378 | if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || |
10745 | 378 | N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || |
10746 | 378 | N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && |
10747 | 0 | N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || |
10748 | 378 | isa<ConstantSDNode>(N->getOperand(i))) |
10749 | 0 | Inputs.push_back(N->getOperand(i)); |
10750 | 378 | else |
10751 | 378 | BinOps.push_back(N->getOperand(i)); |
10752 | 378 | |
10753 | 378 | if (N->getOpcode() == ISD::TRUNCATE) |
10754 | 2 | break; |
10755 | 378 | } |
10756 | 190 | |
10757 | 190 | // Visit all inputs, collect all binary operations (and, or, xor and |
10758 | 190 | // select) that are all fed by extensions. |
10759 | 196 | while (!BinOps.empty()196 ) { |
10760 | 196 | SDValue BinOp = BinOps.back(); |
10761 | 196 | BinOps.pop_back(); |
10762 | 196 | |
10763 | 196 | if (!Visited.insert(BinOp.getNode()).second) |
10764 | 0 | continue; |
10765 | 196 | |
10766 | 196 | PromOps.push_back(BinOp); |
10767 | 196 | |
10768 | 208 | for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie208 ; ++i12 ) { |
10769 | 202 | // The condition of the select is not promoted. |
10770 | 202 | if (BinOp.getOpcode() == ISD::SELECT && 202 i == 00 ) |
10771 | 0 | continue; |
10772 | 202 | if (202 BinOp.getOpcode() == ISD::SELECT_CC && 202 i != 20 && i != 30 ) |
10773 | 0 | continue; |
10774 | 202 | |
10775 | 202 | if (202 ((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || |
10776 | 202 | BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || |
10777 | 202 | BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && |
10778 | 0 | BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || |
10779 | 202 | isa<ConstantSDNode>(BinOp.getOperand(i))202 ) { |
10780 | 6 | Inputs.push_back(BinOp.getOperand(i)); |
10781 | 202 | } else if (196 BinOp.getOperand(i).getOpcode() == ISD::AND || |
10782 | 196 | BinOp.getOperand(i).getOpcode() == ISD::OR || |
10783 | 196 | BinOp.getOperand(i).getOpcode() == ISD::XOR || |
10784 | 196 | BinOp.getOperand(i).getOpcode() == ISD::SELECT || |
10785 | 196 | BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || |
10786 | 196 | BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || |
10787 | 190 | BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || |
10788 | 190 | BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || |
10789 | 196 | BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND190 ) { |
10790 | 6 | BinOps.push_back(BinOp.getOperand(i)); |
10791 | 196 | } else { |
10792 | 190 | // We have an input that is not an extension or another binary |
10793 | 190 | // operation; we'll abort this transformation. |
10794 | 190 | return SDValue(); |
10795 | 190 | } |
10796 | 202 | } |
10797 | 196 | } |
10798 | 190 | |
10799 | 190 | // Make sure that this is a self-contained cluster of operations (which |
10800 | 190 | // is not quite the same thing as saying that everything has only one |
10801 | 190 | // use). |
10802 | 0 | for (unsigned i = 0, ie = Inputs.size(); 0 i != ie0 ; ++i0 ) { |
10803 | 0 | if (isa<ConstantSDNode>(Inputs[i])) |
10804 | 0 | continue; |
10805 | 0 |
|
10806 | 0 | for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), |
10807 | 0 | UE = Inputs[i].getNode()->use_end(); |
10808 | 0 | UI != UE0 ; ++UI0 ) { |
10809 | 0 | SDNode *User = *UI; |
10810 | 0 | if (User != N && 0 !Visited.count(User)0 ) |
10811 | 0 | return SDValue(); |
10812 | 0 |
|
10813 | 0 | // Make sure that we're not going to promote the non-output-value |
10814 | 0 | // operand(s) or SELECT or SELECT_CC. |
10815 | 0 | // FIXME: Although we could sometimes handle this, and it does occur in |
10816 | 0 | // practice that one of the condition inputs to the select is also one of |
10817 | 0 | // the outputs, we currently can't deal with this. |
10818 | 0 | if (0 User->getOpcode() == ISD::SELECT0 ) { |
10819 | 0 | if (User->getOperand(0) == Inputs[i]) |
10820 | 0 | return SDValue(); |
10821 | 0 | } else if (0 User->getOpcode() == ISD::SELECT_CC0 ) { |
10822 | 0 | if (User->getOperand(0) == Inputs[i] || |
10823 | 0 | User->getOperand(1) == Inputs[i]) |
10824 | 0 | return SDValue(); |
10825 | 0 | } |
10826 | 0 | } |
10827 | 0 | } |
10828 | 0 |
|
10829 | 0 | for (unsigned i = 0, ie = PromOps.size(); 0 i != ie0 ; ++i0 ) { |
10830 | 0 | for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), |
10831 | 0 | UE = PromOps[i].getNode()->use_end(); |
10832 | 0 | UI != UE0 ; ++UI0 ) { |
10833 | 0 | SDNode *User = *UI; |
10834 | 0 | if (User != N && 0 !Visited.count(User)0 ) |
10835 | 0 | return SDValue(); |
10836 | 0 |
|
10837 | 0 | // Make sure that we're not going to promote the non-output-value |
10838 | 0 | // operand(s) or SELECT or SELECT_CC. |
10839 | 0 | // FIXME: Although we could sometimes handle this, and it does occur in |
10840 | 0 | // practice that one of the condition inputs to the select is also one of |
10841 | 0 | // the outputs, we currently can't deal with this. |
10842 | 0 | if (0 User->getOpcode() == ISD::SELECT0 ) { |
10843 | 0 | if (User->getOperand(0) == PromOps[i]) |
10844 | 0 | return SDValue(); |
10845 | 0 | } else if (0 User->getOpcode() == ISD::SELECT_CC0 ) { |
10846 | 0 | if (User->getOperand(0) == PromOps[i] || |
10847 | 0 | User->getOperand(1) == PromOps[i]) |
10848 | 0 | return SDValue(); |
10849 | 0 | } |
10850 | 0 | } |
10851 | 0 | } |
10852 | 0 |
|
10853 | 0 | // Replace all inputs with the extension operand. |
10854 | 0 | for (unsigned i = 0, ie = Inputs.size(); 0 i != ie0 ; ++i0 ) { |
10855 | 0 | // Constants may have users outside the cluster of to-be-promoted nodes, |
10856 | 0 | // and so we need to replace those as we do the promotions. |
10857 | 0 | if (isa<ConstantSDNode>(Inputs[i])) |
10858 | 0 | continue; |
10859 | 0 | else |
10860 | 0 | DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); |
10861 | 0 | } |
10862 | 0 |
|
10863 | 0 | std::list<HandleSDNode> PromOpHandles; |
10864 | 0 | for (auto &PromOp : PromOps) |
10865 | 0 | PromOpHandles.emplace_back(PromOp); |
10866 | 0 |
|
10867 | 0 | // Replace all operations (these are all the same, but have a different |
10868 | 0 | // (i1) return type). DAG.getNode will validate that the types of |
10869 | 0 | // a binary operator match, so go through the list in reverse so that |
10870 | 0 | // we've likely promoted both operands first. Any intermediate truncations or |
10871 | 0 | // extensions disappear. |
10872 | 0 | while (!PromOpHandles.empty()0 ) { |
10873 | 0 | SDValue PromOp = PromOpHandles.back().getValue(); |
10874 | 0 | PromOpHandles.pop_back(); |
10875 | 0 |
|
10876 | 0 | if (PromOp.getOpcode() == ISD::TRUNCATE || |
10877 | 0 | PromOp.getOpcode() == ISD::SIGN_EXTEND || |
10878 | 0 | PromOp.getOpcode() == ISD::ZERO_EXTEND || |
10879 | 0 | PromOp.getOpcode() == ISD::ANY_EXTEND0 ) { |
10880 | 0 | if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && |
10881 | 0 | PromOp.getOperand(0).getValueType() != MVT::i10 ) { |
10882 | 0 | // The operand is not yet ready (see comment below). |
10883 | 0 | PromOpHandles.emplace_front(PromOp); |
10884 | 0 | continue; |
10885 | 0 | } |
10886 | 0 |
|
10887 | 0 | SDValue RepValue = PromOp.getOperand(0); |
10888 | 0 | if (isa<ConstantSDNode>(RepValue)) |
10889 | 0 | RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); |
10890 | 0 |
|
10891 | 0 | DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); |
10892 | 0 | continue; |
10893 | 0 | } |
10894 | 0 |
|
10895 | 0 | unsigned C; |
10896 | 0 | switch (PromOp.getOpcode()) { |
10897 | 0 | default: C = 0; break; |
10898 | 0 | case ISD::SELECT: C = 1; break; |
10899 | 0 | case ISD::SELECT_CC: C = 2; break; |
10900 | 0 | } |
10901 | 0 |
|
10902 | 0 | if (0 (!isa<ConstantSDNode>(PromOp.getOperand(C)) && |
10903 | 0 | PromOp.getOperand(C).getValueType() != MVT::i1) || |
10904 | 0 | (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && |
10905 | 0 | PromOp.getOperand(C+1).getValueType() != MVT::i10 )) { |
10906 | 0 | // The to-be-promoted operands of this node have not yet been |
10907 | 0 | // promoted (this should be rare because we're going through the |
10908 | 0 | // list backward, but if one of the operands has several users in |
10909 | 0 | // this cluster of to-be-promoted nodes, it is possible). |
10910 | 0 | PromOpHandles.emplace_front(PromOp); |
10911 | 0 | continue; |
10912 | 0 | } |
10913 | 0 |
|
10914 | 0 | SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), |
10915 | 0 | PromOp.getNode()->op_end()); |
10916 | 0 |
|
10917 | 0 | // If there are any constant inputs, make sure they're replaced now. |
10918 | 0 | for (unsigned i = 0; i < 20 ; ++i0 ) |
10919 | 0 | if (0 isa<ConstantSDNode>(Ops[C+i])0 ) |
10920 | 0 | Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); |
10921 | 0 |
|
10922 | 0 | DAG.ReplaceAllUsesOfValueWith(PromOp, |
10923 | 0 | DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); |
10924 | 0 | } |
10925 | 0 |
|
10926 | 0 | // Now we're left with the initial truncation itself. |
10927 | 0 | if (0 N->getOpcode() == ISD::TRUNCATE0 ) |
10928 | 0 | return N->getOperand(0); |
10929 | 0 |
|
10930 | 0 | // Otherwise, this is a comparison. The operands to be compared have just |
10931 | 0 | // changed type (to i1), but everything else is the same. |
10932 | 0 | return SDValue(N, 0); |
10933 | 0 | } |
10934 | | |
10935 | | SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, |
10936 | 4.43k | DAGCombinerInfo &DCI) const { |
10937 | 4.43k | SelectionDAG &DAG = DCI.DAG; |
10938 | 4.43k | SDLoc dl(N); |
10939 | 4.43k | |
10940 | 4.43k | // If we're tracking CR bits, we need to be careful that we don't have: |
10941 | 4.43k | // zext(binary-ops(trunc(x), trunc(y))) |
10942 | 4.43k | // or |
10943 | 4.43k | // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) |
10944 | 4.43k | // such that we're unnecessarily moving things into CR bits that can more |
10945 | 4.43k | // efficiently stay in GPRs. Note that if we're not certain that the high |
10946 | 4.43k | // bits are set as required by the final extension, we still may need to do |
10947 | 4.43k | // some masking to get the proper behavior. |
10948 | 4.43k | |
10949 | 4.43k | // This same functionality is important on PPC64 when dealing with |
10950 | 4.43k | // 32-to-64-bit extensions; these occur often when 32-bit values are used as |
10951 | 4.43k | // the return values of functions. Because it is so similar, it is handled |
10952 | 4.43k | // here as well. |
10953 | 4.43k | |
10954 | 4.43k | if (N->getValueType(0) != MVT::i32 && |
10955 | 3.76k | N->getValueType(0) != MVT::i64) |
10956 | 246 | return SDValue(); |
10957 | 4.19k | |
10958 | 4.19k | if (4.19k !((N->getOperand(0).getValueType() == MVT::i1 && 4.19k Subtarget.useCRBits()1.40k ) || |
10959 | 2.80k | (N->getOperand(0).getValueType() == MVT::i32 && 2.80k Subtarget.isPPC64()2.06k ))) |
10960 | 774 | return SDValue(); |
10961 | 3.41k | |
10962 | 3.41k | if (3.41k N->getOperand(0).getOpcode() != ISD::AND && |
10963 | 3.35k | N->getOperand(0).getOpcode() != ISD::OR && |
10964 | 3.31k | N->getOperand(0).getOpcode() != ISD::XOR && |
10965 | 3.29k | N->getOperand(0).getOpcode() != ISD::SELECT && |
10966 | 3.17k | N->getOperand(0).getOpcode() != ISD::SELECT_CC) |
10967 | 3.13k | return SDValue(); |
10968 | 286 | |
10969 | 286 | SmallVector<SDValue, 4> Inputs; |
10970 | 286 | SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; |
10971 | 286 | SmallPtrSet<SDNode *, 16> Visited; |
10972 | 286 | |
10973 | 286 | // Visit all inputs, collect all binary operations (and, or, xor and |
10974 | 286 | // select) that are all fed by truncations. |
10975 | 472 | while (!BinOps.empty()472 ) { |
10976 | 330 | SDValue BinOp = BinOps.back(); |
10977 | 330 | BinOps.pop_back(); |
10978 | 330 | |
10979 | 330 | if (!Visited.insert(BinOp.getNode()).second) |
10980 | 0 | continue; |
10981 | 330 | |
10982 | 330 | PromOps.push_back(BinOp); |
10983 | 330 | |
10984 | 958 | for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie958 ; ++i628 ) { |
10985 | 772 | // The condition of the select is not promoted. |
10986 | 772 | if (BinOp.getOpcode() == ISD::SELECT && 772 i == 0401 ) |
10987 | 139 | continue; |
10988 | 633 | if (633 BinOp.getOpcode() == ISD::SELECT_CC && 633 i != 2174 && i != 3126 ) |
10989 | 111 | continue; |
10990 | 522 | |
10991 | 522 | if (522 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || |
10992 | 522 | isa<ConstantSDNode>(BinOp.getOperand(i))419 ) { |
10993 | 324 | Inputs.push_back(BinOp.getOperand(i)); |
10994 | 522 | } else if (198 BinOp.getOperand(i).getOpcode() == ISD::AND || |
10995 | 184 | BinOp.getOperand(i).getOpcode() == ISD::OR || |
10996 | 172 | BinOp.getOperand(i).getOpcode() == ISD::XOR || |
10997 | 172 | BinOp.getOperand(i).getOpcode() == ISD::SELECT || |
10998 | 198 | BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC153 ) { |
10999 | 54 | BinOps.push_back(BinOp.getOperand(i)); |
11000 | 198 | } else { |
11001 | 144 | // We have an input that is not a truncation or another binary |
11002 | 144 | // operation; we'll abort this transformation. |
11003 | 144 | return SDValue(); |
11004 | 144 | } |
11005 | 772 | } |
11006 | 330 | } |
11007 | 286 | |
11008 | 286 | // The operands of a select that must be truncated when the select is |
11009 | 286 | // promoted because the operand is actually part of the to-be-promoted set. |
11010 | 142 | DenseMap<SDNode *, EVT> SelectTruncOp[2]; |
11011 | 142 | |
11012 | 142 | // Make sure that this is a self-contained cluster of operations (which |
11013 | 142 | // is not quite the same thing as saying that everything has only one |
11014 | 142 | // use). |
11015 | 435 | for (unsigned i = 0, ie = Inputs.size(); i != ie435 ; ++i293 ) { |
11016 | 306 | if (isa<ConstantSDNode>(Inputs[i])) |
11017 | 212 | continue; |
11018 | 94 | |
11019 | 94 | for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), |
11020 | 94 | UE = Inputs[i].getNode()->use_end(); |
11021 | 195 | UI != UE195 ; ++UI101 ) { |
11022 | 114 | SDNode *User = *UI; |
11023 | 114 | if (User != N && 114 !Visited.count(User)114 ) |
11024 | 13 | return SDValue(); |
11025 | 101 | |
11026 | 101 | // If we're going to promote the non-output-value operand(s) or SELECT or |
11027 | 101 | // SELECT_CC, record them for truncation. |
11028 | 101 | if (101 User->getOpcode() == ISD::SELECT101 ) { |
11029 | 60 | if (User->getOperand(0) == Inputs[i]) |
11030 | 0 | SelectTruncOp[0].insert(std::make_pair(User, |
11031 | 0 | User->getOperand(0).getValueType())); |
11032 | 101 | } else if (41 User->getOpcode() == ISD::SELECT_CC41 ) { |
11033 | 16 | if (User->getOperand(0) == Inputs[i]) |
11034 | 12 | SelectTruncOp[0].insert(std::make_pair(User, |
11035 | 12 | User->getOperand(0).getValueType())); |
11036 | 16 | if (User->getOperand(1) == Inputs[i]) |
11037 | 2 | SelectTruncOp[1].insert(std::make_pair(User, |
11038 | 2 | User->getOperand(1).getValueType())); |
11039 | 41 | } |
11040 | 114 | } |
11041 | 306 | } |
11042 | 142 | |
11043 | 277 | for (unsigned i = 0, ie = PromOps.size(); 129 i != ie277 ; ++i148 ) { |
11044 | 153 | for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), |
11045 | 153 | UE = PromOps[i].getNode()->use_end(); |
11046 | 306 | UI != UE306 ; ++UI153 ) { |
11047 | 158 | SDNode *User = *UI; |
11048 | 158 | if (User != N && 158 !Visited.count(User)29 ) |
11049 | 5 | return SDValue(); |
11050 | 153 | |
11051 | 153 | // If we're going to promote the non-output-value operand(s) or SELECT or |
11052 | 153 | // SELECT_CC, record them for truncation. |
11053 | 153 | if (153 User->getOpcode() == ISD::SELECT153 ) { |
11054 | 0 | if (User->getOperand(0) == PromOps[i]) |
11055 | 0 | SelectTruncOp[0].insert(std::make_pair(User, |
11056 | 0 | User->getOperand(0).getValueType())); |
11057 | 153 | } else if (153 User->getOpcode() == ISD::SELECT_CC153 ) { |
11058 | 0 | if (User->getOperand(0) == PromOps[i]) |
11059 | 0 | SelectTruncOp[0].insert(std::make_pair(User, |
11060 | 0 | User->getOperand(0).getValueType())); |
11061 | 0 | if (User->getOperand(1) == PromOps[i]) |
11062 | 0 | SelectTruncOp[1].insert(std::make_pair(User, |
11063 | 0 | User->getOperand(1).getValueType())); |
11064 | 153 | } |
11065 | 158 | } |
11066 | 153 | } |
11067 | 129 | |
11068 | 124 | unsigned PromBits = N->getOperand(0).getValueSizeInBits(); |
11069 | 124 | bool ReallyNeedsExt = false; |
11070 | 124 | if (N->getOpcode() != ISD::ANY_EXTEND124 ) { |
11071 | 51 | // If all of the inputs are not already sign/zero extended, then |
11072 | 51 | // we'll still need to do that at the end. |
11073 | 166 | for (unsigned i = 0, ie = Inputs.size(); i != ie166 ; ++i115 ) { |
11074 | 121 | if (isa<ConstantSDNode>(Inputs[i])) |
11075 | 61 | continue; |
11076 | 60 | |
11077 | 60 | unsigned OpBits = |
11078 | 60 | Inputs[i].getOperand(0).getValueSizeInBits(); |
11079 | 60 | assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); |
11080 | 60 | |
11081 | 60 | if ((N->getOpcode() == ISD::ZERO_EXTEND && |
11082 | 14 | !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), |
11083 | 14 | APInt::getHighBitsSet(OpBits, |
11084 | 14 | OpBits-PromBits))) || |
11085 | 54 | (N->getOpcode() == ISD::SIGN_EXTEND && |
11086 | 46 | DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < |
11087 | 60 | (OpBits-(PromBits-1)))) { |
11088 | 6 | ReallyNeedsExt = true; |
11089 | 6 | break; |
11090 | 6 | } |
11091 | 121 | } |
11092 | 51 | } |
11093 | 124 | |
11094 | 124 | // Replace all inputs, either with the truncation operand, or a |
11095 | 124 | // truncation or extension to the final output type. |
11096 | 396 | for (unsigned i = 0, ie = Inputs.size(); i != ie396 ; ++i272 ) { |
11097 | 272 | // Constant inputs need to be replaced with the to-be-promoted nodes that |
11098 | 272 | // use them because they might have users outside of the cluster of |
11099 | 272 | // promoted nodes. |
11100 | 272 | if (isa<ConstantSDNode>(Inputs[i])) |
11101 | 195 | continue; |
11102 | 77 | |
11103 | 77 | SDValue InSrc = Inputs[i].getOperand(0); |
11104 | 77 | if (Inputs[i].getValueType() == N->getValueType(0)) |
11105 | 0 | DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); |
11106 | 77 | else if (77 N->getOpcode() == ISD::SIGN_EXTEND77 ) |
11107 | 46 | DAG.ReplaceAllUsesOfValueWith(Inputs[i], |
11108 | 46 | DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); |
11109 | 31 | else if (31 N->getOpcode() == ISD::ZERO_EXTEND31 ) |
11110 | 15 | DAG.ReplaceAllUsesOfValueWith(Inputs[i], |
11111 | 15 | DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); |
11112 | 31 | else |
11113 | 16 | DAG.ReplaceAllUsesOfValueWith(Inputs[i], |
11114 | 16 | DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); |
11115 | 272 | } |
11116 | 124 | |
11117 | 124 | std::list<HandleSDNode> PromOpHandles; |
11118 | 124 | for (auto &PromOp : PromOps) |
11119 | 148 | PromOpHandles.emplace_back(PromOp); |
11120 | 124 | |
11121 | 124 | // Replace all operations (these are all the same, but have a different |
11122 | 124 | // (promoted) return type). DAG.getNode will validate that the types of |
11123 | 124 | // a binary operator match, so go through the list in reverse so that |
11124 | 124 | // we've likely promoted both operands first. |
11125 | 272 | while (!PromOpHandles.empty()272 ) { |
11126 | 148 | SDValue PromOp = PromOpHandles.back().getValue(); |
11127 | 148 | PromOpHandles.pop_back(); |
11128 | 148 | |
11129 | 148 | unsigned C; |
11130 | 148 | switch (PromOp.getOpcode()) { |
11131 | 25 | default: C = 0; break; |
11132 | 110 | case ISD::SELECT: C = 1; break; |
11133 | 13 | case ISD::SELECT_CC: C = 2; break; |
11134 | 148 | } |
11135 | 148 | |
11136 | 148 | if (148 (!isa<ConstantSDNode>(PromOp.getOperand(C)) && |
11137 | 55 | PromOp.getOperand(C).getValueType() != N->getValueType(0)) || |
11138 | 148 | (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && |
11139 | 148 | PromOp.getOperand(C+1).getValueType() != N->getValueType(0)46 )) { |
11140 | 0 | // The to-be-promoted operands of this node have not yet been |
11141 | 0 | // promoted (this should be rare because we're going through the |
11142 | 0 | // list backward, but if one of the operands has several users in |
11143 | 0 | // this cluster of to-be-promoted nodes, it is possible). |
11144 | 0 | PromOpHandles.emplace_front(PromOp); |
11145 | 0 | continue; |
11146 | 0 | } |
11147 | 148 | |
11148 | 148 | // For SELECT and SELECT_CC nodes, we do a similar check for any |
11149 | 148 | // to-be-promoted comparison inputs. |
11150 | 148 | if (148 PromOp.getOpcode() == ISD::SELECT || |
11151 | 148 | PromOp.getOpcode() == ISD::SELECT_CC38 ) { |
11152 | 123 | if ((SelectTruncOp[0].count(PromOp.getNode()) && |
11153 | 4 | PromOp.getOperand(0).getValueType() != N->getValueType(0)) || |
11154 | 123 | (SelectTruncOp[1].count(PromOp.getNode()) && |
11155 | 123 | PromOp.getOperand(1).getValueType() != N->getValueType(0)1 )) { |
11156 | 0 | PromOpHandles.emplace_front(PromOp); |
11157 | 0 | continue; |
11158 | 0 | } |
11159 | 148 | } |
11160 | 148 | |
11161 | 148 | SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), |
11162 | 148 | PromOp.getNode()->op_end()); |
11163 | 148 | |
11164 | 148 | // If this node has constant inputs, then they'll need to be promoted here. |
11165 | 444 | for (unsigned i = 0; i < 2444 ; ++i296 ) { |
11166 | 296 | if (!isa<ConstantSDNode>(Ops[C+i])) |
11167 | 101 | continue; |
11168 | 195 | if (195 Ops[C+i].getValueType() == N->getValueType(0)195 ) |
11169 | 0 | continue; |
11170 | 195 | |
11171 | 195 | if (195 N->getOpcode() == ISD::SIGN_EXTEND195 ) |
11172 | 0 | Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); |
11173 | 195 | else if (195 N->getOpcode() == ISD::ZERO_EXTEND195 ) |
11174 | 65 | Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); |
11175 | 195 | else |
11176 | 130 | Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); |
11177 | 296 | } |
11178 | 148 | |
11179 | 148 | // If we've promoted the comparison inputs of a SELECT or SELECT_CC, |
11180 | 148 | // truncate them again to the original value type. |
11181 | 148 | if (PromOp.getOpcode() == ISD::SELECT || |
11182 | 148 | PromOp.getOpcode() == ISD::SELECT_CC38 ) { |
11183 | 123 | auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); |
11184 | 123 | if (SI0 != SelectTruncOp[0].end()) |
11185 | 4 | Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); |
11186 | 123 | auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); |
11187 | 123 | if (SI1 != SelectTruncOp[1].end()) |
11188 | 1 | Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); |
11189 | 123 | } |
11190 | 148 | |
11191 | 148 | DAG.ReplaceAllUsesOfValueWith(PromOp, |
11192 | 148 | DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); |
11193 | 148 | } |
11194 | 124 | |
11195 | 124 | // Now we're left with the initial extension itself. |
11196 | 124 | if (124 !ReallyNeedsExt124 ) |
11197 | 118 | return N->getOperand(0); |
11198 | 6 | |
11199 | 6 | // To zero extend, just mask off everything except for the first bit (in the |
11200 | 6 | // i1 case). |
11201 | 6 | if (6 N->getOpcode() == ISD::ZERO_EXTEND6 ) |
11202 | 6 | return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), |
11203 | 6 | DAG.getConstant(APInt::getLowBitsSet( |
11204 | 6 | N->getValueSizeInBits(0), PromBits), |
11205 | 6 | dl, N->getValueType(0))); |
11206 | 0 |
|
11207 | 6 | assert(N->getOpcode() == ISD::SIGN_EXTEND && |
11208 | 0 | "Invalid extension type"); |
11209 | 0 | EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); |
11210 | 0 | SDValue ShiftCst = |
11211 | 0 | DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); |
11212 | 0 | return DAG.getNode( |
11213 | 0 | ISD::SRA, dl, N->getValueType(0), |
11214 | 0 | DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), |
11215 | 0 | ShiftCst); |
11216 | 0 | } |
11217 | | |
11218 | | /// \brief Reduces the number of fp-to-int conversion when building a vector. |
11219 | | /// |
11220 | | /// If this vector is built out of floating to integer conversions, |
11221 | | /// transform it to a vector built out of floating point values followed by a |
11222 | | /// single floating to integer conversion of the vector. |
11223 | | /// Namely (build_vector (fptosi $A), (fptosi $B), ...) |
11224 | | /// becomes (fptosi (build_vector ($A, $B, ...))) |
11225 | | SDValue PPCTargetLowering:: |
11226 | | combineElementTruncationToVectorTruncation(SDNode *N, |
11227 | 178 | DAGCombinerInfo &DCI) const { |
11228 | 178 | assert(N->getOpcode() == ISD::BUILD_VECTOR && |
11229 | 178 | "Should be called with a BUILD_VECTOR node"); |
11230 | 178 | |
11231 | 178 | SelectionDAG &DAG = DCI.DAG; |
11232 | 178 | SDLoc dl(N); |
11233 | 178 | |
11234 | 178 | SDValue FirstInput = N->getOperand(0); |
11235 | 178 | assert(FirstInput.getOpcode() == PPCISD::MFVSR && |
11236 | 178 | "The input operand must be an fp-to-int conversion."); |
11237 | 178 | |
11238 | 178 | // This combine happens after legalization so the fp_to_[su]i nodes are |
11239 | 178 | // already converted to PPCSISD nodes. |
11240 | 178 | unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); |
11241 | 178 | if (FirstConversion == PPCISD::FCTIDZ || |
11242 | 133 | FirstConversion == PPCISD::FCTIDUZ || |
11243 | 88 | FirstConversion == PPCISD::FCTIWZ || |
11244 | 178 | FirstConversion == PPCISD::FCTIWUZ44 ) { |
11245 | 178 | bool IsSplat = true; |
11246 | 178 | bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || |
11247 | 134 | FirstConversion == PPCISD::FCTIWUZ; |
11248 | 178 | EVT SrcVT = FirstInput.getOperand(0).getValueType(); |
11249 | 178 | SmallVector<SDValue, 4> Ops; |
11250 | 178 | EVT TargetVT = N->getValueType(0); |
11251 | 710 | for (int i = 0, e = N->getNumOperands(); i < e710 ; ++i532 ) { |
11252 | 532 | if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) |
11253 | 0 | return SDValue(); |
11254 | 532 | unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); |
11255 | 532 | if (NextConversion != FirstConversion) |
11256 | 0 | return SDValue(); |
11257 | 532 | if (532 N->getOperand(i) != FirstInput532 ) |
11258 | 290 | IsSplat = false; |
11259 | 532 | } |
11260 | 178 | |
11261 | 178 | // If this is a splat, we leave it as-is since there will be only a single |
11262 | 178 | // fp-to-int conversion followed by a splat of the integer. This is better |
11263 | 178 | // for 32-bit and smaller ints and neutral for 64-bit ints. |
11264 | 178 | if (178 IsSplat178 ) |
11265 | 32 | return SDValue(); |
11266 | 146 | |
11267 | 146 | // Now that we know we have the right type of node, get its operands |
11268 | 582 | for (int i = 0, e = N->getNumOperands(); 146 i < e582 ; ++i436 ) { |
11269 | 436 | SDValue In = N->getOperand(i).getOperand(0); |
11270 | 436 | // For 32-bit values, we need to add an FP_ROUND node. |
11271 | 436 | if (Is32Bit436 ) { |
11272 | 288 | if (In.isUndef()) |
11273 | 0 | Ops.push_back(DAG.getUNDEF(SrcVT)); |
11274 | 288 | else { |
11275 | 288 | SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, |
11276 | 288 | MVT::f32, In.getOperand(0), |
11277 | 288 | DAG.getIntPtrConstant(1, dl)); |
11278 | 288 | Ops.push_back(Trunc); |
11279 | 288 | } |
11280 | 288 | } else |
11281 | 148 | Ops.push_back(In.isUndef() ? 148 DAG.getUNDEF(SrcVT)0 : In.getOperand(0)148 ); |
11282 | 436 | } |
11283 | 146 | |
11284 | 146 | unsigned Opcode; |
11285 | 146 | if (FirstConversion == PPCISD::FCTIDZ || |
11286 | 109 | FirstConversion == PPCISD::FCTIWZ) |
11287 | 73 | Opcode = ISD::FP_TO_SINT; |
11288 | 146 | else |
11289 | 73 | Opcode = ISD::FP_TO_UINT; |
11290 | 146 | |
11291 | 146 | EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f6474 : MVT::v4f3272 ; |
11292 | 178 | SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); |
11293 | 178 | return DAG.getNode(Opcode, dl, TargetVT, BV); |
11294 | 178 | } |
11295 | 0 | return SDValue(); |
11296 | 0 | } |
11297 | | |
11298 | | /// \brief Reduce the number of loads when building a vector. |
11299 | | /// |
11300 | | /// Building a vector out of multiple loads can be converted to a load |
11301 | | /// of the vector type if the loads are consecutive. If the loads are |
11302 | | /// consecutive but in descending order, a shuffle is added at the end |
11303 | | /// to reorder the vector. |
11304 | 2.04k | static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { |
11305 | 2.04k | assert(N->getOpcode() == ISD::BUILD_VECTOR && |
11306 | 2.04k | "Should be called with a BUILD_VECTOR node"); |
11307 | 2.04k | |
11308 | 2.04k | SDLoc dl(N); |
11309 | 2.04k | bool InputsAreConsecutiveLoads = true; |
11310 | 2.04k | bool InputsAreReverseConsecutive = true; |
11311 | 2.04k | unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; |
11312 | 2.04k | SDValue FirstInput = N->getOperand(0); |
11313 | 2.04k | bool IsRoundOfExtLoad = false; |
11314 | 2.04k | |
11315 | 2.04k | if (FirstInput.getOpcode() == ISD::FP_ROUND && |
11316 | 2.04k | FirstInput.getOperand(0).getOpcode() == ISD::LOAD98 ) { |
11317 | 48 | LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0)); |
11318 | 48 | IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; |
11319 | 48 | } |
11320 | 2.04k | // Not a build vector of (possibly fp_rounded) loads. |
11321 | 2.04k | if (!IsRoundOfExtLoad && 2.04k FirstInput.getOpcode() != ISD::LOAD2.02k ) |
11322 | 1.68k | return SDValue(); |
11323 | 360 | |
11324 | 536 | for (int i = 1, e = N->getNumOperands(); 360 i < e536 ; ++i176 ) { |
11325 | 440 | // If any inputs are fp_round(extload), they all must be. |
11326 | 440 | if (IsRoundOfExtLoad && 440 N->getOperand(i).getOpcode() != ISD::FP_ROUND40 ) |
11327 | 0 | return SDValue(); |
11328 | 440 | |
11329 | 440 | SDValue NextInput = IsRoundOfExtLoad ? 440 N->getOperand(i).getOperand(0)40 : |
11330 | 400 | N->getOperand(i); |
11331 | 440 | if (NextInput.getOpcode() != ISD::LOAD) |
11332 | 0 | return SDValue(); |
11333 | 440 | |
11334 | 440 | SDValue PreviousInput = |
11335 | 440 | IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0)40 : N->getOperand(i-1)400 ; |
11336 | 440 | LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput); |
11337 | 440 | LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput); |
11338 | 440 | |
11339 | 440 | // If any inputs are fp_round(extload), they all must be. |
11340 | 440 | if (IsRoundOfExtLoad && 440 LD2->getExtensionType() != ISD::EXTLOAD40 ) |
11341 | 0 | return SDValue(); |
11342 | 440 | |
11343 | 440 | if (440 !isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)440 ) |
11344 | 368 | InputsAreConsecutiveLoads = false; |
11345 | 440 | if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) |
11346 | 336 | InputsAreReverseConsecutive = false; |
11347 | 440 | |
11348 | 440 | // Exit early if the loads are neither consecutive nor reverse consecutive. |
11349 | 440 | if (!InputsAreConsecutiveLoads && 440 !InputsAreReverseConsecutive368 ) |
11350 | 264 | return SDValue(); |
11351 | 440 | } |
11352 | 360 | |
11353 | 96 | assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && |
11354 | 96 | "The loads cannot be both consecutive and reverse consecutive."); |
11355 | 96 | |
11356 | 96 | SDValue FirstLoadOp = |
11357 | 96 | IsRoundOfExtLoad ? FirstInput.getOperand(0)8 : FirstInput88 ; |
11358 | 96 | SDValue LastLoadOp = |
11359 | 8 | IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : |
11360 | 88 | N->getOperand(N->getNumOperands()-1); |
11361 | 96 | |
11362 | 96 | LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp); |
11363 | 96 | LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp); |
11364 | 96 | if (InputsAreConsecutiveLoads96 ) { |
11365 | 40 | assert(LD1 && "Input needs to be a LoadSDNode."); |
11366 | 40 | return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), |
11367 | 40 | LD1->getBasePtr(), LD1->getPointerInfo(), |
11368 | 40 | LD1->getAlignment()); |
11369 | 40 | } |
11370 | 56 | if (56 InputsAreReverseConsecutive56 ) { |
11371 | 56 | assert(LDL && "Input needs to be a LoadSDNode."); |
11372 | 56 | SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), |
11373 | 56 | LDL->getBasePtr(), LDL->getPointerInfo(), |
11374 | 56 | LDL->getAlignment()); |
11375 | 56 | SmallVector<int, 16> Ops; |
11376 | 216 | for (int i = N->getNumOperands() - 1; i >= 0216 ; i--160 ) |
11377 | 160 | Ops.push_back(i); |
11378 | 56 | |
11379 | 56 | return DAG.getVectorShuffle(N->getValueType(0), dl, Load, |
11380 | 56 | DAG.getUNDEF(N->getValueType(0)), Ops); |
11381 | 56 | } |
11382 | 0 | return SDValue(); |
11383 | 0 | } |
11384 | | |
11385 | | // This function adds the required vector_shuffle needed to get |
11386 | | // the elements of the vector extract in the correct position |
11387 | | // as specified by the CorrectElems encoding. |
11388 | | static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, |
11389 | | SDValue Input, uint64_t Elems, |
11390 | 10 | uint64_t CorrectElems) { |
11391 | 10 | SDLoc dl(N); |
11392 | 10 | |
11393 | 10 | unsigned NumElems = Input.getValueType().getVectorNumElements(); |
11394 | 10 | SmallVector<int, 16> ShuffleMask(NumElems, -1); |
11395 | 10 | |
11396 | 10 | // Knowing the element indices being extracted from the original |
11397 | 10 | // vector and the order in which they're being inserted, just put |
11398 | 10 | // them at element indices required for the instruction. |
11399 | 38 | for (unsigned i = 0; i < N->getNumOperands()38 ; i++28 ) { |
11400 | 28 | if (DAG.getDataLayout().isLittleEndian()) |
11401 | 14 | ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; |
11402 | 28 | else |
11403 | 14 | ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; |
11404 | 28 | CorrectElems = CorrectElems >> 8; |
11405 | 28 | Elems = Elems >> 8; |
11406 | 28 | } |
11407 | 10 | |
11408 | 10 | SDValue Shuffle = |
11409 | 10 | DAG.getVectorShuffle(Input.getValueType(), dl, Input, |
11410 | 10 | DAG.getUNDEF(Input.getValueType()), ShuffleMask); |
11411 | 10 | |
11412 | 10 | EVT Ty = N->getValueType(0); |
11413 | 10 | SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); |
11414 | 10 | return BV; |
11415 | 10 | } |
11416 | | |
11417 | | // Look for build vector patterns where input operands come from sign |
11418 | | // extended vector_extract elements of specific indices. If the correct indices |
11419 | | // aren't used, add a vector shuffle to fix up the indices and create a new |
11420 | | // PPCISD:SExtVElems node which selects the vector sign extend instructions |
11421 | | // during instruction selection. |
11422 | 806 | static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { |
11423 | 806 | // This array encodes the indices that the vector sign extend instructions |
11424 | 806 | // extract from when extending from one type to another for both BE and LE. |
11425 | 806 | // The right nibble of each byte corresponds to the LE incides. |
11426 | 806 | // and the left nibble of each byte corresponds to the BE incides. |
11427 | 806 | // For example: 0x3074B8FC byte->word |
11428 | 806 | // For LE: the allowed indices are: 0x0,0x4,0x8,0xC |
11429 | 806 | // For BE: the allowed indices are: 0x3,0x7,0xB,0xF |
11430 | 806 | // For example: 0x000070F8 byte->double word |
11431 | 806 | // For LE: the allowed indices are: 0x0,0x8 |
11432 | 806 | // For BE: the allowed indices are: 0x7,0xF |
11433 | 806 | uint64_t TargetElems[] = { |
11434 | 806 | 0x3074B8FC, // b->w |
11435 | 806 | 0x000070F8, // b->d |
11436 | 806 | 0x10325476, // h->w |
11437 | 806 | 0x00003074, // h->d |
11438 | 806 | 0x00001032, // w->d |
11439 | 806 | }; |
11440 | 806 | |
11441 | 806 | uint64_t Elems = 0; |
11442 | 806 | int Index; |
11443 | 806 | SDValue Input; |
11444 | 806 | |
11445 | 862 | auto isSExtOfVecExtract = [&](SDValue Op) -> bool { |
11446 | 862 | if (!Op) |
11447 | 0 | return false; |
11448 | 862 | if (862 Op.getOpcode() != ISD::SIGN_EXTEND862 ) |
11449 | 762 | return false; |
11450 | 100 | |
11451 | 100 | SDValue Extract = Op.getOperand(0); |
11452 | 100 | if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
11453 | 16 | return false; |
11454 | 84 | |
11455 | 84 | ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); |
11456 | 84 | if (!ExtOp) |
11457 | 0 | return false; |
11458 | 84 | |
11459 | 84 | Index = ExtOp->getZExtValue(); |
11460 | 84 | if (Input && 84 Input != Extract.getOperand(0)56 ) |
11461 | 4 | return false; |
11462 | 80 | |
11463 | 80 | if (80 !Input80 ) |
11464 | 28 | Input = Extract.getOperand(0); |
11465 | 80 | |
11466 | 80 | Elems = Elems << 8; |
11467 | 80 | Index = DAG.getDataLayout().isLittleEndian() ? Index40 : Index << 440 ; |
11468 | 862 | Elems |= Index; |
11469 | 862 | |
11470 | 862 | return true; |
11471 | 862 | }; |
11472 | 806 | |
11473 | 806 | // If the build vector operands aren't sign extended vector extracts, |
11474 | 806 | // of the same input vector, then return. |
11475 | 886 | for (unsigned i = 0; i < N->getNumOperands()886 ; i++80 ) { |
11476 | 862 | if (!isSExtOfVecExtract(N->getOperand(i))862 ) { |
11477 | 782 | return SDValue(); |
11478 | 782 | } |
11479 | 862 | } |
11480 | 806 | |
11481 | 806 | // If the vector extract indicies are not correct, add the appropriate |
11482 | 806 | // vector_shuffle. |
11483 | 24 | int TgtElemArrayIdx; |
11484 | 24 | int InputSize = Input.getValueType().getScalarSizeInBits(); |
11485 | 24 | int OutputSize = N->getValueType(0).getScalarSizeInBits(); |
11486 | 24 | if (InputSize + OutputSize == 40) |
11487 | 4 | TgtElemArrayIdx = 0; |
11488 | 20 | else if (20 InputSize + OutputSize == 7220 ) |
11489 | 4 | TgtElemArrayIdx = 1; |
11490 | 16 | else if (16 InputSize + OutputSize == 4816 ) |
11491 | 4 | TgtElemArrayIdx = 2; |
11492 | 12 | else if (12 InputSize + OutputSize == 8012 ) |
11493 | 4 | TgtElemArrayIdx = 3; |
11494 | 8 | else if (8 InputSize + OutputSize == 968 ) |
11495 | 6 | TgtElemArrayIdx = 4; |
11496 | 8 | else |
11497 | 2 | return SDValue(); |
11498 | 22 | |
11499 | 22 | uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; |
11500 | 22 | CorrectElems = DAG.getDataLayout().isLittleEndian() |
11501 | 11 | ? CorrectElems & 0x0F0F0F0F0F0F0F0F |
11502 | 11 | : CorrectElems & 0xF0F0F0F0F0F0F0F0; |
11503 | 22 | if (Elems != CorrectElems22 ) { |
11504 | 10 | return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); |
11505 | 10 | } |
11506 | 12 | |
11507 | 12 | // Regular lowering will catch cases where a shuffle is not needed. |
11508 | 12 | return SDValue(); |
11509 | 12 | } |
11510 | | |
11511 | | SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, |
11512 | 2.59k | DAGCombinerInfo &DCI) const { |
11513 | 2.59k | assert(N->getOpcode() == ISD::BUILD_VECTOR && |
11514 | 2.59k | "Should be called with a BUILD_VECTOR node"); |
11515 | 2.59k | |
11516 | 2.59k | SelectionDAG &DAG = DCI.DAG; |
11517 | 2.59k | SDLoc dl(N); |
11518 | 2.59k | |
11519 | 2.59k | if (!Subtarget.hasVSX()) |
11520 | 400 | return SDValue(); |
11521 | 2.19k | |
11522 | 2.19k | // The target independent DAG combiner will leave a build_vector of |
11523 | 2.19k | // float-to-int conversions intact. We can generate MUCH better code for |
11524 | 2.19k | // a float-to-int conversion of a vector of floats. |
11525 | 2.19k | SDValue FirstInput = N->getOperand(0); |
11526 | 2.19k | if (FirstInput.getOpcode() == PPCISD::MFVSR2.19k ) { |
11527 | 178 | SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); |
11528 | 178 | if (Reduced) |
11529 | 146 | return Reduced; |
11530 | 2.04k | } |
11531 | 2.04k | |
11532 | 2.04k | // If we're building a vector out of consecutive loads, just load that |
11533 | 2.04k | // vector type. |
11534 | 2.04k | SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); |
11535 | 2.04k | if (Reduced) |
11536 | 96 | return Reduced; |
11537 | 1.95k | |
11538 | 1.95k | // If we're building a vector out of extended elements from another vector |
11539 | 1.95k | // we have P9 vector integer extend instructions. |
11540 | 1.95k | if (1.95k Subtarget.hasP9Altivec()1.95k ) { |
11541 | 806 | Reduced = combineBVOfVecSExt(N, DAG); |
11542 | 806 | if (Reduced) |
11543 | 10 | return Reduced; |
11544 | 1.94k | } |
11545 | 1.94k | |
11546 | 1.94k | |
11547 | 1.94k | if (1.94k N->getValueType(0) != MVT::v2f641.94k ) |
11548 | 1.78k | return SDValue(); |
11549 | 153 | |
11550 | 153 | // Looking for: |
11551 | 153 | // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) |
11552 | 153 | if (153 FirstInput.getOpcode() != ISD::SINT_TO_FP && |
11553 | 134 | FirstInput.getOpcode() != ISD::UINT_TO_FP) |
11554 | 130 | return SDValue(); |
11555 | 23 | if (23 N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && |
11556 | 4 | N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) |
11557 | 0 | return SDValue(); |
11558 | 23 | if (23 FirstInput.getOpcode() != N->getOperand(1).getOpcode()23 ) |
11559 | 0 | return SDValue(); |
11560 | 23 | |
11561 | 23 | SDValue Ext1 = FirstInput.getOperand(0); |
11562 | 23 | SDValue Ext2 = N->getOperand(1).getOperand(0); |
11563 | 23 | if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
11564 | 5 | Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
11565 | 18 | return SDValue(); |
11566 | 5 | |
11567 | 5 | ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1)); |
11568 | 5 | ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1)); |
11569 | 5 | if (!Ext1Op || 5 !Ext2Op5 ) |
11570 | 0 | return SDValue(); |
11571 | 5 | if (5 Ext1.getValueType() != MVT::i32 || |
11572 | 5 | Ext2.getValueType() != MVT::i32) |
11573 | 0 | if (0 Ext1.getOperand(0) != Ext2.getOperand(0)0 ) |
11574 | 0 | return SDValue(); |
11575 | 5 | |
11576 | 5 | int FirstElem = Ext1Op->getZExtValue(); |
11577 | 5 | int SecondElem = Ext2Op->getZExtValue(); |
11578 | 5 | int SubvecIdx; |
11579 | 5 | if (FirstElem == 0 && 5 SecondElem == 15 ) |
11580 | 5 | SubvecIdx = Subtarget.isLittleEndian() ? 5 11 : 04 ; |
11581 | 0 | else if (0 FirstElem == 2 && 0 SecondElem == 30 ) |
11582 | 0 | SubvecIdx = Subtarget.isLittleEndian() ? 0 00 : 10 ; |
11583 | 0 | else |
11584 | 0 | return SDValue(); |
11585 | 5 | |
11586 | 5 | SDValue SrcVec = Ext1.getOperand(0); |
11587 | 5 | auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? |
11588 | 5 | PPCISD::SINT_VEC_TO_FP5 : PPCISD::UINT_VEC_TO_FP0 ; |
11589 | 2.59k | return DAG.getNode(NodeType, dl, MVT::v2f64, |
11590 | 2.59k | SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); |
11591 | 2.59k | } |
11592 | | |
11593 | | SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, |
11594 | 353 | DAGCombinerInfo &DCI) const { |
11595 | 353 | assert((N->getOpcode() == ISD::SINT_TO_FP || |
11596 | 353 | N->getOpcode() == ISD::UINT_TO_FP) && |
11597 | 353 | "Need an int -> FP conversion node here"); |
11598 | 353 | |
11599 | 353 | if (useSoftFloat() || 353 !Subtarget.has64BitSupport()352 ) |
11600 | 17 | return SDValue(); |
11601 | 336 | |
11602 | 336 | SelectionDAG &DAG = DCI.DAG; |
11603 | 336 | SDLoc dl(N); |
11604 | 336 | SDValue Op(N, 0); |
11605 | 336 | |
11606 | 336 | SDValue FirstOperand(Op.getOperand(0)); |
11607 | 336 | bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && |
11608 | 59 | (FirstOperand.getValueType() == MVT::i8 || |
11609 | 59 | FirstOperand.getValueType() == MVT::i16); |
11610 | 336 | if (Subtarget.hasP9Vector() && 336 Subtarget.hasP9Altivec()58 && SubWordLoad58 ) { |
11611 | 32 | bool Signed = N->getOpcode() == ISD::SINT_TO_FP; |
11612 | 32 | bool DstDouble = Op.getValueType() == MVT::f64; |
11613 | 32 | unsigned ConvOp = Signed ? |
11614 | 16 | (DstDouble ? 16 PPCISD::FCFID8 : PPCISD::FCFIDS8 ) : |
11615 | 16 | (DstDouble ? 16 PPCISD::FCFIDU8 : PPCISD::FCFIDUS8 ); |
11616 | 32 | SDValue WidthConst = |
11617 | 32 | DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 116 : 216 , |
11618 | 32 | dl, false); |
11619 | 32 | LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode()); |
11620 | 32 | SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; |
11621 | 32 | SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, |
11622 | 32 | DAG.getVTList(MVT::f64, MVT::Other), |
11623 | 32 | Ops, MVT::i8, LDN->getMemOperand()); |
11624 | 32 | |
11625 | 32 | // For signed conversion, we need to sign-extend the value in the VSR |
11626 | 32 | if (Signed32 ) { |
11627 | 16 | SDValue ExtOps[] = { Ld, WidthConst }; |
11628 | 16 | SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); |
11629 | 16 | return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f648 : MVT::f328 , Ext); |
11630 | 16 | } else |
11631 | 16 | return DAG.getNode(ConvOp, dl, DstDouble ? 16 MVT::f648 : MVT::f328 , Ld); |
11632 | 304 | } |
11633 | 304 | |
11634 | 304 | // Don't handle ppc_fp128 here or i1 conversions. |
11635 | 304 | if (304 Op.getValueType() != MVT::f32 && 304 Op.getValueType() != MVT::f64183 ) |
11636 | 52 | return SDValue(); |
11637 | 252 | if (252 Op.getOperand(0).getValueType() == MVT::i1252 ) |
11638 | 0 | return SDValue(); |
11639 | 252 | |
11640 | 252 | // For i32 intermediate values, unfortunately, the conversion functions |
11641 | 252 | // leave the upper 32 bits of the value are undefined. Within the set of |
11642 | 252 | // scalar instructions, we have no method for zero- or sign-extending the |
11643 | 252 | // value. Thus, we cannot handle i32 intermediate values here. |
11644 | 252 | if (252 Op.getOperand(0).getValueType() == MVT::i32252 ) |
11645 | 131 | return SDValue(); |
11646 | 121 | |
11647 | 252 | assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && |
11648 | 121 | "UINT_TO_FP is supported only with FPCVT"); |
11649 | 121 | |
11650 | 121 | // If we have FCFIDS, then use it when converting to single-precision. |
11651 | 121 | // Otherwise, convert to double-precision and then round. |
11652 | 103 | unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) |
11653 | 54 | ? (Op.getOpcode() == ISD::UINT_TO_FP ? 54 PPCISD::FCFIDUS32 |
11654 | 54 | : PPCISD::FCFIDS) |
11655 | 67 | : (Op.getOpcode() == ISD::UINT_TO_FP ? 67 PPCISD::FCFIDU26 |
11656 | 67 | : PPCISD::FCFID); |
11657 | 103 | MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) |
11658 | 54 | ? MVT::f32 |
11659 | 67 | : MVT::f64; |
11660 | 121 | |
11661 | 121 | // If we're converting from a float, to an int, and back to a float again, |
11662 | 121 | // then we don't need the store/load pair at all. |
11663 | 121 | if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && |
11664 | 3 | Subtarget.hasFPCVT()) || |
11665 | 121 | (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)118 ) { |
11666 | 16 | SDValue Src = Op.getOperand(0).getOperand(0); |
11667 | 16 | if (Src.getValueType() == MVT::f3216 ) { |
11668 | 5 | Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); |
11669 | 5 | DCI.AddToWorklist(Src.getNode()); |
11670 | 16 | } else if (11 Src.getValueType() != MVT::f6411 ) { |
11671 | 1 | // Make sure that we don't pick up a ppc_fp128 source value. |
11672 | 1 | return SDValue(); |
11673 | 1 | } |
11674 | 15 | |
11675 | 15 | unsigned FCTOp = |
11676 | 12 | Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : |
11677 | 3 | PPCISD::FCTIDUZ; |
11678 | 15 | |
11679 | 15 | SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); |
11680 | 15 | SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); |
11681 | 15 | |
11682 | 15 | if (Op.getValueType() == MVT::f32 && 15 !Subtarget.hasFPCVT()5 ) { |
11683 | 3 | FP = DAG.getNode(ISD::FP_ROUND, dl, |
11684 | 3 | MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); |
11685 | 3 | DCI.AddToWorklist(FP.getNode()); |
11686 | 3 | } |
11687 | 16 | |
11688 | 16 | return FP; |
11689 | 16 | } |
11690 | 105 | |
11691 | 105 | return SDValue(); |
11692 | 105 | } |
11693 | | |
11694 | | // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for |
11695 | | // builtins) into loads with swaps. |
11696 | | SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, |
11697 | 402 | DAGCombinerInfo &DCI) const { |
11698 | 402 | SelectionDAG &DAG = DCI.DAG; |
11699 | 402 | SDLoc dl(N); |
11700 | 402 | SDValue Chain; |
11701 | 402 | SDValue Base; |
11702 | 402 | MachineMemOperand *MMO; |
11703 | 402 | |
11704 | 402 | switch (N->getOpcode()) { |
11705 | 0 | default: |
11706 | 0 | llvm_unreachable("Unexpected opcode for little endian VSX load"); |
11707 | 388 | case ISD::LOAD: { |
11708 | 388 | LoadSDNode *LD = cast<LoadSDNode>(N); |
11709 | 388 | Chain = LD->getChain(); |
11710 | 388 | Base = LD->getBasePtr(); |
11711 | 388 | MMO = LD->getMemOperand(); |
11712 | 388 | // If the MMO suggests this isn't a load of a full vector, leave |
11713 | 388 | // things alone. For a built-in, we have to make the change for |
11714 | 388 | // correctness, so if there is a size problem that will be a bug. |
11715 | 388 | if (MMO->getSize() < 16) |
11716 | 0 | return SDValue(); |
11717 | 388 | break; |
11718 | 388 | } |
11719 | 14 | case ISD::INTRINSIC_W_CHAIN: { |
11720 | 14 | MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); |
11721 | 14 | Chain = Intrin->getChain(); |
11722 | 14 | // Similarly to the store case below, Intrin->getBasePtr() doesn't get |
11723 | 14 | // us what we want. Get operand 2 instead. |
11724 | 14 | Base = Intrin->getOperand(2); |
11725 | 14 | MMO = Intrin->getMemOperand(); |
11726 | 14 | break; |
11727 | 402 | } |
11728 | 402 | } |
11729 | 402 | |
11730 | 402 | MVT VecTy = N->getValueType(0).getSimpleVT(); |
11731 | 402 | |
11732 | 402 | // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is |
11733 | 402 | // aligned and the type is a vector with elements up to 4 bytes |
11734 | 402 | if (Subtarget.needsSwapsForVSXMemOps() && 402 !(MMO->getAlignment()%16)402 |
11735 | 402 | && VecTy.getScalarSizeInBits() <= 32299 ) { |
11736 | 151 | return SDValue(); |
11737 | 151 | } |
11738 | 251 | |
11739 | 251 | SDValue LoadOps[] = { Chain, Base }; |
11740 | 251 | SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, |
11741 | 251 | DAG.getVTList(MVT::v2f64, MVT::Other), |
11742 | 251 | LoadOps, MVT::v2f64, MMO); |
11743 | 251 | |
11744 | 251 | DCI.AddToWorklist(Load.getNode()); |
11745 | 251 | Chain = Load.getValue(1); |
11746 | 251 | SDValue Swap = DAG.getNode( |
11747 | 251 | PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); |
11748 | 251 | DCI.AddToWorklist(Swap.getNode()); |
11749 | 251 | |
11750 | 251 | // Add a bitcast if the resulting load type doesn't match v2f64. |
11751 | 251 | if (VecTy != MVT::v2f64251 ) { |
11752 | 83 | SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); |
11753 | 83 | DCI.AddToWorklist(N.getNode()); |
11754 | 83 | // Package {bitcast value, swap's chain} to match Load's shape. |
11755 | 83 | return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), |
11756 | 83 | N, Swap.getValue(1)); |
11757 | 83 | } |
11758 | 168 | |
11759 | 168 | return Swap; |
11760 | 168 | } |
11761 | | |
11762 | | // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for |
11763 | | // builtins) into stores with swaps. |
11764 | | SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, |
11765 | 264 | DAGCombinerInfo &DCI) const { |
11766 | 264 | SelectionDAG &DAG = DCI.DAG; |
11767 | 264 | SDLoc dl(N); |
11768 | 264 | SDValue Chain; |
11769 | 264 | SDValue Base; |
11770 | 264 | unsigned SrcOpnd; |
11771 | 264 | MachineMemOperand *MMO; |
11772 | 264 | |
11773 | 264 | switch (N->getOpcode()) { |
11774 | 0 | default: |
11775 | 0 | llvm_unreachable("Unexpected opcode for little endian VSX store"); |
11776 | 252 | case ISD::STORE: { |
11777 | 252 | StoreSDNode *ST = cast<StoreSDNode>(N); |
11778 | 252 | Chain = ST->getChain(); |
11779 | 252 | Base = ST->getBasePtr(); |
11780 | 252 | MMO = ST->getMemOperand(); |
11781 | 252 | SrcOpnd = 1; |
11782 | 252 | // If the MMO suggests this isn't a store of a full vector, leave |
11783 | 252 | // things alone. For a built-in, we have to make the change for |
11784 | 252 | // correctness, so if there is a size problem that will be a bug. |
11785 | 252 | if (MMO->getSize() < 16) |
11786 | 0 | return SDValue(); |
11787 | 252 | break; |
11788 | 252 | } |
11789 | 12 | case ISD::INTRINSIC_VOID: { |
11790 | 12 | MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); |
11791 | 12 | Chain = Intrin->getChain(); |
11792 | 12 | // Intrin->getBasePtr() oddly does not get what we want. |
11793 | 12 | Base = Intrin->getOperand(3); |
11794 | 12 | MMO = Intrin->getMemOperand(); |
11795 | 12 | SrcOpnd = 2; |
11796 | 12 | break; |
11797 | 264 | } |
11798 | 264 | } |
11799 | 264 | |
11800 | 264 | SDValue Src = N->getOperand(SrcOpnd); |
11801 | 264 | MVT VecTy = Src.getValueType().getSimpleVT(); |
11802 | 264 | |
11803 | 264 | // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is |
11804 | 264 | // aligned and the type is a vector with elements up to 4 bytes |
11805 | 264 | if (Subtarget.needsSwapsForVSXMemOps() && 264 !(MMO->getAlignment()%16)264 |
11806 | 264 | && VecTy.getScalarSizeInBits() <= 32214 ) { |
11807 | 145 | return SDValue(); |
11808 | 145 | } |
11809 | 119 | |
11810 | 119 | // All stores are done as v2f64 and possible bit cast. |
11811 | 119 | if (119 VecTy != MVT::v2f64119 ) { |
11812 | 71 | Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); |
11813 | 71 | DCI.AddToWorklist(Src.getNode()); |
11814 | 71 | } |
11815 | 264 | |
11816 | 264 | SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, |
11817 | 264 | DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); |
11818 | 264 | DCI.AddToWorklist(Swap.getNode()); |
11819 | 264 | Chain = Swap.getValue(1); |
11820 | 264 | SDValue StoreOps[] = { Chain, Swap, Base }; |
11821 | 264 | SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, |
11822 | 264 | DAG.getVTList(MVT::Other), |
11823 | 264 | StoreOps, VecTy, MMO); |
11824 | 264 | DCI.AddToWorklist(Store.getNode()); |
11825 | 264 | return Store; |
11826 | 264 | } |
11827 | | |
11828 | | SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, |
11829 | 83.6k | DAGCombinerInfo &DCI) const { |
11830 | 83.6k | SelectionDAG &DAG = DCI.DAG; |
11831 | 83.6k | SDLoc dl(N); |
11832 | 83.6k | switch (N->getOpcode()) { |
11833 | 27.3k | default: break; |
11834 | 1.40k | case ISD::SHL: |
11835 | 1.40k | return combineSHL(N, DCI); |
11836 | 400 | case ISD::SRA: |
11837 | 400 | return combineSRA(N, DCI); |
11838 | 569 | case ISD::SRL: |
11839 | 569 | return combineSRL(N, DCI); |
11840 | 60 | case PPCISD::SHL: |
11841 | 60 | if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. |
11842 | 2 | return N->getOperand(0); |
11843 | 58 | break; |
11844 | 57 | case PPCISD::SRL: |
11845 | 57 | if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. |
11846 | 1 | return N->getOperand(0); |
11847 | 56 | break; |
11848 | 21 | case PPCISD::SRA: |
11849 | 21 | if (ConstantSDNode *C21 = dyn_cast<ConstantSDNode>(N->getOperand(0))) { |
11850 | 0 | if (C->isNullValue() || // 0 >>s V -> 0. |
11851 | 0 | C->isAllOnesValue()) // -1 >>s V -> -1. |
11852 | 0 | return N->getOperand(0); |
11853 | 21 | } |
11854 | 21 | break; |
11855 | 4.43k | case ISD::SIGN_EXTEND: |
11856 | 4.43k | case ISD::ZERO_EXTEND: |
11857 | 4.43k | case ISD::ANY_EXTEND: |
11858 | 4.43k | return DAGCombineExtBoolTrunc(N, DCI); |
11859 | 10.5k | case ISD::TRUNCATE: |
11860 | 10.5k | case ISD::SETCC: |
11861 | 10.5k | case ISD::SELECT_CC: |
11862 | 10.5k | return DAGCombineTruncBoolExt(N, DCI); |
11863 | 353 | case ISD::SINT_TO_FP: |
11864 | 353 | case ISD::UINT_TO_FP: |
11865 | 353 | return combineFPToIntToFP(N, DCI); |
11866 | 14.6k | case ISD::STORE: { |
11867 | 14.6k | EVT Op1VT = N->getOperand(1).getValueType(); |
11868 | 14.6k | bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) || |
11869 | 8.79k | (Subtarget.hasP9Vector() && 8.79k (Op1VT == MVT::i8 || 455 Op1VT == MVT::i16419 )); |
11870 | 14.6k | |
11871 | 14.6k | // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). |
11872 | 14.6k | if (Subtarget.hasSTFIWX() && 14.6k !cast<StoreSDNode>(N)->isTruncatingStore()9.65k && |
11873 | 8.44k | N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && |
11874 | 19 | ValidTypeForStoreFltAsInt && |
11875 | 14.6k | N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf12818 ) { |
11876 | 18 | SDValue Val = N->getOperand(1).getOperand(0); |
11877 | 18 | if (Val.getValueType() == MVT::f3218 ) { |
11878 | 11 | Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); |
11879 | 11 | DCI.AddToWorklist(Val.getNode()); |
11880 | 11 | } |
11881 | 18 | Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); |
11882 | 18 | DCI.AddToWorklist(Val.getNode()); |
11883 | 18 | |
11884 | 18 | if (Op1VT == MVT::i3218 ) { |
11885 | 10 | SDValue Ops[] = { |
11886 | 10 | N->getOperand(0), Val, N->getOperand(2), |
11887 | 10 | DAG.getValueType(N->getOperand(1).getValueType()) |
11888 | 10 | }; |
11889 | 10 | |
11890 | 10 | Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, |
11891 | 10 | DAG.getVTList(MVT::Other), Ops, |
11892 | 10 | cast<StoreSDNode>(N)->getMemoryVT(), |
11893 | 10 | cast<StoreSDNode>(N)->getMemOperand()); |
11894 | 18 | } else { |
11895 | 8 | unsigned WidthInBytes = |
11896 | 8 | N->getOperand(1).getValueType() == MVT::i8 ? 14 : 24 ; |
11897 | 8 | SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false); |
11898 | 8 | |
11899 | 8 | SDValue Ops[] = { |
11900 | 8 | N->getOperand(0), Val, N->getOperand(2), WidthConst, |
11901 | 8 | DAG.getValueType(N->getOperand(1).getValueType()) |
11902 | 8 | }; |
11903 | 8 | Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl, |
11904 | 8 | DAG.getVTList(MVT::Other), Ops, |
11905 | 8 | cast<StoreSDNode>(N)->getMemoryVT(), |
11906 | 8 | cast<StoreSDNode>(N)->getMemOperand()); |
11907 | 8 | } |
11908 | 18 | |
11909 | 18 | DCI.AddToWorklist(Val.getNode()); |
11910 | 18 | return Val; |
11911 | 18 | } |
11912 | 14.6k | |
11913 | 14.6k | // Turn STORE (BSWAP) -> sthbrx/stwbrx. |
11914 | 14.6k | if (14.6k cast<StoreSDNode>(N)->isUnindexed() && |
11915 | 14.6k | N->getOperand(1).getOpcode() == ISD::BSWAP && |
11916 | 26 | N->getOperand(1).getNode()->hasOneUse() && |
11917 | 26 | (N->getOperand(1).getValueType() == MVT::i32 || |
11918 | 15 | N->getOperand(1).getValueType() == MVT::i16 || |
11919 | 10 | (Subtarget.hasLDBRX() && 10 Subtarget.isPPC64()7 && |
11920 | 14.6k | N->getOperand(1).getValueType() == MVT::i645 ))) { |
11921 | 21 | SDValue BSwapOp = N->getOperand(1).getOperand(0); |
11922 | 21 | // Do an any-extend to 32-bits if this is a half-word input. |
11923 | 21 | if (BSwapOp.getValueType() == MVT::i16) |
11924 | 5 | BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); |
11925 | 21 | |
11926 | 21 | // If the type of BSWAP operand is wider than stored memory width |
11927 | 21 | // it need to be shifted to the right side before STBRX. |
11928 | 21 | EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); |
11929 | 21 | if (Op1VT.bitsGT(mVT)21 ) { |
11930 | 7 | int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); |
11931 | 7 | BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, |
11932 | 7 | DAG.getConstant(Shift, dl, MVT::i32)); |
11933 | 7 | // Need to truncate if this is a bswap of i64 stored as i32/i16. |
11934 | 7 | if (Op1VT == MVT::i64) |
11935 | 4 | BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); |
11936 | 7 | } |
11937 | 21 | |
11938 | 21 | SDValue Ops[] = { |
11939 | 21 | N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) |
11940 | 21 | }; |
11941 | 21 | return |
11942 | 21 | DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), |
11943 | 21 | Ops, cast<StoreSDNode>(N)->getMemoryVT(), |
11944 | 21 | cast<StoreSDNode>(N)->getMemOperand()); |
11945 | 21 | } |
11946 | 14.6k | |
11947 | 14.6k | // For little endian, VSX stores require generating xxswapd/lxvd2x. |
11948 | 14.6k | // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. |
11949 | 14.6k | EVT VT = N->getOperand(1).getValueType(); |
11950 | 14.6k | if (VT.isSimple()14.6k ) { |
11951 | 14.5k | MVT StoreVT = VT.getSimpleVT(); |
11952 | 14.5k | if (Subtarget.needsSwapsForVSXMemOps() && |
11953 | 944 | (StoreVT == MVT::v2f64 || 944 StoreVT == MVT::v2i64902 || |
11954 | 944 | StoreVT == MVT::v4f32866 || StoreVT == MVT::v4i32845 )) |
11955 | 252 | return expandVSXStoreForLE(N, DCI); |
11956 | 14.3k | } |
11957 | 14.3k | break; |
11958 | 14.3k | } |
11959 | 13.7k | case ISD::LOAD: { |
11960 | 13.7k | LoadSDNode *LD = cast<LoadSDNode>(N); |
11961 | 13.7k | EVT VT = LD->getValueType(0); |
11962 | 13.7k | |
11963 | 13.7k | // For little endian, VSX loads require generating lxvd2x/xxswapd. |
11964 | 13.7k | // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. |
11965 | 13.7k | if (VT.isSimple()13.7k ) { |
11966 | 13.7k | MVT LoadVT = VT.getSimpleVT(); |
11967 | 13.7k | if (Subtarget.needsSwapsForVSXMemOps() && |
11968 | 1.59k | (LoadVT == MVT::v2f64 || 1.59k LoadVT == MVT::v2i641.43k || |
11969 | 1.59k | LoadVT == MVT::v4f321.41k || LoadVT == MVT::v4i321.38k )) |
11970 | 388 | return expandVSXLoadForLE(N, DCI); |
11971 | 13.3k | } |
11972 | 13.3k | |
11973 | 13.3k | // We sometimes end up with a 64-bit integer load, from which we extract |
11974 | 13.3k | // two single-precision floating-point numbers. This happens with |
11975 | 13.3k | // std::complex<float>, and other similar structures, because of the way we |
11976 | 13.3k | // canonicalize structure copies. However, if we lack direct moves, |
11977 | 13.3k | // then the final bitcasts from the extracted integer values to the |
11978 | 13.3k | // floating-point numbers turn into store/load pairs. Even with direct moves, |
11979 | 13.3k | // just loading the two floating-point numbers is likely better. |
11980 | 13.3k | auto ReplaceTwoFloatLoad = [&]() 13.3k { |
11981 | 13.3k | if (VT != MVT::i64) |
11982 | 10.8k | return false; |
11983 | 2.47k | |
11984 | 2.47k | if (2.47k LD->getExtensionType() != ISD::NON_EXTLOAD || |
11985 | 1.96k | LD->isVolatile()) |
11986 | 522 | return false; |
11987 | 1.95k | |
11988 | 1.95k | // We're looking for a sequence like this: |
11989 | 1.95k | // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64 |
11990 | 1.95k | // t16: i64 = srl t13, Constant:i32<32> |
11991 | 1.95k | // t17: i32 = truncate t16 |
11992 | 1.95k | // t18: f32 = bitcast t17 |
11993 | 1.95k | // t19: i32 = truncate t13 |
11994 | 1.95k | // t20: f32 = bitcast t19 |
11995 | 1.95k | |
11996 | 1.95k | if (1.95k !LD->hasNUsesOfValue(2, 0)1.95k ) |
11997 | 1.79k | return false; |
11998 | 155 | |
11999 | 155 | auto UI = LD->use_begin(); |
12000 | 190 | while (UI.getUse().getResNo() != 0190 ) ++UI35 ; |
12001 | 155 | SDNode *Trunc = *UI++; |
12002 | 161 | while (UI.getUse().getResNo() != 0161 ) ++UI6 ; |
12003 | 155 | SDNode *RightShift = *UI; |
12004 | 155 | if (Trunc->getOpcode() != ISD::TRUNCATE) |
12005 | 153 | std::swap(Trunc, RightShift); |
12006 | 155 | |
12007 | 155 | if (Trunc->getOpcode() != ISD::TRUNCATE || |
12008 | 2 | Trunc->getValueType(0) != MVT::i32 || |
12009 | 2 | !Trunc->hasOneUse()) |
12010 | 153 | return false; |
12011 | 2 | if (2 RightShift->getOpcode() != ISD::SRL || |
12012 | 2 | !isa<ConstantSDNode>(RightShift->getOperand(1)) || |
12013 | 2 | RightShift->getConstantOperandVal(1) != 32 || |
12014 | 2 | !RightShift->hasOneUse()) |
12015 | 0 | return false; |
12016 | 2 | |
12017 | 2 | SDNode *Trunc2 = *RightShift->use_begin(); |
12018 | 2 | if (Trunc2->getOpcode() != ISD::TRUNCATE || |
12019 | 2 | Trunc2->getValueType(0) != MVT::i32 || |
12020 | 2 | !Trunc2->hasOneUse()) |
12021 | 0 | return false; |
12022 | 2 | |
12023 | 2 | SDNode *Bitcast = *Trunc->use_begin(); |
12024 | 2 | SDNode *Bitcast2 = *Trunc2->use_begin(); |
12025 | 2 | |
12026 | 2 | if (Bitcast->getOpcode() != ISD::BITCAST || |
12027 | 2 | Bitcast->getValueType(0) != MVT::f32) |
12028 | 0 | return false; |
12029 | 2 | if (2 Bitcast2->getOpcode() != ISD::BITCAST || |
12030 | 2 | Bitcast2->getValueType(0) != MVT::f32) |
12031 | 0 | return false; |
12032 | 2 | |
12033 | 2 | if (2 Subtarget.isLittleEndian()2 ) |
12034 | 0 | std::swap(Bitcast, Bitcast2); |
12035 | 2 | |
12036 | 2 | // Bitcast has the second float (in memory-layout order) and Bitcast2 |
12037 | 2 | // has the first one. |
12038 | 2 | |
12039 | 2 | SDValue BasePtr = LD->getBasePtr(); |
12040 | 2 | if (LD->isIndexed()2 ) { |
12041 | 0 | assert(LD->getAddressingMode() == ISD::PRE_INC && |
12042 | 0 | "Non-pre-inc AM on PPC?"); |
12043 | 0 | BasePtr = |
12044 | 0 | DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, |
12045 | 0 | LD->getOffset()); |
12046 | 0 | } |
12047 | 2 | |
12048 | 2 | auto MMOFlags = |
12049 | 2 | LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; |
12050 | 2 | SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, |
12051 | 2 | LD->getPointerInfo(), LD->getAlignment(), |
12052 | 2 | MMOFlags, LD->getAAInfo()); |
12053 | 2 | SDValue AddPtr = |
12054 | 2 | DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), |
12055 | 2 | BasePtr, DAG.getIntPtrConstant(4, dl)); |
12056 | 2 | SDValue FloatLoad2 = DAG.getLoad( |
12057 | 2 | MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, |
12058 | 2 | LD->getPointerInfo().getWithOffset(4), |
12059 | 2 | MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); |
12060 | 2 | |
12061 | 2 | if (LD->isIndexed()2 ) { |
12062 | 0 | // Note that DAGCombine should re-form any pre-increment load(s) from |
12063 | 0 | // what is produced here if that makes sense. |
12064 | 0 | DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); |
12065 | 0 | } |
12066 | 2 | |
12067 | 2 | DCI.CombineTo(Bitcast2, FloatLoad); |
12068 | 2 | DCI.CombineTo(Bitcast, FloatLoad2); |
12069 | 2 | |
12070 | 2 | DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 20 : 12 ), |
12071 | 13.3k | SDValue(FloatLoad2.getNode(), 1)); |
12072 | 13.3k | return true; |
12073 | 13.3k | }; |
12074 | 13.3k | |
12075 | 13.3k | if (ReplaceTwoFloatLoad()) |
12076 | 2 | return SDValue(N, 0); |
12077 | 13.3k | |
12078 | 13.3k | EVT MemVT = LD->getMemoryVT(); |
12079 | 13.3k | Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); |
12080 | 13.3k | unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); |
12081 | 13.3k | Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); |
12082 | 13.3k | unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); |
12083 | 13.3k | if (LD->isUnindexed() && 13.3k VT.isVector()13.2k && |
12084 | 2.74k | ((Subtarget.hasAltivec() && 2.74k ISD::isNON_EXTLoad(N)2.51k && |
12085 | 2.74k | // P8 and later hardware should just use LOAD. |
12086 | 2.74k | !Subtarget.hasP8Vector()2.51k && (VT == MVT::v16i8 || 1.35k VT == MVT::v8i161.24k || |
12087 | 1.35k | VT == MVT::v4i321.19k || VT == MVT::v4f32488 )) || |
12088 | 1.69k | (Subtarget.hasQPX() && 1.69k (VT == MVT::v4f64 || 178 VT == MVT::v4f3296 ) && |
12089 | 2.74k | LD->getAlignment() >= ScalarABIAlignment)) && |
12090 | 13.3k | LD->getAlignment() < ABIAlignment1.14k ) { |
12091 | 108 | // This is a type-legal unaligned Altivec or QPX load. |
12092 | 108 | SDValue Chain = LD->getChain(); |
12093 | 108 | SDValue Ptr = LD->getBasePtr(); |
12094 | 108 | bool isLittleEndian = Subtarget.isLittleEndian(); |
12095 | 108 | |
12096 | 108 | // This implements the loading of unaligned vectors as described in |
12097 | 108 | // the venerable Apple Velocity Engine overview. Specifically: |
12098 | 108 | // https://developer.apple.com/hardwaredrivers/ve/alignment.html |
12099 | 108 | // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html |
12100 | 108 | // |
12101 | 108 | // The general idea is to expand a sequence of one or more unaligned |
12102 | 108 | // loads into an alignment-based permutation-control instruction (lvsl |
12103 | 108 | // or lvsr), a series of regular vector loads (which always truncate |
12104 | 108 | // their input address to an aligned address), and a series of |
12105 | 108 | // permutations. The results of these permutations are the requested |
12106 | 108 | // loaded values. The trick is that the last "extra" load is not taken |
12107 | 108 | // from the address you might suspect (sizeof(vector) bytes after the |
12108 | 108 | // last requested load), but rather sizeof(vector) - 1 bytes after the |
12109 | 108 | // last requested vector. The point of this is to avoid a page fault if |
12110 | 108 | // the base address happened to be aligned. This works because if the |
12111 | 108 | // base address is aligned, then adding less than a full vector length |
12112 | 108 | // will cause the last vector in the sequence to be (re)loaded. |
12113 | 108 | // Otherwise, the next vector will be fetched as you might suspect was |
12114 | 108 | // necessary. |
12115 | 108 | |
12116 | 108 | // We might be able to reuse the permutation generation from |
12117 | 108 | // a different base address offset from this one by an aligned amount. |
12118 | 108 | // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this |
12119 | 108 | // optimization later. |
12120 | 108 | Intrinsic::ID Intr, IntrLD, IntrPerm; |
12121 | 108 | MVT PermCntlTy, PermTy, LDTy; |
12122 | 108 | if (Subtarget.hasAltivec()108 ) { |
12123 | 1 | Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : |
12124 | 57 | Intrinsic::ppc_altivec_lvsl; |
12125 | 58 | IntrLD = Intrinsic::ppc_altivec_lvx; |
12126 | 58 | IntrPerm = Intrinsic::ppc_altivec_vperm; |
12127 | 58 | PermCntlTy = MVT::v16i8; |
12128 | 58 | PermTy = MVT::v4i32; |
12129 | 58 | LDTy = MVT::v4i32; |
12130 | 108 | } else { |
12131 | 46 | Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : |
12132 | 4 | Intrinsic::ppc_qpx_qvlpcls; |
12133 | 46 | IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : |
12134 | 4 | Intrinsic::ppc_qpx_qvlfs; |
12135 | 50 | IntrPerm = Intrinsic::ppc_qpx_qvfperm; |
12136 | 50 | PermCntlTy = MVT::v4f64; |
12137 | 50 | PermTy = MVT::v4f64; |
12138 | 50 | LDTy = MemVT.getSimpleVT(); |
12139 | 50 | } |
12140 | 108 | |
12141 | 108 | SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); |
12142 | 108 | |
12143 | 108 | // Create the new MMO for the new base load. It is like the original MMO, |
12144 | 108 | // but represents an area in memory almost twice the vector size centered |
12145 | 108 | // on the original address. If the address is unaligned, we might start |
12146 | 108 | // reading up to (sizeof(vector)-1) bytes below the address of the |
12147 | 108 | // original unaligned load. |
12148 | 108 | MachineFunction &MF = DAG.getMachineFunction(); |
12149 | 108 | MachineMemOperand *BaseMMO = |
12150 | 108 | MF.getMachineMemOperand(LD->getMemOperand(), |
12151 | 108 | -(long)MemVT.getStoreSize()+1, |
12152 | 108 | 2*MemVT.getStoreSize()-1); |
12153 | 108 | |
12154 | 108 | // Create the new base load. |
12155 | 108 | SDValue LDXIntID = |
12156 | 108 | DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); |
12157 | 108 | SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; |
12158 | 108 | SDValue BaseLoad = |
12159 | 108 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, |
12160 | 108 | DAG.getVTList(PermTy, MVT::Other), |
12161 | 108 | BaseLoadOps, LDTy, BaseMMO); |
12162 | 108 | |
12163 | 108 | // Note that the value of IncOffset (which is provided to the next |
12164 | 108 | // load's pointer info offset value, and thus used to calculate the |
12165 | 108 | // alignment), and the value of IncValue (which is actually used to |
12166 | 108 | // increment the pointer value) are different! This is because we |
12167 | 108 | // require the next load to appear to be aligned, even though it |
12168 | 108 | // is actually offset from the base pointer by a lesser amount. |
12169 | 108 | int IncOffset = VT.getSizeInBits() / 8; |
12170 | 108 | int IncValue = IncOffset; |
12171 | 108 | |
12172 | 108 | // Walk (both up and down) the chain looking for another load at the real |
12173 | 108 | // (aligned) offset (the alignment of the other load does not matter in |
12174 | 108 | // this case). If found, then do not use the offset reduction trick, as |
12175 | 108 | // that will prevent the loads from being later combined (as they would |
12176 | 108 | // otherwise be duplicates). |
12177 | 108 | if (!findConsecutiveLoad(LD, DAG)) |
12178 | 58 | --IncValue; |
12179 | 108 | |
12180 | 108 | SDValue Increment = |
12181 | 108 | DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); |
12182 | 108 | Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); |
12183 | 108 | |
12184 | 108 | MachineMemOperand *ExtraMMO = |
12185 | 108 | MF.getMachineMemOperand(LD->getMemOperand(), |
12186 | 108 | 1, 2*MemVT.getStoreSize()-1); |
12187 | 108 | SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; |
12188 | 108 | SDValue ExtraLoad = |
12189 | 108 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, |
12190 | 108 | DAG.getVTList(PermTy, MVT::Other), |
12191 | 108 | ExtraLoadOps, LDTy, ExtraMMO); |
12192 | 108 | |
12193 | 108 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
12194 | 108 | BaseLoad.getValue(1), ExtraLoad.getValue(1)); |
12195 | 108 | |
12196 | 108 | // Because vperm has a big-endian bias, we must reverse the order |
12197 | 108 | // of the input vectors and complement the permute control vector |
12198 | 108 | // when generating little endian code. We have already handled the |
12199 | 108 | // latter by using lvsr instead of lvsl, so just reverse BaseLoad |
12200 | 108 | // and ExtraLoad here. |
12201 | 108 | SDValue Perm; |
12202 | 108 | if (isLittleEndian) |
12203 | 1 | Perm = BuildIntrinsicOp(IntrPerm, |
12204 | 1 | ExtraLoad, BaseLoad, PermCntl, DAG, dl); |
12205 | 108 | else |
12206 | 107 | Perm = BuildIntrinsicOp(IntrPerm, |
12207 | 107 | BaseLoad, ExtraLoad, PermCntl, DAG, dl); |
12208 | 108 | |
12209 | 108 | if (VT != PermTy) |
12210 | 40 | Perm = Subtarget.hasAltivec() ? |
12211 | 36 | DAG.getNode(ISD::BITCAST, dl, VT, Perm) : |
12212 | 4 | DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX |
12213 | 4 | DAG.getTargetConstant(1, dl, MVT::i64)); |
12214 | 108 | // second argument is 1 because this rounding |
12215 | 108 | // is always exact. |
12216 | 108 | |
12217 | 108 | // The output of the permutation is our loaded result, the TokenFactor is |
12218 | 108 | // our new chain. |
12219 | 108 | DCI.CombineTo(N, Perm, TF); |
12220 | 108 | return SDValue(N, 0); |
12221 | 108 | } |
12222 | 13.2k | } |
12223 | 13.2k | break; |
12224 | 1.88k | case ISD::INTRINSIC_WO_CHAIN: { |
12225 | 1.88k | bool isLittleEndian = Subtarget.isLittleEndian(); |
12226 | 1.88k | unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); |
12227 | 368 | Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr |
12228 | 1.51k | : Intrinsic::ppc_altivec_lvsl); |
12229 | 1.88k | if ((IID == Intr || |
12230 | 1.75k | IID == Intrinsic::ppc_qpx_qvlpcld || |
12231 | 1.62k | IID == Intrinsic::ppc_qpx_qvlpcls) && |
12232 | 1.88k | N->getOperand(1)->getOpcode() == ISD::ADD260 ) { |
12233 | 187 | SDValue Add = N->getOperand(1); |
12234 | 187 | |
12235 | 187 | int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? |
12236 | 187 | 5110 /* 32 byte alignment */ : 477 /* 16 byte alignment */; |
12237 | 187 | |
12238 | 187 | if (DAG.MaskedValueIsZero(Add->getOperand(1), |
12239 | 187 | APInt::getAllOnesValue(Bits /* alignment */) |
12240 | 187 | .zext(Add.getScalarValueSizeInBits()))) { |
12241 | 177 | SDNode *BasePtr = Add->getOperand(0).getNode(); |
12242 | 177 | for (SDNode::use_iterator UI = BasePtr->use_begin(), |
12243 | 177 | UE = BasePtr->use_end(); |
12244 | 3.38k | UI != UE3.38k ; ++UI3.20k ) { |
12245 | 3.21k | if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && |
12246 | 3.21k | cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID10 ) { |
12247 | 10 | // We've found another LVSL/LVSR, and this address is an aligned |
12248 | 10 | // multiple of that one. The results will be the same, so use the |
12249 | 10 | // one we've just found instead. |
12250 | 10 | |
12251 | 10 | return SDValue(*UI, 0); |
12252 | 10 | } |
12253 | 3.21k | } |
12254 | 177 | } |
12255 | 187 | |
12256 | 177 | if (177 isa<ConstantSDNode>(Add->getOperand(1))177 ) { |
12257 | 167 | SDNode *BasePtr = Add->getOperand(0).getNode(); |
12258 | 167 | for (SDNode::use_iterator UI = BasePtr->use_begin(), |
12259 | 468 | UE = BasePtr->use_end(); UI != UE468 ; ++UI301 ) { |
12260 | 468 | if (UI->getOpcode() == ISD::ADD && |
12261 | 468 | isa<ConstantSDNode>(UI->getOperand(1)) && |
12262 | 468 | (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - |
12263 | 468 | cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) % |
12264 | 468 | (1ULL << Bits) == 0) { |
12265 | 168 | SDNode *OtherAdd = *UI; |
12266 | 168 | for (SDNode::use_iterator VI = OtherAdd->use_begin(), |
12267 | 252 | VE = OtherAdd->use_end(); VI != VE252 ; ++VI84 ) { |
12268 | 251 | if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && |
12269 | 251 | cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID167 ) { |
12270 | 167 | return SDValue(*VI, 0); |
12271 | 167 | } |
12272 | 251 | } |
12273 | 168 | } |
12274 | 468 | } |
12275 | 167 | } |
12276 | 187 | } |
12277 | 1.88k | } |
12278 | 1.88k | |
12279 | 1.70k | break; |
12280 | 790 | case ISD::INTRINSIC_W_CHAIN: |
12281 | 790 | // For little endian, VSX loads require generating lxvd2x/xxswapd. |
12282 | 790 | // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. |
12283 | 790 | if (Subtarget.needsSwapsForVSXMemOps()790 ) { |
12284 | 19 | switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { |
12285 | 5 | default: |
12286 | 5 | break; |
12287 | 14 | case Intrinsic::ppc_vsx_lxvw4x: |
12288 | 14 | case Intrinsic::ppc_vsx_lxvd2x: |
12289 | 14 | return expandVSXLoadForLE(N, DCI); |
12290 | 776 | } |
12291 | 776 | } |
12292 | 776 | break; |
12293 | 2.12k | case ISD::INTRINSIC_VOID: |
12294 | 2.12k | // For little endian, VSX stores require generating xxswapd/stxvd2x. |
12295 | 2.12k | // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. |
12296 | 2.12k | if (Subtarget.needsSwapsForVSXMemOps()2.12k ) { |
12297 | 1.71k | switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { |
12298 | 1.70k | default: |
12299 | 1.70k | break; |
12300 | 12 | case Intrinsic::ppc_vsx_stxvw4x: |
12301 | 12 | case Intrinsic::ppc_vsx_stxvd2x: |
12302 | 12 | return expandVSXStoreForLE(N, DCI); |
12303 | 2.11k | } |
12304 | 2.11k | } |
12305 | 2.11k | break; |
12306 | 37 | case ISD::BSWAP: |
12307 | 37 | // Turn BSWAP (LOAD) -> lhbrx/lwbrx. |
12308 | 37 | if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && |
12309 | 32 | N->getOperand(0).hasOneUse() && |
12310 | 32 | (N->getValueType(0) == MVT::i32 || 32 N->getValueType(0) == MVT::i1621 || |
12311 | 14 | (Subtarget.hasLDBRX() && 14 Subtarget.isPPC64()12 && |
12312 | 37 | N->getValueType(0) == MVT::i6411 ))) { |
12313 | 29 | SDValue Load = N->getOperand(0); |
12314 | 29 | LoadSDNode *LD = cast<LoadSDNode>(Load); |
12315 | 29 | // Create the byte-swapping load. |
12316 | 29 | SDValue Ops[] = { |
12317 | 29 | LD->getChain(), // Chain |
12318 | 29 | LD->getBasePtr(), // Ptr |
12319 | 29 | DAG.getValueType(N->getValueType(0)) // VT |
12320 | 29 | }; |
12321 | 29 | SDValue BSLoad = |
12322 | 29 | DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, |
12323 | 29 | DAG.getVTList(N->getValueType(0) == MVT::i64 ? |
12324 | 29 | MVT::i6411 : MVT::i3218 , MVT::Other), |
12325 | 29 | Ops, LD->getMemoryVT(), LD->getMemOperand()); |
12326 | 29 | |
12327 | 29 | // If this is an i16 load, insert the truncate. |
12328 | 29 | SDValue ResVal = BSLoad; |
12329 | 29 | if (N->getValueType(0) == MVT::i16) |
12330 | 7 | ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); |
12331 | 29 | |
12332 | 29 | // First, combine the bswap away. This makes the value produced by the |
12333 | 29 | // load dead. |
12334 | 29 | DCI.CombineTo(N, ResVal); |
12335 | 29 | |
12336 | 29 | // Next, combine the load away, we give it a bogus result value but a real |
12337 | 29 | // chain result. The result value is dead because the bswap is dead. |
12338 | 29 | DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); |
12339 | 29 | |
12340 | 29 | // Return N so it doesn't get rechecked! |
12341 | 29 | return SDValue(N, 0); |
12342 | 29 | } |
12343 | 8 | break; |
12344 | 13 | case PPCISD::VCMP: |
12345 | 13 | // If a VCMPo node already exists with exactly the same operands as this |
12346 | 13 | // node, use its result instead of this node (VCMPo computes both a CR6 and |
12347 | 13 | // a normal output). |
12348 | 13 | // |
12349 | 13 | if (!N->getOperand(0).hasOneUse() && |
12350 | 1 | !N->getOperand(1).hasOneUse() && |
12351 | 13 | !N->getOperand(2).hasOneUse()1 ) { |
12352 | 1 | |
12353 | 1 | // Scan all of the users of the LHS, looking for VCMPo's that match. |
12354 | 1 | SDNode *VCMPoNode = nullptr; |
12355 | 1 | |
12356 | 1 | SDNode *LHSN = N->getOperand(0).getNode(); |
12357 | 1 | for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); |
12358 | 1 | UI != E1 ; ++UI0 ) |
12359 | 1 | if (1 UI->getOpcode() == PPCISD::VCMPo && |
12360 | 1 | UI->getOperand(1) == N->getOperand(1) && |
12361 | 1 | UI->getOperand(2) == N->getOperand(2) && |
12362 | 1 | UI->getOperand(0) == N->getOperand(0)1 ) { |
12363 | 1 | VCMPoNode = *UI; |
12364 | 1 | break; |
12365 | 1 | } |
12366 | 1 | |
12367 | 1 | // If there is no VCMPo node, or if the flag value has a single use, don't |
12368 | 1 | // transform this. |
12369 | 1 | if (!VCMPoNode || 1 VCMPoNode->hasNUsesOfValue(0, 1)1 ) |
12370 | 0 | break; |
12371 | 1 | |
12372 | 1 | // Look at the (necessarily single) use of the flag value. If it has a |
12373 | 1 | // chain, this transformation is more complex. Note that multiple things |
12374 | 1 | // could use the value result, which we should ignore. |
12375 | 1 | SDNode *FlagUser = nullptr; |
12376 | 1 | for (SDNode::use_iterator UI = VCMPoNode->use_begin(); |
12377 | 2 | FlagUser == nullptr2 ; ++UI1 ) { |
12378 | 1 | assert(UI != VCMPoNode->use_end() && "Didn't find user!"); |
12379 | 1 | SDNode *User = *UI; |
12380 | 2 | for (unsigned i = 0, e = User->getNumOperands(); i != e2 ; ++i1 ) { |
12381 | 2 | if (User->getOperand(i) == SDValue(VCMPoNode, 1)2 ) { |
12382 | 1 | FlagUser = User; |
12383 | 1 | break; |
12384 | 1 | } |
12385 | 2 | } |
12386 | 1 | } |
12387 | 1 | |
12388 | 1 | // If the user is a MFOCRF instruction, we know this is safe. |
12389 | 1 | // Otherwise we give up for right now. |
12390 | 1 | if (FlagUser->getOpcode() == PPCISD::MFOCRF) |
12391 | 1 | return SDValue(VCMPoNode, 0); |
12392 | 12 | } |
12393 | 12 | break; |
12394 | 973 | case ISD::BRCOND: { |
12395 | 973 | SDValue Cond = N->getOperand(1); |
12396 | 973 | SDValue Target = N->getOperand(2); |
12397 | 973 | |
12398 | 973 | if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && |
12399 | 112 | cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == |
12400 | 973 | Intrinsic::ppc_is_decremented_ctr_nonzero) { |
12401 | 112 | |
12402 | 112 | // We now need to make the intrinsic dead (it cannot be instruction |
12403 | 112 | // selected). |
12404 | 112 | DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); |
12405 | 112 | assert(Cond.getNode()->hasOneUse() && |
12406 | 112 | "Counter decrement has more than one use"); |
12407 | 112 | |
12408 | 112 | return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, |
12409 | 112 | N->getOperand(0), Target); |
12410 | 112 | } |
12411 | 861 | } |
12412 | 861 | break; |
12413 | 1.63k | case ISD::BR_CC: { |
12414 | 1.63k | // If this is a branch on an altivec predicate comparison, lower this so |
12415 | 1.63k | // that we don't have to do a MFOCRF: instead, branch directly on CR6. This |
12416 | 1.63k | // lowering is done pre-legalize, because the legalizer lowers the predicate |
12417 | 1.63k | // compare down to code that is difficult to reassemble. |
12418 | 1.63k | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); |
12419 | 1.63k | SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); |
12420 | 1.63k | |
12421 | 1.63k | // Sometimes the promoted value of the intrinsic is ANDed by some non-zero |
12422 | 1.63k | // value. If so, pass-through the AND to get to the intrinsic. |
12423 | 1.63k | if (LHS.getOpcode() == ISD::AND && |
12424 | 187 | LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && |
12425 | 0 | cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == |
12426 | 0 | Intrinsic::ppc_is_decremented_ctr_nonzero && |
12427 | 0 | isa<ConstantSDNode>(LHS.getOperand(1)) && |
12428 | 0 | !isNullConstant(LHS.getOperand(1))) |
12429 | 0 | LHS = LHS.getOperand(0); |
12430 | 1.63k | |
12431 | 1.63k | if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && |
12432 | 7 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == |
12433 | 7 | Intrinsic::ppc_is_decremented_ctr_nonzero && |
12434 | 1.63k | isa<ConstantSDNode>(RHS)7 ) { |
12435 | 7 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && |
12436 | 7 | "Counter decrement comparison is not EQ or NE"); |
12437 | 7 | |
12438 | 7 | unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); |
12439 | 0 | bool isBDNZ = (CC == ISD::SETEQ && Val) || |
12440 | 7 | (CC == ISD::SETNE && 7 !Val7 ); |
12441 | 7 | |
12442 | 7 | // We now need to make the intrinsic dead (it cannot be instruction |
12443 | 7 | // selected). |
12444 | 7 | DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); |
12445 | 7 | assert(LHS.getNode()->hasOneUse() && |
12446 | 7 | "Counter decrement has more than one use"); |
12447 | 7 | |
12448 | 7 | return DAG.getNode(isBDNZ ? PPCISD::BDNZ0 : PPCISD::BDZ7 , dl, MVT::Other, |
12449 | 7 | N->getOperand(0), N->getOperand(4)); |
12450 | 7 | } |
12451 | 1.62k | |
12452 | 1.62k | int CompareOpc; |
12453 | 1.62k | bool isDot; |
12454 | 1.62k | |
12455 | 1.62k | if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && |
12456 | 1.62k | isa<ConstantSDNode>(RHS)4 && (CC == ISD::SETEQ || 4 CC == ISD::SETNE3 ) && |
12457 | 1.62k | getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)4 ) { |
12458 | 4 | assert(isDot && "Can't compare against a vector result!"); |
12459 | 4 | |
12460 | 4 | // If this is a comparison against something other than 0/1, then we know |
12461 | 4 | // that the condition is never/always true. |
12462 | 4 | unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); |
12463 | 4 | if (Val != 0 && 4 Val != 10 ) { |
12464 | 0 | if (CC == ISD::SETEQ) // Cond never true, remove branch. |
12465 | 0 | return N->getOperand(0); |
12466 | 0 | // Always !=, turn it into an unconditional branch. |
12467 | 0 | return DAG.getNode(ISD::BR, dl, MVT::Other, |
12468 | 0 | N->getOperand(0), N->getOperand(4)); |
12469 | 0 | } |
12470 | 4 | |
12471 | 4 | bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); |
12472 | 4 | |
12473 | 4 | // Create the PPCISD altivec 'dot' comparison node. |
12474 | 4 | SDValue Ops[] = { |
12475 | 4 | LHS.getOperand(2), // LHS of compare |
12476 | 4 | LHS.getOperand(3), // RHS of compare |
12477 | 4 | DAG.getConstant(CompareOpc, dl, MVT::i32) |
12478 | 4 | }; |
12479 | 4 | EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; |
12480 | 4 | SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); |
12481 | 4 | |
12482 | 4 | // Unpack the result based on how the target uses it. |
12483 | 4 | PPC::Predicate CompOpc; |
12484 | 4 | switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { |
12485 | 0 | default: // Can't happen, don't crash on invalid number though. |
12486 | 1 | case 0: // Branch on the value of the EQ bit of CR6. |
12487 | 1 | CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ0 : PPC::PRED_NE1 ; |
12488 | 1 | break; |
12489 | 1 | case 1: // Branch on the inverted value of the EQ bit of CR6. |
12490 | 1 | CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE1 : PPC::PRED_EQ0 ; |
12491 | 1 | break; |
12492 | 2 | case 2: // Branch on the value of the LT bit of CR6. |
12493 | 2 | CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT2 : PPC::PRED_GE0 ; |
12494 | 2 | break; |
12495 | 0 | case 3: // Branch on the inverted value of the LT bit of CR6. |
12496 | 0 | CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE0 : PPC::PRED_LT0 ; |
12497 | 0 | break; |
12498 | 4 | } |
12499 | 4 | |
12500 | 4 | return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), |
12501 | 4 | DAG.getConstant(CompOpc, dl, MVT::i32), |
12502 | 4 | DAG.getRegister(PPC::CR6, MVT::i32), |
12503 | 4 | N->getOperand(4), CompNode.getValue(1)); |
12504 | 4 | } |
12505 | 1.62k | break; |
12506 | 1.62k | } |
12507 | 2.59k | case ISD::BUILD_VECTOR: |
12508 | 2.59k | return DAGCombineBuildVector(N, DCI); |
12509 | 62.2k | } |
12510 | 62.2k | |
12511 | 62.2k | return SDValue(); |
12512 | 62.2k | } |
12513 | | |
12514 | | SDValue |
12515 | | PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
12516 | | SelectionDAG &DAG, |
12517 | 11 | std::vector<SDNode *> *Created) const { |
12518 | 11 | // fold (sdiv X, pow2) |
12519 | 11 | EVT VT = N->getValueType(0); |
12520 | 11 | if (VT == MVT::i64 && 11 !Subtarget.isPPC64()4 ) |
12521 | 2 | return SDValue(); |
12522 | 9 | if (9 (VT != MVT::i32 && 9 VT != MVT::i642 ) || |
12523 | 9 | !(Divisor.isPowerOf2() || 9 (-Divisor).isPowerOf2()3 )) |
12524 | 0 | return SDValue(); |
12525 | 9 | |
12526 | 9 | SDLoc DL(N); |
12527 | 9 | SDValue N0 = N->getOperand(0); |
12528 | 9 | |
12529 | 9 | bool IsNegPow2 = (-Divisor).isPowerOf2(); |
12530 | 9 | unsigned Lg2 = (IsNegPow2 ? -Divisor3 : Divisor6 ).countTrailingZeros(); |
12531 | 9 | SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); |
12532 | 9 | |
12533 | 9 | SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); |
12534 | 9 | if (Created) |
12535 | 9 | Created->push_back(Op.getNode()); |
12536 | 9 | |
12537 | 9 | if (IsNegPow29 ) { |
12538 | 3 | Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); |
12539 | 3 | if (Created) |
12540 | 3 | Created->push_back(Op.getNode()); |
12541 | 3 | } |
12542 | 11 | |
12543 | 11 | return Op; |
12544 | 11 | } |
12545 | | |
12546 | | //===----------------------------------------------------------------------===// |
12547 | | // Inline Assembly Support |
12548 | | //===----------------------------------------------------------------------===// |
12549 | | |
12550 | | void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
12551 | | KnownBits &Known, |
12552 | | const APInt &DemandedElts, |
12553 | | const SelectionDAG &DAG, |
12554 | 5.25k | unsigned Depth) const { |
12555 | 5.25k | Known.resetAll(); |
12556 | 5.25k | switch (Op.getOpcode()) { |
12557 | 5.06k | default: break; |
12558 | 44 | case PPCISD::LBRX: { |
12559 | 44 | // lhbrx is known to have the top bits cleared out. |
12560 | 44 | if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) |
12561 | 4 | Known.Zero = 0xFFFF0000; |
12562 | 44 | break; |
12563 | 5.25k | } |
12564 | 143 | case ISD::INTRINSIC_WO_CHAIN: { |
12565 | 143 | switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { |
12566 | 143 | default: break; |
12567 | 0 | case Intrinsic::ppc_altivec_vcmpbfp_p: |
12568 | 0 | case Intrinsic::ppc_altivec_vcmpeqfp_p: |
12569 | 0 | case Intrinsic::ppc_altivec_vcmpequb_p: |
12570 | 0 | case Intrinsic::ppc_altivec_vcmpequh_p: |
12571 | 0 | case Intrinsic::ppc_altivec_vcmpequw_p: |
12572 | 0 | case Intrinsic::ppc_altivec_vcmpequd_p: |
12573 | 0 | case Intrinsic::ppc_altivec_vcmpgefp_p: |
12574 | 0 | case Intrinsic::ppc_altivec_vcmpgtfp_p: |
12575 | 0 | case Intrinsic::ppc_altivec_vcmpgtsb_p: |
12576 | 0 | case Intrinsic::ppc_altivec_vcmpgtsh_p: |
12577 | 0 | case Intrinsic::ppc_altivec_vcmpgtsw_p: |
12578 | 0 | case Intrinsic::ppc_altivec_vcmpgtsd_p: |
12579 | 0 | case Intrinsic::ppc_altivec_vcmpgtub_p: |
12580 | 0 | case Intrinsic::ppc_altivec_vcmpgtuh_p: |
12581 | 0 | case Intrinsic::ppc_altivec_vcmpgtuw_p: |
12582 | 0 | case Intrinsic::ppc_altivec_vcmpgtud_p: |
12583 | 0 | Known.Zero = ~1U; // All bits but the low one are known to be zero. |
12584 | 0 | break; |
12585 | 5.25k | } |
12586 | 5.25k | } |
12587 | 5.25k | } |
12588 | 5.25k | } |
12589 | | |
12590 | 1.39k | unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { |
12591 | 1.39k | switch (Subtarget.getDarwinDirective()) { |
12592 | 364 | default: break; |
12593 | 1.03k | case PPC::DIR_970: |
12594 | 1.03k | case PPC::DIR_PWR4: |
12595 | 1.03k | case PPC::DIR_PWR5: |
12596 | 1.03k | case PPC::DIR_PWR5X: |
12597 | 1.03k | case PPC::DIR_PWR6: |
12598 | 1.03k | case PPC::DIR_PWR6X: |
12599 | 1.03k | case PPC::DIR_PWR7: |
12600 | 1.03k | case PPC::DIR_PWR8: |
12601 | 1.03k | case PPC::DIR_PWR9: { |
12602 | 1.03k | if (!ML) |
12603 | 0 | break; |
12604 | 1.03k | |
12605 | 1.03k | const PPCInstrInfo *TII = Subtarget.getInstrInfo(); |
12606 | 1.03k | |
12607 | 1.03k | // For small loops (between 5 and 8 instructions), align to a 32-byte |
12608 | 1.03k | // boundary so that the entire loop fits in one instruction-cache line. |
12609 | 1.03k | uint64_t LoopSize = 0; |
12610 | 3.21k | for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE3.21k ; ++I2.18k ) |
12611 | 7.29k | for (auto J = (*I)->begin(), JE = (*I)->end(); 2.18k J != JE7.29k ; ++J5.10k ) { |
12612 | 5.63k | LoopSize += TII->getInstSizeInBytes(*J); |
12613 | 5.63k | if (LoopSize > 32) |
12614 | 523 | break; |
12615 | 2.18k | } |
12616 | 1.03k | |
12617 | 1.03k | if (LoopSize > 16 && 1.03k LoopSize <= 32681 ) |
12618 | 568 | return 5; |
12619 | 466 | |
12620 | 466 | break; |
12621 | 466 | } |
12622 | 830 | } |
12623 | 830 | |
12624 | 830 | return TargetLowering::getPrefLoopAlignment(ML); |
12625 | 830 | } |
12626 | | |
12627 | | /// getConstraintType - Given a constraint, return the type of |
12628 | | /// constraint it is for this target. |
12629 | | PPCTargetLowering::ConstraintType |
12630 | 8.08k | PPCTargetLowering::getConstraintType(StringRef Constraint) const { |
12631 | 8.08k | if (Constraint.size() == 18.08k ) { |
12632 | 1.82k | switch (Constraint[0]) { |
12633 | 602 | default: break; |
12634 | 1.21k | case 'b': |
12635 | 1.21k | case 'r': |
12636 | 1.21k | case 'f': |
12637 | 1.21k | case 'd': |
12638 | 1.21k | case 'v': |
12639 | 1.21k | case 'y': |
12640 | 1.21k | return C_RegisterClass; |
12641 | 4 | case 'Z': |
12642 | 4 | // FIXME: While Z does indicate a memory constraint, it specifically |
12643 | 4 | // indicates an r+r address (used in conjunction with the 'y' modifier |
12644 | 4 | // in the replacement string). Currently, we're forcing the base |
12645 | 4 | // register to be r0 in the asm printer (which is interpreted as zero) |
12646 | 4 | // and forming the complete address in the second register. This is |
12647 | 4 | // suboptimal. |
12648 | 4 | return C_Memory; |
12649 | 8.08k | } |
12650 | 6.26k | } else if (6.26k Constraint == "wc"6.26k ) { // individual CR bits. |
12651 | 156 | return C_RegisterClass; |
12652 | 6.10k | } else if (6.10k Constraint == "wa" || 6.10k Constraint == "wd"6.10k || |
12653 | 6.10k | Constraint == "wf"6.09k || Constraint == "ws"6.09k ) { |
12654 | 12 | return C_RegisterClass; // VSX registers. |
12655 | 12 | } |
12656 | 6.69k | return TargetLowering::getConstraintType(Constraint); |
12657 | 6.69k | } |
12658 | | |
12659 | | /// Examine constraint type and operand type and determine a weight value. |
12660 | | /// This object must already have been set up with the operand type |
12661 | | /// and the current alternative constraint selected. |
12662 | | TargetLowering::ConstraintWeight |
12663 | | PPCTargetLowering::getSingleConstraintMatchWeight( |
12664 | 486 | AsmOperandInfo &info, const char *constraint) const { |
12665 | 486 | ConstraintWeight weight = CW_Invalid; |
12666 | 486 | Value *CallOperandVal = info.CallOperandVal; |
12667 | 486 | // If we don't have a value, we can't do a match, |
12668 | 486 | // but allow it at the lowest weight. |
12669 | 486 | if (!CallOperandVal) |
12670 | 204 | return CW_Default; |
12671 | 282 | Type *type = CallOperandVal->getType(); |
12672 | 282 | |
12673 | 282 | // Look at the constraint type. |
12674 | 282 | if (StringRef(constraint) == "wc" && 282 type->isIntegerTy(1)0 ) |
12675 | 0 | return CW_Register; // an individual CR bit. |
12676 | 282 | else if (282 (StringRef(constraint) == "wa" || |
12677 | 282 | StringRef(constraint) == "wd" || |
12678 | 282 | StringRef(constraint) == "wf") && |
12679 | 0 | type->isVectorTy()) |
12680 | 0 | return CW_Register; |
12681 | 282 | else if (282 StringRef(constraint) == "ws" && 282 type->isDoubleTy()0 ) |
12682 | 0 | return CW_Register; |
12683 | 282 | |
12684 | 282 | switch (*constraint) { |
12685 | 282 | default: |
12686 | 282 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
12687 | 282 | break; |
12688 | 0 | case 'b': |
12689 | 0 | if (type->isIntegerTy()) |
12690 | 0 | weight = CW_Register; |
12691 | 0 | break; |
12692 | 0 | case 'f': |
12693 | 0 | if (type->isFloatTy()) |
12694 | 0 | weight = CW_Register; |
12695 | 0 | break; |
12696 | 0 | case 'd': |
12697 | 0 | if (type->isDoubleTy()) |
12698 | 0 | weight = CW_Register; |
12699 | 0 | break; |
12700 | 0 | case 'v': |
12701 | 0 | if (type->isVectorTy()) |
12702 | 0 | weight = CW_Register; |
12703 | 0 | break; |
12704 | 0 | case 'y': |
12705 | 0 | weight = CW_Register; |
12706 | 0 | break; |
12707 | 0 | case 'Z': |
12708 | 0 | weight = CW_Memory; |
12709 | 0 | break; |
12710 | 282 | } |
12711 | 282 | return weight; |
12712 | 282 | } |
12713 | | |
12714 | | std::pair<unsigned, const TargetRegisterClass *> |
12715 | | PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
12716 | | StringRef Constraint, |
12717 | 2.77k | MVT VT) const { |
12718 | 2.77k | if (Constraint.size() == 12.77k ) { |
12719 | 290 | // GCC RS6000 Constraint Letters |
12720 | 290 | switch (Constraint[0]) { |
12721 | 7 | case 'b': // R1-R31 |
12722 | 7 | if (VT == MVT::i64 && 7 Subtarget.isPPC64()5 ) |
12723 | 5 | return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); |
12724 | 2 | return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); |
12725 | 270 | case 'r': // R0-R31 |
12726 | 270 | if (VT == MVT::i64 && 270 Subtarget.isPPC64()27 ) |
12727 | 27 | return std::make_pair(0U, &PPC::G8RCRegClass); |
12728 | 243 | return std::make_pair(0U, &PPC::GPRCRegClass); |
12729 | 243 | // 'd' and 'f' constraints are both defined to be "the floating point |
12730 | 243 | // registers", where one is for 32-bit and the other for 64-bit. We don't |
12731 | 243 | // really care overly much here so just give them all the same reg classes. |
12732 | 13 | case 'd': |
12733 | 13 | case 'f': |
12734 | 13 | if (VT == MVT::f32 || 13 VT == MVT::i3213 ) |
12735 | 0 | return std::make_pair(0U, &PPC::F4RCRegClass); |
12736 | 13 | if (13 VT == MVT::f64 || 13 VT == MVT::i642 ) |
12737 | 13 | return std::make_pair(0U, &PPC::F8RCRegClass); |
12738 | 0 | if (0 VT == MVT::v4f64 && 0 Subtarget.hasQPX()0 ) |
12739 | 0 | return std::make_pair(0U, &PPC::QFRCRegClass); |
12740 | 0 | if (0 VT == MVT::v4f32 && 0 Subtarget.hasQPX()0 ) |
12741 | 0 | return std::make_pair(0U, &PPC::QSRCRegClass); |
12742 | 0 | break; |
12743 | 0 | case 'v': |
12744 | 0 | if (VT == MVT::v4f64 && 0 Subtarget.hasQPX()0 ) |
12745 | 0 | return std::make_pair(0U, &PPC::QFRCRegClass); |
12746 | 0 | if (0 VT == MVT::v4f32 && 0 Subtarget.hasQPX()0 ) |
12747 | 0 | return std::make_pair(0U, &PPC::QSRCRegClass); |
12748 | 0 | if (0 Subtarget.hasAltivec()0 ) |
12749 | 0 | return std::make_pair(0U, &PPC::VRRCRegClass); |
12750 | 0 | case 'y': // crrc |
12751 | 0 | return std::make_pair(0U, &PPC::CRRCRegClass); |
12752 | 2.77k | } |
12753 | 2.48k | } else if (2.48k Constraint == "wc" && 2.48k Subtarget.useCRBits()39 ) { |
12754 | 36 | // An individual CR bit. |
12755 | 36 | return std::make_pair(0U, &PPC::CRBITRCRegClass); |
12756 | 2.45k | } else if (2.45k (Constraint == "wa" || 2.45k Constraint == "wd"2.45k || |
12757 | 2.45k | Constraint == "wf"2.44k ) && Subtarget.hasVSX()3 ) { |
12758 | 0 | return std::make_pair(0U, &PPC::VSRCRegClass); |
12759 | 2.45k | } else if (2.45k Constraint == "ws" && 2.45k Subtarget.hasVSX()0 ) { |
12760 | 0 | if (VT == MVT::f32 && 0 Subtarget.hasP8Vector()0 ) |
12761 | 0 | return std::make_pair(0U, &PPC::VSSRCRegClass); |
12762 | 0 | else |
12763 | 0 | return std::make_pair(0U, &PPC::VSFRCRegClass); |
12764 | 2.45k | } |
12765 | 2.45k | |
12766 | 2.45k | std::pair<unsigned, const TargetRegisterClass *> R = |
12767 | 2.45k | TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
12768 | 2.45k | |
12769 | 2.45k | // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers |
12770 | 2.45k | // (which we call X[0-9]+). If a 64-bit value has been requested, and a |
12771 | 2.45k | // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent |
12772 | 2.45k | // register. |
12773 | 2.45k | // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use |
12774 | 2.45k | // the AsmName field from *RegisterInfo.td, then this would not be necessary. |
12775 | 2.45k | if (R.first && 2.45k VT == MVT::i642.35k && Subtarget.isPPC64()76 && |
12776 | 76 | PPC::GPRCRegClass.contains(R.first)) |
12777 | 76 | return std::make_pair(TRI->getMatchingSuperReg(R.first, |
12778 | 76 | PPC::sub_32, &PPC::G8RCRegClass), |
12779 | 76 | &PPC::G8RCRegClass); |
12780 | 2.37k | |
12781 | 2.37k | // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. |
12782 | 2.37k | if (2.37k !R.second && 2.37k StringRef("{cc}").equals_lower(Constraint)99 ) { |
12783 | 16 | R.first = PPC::CR0; |
12784 | 16 | R.second = &PPC::CRRCRegClass; |
12785 | 16 | } |
12786 | 2.77k | |
12787 | 2.77k | return R; |
12788 | 2.77k | } |
12789 | | |
12790 | | /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops |
12791 | | /// vector. If it is invalid, don't add anything to Ops. |
12792 | | void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, |
12793 | | std::string &Constraint, |
12794 | | std::vector<SDValue>&Ops, |
12795 | 78 | SelectionDAG &DAG) const { |
12796 | 78 | SDValue Result; |
12797 | 78 | |
12798 | 78 | // Only support length 1 constraints. |
12799 | 78 | if (Constraint.length() > 178 ) return0 ; |
12800 | 78 | |
12801 | 78 | char Letter = Constraint[0]; |
12802 | 78 | switch (Letter) { |
12803 | 54 | default: break; |
12804 | 24 | case 'I': |
12805 | 24 | case 'J': |
12806 | 24 | case 'K': |
12807 | 24 | case 'L': |
12808 | 24 | case 'M': |
12809 | 24 | case 'N': |
12810 | 24 | case 'O': |
12811 | 24 | case 'P': { |
12812 | 24 | ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); |
12813 | 24 | if (!CST24 ) return4 ; // Must be an immediate to match. |
12814 | 20 | SDLoc dl(Op); |
12815 | 20 | int64_t Value = CST->getSExtValue(); |
12816 | 20 | EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative |
12817 | 20 | // numbers are printed as such. |
12818 | 20 | switch (Letter) { |
12819 | 0 | default: 0 llvm_unreachable0 ("Unknown constraint letter!"); |
12820 | 6 | case 'I': // "I" is a signed 16-bit constant. |
12821 | 6 | if (isInt<16>(Value)) |
12822 | 6 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12823 | 6 | break; |
12824 | 0 | case 'J': // "J" is a constant with only the high-order 16 bits nonzero. |
12825 | 0 | if (isShiftedUInt<16, 16>(Value)) |
12826 | 0 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12827 | 0 | break; |
12828 | 0 | case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. |
12829 | 0 | if (isShiftedInt<16, 16>(Value)) |
12830 | 0 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12831 | 0 | break; |
12832 | 0 | case 'K': // "K" is a constant with only the low-order 16 bits nonzero. |
12833 | 0 | if (isUInt<16>(Value)) |
12834 | 0 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12835 | 0 | break; |
12836 | 0 | case 'M': // "M" is a constant that is greater than 31. |
12837 | 0 | if (Value > 31) |
12838 | 0 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12839 | 0 | break; |
12840 | 0 | case 'N': // "N" is a positive constant that is an exact power of two. |
12841 | 0 | if (Value > 0 && 0 isPowerOf2_64(Value)0 ) |
12842 | 0 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12843 | 0 | break; |
12844 | 14 | case 'O': // "O" is the constant zero. |
12845 | 14 | if (Value == 0) |
12846 | 14 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12847 | 14 | break; |
12848 | 0 | case 'P': // "P" is a constant whose negation is a signed 16-bit constant. |
12849 | 0 | if (isInt<16>(-Value)) |
12850 | 0 | Result = DAG.getTargetConstant(Value, dl, TCVT); |
12851 | 0 | break; |
12852 | 20 | } |
12853 | 20 | break; |
12854 | 20 | } |
12855 | 74 | } |
12856 | 74 | |
12857 | 74 | if (74 Result.getNode()74 ) { |
12858 | 20 | Ops.push_back(Result); |
12859 | 20 | return; |
12860 | 20 | } |
12861 | 54 | |
12862 | 54 | // Handle standard constraint letters. |
12863 | 54 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
12864 | 54 | } |
12865 | | |
12866 | | // isLegalAddressingMode - Return true if the addressing mode represented |
12867 | | // by AM is legal for this target, for a load/store of the specified type. |
12868 | | bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
12869 | | const AddrMode &AM, Type *Ty, |
12870 | 44.7k | unsigned AS, Instruction *I) const { |
12871 | 44.7k | // PPC does not allow r+i addressing modes for vectors! |
12872 | 44.7k | if (Ty->isVectorTy() && 44.7k AM.BaseOffs != 020.6k ) |
12873 | 2.89k | return false; |
12874 | 41.8k | |
12875 | 41.8k | // PPC allows a sign-extended 16-bit immediate field. |
12876 | 41.8k | if (41.8k AM.BaseOffs <= -(1LL << 16) || 41.8k AM.BaseOffs >= (1LL << 16)-141.8k ) |
12877 | 28 | return false; |
12878 | 41.7k | |
12879 | 41.7k | // No global is ever allowed as a base. |
12880 | 41.7k | if (41.7k AM.BaseGV41.7k ) |
12881 | 2.32k | return false; |
12882 | 39.4k | |
12883 | 39.4k | // PPC only support r+r, |
12884 | 39.4k | switch (AM.Scale) { |
12885 | 11.3k | case 0: // "r+i" or just "i", depending on HasBaseReg. |
12886 | 11.3k | break; |
12887 | 23.3k | case 1: |
12888 | 23.3k | if (AM.HasBaseReg && 23.3k AM.BaseOffs23.0k ) // "r+r+i" is not allowed. |
12889 | 2.71k | return false; |
12890 | 20.6k | // Otherwise we have r+r or r+i. |
12891 | 20.6k | break; |
12892 | 226 | case 2: |
12893 | 226 | if (AM.HasBaseReg || 226 AM.BaseOffs124 ) // 2*r+r or 2*r+i is not allowed. |
12894 | 102 | return false; |
12895 | 124 | // Allow 2*r as r+r. |
12896 | 124 | break; |
12897 | 4.53k | default: |
12898 | 4.53k | // No other scales are supported. |
12899 | 4.53k | return false; |
12900 | 32.1k | } |
12901 | 32.1k | |
12902 | 32.1k | return true; |
12903 | 32.1k | } |
12904 | | |
12905 | | SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, |
12906 | 6 | SelectionDAG &DAG) const { |
12907 | 6 | MachineFunction &MF = DAG.getMachineFunction(); |
12908 | 6 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
12909 | 6 | MFI.setReturnAddressIsTaken(true); |
12910 | 6 | |
12911 | 6 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
12912 | 0 | return SDValue(); |
12913 | 6 | |
12914 | 6 | SDLoc dl(Op); |
12915 | 6 | unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
12916 | 6 | |
12917 | 6 | // Make sure the function does not optimize away the store of the RA to |
12918 | 6 | // the stack. |
12919 | 6 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
12920 | 6 | FuncInfo->setLRStoreRequired(); |
12921 | 6 | bool isPPC64 = Subtarget.isPPC64(); |
12922 | 6 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
12923 | 6 | |
12924 | 6 | if (Depth > 06 ) { |
12925 | 2 | SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); |
12926 | 2 | SDValue Offset = |
12927 | 2 | DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, |
12928 | 2 | isPPC64 ? MVT::i640 : MVT::i322 ); |
12929 | 2 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), |
12930 | 2 | DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), |
12931 | 2 | MachinePointerInfo()); |
12932 | 2 | } |
12933 | 4 | |
12934 | 4 | // Just load the return address off the stack. |
12935 | 4 | SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); |
12936 | 4 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, |
12937 | 4 | MachinePointerInfo()); |
12938 | 4 | } |
12939 | | |
12940 | | SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, |
12941 | 12 | SelectionDAG &DAG) const { |
12942 | 12 | SDLoc dl(Op); |
12943 | 12 | unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
12944 | 12 | |
12945 | 12 | MachineFunction &MF = DAG.getMachineFunction(); |
12946 | 12 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
12947 | 12 | MFI.setFrameAddressIsTaken(true); |
12948 | 12 | |
12949 | 12 | EVT PtrVT = getPointerTy(MF.getDataLayout()); |
12950 | 12 | bool isPPC64 = PtrVT == MVT::i64; |
12951 | 12 | |
12952 | 12 | // Naked functions never have a frame pointer, and so we use r1. For all |
12953 | 12 | // other functions, this decision must be delayed until during PEI. |
12954 | 12 | unsigned FrameReg; |
12955 | 12 | if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) |
12956 | 1 | FrameReg = isPPC64 ? 1 PPC::X11 : PPC::R10 ; |
12957 | 12 | else |
12958 | 11 | FrameReg = isPPC64 ? 11 PPC::FP87 : PPC::FP4 ; |
12959 | 12 | |
12960 | 12 | SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, |
12961 | 12 | PtrVT); |
12962 | 16 | while (Depth--) |
12963 | 4 | FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), |
12964 | 4 | FrameAddr, MachinePointerInfo()); |
12965 | 12 | return FrameAddr; |
12966 | 12 | } |
12967 | | |
12968 | | // FIXME? Maybe this could be a TableGen attribute on some registers and |
12969 | | // this table could be generated automatically from RegInfo. |
12970 | | unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, |
12971 | 19 | SelectionDAG &DAG) const { |
12972 | 19 | bool isPPC64 = Subtarget.isPPC64(); |
12973 | 19 | bool isDarwinABI = Subtarget.isDarwinABI(); |
12974 | 19 | |
12975 | 19 | if ((isPPC64 && 19 VT != MVT::i6411 && VT != MVT::i325 ) || |
12976 | 19 | (!isPPC64 && 19 VT != MVT::i328 )) |
12977 | 0 | report_fatal_error("Invalid register global variable type"); |
12978 | 19 | |
12979 | 19 | bool is64Bit = isPPC64 && 19 VT == MVT::i6411 ; |
12980 | 19 | unsigned Reg = StringSwitch<unsigned>(RegName) |
12981 | 19 | .Case("r1", is64Bit ? PPC::X16 : PPC::R113 ) |
12982 | 19 | .Case("r2", (isDarwinABI || isPPC6411 ) ? 015 : PPC::R24 ) |
12983 | 19 | .Case("r13", (!isPPC64 && isDarwinABI8 ) ? 04 : |
12984 | 15 | (is64Bit ? 15 PPC::X136 : PPC::R139 )) |
12985 | 19 | .Default(0); |
12986 | 19 | |
12987 | 19 | if (Reg) |
12988 | 11 | return Reg; |
12989 | 8 | report_fatal_error("Invalid register name global variable"); |
12990 | 8 | } |
12991 | | |
12992 | | bool |
12993 | 1.33k | PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { |
12994 | 1.33k | // The PowerPC target isn't yet aware of offsets. |
12995 | 1.33k | return false; |
12996 | 1.33k | } |
12997 | | |
12998 | | bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
12999 | | const CallInst &I, |
13000 | 1.35k | unsigned Intrinsic) const { |
13001 | 1.35k | switch (Intrinsic) { |
13002 | 22 | case Intrinsic::ppc_qpx_qvlfd: |
13003 | 22 | case Intrinsic::ppc_qpx_qvlfs: |
13004 | 22 | case Intrinsic::ppc_qpx_qvlfcd: |
13005 | 22 | case Intrinsic::ppc_qpx_qvlfcs: |
13006 | 22 | case Intrinsic::ppc_qpx_qvlfiwa: |
13007 | 22 | case Intrinsic::ppc_qpx_qvlfiwz: |
13008 | 22 | case Intrinsic::ppc_altivec_lvx: |
13009 | 22 | case Intrinsic::ppc_altivec_lvxl: |
13010 | 22 | case Intrinsic::ppc_altivec_lvebx: |
13011 | 22 | case Intrinsic::ppc_altivec_lvehx: |
13012 | 22 | case Intrinsic::ppc_altivec_lvewx: |
13013 | 22 | case Intrinsic::ppc_vsx_lxvd2x: |
13014 | 22 | case Intrinsic::ppc_vsx_lxvw4x: { |
13015 | 22 | EVT VT; |
13016 | 22 | switch (Intrinsic) { |
13017 | 0 | case Intrinsic::ppc_altivec_lvebx: |
13018 | 0 | VT = MVT::i8; |
13019 | 0 | break; |
13020 | 0 | case Intrinsic::ppc_altivec_lvehx: |
13021 | 0 | VT = MVT::i16; |
13022 | 0 | break; |
13023 | 0 | case Intrinsic::ppc_altivec_lvewx: |
13024 | 0 | VT = MVT::i32; |
13025 | 0 | break; |
13026 | 9 | case Intrinsic::ppc_vsx_lxvd2x: |
13027 | 9 | VT = MVT::v2f64; |
13028 | 9 | break; |
13029 | 0 | case Intrinsic::ppc_qpx_qvlfd: |
13030 | 0 | VT = MVT::v4f64; |
13031 | 0 | break; |
13032 | 0 | case Intrinsic::ppc_qpx_qvlfs: |
13033 | 0 | VT = MVT::v4f32; |
13034 | 0 | break; |
13035 | 0 | case Intrinsic::ppc_qpx_qvlfcd: |
13036 | 0 | VT = MVT::v2f64; |
13037 | 0 | break; |
13038 | 0 | case Intrinsic::ppc_qpx_qvlfcs: |
13039 | 0 | VT = MVT::v2f32; |
13040 | 0 | break; |
13041 | 13 | default: |
13042 | 13 | VT = MVT::v4i32; |
13043 | 13 | break; |
13044 | 22 | } |
13045 | 22 | |
13046 | 22 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
13047 | 22 | Info.memVT = VT; |
13048 | 22 | Info.ptrVal = I.getArgOperand(0); |
13049 | 22 | Info.offset = -VT.getStoreSize()+1; |
13050 | 22 | Info.size = 2*VT.getStoreSize()-1; |
13051 | 22 | Info.align = 1; |
13052 | 22 | Info.vol = false; |
13053 | 22 | Info.readMem = true; |
13054 | 22 | Info.writeMem = false; |
13055 | 22 | return true; |
13056 | 22 | } |
13057 | 0 | case Intrinsic::ppc_qpx_qvlfda: |
13058 | 0 | case Intrinsic::ppc_qpx_qvlfsa: |
13059 | 0 | case Intrinsic::ppc_qpx_qvlfcda: |
13060 | 0 | case Intrinsic::ppc_qpx_qvlfcsa: |
13061 | 0 | case Intrinsic::ppc_qpx_qvlfiwaa: |
13062 | 0 | case Intrinsic::ppc_qpx_qvlfiwza: { |
13063 | 0 | EVT VT; |
13064 | 0 | switch (Intrinsic) { |
13065 | 0 | case Intrinsic::ppc_qpx_qvlfda: |
13066 | 0 | VT = MVT::v4f64; |
13067 | 0 | break; |
13068 | 0 | case Intrinsic::ppc_qpx_qvlfsa: |
13069 | 0 | VT = MVT::v4f32; |
13070 | 0 | break; |
13071 | 0 | case Intrinsic::ppc_qpx_qvlfcda: |
13072 | 0 | VT = MVT::v2f64; |
13073 | 0 | break; |
13074 | 0 | case Intrinsic::ppc_qpx_qvlfcsa: |
13075 | 0 | VT = MVT::v2f32; |
13076 | 0 | break; |
13077 | 0 | default: |
13078 | 0 | VT = MVT::v4i32; |
13079 | 0 | break; |
13080 | 0 | } |
13081 | 0 |
|
13082 | 0 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
13083 | 0 | Info.memVT = VT; |
13084 | 0 | Info.ptrVal = I.getArgOperand(0); |
13085 | 0 | Info.offset = 0; |
13086 | 0 | Info.size = VT.getStoreSize(); |
13087 | 0 | Info.align = 1; |
13088 | 0 | Info.vol = false; |
13089 | 0 | Info.readMem = true; |
13090 | 0 | Info.writeMem = false; |
13091 | 0 | return true; |
13092 | 0 | } |
13093 | 20 | case Intrinsic::ppc_qpx_qvstfd: |
13094 | 20 | case Intrinsic::ppc_qpx_qvstfs: |
13095 | 20 | case Intrinsic::ppc_qpx_qvstfcd: |
13096 | 20 | case Intrinsic::ppc_qpx_qvstfcs: |
13097 | 20 | case Intrinsic::ppc_qpx_qvstfiw: |
13098 | 20 | case Intrinsic::ppc_altivec_stvx: |
13099 | 20 | case Intrinsic::ppc_altivec_stvxl: |
13100 | 20 | case Intrinsic::ppc_altivec_stvebx: |
13101 | 20 | case Intrinsic::ppc_altivec_stvehx: |
13102 | 20 | case Intrinsic::ppc_altivec_stvewx: |
13103 | 20 | case Intrinsic::ppc_vsx_stxvd2x: |
13104 | 20 | case Intrinsic::ppc_vsx_stxvw4x: { |
13105 | 20 | EVT VT; |
13106 | 20 | switch (Intrinsic) { |
13107 | 0 | case Intrinsic::ppc_altivec_stvebx: |
13108 | 0 | VT = MVT::i8; |
13109 | 0 | break; |
13110 | 0 | case Intrinsic::ppc_altivec_stvehx: |
13111 | 0 | VT = MVT::i16; |
13112 | 0 | break; |
13113 | 0 | case Intrinsic::ppc_altivec_stvewx: |
13114 | 0 | VT = MVT::i32; |
13115 | 0 | break; |
13116 | 9 | case Intrinsic::ppc_vsx_stxvd2x: |
13117 | 9 | VT = MVT::v2f64; |
13118 | 9 | break; |
13119 | 0 | case Intrinsic::ppc_qpx_qvstfd: |
13120 | 0 | VT = MVT::v4f64; |
13121 | 0 | break; |
13122 | 0 | case Intrinsic::ppc_qpx_qvstfs: |
13123 | 0 | VT = MVT::v4f32; |
13124 | 0 | break; |
13125 | 0 | case Intrinsic::ppc_qpx_qvstfcd: |
13126 | 0 | VT = MVT::v2f64; |
13127 | 0 | break; |
13128 | 0 | case Intrinsic::ppc_qpx_qvstfcs: |
13129 | 0 | VT = MVT::v2f32; |
13130 | 0 | break; |
13131 | 11 | default: |
13132 | 11 | VT = MVT::v4i32; |
13133 | 11 | break; |
13134 | 20 | } |
13135 | 20 | |
13136 | 20 | Info.opc = ISD::INTRINSIC_VOID; |
13137 | 20 | Info.memVT = VT; |
13138 | 20 | Info.ptrVal = I.getArgOperand(1); |
13139 | 20 | Info.offset = -VT.getStoreSize()+1; |
13140 | 20 | Info.size = 2*VT.getStoreSize()-1; |
13141 | 20 | Info.align = 1; |
13142 | 20 | Info.vol = false; |
13143 | 20 | Info.readMem = false; |
13144 | 20 | Info.writeMem = true; |
13145 | 20 | return true; |
13146 | 20 | } |
13147 | 0 | case Intrinsic::ppc_qpx_qvstfda: |
13148 | 0 | case Intrinsic::ppc_qpx_qvstfsa: |
13149 | 0 | case Intrinsic::ppc_qpx_qvstfcda: |
13150 | 0 | case Intrinsic::ppc_qpx_qvstfcsa: |
13151 | 0 | case Intrinsic::ppc_qpx_qvstfiwa: { |
13152 | 0 | EVT VT; |
13153 | 0 | switch (Intrinsic) { |
13154 | 0 | case Intrinsic::ppc_qpx_qvstfda: |
13155 | 0 | VT = MVT::v4f64; |
13156 | 0 | break; |
13157 | 0 | case Intrinsic::ppc_qpx_qvstfsa: |
13158 | 0 | VT = MVT::v4f32; |
13159 | 0 | break; |
13160 | 0 | case Intrinsic::ppc_qpx_qvstfcda: |
13161 | 0 | VT = MVT::v2f64; |
13162 | 0 | break; |
13163 | 0 | case Intrinsic::ppc_qpx_qvstfcsa: |
13164 | 0 | VT = MVT::v2f32; |
13165 | 0 | break; |
13166 | 0 | default: |
13167 | 0 | VT = MVT::v4i32; |
13168 | 0 | break; |
13169 | 0 | } |
13170 | 0 |
|
13171 | 0 | Info.opc = ISD::INTRINSIC_VOID; |
13172 | 0 | Info.memVT = VT; |
13173 | 0 | Info.ptrVal = I.getArgOperand(1); |
13174 | 0 | Info.offset = 0; |
13175 | 0 | Info.size = VT.getStoreSize(); |
13176 | 0 | Info.align = 1; |
13177 | 0 | Info.vol = false; |
13178 | 0 | Info.readMem = false; |
13179 | 0 | Info.writeMem = true; |
13180 | 0 | return true; |
13181 | 0 | } |
13182 | 1.30k | default: |
13183 | 1.30k | break; |
13184 | 1.30k | } |
13185 | 1.30k | |
13186 | 1.30k | return false; |
13187 | 1.30k | } |
13188 | | |
13189 | | /// getOptimalMemOpType - Returns the target specific optimal type for load |
13190 | | /// and store operations as a result of memset, memcpy, and memmove |
13191 | | /// lowering. If DstAlign is zero that means it's safe to destination |
13192 | | /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it |
13193 | | /// means there isn't a need to check it against alignment requirement, |
13194 | | /// probably because the source does not need to be loaded. If 'IsMemset' is |
13195 | | /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that |
13196 | | /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy |
13197 | | /// source is constant so it does not need to be loaded. |
13198 | | /// It returns EVT::Other if the type should be determined using generic |
13199 | | /// target-independent logic. |
13200 | | EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, |
13201 | | unsigned DstAlign, unsigned SrcAlign, |
13202 | | bool IsMemset, bool ZeroMemset, |
13203 | | bool MemcpyStrSrc, |
13204 | 131 | MachineFunction &MF) const { |
13205 | 131 | if (getTargetMachine().getOptLevel() != CodeGenOpt::None131 ) { |
13206 | 54 | const Function *F = MF.getFunction(); |
13207 | 54 | // When expanding a memset, require at least two QPX instructions to cover |
13208 | 54 | // the cost of loading the value to be stored from the constant pool. |
13209 | 54 | if (Subtarget.hasQPX() && 54 Size >= 325 && (!IsMemset || 5 Size >= 643 ) && |
13210 | 54 | (!SrcAlign || 5 SrcAlign >= 322 ) && (!DstAlign || 3 DstAlign >= 323 ) && |
13211 | 54 | !F->hasFnAttribute(Attribute::NoImplicitFloat)2 ) { |
13212 | 2 | return MVT::v4f64; |
13213 | 2 | } |
13214 | 52 | |
13215 | 52 | // We should use Altivec/VSX loads and stores when available. For unaligned |
13216 | 52 | // addresses, unaligned VSX loads are only fast starting with the P8. |
13217 | 52 | if (52 Subtarget.hasAltivec() && 52 Size >= 1640 && |
13218 | 33 | (((!SrcAlign || 33 SrcAlign >= 1618 ) && (!DstAlign || 22 DstAlign >= 1616 )) || |
13219 | 22 | ((IsMemset && 22 Subtarget.hasVSX()9 ) || Subtarget.hasP8Vector()13 ))) |
13220 | 30 | return MVT::v4i32; |
13221 | 99 | } |
13222 | 99 | |
13223 | 99 | if (99 Subtarget.isPPC64()99 ) { |
13224 | 97 | return MVT::i64; |
13225 | 97 | } |
13226 | 2 | |
13227 | 2 | return MVT::i32; |
13228 | 2 | } |
13229 | | |
13230 | | /// \brief Returns true if it is beneficial to convert a load of a constant |
13231 | | /// to just the constant itself. |
13232 | | bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
13233 | 0 | Type *Ty) const { |
13234 | 0 | assert(Ty->isIntegerTy()); |
13235 | 0 |
|
13236 | 0 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
13237 | 0 | return !(BitSize == 0 || BitSize > 64); |
13238 | 0 | } |
13239 | | |
13240 | 1.63k | bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { |
13241 | 1.63k | if (!Ty1->isIntegerTy() || 1.63k !Ty2->isIntegerTy()1.63k ) |
13242 | 1 | return false; |
13243 | 1.63k | unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); |
13244 | 1.63k | unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); |
13245 | 729 | return NumBits1 == 64 && NumBits2 == 32; |
13246 | 1.63k | } |
13247 | | |
13248 | 2.99k | bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { |
13249 | 2.99k | if (!VT1.isInteger() || 2.99k !VT2.isInteger()2.99k ) |
13250 | 0 | return false; |
13251 | 2.99k | unsigned NumBits1 = VT1.getSizeInBits(); |
13252 | 2.99k | unsigned NumBits2 = VT2.getSizeInBits(); |
13253 | 440 | return NumBits1 == 64 && NumBits2 == 32; |
13254 | 2.99k | } |
13255 | | |
13256 | 2.57k | bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
13257 | 2.57k | // Generally speaking, zexts are not free, but they are free when they can be |
13258 | 2.57k | // folded with other operations. |
13259 | 2.57k | if (LoadSDNode *LD2.57k = dyn_cast<LoadSDNode>(Val)) { |
13260 | 171 | EVT MemVT = LD->getMemoryVT(); |
13261 | 171 | if ((MemVT == MVT::i1 || 171 MemVT == MVT::i8169 || MemVT == MVT::i16166 || |
13262 | 156 | (Subtarget.isPPC64() && 156 MemVT == MVT::i32135 )) && |
13263 | 48 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
13264 | 0 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
13265 | 48 | return true; |
13266 | 2.52k | } |
13267 | 2.52k | |
13268 | 2.52k | // FIXME: Add other cases... |
13269 | 2.52k | // - 32-bit shifts with a zext to i64 |
13270 | 2.52k | // - zext after ctlz, bswap, etc. |
13271 | 2.52k | // - zext after and by a constant mask |
13272 | 2.52k | |
13273 | 2.52k | return TargetLowering::isZExtFree(Val, VT2); |
13274 | 2.52k | } |
13275 | | |
13276 | 540 | bool PPCTargetLowering::isFPExtFree(EVT VT) const { |
13277 | 540 | assert(VT.isFloatingPoint()); |
13278 | 540 | return true; |
13279 | 540 | } |
13280 | | |
13281 | 6.05k | bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
13282 | 148 | return isInt<16>(Imm) || isUInt<16>(Imm); |
13283 | 6.05k | } |
13284 | | |
13285 | 3.04k | bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
13286 | 159 | return isInt<16>(Imm) || isUInt<16>(Imm); |
13287 | 3.04k | } |
13288 | | |
13289 | | bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, |
13290 | | unsigned, |
13291 | | unsigned, |
13292 | 972 | bool *Fast) const { |
13293 | 972 | if (DisablePPCUnaligned) |
13294 | 12 | return false; |
13295 | 960 | |
13296 | 960 | // PowerPC supports unaligned memory access for simple non-vector types. |
13297 | 960 | // Although accessing unaligned addresses is not as efficient as accessing |
13298 | 960 | // aligned addresses, it is generally more efficient than manual expansion, |
13299 | 960 | // and generally only traps for software emulation when crossing page |
13300 | 960 | // boundaries. |
13301 | 960 | |
13302 | 960 | if (960 !VT.isSimple()960 ) |
13303 | 0 | return false; |
13304 | 960 | |
13305 | 960 | if (960 VT.getSimpleVT().isVector()960 ) { |
13306 | 601 | if (Subtarget.hasVSX()601 ) { |
13307 | 581 | if (VT != MVT::v2f64 && 581 VT != MVT::v2i64340 && |
13308 | 581 | VT != MVT::v4f32323 && VT != MVT::v4i32323 ) |
13309 | 0 | return false; |
13310 | 20 | } else { |
13311 | 20 | return false; |
13312 | 20 | } |
13313 | 940 | } |
13314 | 940 | |
13315 | 940 | if (940 VT == MVT::ppcf128940 ) |
13316 | 0 | return false; |
13317 | 940 | |
13318 | 940 | if (940 Fast940 ) |
13319 | 32 | *Fast = true; |
13320 | 972 | |
13321 | 972 | return true; |
13322 | 972 | } |
13323 | | |
13324 | 1.30k | bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { |
13325 | 1.30k | VT = VT.getScalarType(); |
13326 | 1.30k | |
13327 | 1.30k | if (!VT.isSimple()) |
13328 | 0 | return false; |
13329 | 1.30k | |
13330 | 1.30k | switch (VT.getSimpleVT().SimpleTy) { |
13331 | 1.28k | case MVT::f32: |
13332 | 1.28k | case MVT::f64: |
13333 | 1.28k | return true; |
13334 | 15 | default: |
13335 | 15 | break; |
13336 | 15 | } |
13337 | 15 | |
13338 | 15 | return false; |
13339 | 15 | } |
13340 | | |
13341 | | const MCPhysReg * |
13342 | 59 | PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { |
13343 | 59 | // LR is a callee-save register, but we must treat it as clobbered by any call |
13344 | 59 | // site. Hence we include LR in the scratch registers, which are in turn added |
13345 | 59 | // as implicit-defs for stackmaps and patchpoints. The same reasoning applies |
13346 | 59 | // to CTR, which is used by any indirect call. |
13347 | 59 | static const MCPhysReg ScratchRegs[] = { |
13348 | 59 | PPC::X12, PPC::LR8, PPC::CTR8, 0 |
13349 | 59 | }; |
13350 | 59 | |
13351 | 59 | return ScratchRegs; |
13352 | 59 | } |
13353 | | |
13354 | | unsigned PPCTargetLowering::getExceptionPointerRegister( |
13355 | 66 | const Constant *PersonalityFn) const { |
13356 | 66 | return Subtarget.isPPC64() ? PPC::X366 : PPC::R30 ; |
13357 | 66 | } |
13358 | | |
13359 | | unsigned PPCTargetLowering::getExceptionSelectorRegister( |
13360 | 33 | const Constant *PersonalityFn) const { |
13361 | 33 | return Subtarget.isPPC64() ? PPC::X433 : PPC::R40 ; |
13362 | 33 | } |
13363 | | |
13364 | | bool |
13365 | | PPCTargetLowering::shouldExpandBuildVectorWithShuffles( |
13366 | 238 | EVT VT , unsigned DefinedValues) const { |
13367 | 238 | if (VT == MVT::v2i64) |
13368 | 73 | return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves |
13369 | 165 | |
13370 | 165 | if (165 Subtarget.hasVSX() || 165 Subtarget.hasQPX()45 ) |
13371 | 143 | return true; |
13372 | 22 | |
13373 | 22 | return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); |
13374 | 22 | } |
13375 | | |
13376 | 74.6k | Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { |
13377 | 74.6k | if (DisableILPPref || 74.6k Subtarget.enableMachineScheduler()73.8k ) |
13378 | 56.5k | return TargetLowering::getSchedulingPreference(N); |
13379 | 18.0k | |
13380 | 18.0k | return Sched::ILP; |
13381 | 18.0k | } |
13382 | | |
13383 | | // Create a fast isel object. |
13384 | | FastISel * |
13385 | | PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, |
13386 | 596 | const TargetLibraryInfo *LibInfo) const { |
13387 | 596 | return PPC::createFastISel(FuncInfo, LibInfo); |
13388 | 596 | } |
13389 | | |
13390 | 3 | void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { |
13391 | 3 | if (Subtarget.isDarwinABI()3 ) return0 ; |
13392 | 3 | if (3 !Subtarget.isPPC64()3 ) return0 ; |
13393 | 3 | |
13394 | 3 | // Update IsSplitCSR in PPCFunctionInfo |
13395 | 3 | PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>(); |
13396 | 3 | PFI->setIsSplitCSR(true); |
13397 | 3 | } |
13398 | | |
13399 | | void PPCTargetLowering::insertCopiesSplitCSR( |
13400 | | MachineBasicBlock *Entry, |
13401 | 3 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
13402 | 3 | const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
13403 | 3 | const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); |
13404 | 3 | if (!IStart) |
13405 | 0 | return; |
13406 | 3 | |
13407 | 3 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
13408 | 3 | MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); |
13409 | 3 | MachineBasicBlock::iterator MBBI = Entry->begin(); |
13410 | 158 | for (const MCPhysReg *I = IStart; *I158 ; ++I155 ) { |
13411 | 155 | const TargetRegisterClass *RC = nullptr; |
13412 | 155 | if (PPC::G8RCRegClass.contains(*I)) |
13413 | 56 | RC = &PPC::G8RCRegClass; |
13414 | 99 | else if (99 PPC::F8RCRegClass.contains(*I)99 ) |
13415 | 54 | RC = &PPC::F8RCRegClass; |
13416 | 45 | else if (45 PPC::CRRCRegClass.contains(*I)45 ) |
13417 | 9 | RC = &PPC::CRRCRegClass; |
13418 | 36 | else if (36 PPC::VRRCRegClass.contains(*I)36 ) |
13419 | 36 | RC = &PPC::VRRCRegClass; |
13420 | 36 | else |
13421 | 0 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
13422 | 155 | |
13423 | 155 | unsigned NewVR = MRI->createVirtualRegister(RC); |
13424 | 155 | // Create copy from CSR to a virtual register. |
13425 | 155 | // FIXME: this currently does not emit CFI pseudo-instructions, it works |
13426 | 155 | // fine for CXX_FAST_TLS since the C++-style TLS access functions should be |
13427 | 155 | // nounwind. If we want to generalize this later, we may need to emit |
13428 | 155 | // CFI pseudo-instructions. |
13429 | 155 | assert(Entry->getParent()->getFunction()->hasFnAttribute( |
13430 | 155 | Attribute::NoUnwind) && |
13431 | 155 | "Function should be nounwind in insertCopiesSplitCSR!"); |
13432 | 155 | Entry->addLiveIn(*I); |
13433 | 155 | BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) |
13434 | 155 | .addReg(*I); |
13435 | 155 | |
13436 | 155 | // Insert the copy-back instructions right before the terminator |
13437 | 155 | for (auto *Exit : Exits) |
13438 | 155 | BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), |
13439 | 155 | TII->get(TargetOpcode::COPY), *I) |
13440 | 155 | .addReg(NewVR); |
13441 | 155 | } |
13442 | 3 | } |
13443 | | |
13444 | | // Override to enable LOAD_STACK_GUARD lowering on Linux. |
13445 | 12 | bool PPCTargetLowering::useLoadStackGuardNode() const { |
13446 | 12 | if (!Subtarget.isTargetLinux()) |
13447 | 6 | return TargetLowering::useLoadStackGuardNode(); |
13448 | 6 | return true; |
13449 | 6 | } |
13450 | | |
13451 | | // Override to disable global variable loading on Linux. |
13452 | 4 | void PPCTargetLowering::insertSSPDeclarations(Module &M) const { |
13453 | 4 | if (!Subtarget.isTargetLinux()) |
13454 | 2 | return TargetLowering::insertSSPDeclarations(M); |
13455 | 2 | } |
13456 | | |
13457 | 364 | bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { |
13458 | 364 | if (!VT.isSimple() || 364 !Subtarget.hasVSX()364 ) |
13459 | 210 | return false; |
13460 | 154 | |
13461 | 154 | switch(VT.getSimpleVT().SimpleTy) { |
13462 | 0 | default: |
13463 | 0 | // For FP types that are currently not supported by PPC backend, return |
13464 | 0 | // false. Examples: f16, f80. |
13465 | 0 | return false; |
13466 | 154 | case MVT::f32: |
13467 | 154 | case MVT::f64: |
13468 | 154 | case MVT::ppcf128: |
13469 | 154 | return Imm.isPosZero(); |
13470 | 0 | } |
13471 | 0 | } |
13472 | | |
13473 | | // For vector shift operation op, fold |
13474 | | // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y) |
13475 | | static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, |
13476 | 2.37k | SelectionDAG &DAG) { |
13477 | 2.37k | SDValue N0 = N->getOperand(0); |
13478 | 2.37k | SDValue N1 = N->getOperand(1); |
13479 | 2.37k | EVT VT = N0.getValueType(); |
13480 | 2.37k | unsigned OpSizeInBits = VT.getScalarSizeInBits(); |
13481 | 2.37k | unsigned Opcode = N->getOpcode(); |
13482 | 2.37k | unsigned TargetOpcode; |
13483 | 2.37k | |
13484 | 2.37k | switch (Opcode) { |
13485 | 0 | default: |
13486 | 0 | llvm_unreachable("Unexpected shift operation"); |
13487 | 1.40k | case ISD::SHL: |
13488 | 1.40k | TargetOpcode = PPCISD::SHL; |
13489 | 1.40k | break; |
13490 | 569 | case ISD::SRL: |
13491 | 569 | TargetOpcode = PPCISD::SRL; |
13492 | 569 | break; |
13493 | 400 | case ISD::SRA: |
13494 | 400 | TargetOpcode = PPCISD::SRA; |
13495 | 400 | break; |
13496 | 2.37k | } |
13497 | 2.37k | |
13498 | 2.37k | if (2.37k VT.isVector() && 2.37k TLI.isOperationLegal(Opcode, VT)175 && |
13499 | 144 | N1->getOpcode() == ISD::AND) |
13500 | 12 | if (ConstantSDNode *12 Mask12 = isConstOrConstSplat(N1->getOperand(1))) |
13501 | 12 | if (12 Mask->getZExtValue() == OpSizeInBits - 112 ) |
13502 | 12 | return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); |
13503 | 2.36k | |
13504 | 2.36k | return SDValue(); |
13505 | 2.36k | } |
13506 | | |
13507 | 1.40k | SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { |
13508 | 1.40k | if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) |
13509 | 4 | return Value; |
13510 | 1.40k | |
13511 | 1.40k | return SDValue(); |
13512 | 1.40k | } |
13513 | | |
13514 | 400 | SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { |
13515 | 400 | if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) |
13516 | 4 | return Value; |
13517 | 396 | |
13518 | 396 | return SDValue(); |
13519 | 396 | } |
13520 | | |
13521 | 569 | SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { |
13522 | 569 | if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) |
13523 | 4 | return Value; |
13524 | 565 | |
13525 | 565 | return SDValue(); |
13526 | 565 | } |