/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines an instruction selector for the Hexagon target. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "Hexagon.h" |
15 | | #include "HexagonISelLowering.h" |
16 | | #include "HexagonMachineFunctionInfo.h" |
17 | | #include "HexagonTargetMachine.h" |
18 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
19 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
20 | | #include "llvm/CodeGen/SelectionDAGISel.h" |
21 | | #include "llvm/IR/Intrinsics.h" |
22 | | #include "llvm/Support/CommandLine.h" |
23 | | #include "llvm/Support/Debug.h" |
24 | | using namespace llvm; |
25 | | |
26 | | #define DEBUG_TYPE "hexagon-isel" |
27 | | |
28 | | static |
29 | | cl::opt<bool> |
30 | | EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), |
31 | | cl::desc("Rebalance address calculation trees to improve " |
32 | | "instruction selection")); |
33 | | |
34 | | // Rebalance only if this allows e.g. combining a GA with an offset or |
35 | | // factoring out a shift. |
36 | | static |
37 | | cl::opt<bool> |
38 | | RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), |
39 | | cl::desc("Rebalance address tree only if this allows optimizations")); |
40 | | |
41 | | static |
42 | | cl::opt<bool> |
43 | | RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, |
44 | | cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced")); |
45 | | |
46 | | //===----------------------------------------------------------------------===// |
47 | | // Instruction Selector Implementation |
48 | | //===----------------------------------------------------------------------===// |
49 | | |
50 | | //===--------------------------------------------------------------------===// |
51 | | /// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine |
52 | | /// instructions for SelectionDAG operations. |
53 | | /// |
54 | | namespace { |
55 | | class HexagonDAGToDAGISel : public SelectionDAGISel { |
56 | | const HexagonSubtarget *HST; |
57 | | const HexagonInstrInfo *HII; |
58 | | const HexagonRegisterInfo *HRI; |
59 | | public: |
60 | | explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, |
61 | | CodeGenOpt::Level OptLevel) |
62 | | : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr), |
63 | 441 | HRI(nullptr) {} |
64 | | |
65 | 2.40k | bool runOnMachineFunction(MachineFunction &MF) override { |
66 | 2.40k | // Reset the subtarget each time through. |
67 | 2.40k | HST = &MF.getSubtarget<HexagonSubtarget>(); |
68 | 2.40k | HII = HST->getInstrInfo(); |
69 | 2.40k | HRI = HST->getRegisterInfo(); |
70 | 2.40k | SelectionDAGISel::runOnMachineFunction(MF); |
71 | 2.40k | return true; |
72 | 2.40k | } |
73 | | |
74 | 8.82k | bool ComplexPatternFuncMutatesDAG() const override { |
75 | 8.82k | return true; |
76 | 8.82k | } |
77 | | void PreprocessISelDAG() override; |
78 | | void EmitFunctionEntryCode() override; |
79 | | |
80 | | void Select(SDNode *N) override; |
81 | | |
82 | | // Complex Pattern Selectors. |
83 | | inline bool SelectAddrGA(SDValue &N, SDValue &R); |
84 | | inline bool SelectAddrGP(SDValue &N, SDValue &R); |
85 | | bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); |
86 | | bool SelectAddrFI(SDValue &N, SDValue &R); |
87 | | bool DetectUseSxtw(SDValue &N, SDValue &R); |
88 | | |
89 | 0 | StringRef getPassName() const override { |
90 | 0 | return "Hexagon DAG->DAG Pattern Instruction Selection"; |
91 | 0 | } |
92 | | |
93 | | // Generate a machine instruction node corresponding to the circ/brev |
94 | | // load intrinsic. |
95 | | MachineSDNode *LoadInstrForLoadIntrinsic(SDNode *IntN); |
96 | | // Given the circ/brev load intrinsic and the already generated machine |
97 | | // instruction, generate the appropriate store (that is a part of the |
98 | | // intrinsic's functionality). |
99 | | SDNode *StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN); |
100 | | |
101 | | void SelectFrameIndex(SDNode *N); |
102 | | /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for |
103 | | /// inline asm expressions. |
104 | | bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
105 | | unsigned ConstraintID, |
106 | | std::vector<SDValue> &OutOps) override; |
107 | | bool tryLoadOfLoadIntrinsic(LoadSDNode *N); |
108 | | void SelectLoad(SDNode *N); |
109 | | void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl); |
110 | | void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); |
111 | | void SelectStore(SDNode *N); |
112 | | void SelectSHL(SDNode *N); |
113 | | void SelectZeroExtend(SDNode *N); |
114 | | void SelectIntrinsicWChain(SDNode *N); |
115 | | void SelectIntrinsicWOChain(SDNode *N); |
116 | | void SelectConstant(SDNode *N); |
117 | | void SelectConstantFP(SDNode *N); |
118 | | void SelectBitcast(SDNode *N); |
119 | | |
120 | | // Include the pieces autogenerated from the target description. |
121 | | #include "HexagonGenDAGISel.inc" |
122 | | |
123 | | private: |
124 | | bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); |
125 | | bool isOrEquivalentToAdd(const SDNode *N) const; |
126 | | bool isAlignedMemNode(const MemSDNode *N) const; |
127 | | bool isSmallStackStore(const StoreSDNode *N) const; |
128 | | bool isPositiveHalfWord(const SDNode *N) const; |
129 | | |
130 | | // DAG preprocessing functions. |
131 | | void ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes); |
132 | | void ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes); |
133 | | void ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes); |
134 | | void ppHoistZextI1(std::vector<SDNode*> &&Nodes); |
135 | | |
136 | | SmallDenseMap<SDNode *,int> RootWeights; |
137 | | SmallDenseMap<SDNode *,int> RootHeights; |
138 | | SmallDenseMap<const Value *,int> GAUsesInFunction; |
139 | | int getWeight(SDNode *N); |
140 | | int getHeight(SDNode *N); |
141 | | SDValue getMultiplierForSHL(SDNode *N); |
142 | | SDValue factorOutPowerOf2(SDValue V, unsigned Power); |
143 | | unsigned getUsesInFunction(const Value *V); |
144 | | SDValue balanceSubTree(SDNode *N, bool Factorize = false); |
145 | | void rebalanceAddressTrees(); |
146 | | }; // end HexagonDAGToDAGISel |
147 | | } // end anonymous namespace |
148 | | |
149 | | |
150 | | /// createHexagonISelDag - This pass converts a legalized DAG into a |
151 | | /// Hexagon-specific DAG, ready for instruction scheduling. |
152 | | /// |
153 | | namespace llvm { |
154 | | FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, |
155 | 441 | CodeGenOpt::Level OptLevel) { |
156 | 441 | return new HexagonDAGToDAGISel(TM, OptLevel); |
157 | 441 | } |
158 | | } |
159 | | |
160 | | // Intrinsics that return a a predicate. |
161 | 0 | static bool doesIntrinsicReturnPredicate(unsigned ID) { |
162 | 0 | switch (ID) { |
163 | 0 | default: |
164 | 0 | return false; |
165 | 0 | case Intrinsic::hexagon_C2_cmpeq: |
166 | 0 | case Intrinsic::hexagon_C2_cmpgt: |
167 | 0 | case Intrinsic::hexagon_C2_cmpgtu: |
168 | 0 | case Intrinsic::hexagon_C2_cmpgtup: |
169 | 0 | case Intrinsic::hexagon_C2_cmpgtp: |
170 | 0 | case Intrinsic::hexagon_C2_cmpeqp: |
171 | 0 | case Intrinsic::hexagon_C2_bitsset: |
172 | 0 | case Intrinsic::hexagon_C2_bitsclr: |
173 | 0 | case Intrinsic::hexagon_C2_cmpeqi: |
174 | 0 | case Intrinsic::hexagon_C2_cmpgti: |
175 | 0 | case Intrinsic::hexagon_C2_cmpgtui: |
176 | 0 | case Intrinsic::hexagon_C2_cmpgei: |
177 | 0 | case Intrinsic::hexagon_C2_cmpgeui: |
178 | 0 | case Intrinsic::hexagon_C2_cmplt: |
179 | 0 | case Intrinsic::hexagon_C2_cmpltu: |
180 | 0 | case Intrinsic::hexagon_C2_bitsclri: |
181 | 0 | case Intrinsic::hexagon_C2_and: |
182 | 0 | case Intrinsic::hexagon_C2_or: |
183 | 0 | case Intrinsic::hexagon_C2_xor: |
184 | 0 | case Intrinsic::hexagon_C2_andn: |
185 | 0 | case Intrinsic::hexagon_C2_not: |
186 | 0 | case Intrinsic::hexagon_C2_orn: |
187 | 0 | case Intrinsic::hexagon_C2_pxfer_map: |
188 | 0 | case Intrinsic::hexagon_C2_any8: |
189 | 0 | case Intrinsic::hexagon_C2_all8: |
190 | 0 | case Intrinsic::hexagon_A2_vcmpbeq: |
191 | 0 | case Intrinsic::hexagon_A2_vcmpbgtu: |
192 | 0 | case Intrinsic::hexagon_A2_vcmpheq: |
193 | 0 | case Intrinsic::hexagon_A2_vcmphgt: |
194 | 0 | case Intrinsic::hexagon_A2_vcmphgtu: |
195 | 0 | case Intrinsic::hexagon_A2_vcmpweq: |
196 | 0 | case Intrinsic::hexagon_A2_vcmpwgt: |
197 | 0 | case Intrinsic::hexagon_A2_vcmpwgtu: |
198 | 0 | case Intrinsic::hexagon_C2_tfrrp: |
199 | 0 | case Intrinsic::hexagon_S2_tstbit_i: |
200 | 0 | case Intrinsic::hexagon_S2_tstbit_r: |
201 | 0 | return true; |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | 53 | void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { |
206 | 53 | SDValue Chain = LD->getChain(); |
207 | 53 | SDValue Base = LD->getBasePtr(); |
208 | 53 | SDValue Offset = LD->getOffset(); |
209 | 53 | int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); |
210 | 53 | EVT LoadedVT = LD->getMemoryVT(); |
211 | 53 | unsigned Opcode = 0; |
212 | 53 | |
213 | 53 | // Check for zero extended loads. Treat any-extend loads as zero extended |
214 | 53 | // loads. |
215 | 53 | ISD::LoadExtType ExtType = LD->getExtensionType(); |
216 | 51 | bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); |
217 | 53 | bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc); |
218 | 53 | |
219 | 53 | assert(LoadedVT.isSimple()); |
220 | 53 | switch (LoadedVT.getSimpleVT().SimpleTy) { |
221 | 2 | case MVT::i8: |
222 | 2 | if (IsZeroExt) |
223 | 2 | Opcode = IsValidInc ? 2 Hexagon::L2_loadrub_pi2 : Hexagon::L2_loadrub_io0 ; |
224 | 2 | else |
225 | 0 | Opcode = IsValidInc ? 0 Hexagon::L2_loadrb_pi0 : Hexagon::L2_loadrb_io0 ; |
226 | 2 | break; |
227 | 2 | case MVT::i16: |
228 | 2 | if (IsZeroExt) |
229 | 0 | Opcode = IsValidInc ? 0 Hexagon::L2_loadruh_pi0 : Hexagon::L2_loadruh_io0 ; |
230 | 2 | else |
231 | 2 | Opcode = IsValidInc ? 2 Hexagon::L2_loadrh_pi2 : Hexagon::L2_loadrh_io0 ; |
232 | 2 | break; |
233 | 21 | case MVT::i32: |
234 | 21 | Opcode = IsValidInc ? Hexagon::L2_loadri_pi21 : Hexagon::L2_loadri_io0 ; |
235 | 21 | break; |
236 | 24 | case MVT::i64: |
237 | 24 | Opcode = IsValidInc ? Hexagon::L2_loadrd_pi24 : Hexagon::L2_loadrd_io0 ; |
238 | 24 | break; |
239 | 53 | // 64B |
240 | 4 | case MVT::v64i8: |
241 | 4 | case MVT::v32i16: |
242 | 4 | case MVT::v16i32: |
243 | 4 | case MVT::v8i64: |
244 | 4 | case MVT::v128i8: |
245 | 4 | case MVT::v64i16: |
246 | 4 | case MVT::v32i32: |
247 | 4 | case MVT::v16i64: |
248 | 4 | if (isAlignedMemNode(LD)4 ) { |
249 | 2 | if (LD->isNonTemporal()) |
250 | 0 | Opcode = IsValidInc ? 0 Hexagon::V6_vL32b_nt_pi0 : Hexagon::V6_vL32b_nt_ai0 ; |
251 | 2 | else |
252 | 2 | Opcode = IsValidInc ? 2 Hexagon::V6_vL32b_pi2 : Hexagon::V6_vL32b_ai0 ; |
253 | 4 | } else { |
254 | 2 | Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi2 : Hexagon::V6_vL32Ub_ai0 ; |
255 | 2 | } |
256 | 4 | break; |
257 | 0 | default: |
258 | 0 | llvm_unreachable("Unexpected memory type in indexed load"); |
259 | 53 | } |
260 | 53 | |
261 | 53 | SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32); |
262 | 53 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); |
263 | 53 | MemOp[0] = LD->getMemOperand(); |
264 | 53 | |
265 | 53 | auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl) |
266 | 24 | -> MachineSDNode* { |
267 | 24 | if (ExtType == ISD::ZEXTLOAD || 24 ExtType == ISD::EXTLOAD24 ) { |
268 | 0 | SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); |
269 | 0 | return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64, |
270 | 0 | Zero, SDValue(N, 0)); |
271 | 0 | } |
272 | 24 | if (24 ExtType == ISD::SEXTLOAD24 ) |
273 | 0 | return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, |
274 | 0 | SDValue(N, 0)); |
275 | 24 | return N; |
276 | 24 | }; |
277 | 53 | |
278 | 53 | // Loaded value Next address Chain |
279 | 53 | SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) }; |
280 | 53 | SDValue To[3]; |
281 | 53 | |
282 | 53 | EVT ValueVT = LD->getValueType(0); |
283 | 53 | if (ValueVT == MVT::i64 && 53 ExtType != ISD::NON_EXTLOAD24 ) { |
284 | 0 | // A load extending to i64 will actually produce i32, which will then |
285 | 0 | // need to be extended to i64. |
286 | 0 | assert(LoadedVT.getSizeInBits() <= 32); |
287 | 0 | ValueVT = MVT::i32; |
288 | 0 | } |
289 | 53 | |
290 | 53 | if (IsValidInc53 ) { |
291 | 53 | MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, |
292 | 53 | MVT::i32, MVT::Other, Base, |
293 | 53 | IncV, Chain); |
294 | 53 | L->setMemRefs(MemOp, MemOp+1); |
295 | 53 | To[1] = SDValue(L, 1); // Next address. |
296 | 53 | To[2] = SDValue(L, 2); // Chain. |
297 | 53 | // Handle special case for extension to i64. |
298 | 53 | if (LD->getValueType(0) == MVT::i64) |
299 | 24 | L = getExt64(L, dl); |
300 | 53 | To[0] = SDValue(L, 0); // Loaded (extended) value. |
301 | 0 | } else { |
302 | 0 | SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); |
303 | 0 | MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other, |
304 | 0 | Base, Zero, Chain); |
305 | 0 | L->setMemRefs(MemOp, MemOp+1); |
306 | 0 | To[2] = SDValue(L, 1); // Chain. |
307 | 0 | MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, |
308 | 0 | Base, IncV); |
309 | 0 | To[1] = SDValue(A, 0); // Next address. |
310 | 0 | // Handle special case for extension to i64. |
311 | 0 | if (LD->getValueType(0) == MVT::i64) |
312 | 0 | L = getExt64(L, dl); |
313 | 0 | To[0] = SDValue(L, 0); // Loaded (extended) value. |
314 | 0 | } |
315 | 53 | ReplaceUses(From, To, 3); |
316 | 53 | CurDAG->RemoveDeadNode(LD); |
317 | 53 | } |
318 | | |
319 | | |
320 | 109 | MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { |
321 | 109 | if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) |
322 | 0 | return nullptr; |
323 | 109 | |
324 | 109 | SDLoc dl(IntN); |
325 | 109 | unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); |
326 | 109 | |
327 | 109 | static std::map<unsigned,unsigned> LoadPciMap = { |
328 | 109 | { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci }, |
329 | 109 | { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci }, |
330 | 109 | { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci }, |
331 | 109 | { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci }, |
332 | 109 | { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci }, |
333 | 109 | { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci }, |
334 | 109 | }; |
335 | 109 | auto FLC = LoadPciMap.find(IntNo); |
336 | 109 | if (FLC != LoadPciMap.end()109 ) { |
337 | 24 | SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32, |
338 | 24 | IntN->getOperand(4)); |
339 | 24 | EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i6417 : MVT::i327 ; |
340 | 24 | EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; |
341 | 24 | // Operands: { Base, Increment, Modifier, Chain } |
342 | 24 | auto Inc = cast<ConstantSDNode>(IntN->getOperand(5)); |
343 | 24 | SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32); |
344 | 24 | MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys, |
345 | 24 | { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) }); |
346 | 24 | return Res; |
347 | 24 | } |
348 | 85 | |
349 | 85 | static std::map<unsigned,unsigned> LoadPbrMap = { |
350 | 85 | { Intrinsic::hexagon_brev_ldb, Hexagon::L2_loadrb_pbr }, |
351 | 85 | { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr }, |
352 | 85 | { Intrinsic::hexagon_brev_ldh, Hexagon::L2_loadrh_pbr }, |
353 | 85 | { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr }, |
354 | 85 | { Intrinsic::hexagon_brev_ldw, Hexagon::L2_loadri_pbr }, |
355 | 85 | { Intrinsic::hexagon_brev_ldd, Hexagon::L2_loadrd_pbr }, |
356 | 85 | }; |
357 | 85 | auto FLB = LoadPbrMap.find(IntNo); |
358 | 85 | if (FLB != LoadPbrMap.end()85 ) { |
359 | 12 | SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32, |
360 | 12 | IntN->getOperand(4)); |
361 | 12 | EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? MVT::i642 : MVT::i3210 ; |
362 | 12 | EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; |
363 | 12 | // Operands: { Base, Modifier, Chain } |
364 | 12 | MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys, |
365 | 12 | { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) }); |
366 | 12 | return Res; |
367 | 12 | } |
368 | 73 | |
369 | 73 | return nullptr; |
370 | 73 | } |
371 | | |
372 | | SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, |
373 | 36 | SDNode *IntN) { |
374 | 36 | // The "LoadN" is just a machine load instruction. The intrinsic also |
375 | 36 | // involves storing it. Generate an appropriate store to the location |
376 | 36 | // given in the intrinsic's operand(3). |
377 | 36 | uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags; |
378 | 36 | unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) & |
379 | 36 | HexagonII::MemAccesSizeMask; |
380 | 36 | unsigned Size = 1U << (SizeBits-1); |
381 | 36 | |
382 | 36 | SDLoc dl(IntN); |
383 | 36 | MachinePointerInfo PI; |
384 | 36 | SDValue TS; |
385 | 36 | SDValue Loc = IntN->getOperand(3); |
386 | 36 | |
387 | 36 | if (Size >= 4) |
388 | 24 | TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI, |
389 | 24 | Size); |
390 | 36 | else |
391 | 12 | TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, |
392 | 12 | PI, MVT::getIntegerVT(Size * 8), Size); |
393 | 36 | |
394 | 36 | SDNode *StoreN; |
395 | 36 | { |
396 | 36 | HandleSDNode Handle(TS); |
397 | 36 | SelectStore(TS.getNode()); |
398 | 36 | StoreN = Handle.getValue().getNode(); |
399 | 36 | } |
400 | 36 | |
401 | 36 | // Load's results are { Loaded value, Updated pointer, Chain } |
402 | 36 | ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1)); |
403 | 36 | ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0)); |
404 | 36 | return StoreN; |
405 | 36 | } |
406 | | |
407 | 1.91k | bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { |
408 | 1.91k | // The intrinsics for load circ/brev perform two operations: |
409 | 1.91k | // 1. Load a value V from the specified location, using the addressing |
410 | 1.91k | // mode corresponding to the intrinsic. |
411 | 1.91k | // 2. Store V into a specified location. This location is typically a |
412 | 1.91k | // local, temporary object. |
413 | 1.91k | // In many cases, the program using these intrinsics will immediately |
414 | 1.91k | // load V again from the local object. In those cases, when certain |
415 | 1.91k | // conditions are met, the last load can be removed. |
416 | 1.91k | // This function identifies and optimizes this pattern. If the pattern |
417 | 1.91k | // cannot be optimized, it returns nullptr, which will cause the load |
418 | 1.91k | // to be selected separately from the intrinsic (which will be handled |
419 | 1.91k | // in SelectIntrinsicWChain). |
420 | 1.91k | |
421 | 1.91k | SDValue Ch = N->getOperand(0); |
422 | 1.91k | SDValue Loc = N->getOperand(1); |
423 | 1.91k | |
424 | 1.91k | // Assume that the load and the intrinsic are connected directly with a |
425 | 1.91k | // chain: |
426 | 1.91k | // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C |
427 | 1.91k | // t2: i32,ch = load t1:1, Loc, ... |
428 | 1.91k | SDNode *C = Ch.getNode(); |
429 | 1.91k | |
430 | 1.91k | if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN) |
431 | 1.86k | return false; |
432 | 42 | |
433 | 42 | // The second load can only be eliminated if its extension type matches |
434 | 42 | // that of the load instruction corresponding to the intrinsic. The user |
435 | 42 | // can provide an address of an unsigned variable to store the result of |
436 | 42 | // a sign-extending intrinsic into (or the other way around). |
437 | 42 | ISD::LoadExtType IntExt; |
438 | 42 | switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) { |
439 | 6 | case Intrinsic::hexagon_brev_ldub: |
440 | 6 | case Intrinsic::hexagon_brev_lduh: |
441 | 6 | case Intrinsic::hexagon_circ_ldub: |
442 | 6 | case Intrinsic::hexagon_circ_lduh: |
443 | 6 | IntExt = ISD::ZEXTLOAD; |
444 | 6 | break; |
445 | 30 | case Intrinsic::hexagon_brev_ldw: |
446 | 30 | case Intrinsic::hexagon_brev_ldd: |
447 | 30 | case Intrinsic::hexagon_circ_ldw: |
448 | 30 | case Intrinsic::hexagon_circ_ldd: |
449 | 30 | IntExt = ISD::NON_EXTLOAD; |
450 | 30 | break; |
451 | 6 | default: |
452 | 6 | IntExt = ISD::SEXTLOAD; |
453 | 6 | break; |
454 | 42 | } |
455 | 42 | if (42 N->getExtensionType() != IntExt42 ) |
456 | 0 | return false; |
457 | 42 | |
458 | 42 | // Make sure the target location for the loaded value in the load intrinsic |
459 | 42 | // is the location from which LD (or N) is loading. |
460 | 42 | if (42 C->getNumOperands() < 4 || 42 Loc.getNode() != C->getOperand(3).getNode()42 ) |
461 | 8 | return false; |
462 | 34 | |
463 | 34 | if (MachineSDNode *34 L34 = LoadInstrForLoadIntrinsic(C)) { |
464 | 34 | SDNode *S = StoreInstrForLoadIntrinsic(L, C); |
465 | 34 | SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) }; |
466 | 34 | SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) }; |
467 | 34 | ReplaceUses(F, T, array_lengthof(T)); |
468 | 34 | // This transformation will leave the intrinsic dead. If it remains in |
469 | 34 | // the DAG, the selection code will see it again, but without the load, |
470 | 34 | // and it will generate a store that is normally required for it. |
471 | 34 | CurDAG->RemoveDeadNode(C); |
472 | 34 | return true; |
473 | 34 | } |
474 | 0 |
|
475 | 0 | return false; |
476 | 0 | } |
477 | | |
478 | 1.96k | void HexagonDAGToDAGISel::SelectLoad(SDNode *N) { |
479 | 1.96k | SDLoc dl(N); |
480 | 1.96k | LoadSDNode *LD = cast<LoadSDNode>(N); |
481 | 1.96k | ISD::MemIndexedMode AM = LD->getAddressingMode(); |
482 | 1.96k | |
483 | 1.96k | // Handle indexed loads. |
484 | 1.96k | if (AM != ISD::UNINDEXED1.96k ) { |
485 | 53 | SelectIndexedLoad(LD, dl); |
486 | 53 | return; |
487 | 53 | } |
488 | 1.91k | |
489 | 1.91k | // Handle patterns using circ/brev load intrinsics. |
490 | 1.91k | if (1.91k tryLoadOfLoadIntrinsic(LD)1.91k ) |
491 | 34 | return; |
492 | 1.87k | |
493 | 1.87k | SelectCode(LD); |
494 | 1.87k | } |
495 | | |
496 | 38 | void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { |
497 | 38 | SDValue Chain = ST->getChain(); |
498 | 38 | SDValue Base = ST->getBasePtr(); |
499 | 38 | SDValue Offset = ST->getOffset(); |
500 | 38 | SDValue Value = ST->getValue(); |
501 | 38 | // Get the constant value. |
502 | 38 | int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); |
503 | 38 | EVT StoredVT = ST->getMemoryVT(); |
504 | 38 | EVT ValueVT = Value.getValueType(); |
505 | 38 | |
506 | 38 | bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc); |
507 | 38 | unsigned Opcode = 0; |
508 | 38 | |
509 | 38 | assert(StoredVT.isSimple()); |
510 | 38 | switch (StoredVT.getSimpleVT().SimpleTy) { |
511 | 11 | case MVT::i8: |
512 | 11 | Opcode = IsValidInc ? Hexagon::S2_storerb_pi11 : Hexagon::S2_storerb_io0 ; |
513 | 11 | break; |
514 | 2 | case MVT::i16: |
515 | 2 | Opcode = IsValidInc ? Hexagon::S2_storerh_pi2 : Hexagon::S2_storerh_io0 ; |
516 | 2 | break; |
517 | 19 | case MVT::i32: |
518 | 19 | Opcode = IsValidInc ? Hexagon::S2_storeri_pi19 : Hexagon::S2_storeri_io0 ; |
519 | 19 | break; |
520 | 2 | case MVT::i64: |
521 | 2 | Opcode = IsValidInc ? Hexagon::S2_storerd_pi2 : Hexagon::S2_storerd_io0 ; |
522 | 2 | break; |
523 | 4 | case MVT::v64i8: |
524 | 4 | case MVT::v32i16: |
525 | 4 | case MVT::v16i32: |
526 | 4 | case MVT::v8i64: |
527 | 4 | case MVT::v128i8: |
528 | 4 | case MVT::v64i16: |
529 | 4 | case MVT::v32i32: |
530 | 4 | case MVT::v16i64: |
531 | 4 | if (isAlignedMemNode(ST)4 ) { |
532 | 2 | if (ST->isNonTemporal()) |
533 | 0 | Opcode = IsValidInc ? 0 Hexagon::V6_vS32b_nt_pi0 : Hexagon::V6_vS32b_nt_ai0 ; |
534 | 2 | else |
535 | 2 | Opcode = IsValidInc ? 2 Hexagon::V6_vS32b_pi2 : Hexagon::V6_vS32b_ai0 ; |
536 | 4 | } else { |
537 | 2 | Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi2 : Hexagon::V6_vS32Ub_ai0 ; |
538 | 2 | } |
539 | 4 | break; |
540 | 0 | default: |
541 | 0 | llvm_unreachable("Unexpected memory type in indexed store"); |
542 | 38 | } |
543 | 38 | |
544 | 38 | if (38 ST->isTruncatingStore() && 38 ValueVT.getSizeInBits() == 6413 ) { |
545 | 0 | assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); |
546 | 0 | Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, |
547 | 0 | dl, MVT::i32, Value); |
548 | 0 | } |
549 | 38 | |
550 | 38 | SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32); |
551 | 38 | MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); |
552 | 38 | MemOp[0] = ST->getMemOperand(); |
553 | 38 | |
554 | 38 | // Next address Chain |
555 | 38 | SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) }; |
556 | 38 | SDValue To[2]; |
557 | 38 | |
558 | 38 | if (IsValidInc38 ) { |
559 | 38 | // Build post increment store. |
560 | 38 | SDValue Ops[] = { Base, IncV, Value, Chain }; |
561 | 38 | MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, |
562 | 38 | Ops); |
563 | 38 | S->setMemRefs(MemOp, MemOp + 1); |
564 | 38 | To[0] = SDValue(S, 0); |
565 | 38 | To[1] = SDValue(S, 1); |
566 | 38 | } else { |
567 | 0 | SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); |
568 | 0 | SDValue Ops[] = { Base, Zero, Value, Chain }; |
569 | 0 | MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); |
570 | 0 | S->setMemRefs(MemOp, MemOp + 1); |
571 | 0 | To[1] = SDValue(S, 0); |
572 | 0 | MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, |
573 | 0 | Base, IncV); |
574 | 0 | To[0] = SDValue(A, 0); |
575 | 0 | } |
576 | 38 | |
577 | 38 | ReplaceUses(From, To, 2); |
578 | 38 | CurDAG->RemoveDeadNode(ST); |
579 | 38 | } |
580 | | |
581 | 1.87k | void HexagonDAGToDAGISel::SelectStore(SDNode *N) { |
582 | 1.87k | SDLoc dl(N); |
583 | 1.87k | StoreSDNode *ST = cast<StoreSDNode>(N); |
584 | 1.87k | ISD::MemIndexedMode AM = ST->getAddressingMode(); |
585 | 1.87k | |
586 | 1.87k | // Handle indexed stores. |
587 | 1.87k | if (AM != ISD::UNINDEXED1.87k ) { |
588 | 38 | SelectIndexedStore(ST, dl); |
589 | 38 | return; |
590 | 38 | } |
591 | 1.83k | |
592 | 1.83k | SelectCode(ST); |
593 | 1.83k | } |
594 | | |
595 | 50 | void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { |
596 | 50 | SDLoc dl(N); |
597 | 50 | SDValue Shl_0 = N->getOperand(0); |
598 | 50 | SDValue Shl_1 = N->getOperand(1); |
599 | 50 | |
600 | 50 | auto Default = [this,N] () -> void { SelectCode(N); }; |
601 | 50 | |
602 | 50 | if (N->getValueType(0) != MVT::i32 || 50 Shl_1.getOpcode() != ISD::Constant49 ) |
603 | 24 | return Default(); |
604 | 26 | |
605 | 26 | // RHS is const. |
606 | 26 | int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue(); |
607 | 26 | |
608 | 26 | if (Shl_0.getOpcode() == ISD::MUL26 ) { |
609 | 1 | SDValue Mul_0 = Shl_0.getOperand(0); // Val |
610 | 1 | SDValue Mul_1 = Shl_0.getOperand(1); // Const |
611 | 1 | // RHS of mul is const. |
612 | 1 | if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Mul_1)) { |
613 | 0 | int32_t ValConst = C->getSExtValue() << ShlConst; |
614 | 0 | if (isInt<9>(ValConst)0 ) { |
615 | 0 | SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32); |
616 | 0 | SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, |
617 | 0 | MVT::i32, Mul_0, Val); |
618 | 0 | ReplaceNode(N, Result); |
619 | 0 | return; |
620 | 0 | } |
621 | 1 | } |
622 | 1 | return Default(); |
623 | 1 | } |
624 | 25 | |
625 | 25 | if (25 Shl_0.getOpcode() == ISD::SUB25 ) { |
626 | 1 | SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 |
627 | 1 | SDValue Sub_1 = Shl_0.getOperand(1); // Val |
628 | 1 | if (ConstantSDNode *C11 = dyn_cast<ConstantSDNode>(Sub_0)) { |
629 | 0 | if (C1->getSExtValue() != 0 || 0 Sub_1.getOpcode() != ISD::SHL0 ) |
630 | 0 | return Default(); |
631 | 0 | SDValue Shl2_0 = Sub_1.getOperand(0); // Val |
632 | 0 | SDValue Shl2_1 = Sub_1.getOperand(1); // Const |
633 | 0 | if (ConstantSDNode *C20 = dyn_cast<ConstantSDNode>(Shl2_1)) { |
634 | 0 | int32_t ValConst = 1 << (ShlConst + C2->getSExtValue()); |
635 | 0 | if (isInt<9>(-ValConst)0 ) { |
636 | 0 | SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32); |
637 | 0 | SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, |
638 | 0 | MVT::i32, Shl2_0, Val); |
639 | 0 | ReplaceNode(N, Result); |
640 | 0 | return; |
641 | 0 | } |
642 | 25 | } |
643 | 0 | } |
644 | 1 | } |
645 | 25 | |
646 | 25 | return Default(); |
647 | 25 | } |
648 | | |
649 | | |
650 | | // |
651 | | // If there is an zero_extend followed an intrinsic in DAG (this means - the |
652 | | // result of the intrinsic is predicate); convert the zero_extend to |
653 | | // transfer instruction. |
654 | | // |
655 | | // Zero extend -> transfer is lowered here. Otherwise, zero_extend will be |
656 | | // converted into a MUX as predicate registers defined as 1 bit in the |
657 | | // compiler. Architecture defines them as 8-bit registers. |
658 | | // We want to preserve all the lower 8-bits and, not just 1 LSB bit. |
659 | | // |
660 | 79 | void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { |
661 | 79 | SDLoc dl(N); |
662 | 79 | |
663 | 79 | SDValue Op0 = N->getOperand(0); |
664 | 79 | EVT OpVT = Op0.getValueType(); |
665 | 79 | unsigned OpBW = OpVT.getSizeInBits(); |
666 | 79 | |
667 | 79 | // Special handling for zero-extending a vector of booleans. |
668 | 79 | if (OpVT.isVector() && 79 OpVT.getVectorElementType() == MVT::i15 && OpBW <= 640 ) { |
669 | 0 | SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); |
670 | 0 | unsigned NE = OpVT.getVectorNumElements(); |
671 | 0 | EVT ExVT = N->getValueType(0); |
672 | 0 | unsigned ES = ExVT.getScalarSizeInBits(); |
673 | 0 | uint64_t MV = 0, Bit = 1; |
674 | 0 | for (unsigned i = 0; i < NE0 ; ++i0 ) { |
675 | 0 | MV |= Bit; |
676 | 0 | Bit <<= ES; |
677 | 0 | } |
678 | 0 | SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64); |
679 | 0 | SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl, |
680 | 0 | MVT::i64, Ones); |
681 | 0 | if (ExVT.getSizeInBits() == 320 ) { |
682 | 0 | SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, |
683 | 0 | SDValue(Mask,0), SDValue(OnesReg,0)); |
684 | 0 | SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32); |
685 | 0 | ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, |
686 | 0 | SDValue(And, 0), SubR)); |
687 | 0 | return; |
688 | 0 | } |
689 | 0 | ReplaceNode(N, |
690 | 0 | CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, |
691 | 0 | SDValue(Mask, 0), SDValue(OnesReg, 0))); |
692 | 0 | return; |
693 | 0 | } |
694 | 79 | |
695 | 79 | SDNode *Int = N->getOperand(0).getNode(); |
696 | 79 | if ((Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)79 ) { |
697 | 0 | unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue(); |
698 | 0 | if (doesIntrinsicReturnPredicate(ID)0 ) { |
699 | 0 | // Now we need to differentiate target data types. |
700 | 0 | if (N->getValueType(0) == MVT::i640 ) { |
701 | 0 | // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). |
702 | 0 | SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); |
703 | 0 | SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, |
704 | 0 | MVT::i32, SDValue(Int, 0)); |
705 | 0 | SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, |
706 | 0 | MVT::i32, TargetConst0); |
707 | 0 | SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, |
708 | 0 | MVT::i64, MVT::Other, |
709 | 0 | SDValue(Result_2, 0), |
710 | 0 | SDValue(Result_1, 0)); |
711 | 0 | ReplaceNode(N, Result_3); |
712 | 0 | return; |
713 | 0 | } |
714 | 0 | if (0 N->getValueType(0) == MVT::i320 ) { |
715 | 0 | // Convert the zero_extend to Rs = Pd |
716 | 0 | SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, |
717 | 0 | MVT::i32, SDValue(Int, 0)); |
718 | 0 | ReplaceNode(N, RsPd); |
719 | 0 | return; |
720 | 0 | } |
721 | 0 | llvm_unreachable0 ("Unexpected value type"); |
722 | 0 | } |
723 | 0 | } |
724 | 79 | SelectCode(N); |
725 | 79 | } |
726 | | |
727 | | |
728 | | // |
729 | | // Handling intrinsics for circular load and bitreverse load. |
730 | | // |
731 | 75 | void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { |
732 | 75 | if (MachineSDNode *L75 = LoadInstrForLoadIntrinsic(N)) { |
733 | 2 | StoreInstrForLoadIntrinsic(L, N); |
734 | 2 | CurDAG->RemoveDeadNode(N); |
735 | 2 | return; |
736 | 2 | } |
737 | 73 | SelectCode(N); |
738 | 73 | } |
739 | | |
740 | 2.42k | void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { |
741 | 2.42k | unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); |
742 | 2.42k | unsigned Bits; |
743 | 2.42k | switch (IID) { |
744 | 3 | case Intrinsic::hexagon_S2_vsplatrb: |
745 | 3 | Bits = 8; |
746 | 3 | break; |
747 | 2 | case Intrinsic::hexagon_S2_vsplatrh: |
748 | 2 | Bits = 16; |
749 | 2 | break; |
750 | 2.41k | default: |
751 | 2.41k | SelectCode(N); |
752 | 2.41k | return; |
753 | 5 | } |
754 | 5 | |
755 | 5 | SDValue V = N->getOperand(1); |
756 | 5 | SDValue U; |
757 | 5 | if (keepsLowBits(V, Bits, U)5 ) { |
758 | 1 | SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), |
759 | 1 | N->getOperand(0), U); |
760 | 1 | ReplaceNode(N, R.getNode()); |
761 | 1 | SelectCode(R.getNode()); |
762 | 1 | return; |
763 | 1 | } |
764 | 4 | SelectCode(N); |
765 | 4 | } |
766 | | |
767 | | // |
768 | | // Map floating point constant values. |
769 | | // |
770 | 8 | void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { |
771 | 8 | SDLoc dl(N); |
772 | 8 | ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); |
773 | 8 | APInt A = CN->getValueAPF().bitcastToAPInt(); |
774 | 8 | if (N->getValueType(0) == MVT::f328 ) { |
775 | 8 | SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32); |
776 | 8 | ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V)); |
777 | 8 | return; |
778 | 8 | } |
779 | 0 | if (0 N->getValueType(0) == MVT::f640 ) { |
780 | 0 | SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64); |
781 | 0 | ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V)); |
782 | 0 | return; |
783 | 0 | } |
784 | 0 |
|
785 | 0 | SelectCode(N); |
786 | 0 | } |
787 | | |
788 | | // |
789 | | // Map boolean values. |
790 | | // |
791 | 577 | void HexagonDAGToDAGISel::SelectConstant(SDNode *N) { |
792 | 577 | if (N->getValueType(0) == MVT::i1577 ) { |
793 | 14 | assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1)); |
794 | 14 | unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0) |
795 | 6 | ? Hexagon::PS_true |
796 | 8 | : Hexagon::PS_false; |
797 | 14 | ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1)); |
798 | 14 | return; |
799 | 14 | } |
800 | 563 | |
801 | 563 | SelectCode(N); |
802 | 563 | } |
803 | | |
804 | | |
805 | 92 | void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { |
806 | 92 | MachineFrameInfo &MFI = MF->getFrameInfo(); |
807 | 92 | const HexagonFrameLowering *HFI = HST->getFrameLowering(); |
808 | 92 | int FX = cast<FrameIndexSDNode>(N)->getIndex(); |
809 | 92 | unsigned StkA = HFI->getStackAlignment(); |
810 | 92 | unsigned MaxA = MFI.getMaxAlignment(); |
811 | 92 | SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); |
812 | 92 | SDLoc DL(N); |
813 | 92 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); |
814 | 92 | SDNode *R = nullptr; |
815 | 92 | |
816 | 92 | // Use PS_fi when: |
817 | 92 | // - the object is fixed, or |
818 | 92 | // - there are no objects with higher-than-default alignment, or |
819 | 92 | // - there are no dynamically allocated objects. |
820 | 92 | // Otherwise, use PS_fia. |
821 | 92 | if (FX < 0 || 92 MaxA <= StkA92 || !MFI.hasVarSizedObjects()25 ) { |
822 | 91 | R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero); |
823 | 92 | } else { |
824 | 1 | auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>(); |
825 | 1 | unsigned AR = HMFI.getStackAlignBaseVReg(); |
826 | 1 | SDValue CH = CurDAG->getEntryNode(); |
827 | 1 | SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero }; |
828 | 1 | R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops); |
829 | 1 | } |
830 | 92 | |
831 | 92 | ReplaceNode(N, R); |
832 | 92 | } |
833 | | |
834 | | |
835 | 229 | void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { |
836 | 229 | EVT SVT = N->getOperand(0).getValueType(); |
837 | 229 | EVT DVT = N->getValueType(0); |
838 | 229 | if (!SVT.isVector() || 229 !DVT.isVector()128 || |
839 | 79 | SVT.getVectorElementType() == MVT::i1 || |
840 | 79 | DVT.getVectorElementType() == MVT::i1 || |
841 | 229 | SVT.getSizeInBits() != DVT.getSizeInBits()12 ) { |
842 | 217 | SelectCode(N); |
843 | 217 | return; |
844 | 217 | } |
845 | 12 | |
846 | 12 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0)); |
847 | 12 | CurDAG->RemoveDeadNode(N); |
848 | 12 | } |
849 | | |
850 | | |
851 | 46.4k | void HexagonDAGToDAGISel::Select(SDNode *N) { |
852 | 46.4k | if (N->isMachineOpcode()) |
853 | 50 | return N->setNodeId(-1); // Already selected. |
854 | 46.4k | |
855 | 46.4k | switch (N->getOpcode()) { |
856 | 577 | case ISD::Constant: return SelectConstant(N); |
857 | 8 | case ISD::ConstantFP: return SelectConstantFP(N); |
858 | 92 | case ISD::FrameIndex: return SelectFrameIndex(N); |
859 | 229 | case ISD::BITCAST: return SelectBitcast(N); |
860 | 50 | case ISD::SHL: return SelectSHL(N); |
861 | 1.96k | case ISD::LOAD: return SelectLoad(N); |
862 | 1.83k | case ISD::STORE: return SelectStore(N); |
863 | 79 | case ISD::ZERO_EXTEND: return SelectZeroExtend(N); |
864 | 75 | case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N); |
865 | 2.42k | case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); |
866 | 39.1k | } |
867 | 39.1k | |
868 | 39.1k | SelectCode(N); |
869 | 39.1k | } |
870 | | |
871 | | bool HexagonDAGToDAGISel:: |
872 | | SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, |
873 | 5 | std::vector<SDValue> &OutOps) { |
874 | 5 | SDValue Inp = Op, Res; |
875 | 5 | |
876 | 5 | switch (ConstraintID) { |
877 | 0 | default: |
878 | 0 | return true; |
879 | 5 | case InlineAsm::Constraint_i: |
880 | 5 | case InlineAsm::Constraint_o: // Offsetable. |
881 | 5 | case InlineAsm::Constraint_v: // Not offsetable. |
882 | 5 | case InlineAsm::Constraint_m: // Memory. |
883 | 5 | if (SelectAddrFI(Inp, Res)) |
884 | 3 | OutOps.push_back(Res); |
885 | 5 | else |
886 | 2 | OutOps.push_back(Inp); |
887 | 5 | break; |
888 | 5 | } |
889 | 5 | |
890 | 5 | OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); |
891 | 5 | return false; |
892 | 5 | } |
893 | | |
894 | | |
895 | 11 | static bool isMemOPCandidate(SDNode *I, SDNode *U) { |
896 | 11 | // I is an operand of U. Check if U is an arithmetic (binary) operation |
897 | 11 | // usable in a memop, where the other operand is a loaded value, and the |
898 | 11 | // result of U is stored in the same location. |
899 | 11 | |
900 | 11 | if (!U->hasOneUse()) |
901 | 5 | return false; |
902 | 6 | unsigned Opc = U->getOpcode(); |
903 | 6 | switch (Opc) { |
904 | 5 | case ISD::ADD: |
905 | 5 | case ISD::SUB: |
906 | 5 | case ISD::AND: |
907 | 5 | case ISD::OR: |
908 | 5 | break; |
909 | 1 | default: |
910 | 1 | return false; |
911 | 5 | } |
912 | 5 | |
913 | 5 | SDValue S0 = U->getOperand(0); |
914 | 5 | SDValue S1 = U->getOperand(1); |
915 | 5 | SDValue SY = (S0.getNode() == I) ? S13 : S02 ; |
916 | 5 | |
917 | 5 | SDNode *UUse = *U->use_begin(); |
918 | 5 | if (UUse->getNumValues() != 1) |
919 | 3 | return false; |
920 | 2 | |
921 | 2 | // Check if one of the inputs to U is a load instruction and the output |
922 | 2 | // is used by a store instruction. If so and they also have the same |
923 | 2 | // base pointer, then don't preoprocess this node sequence as it |
924 | 2 | // can be matched to a memop. |
925 | 2 | SDNode *SYNode = SY.getNode(); |
926 | 2 | if (UUse->getOpcode() == ISD::STORE && 2 SYNode->getOpcode() == ISD::LOAD1 ) { |
927 | 0 | SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr(); |
928 | 0 | SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr(); |
929 | 0 | if (LDBasePtr == STBasePtr) |
930 | 0 | return true; |
931 | 2 | } |
932 | 2 | return false; |
933 | 2 | } |
934 | | |
935 | | |
936 | | // Transform: (or (select c x 0) z) -> (select c (or x z) z) |
937 | | // (or (select c 0 y) z) -> (select c z (or y z)) |
938 | 3.38k | void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) { |
939 | 3.38k | SelectionDAG &DAG = *CurDAG; |
940 | 3.38k | |
941 | 55.6k | for (auto I : Nodes) { |
942 | 55.6k | if (I->getOpcode() != ISD::OR) |
943 | 54.4k | continue; |
944 | 1.22k | |
945 | 1.22k | auto IsZero = [] (const SDValue &V) -> bool 1.22k { |
946 | 6 | if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode())) |
947 | 3 | return SC->isNullValue(); |
948 | 3 | return false; |
949 | 3 | }; |
950 | 1.22k | auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { |
951 | 1.22k | if (Op.getOpcode() != ISD::SELECT) |
952 | 1.22k | return false; |
953 | 3 | return IsZero(Op.getOperand(1)) || 3 IsZero(Op.getOperand(2))3 ; |
954 | 1.22k | }; |
955 | 1.22k | |
956 | 1.22k | SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); |
957 | 1.22k | EVT VT = I->getValueType(0); |
958 | 1.22k | bool SelN0 = IsSelect0(N0); |
959 | 1.22k | SDValue SOp = SelN0 ? N00 : N11.22k ; |
960 | 1.22k | SDValue VOp = SelN0 ? N10 : N01.22k ; |
961 | 1.22k | |
962 | 1.22k | if (SOp.getOpcode() == ISD::SELECT && 1.22k SOp.getNode()->hasOneUse()0 ) { |
963 | 0 | SDValue SC = SOp.getOperand(0); |
964 | 0 | SDValue SX = SOp.getOperand(1); |
965 | 0 | SDValue SY = SOp.getOperand(2); |
966 | 0 | SDLoc DLS = SOp; |
967 | 0 | if (IsZero(SY)0 ) { |
968 | 0 | SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp); |
969 | 0 | SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp); |
970 | 0 | DAG.ReplaceAllUsesWith(I, NewSel.getNode()); |
971 | 0 | } else if (0 IsZero(SX)0 ) { |
972 | 0 | SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp); |
973 | 0 | SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr); |
974 | 0 | DAG.ReplaceAllUsesWith(I, NewSel.getNode()); |
975 | 0 | } |
976 | 0 | } |
977 | 55.6k | } |
978 | 3.38k | } |
979 | | |
980 | | // Transform: (store ch val (add x (add (shl y c) e))) |
981 | | // to: (store ch val (add x (shl (add y d) c))), |
982 | | // where e = (shl d c) for some integer d. |
983 | | // The purpose of this is to enable generation of loads/stores with |
984 | | // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift |
985 | | // value c must be 0, 1 or 2. |
986 | 3.38k | void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) { |
987 | 3.38k | SelectionDAG &DAG = *CurDAG; |
988 | 3.38k | |
989 | 55.6k | for (auto I : Nodes) { |
990 | 55.6k | if (I->getOpcode() != ISD::STORE) |
991 | 53.7k | continue; |
992 | 1.83k | |
993 | 1.83k | // I matched: (store ch val Off) |
994 | 1.83k | SDValue Off = I->getOperand(2); |
995 | 1.83k | // Off needs to match: (add x (add (shl y c) (shl d c)))) |
996 | 1.83k | if (Off.getOpcode() != ISD::ADD) |
997 | 1.47k | continue; |
998 | 367 | // Off matched: (add x T0) |
999 | 367 | SDValue T0 = Off.getOperand(1); |
1000 | 367 | // T0 needs to match: (add T1 T2): |
1001 | 367 | if (T0.getOpcode() != ISD::ADD) |
1002 | 366 | continue; |
1003 | 1 | // T0 matched: (add T1 T2) |
1004 | 1 | SDValue T1 = T0.getOperand(0); |
1005 | 1 | SDValue T2 = T0.getOperand(1); |
1006 | 1 | // T1 needs to match: (shl y c) |
1007 | 1 | if (T1.getOpcode() != ISD::SHL) |
1008 | 0 | continue; |
1009 | 1 | SDValue C = T1.getOperand(1); |
1010 | 1 | ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode()); |
1011 | 1 | if (CN == nullptr) |
1012 | 0 | continue; |
1013 | 1 | unsigned CV = CN->getZExtValue(); |
1014 | 1 | if (CV > 2) |
1015 | 0 | continue; |
1016 | 1 | // T2 needs to match e, where e = (shl d c) for some d. |
1017 | 1 | ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode()); |
1018 | 1 | if (EN == nullptr) |
1019 | 0 | continue; |
1020 | 1 | unsigned EV = EN->getZExtValue(); |
1021 | 1 | if (EV % (1 << CV) != 0) |
1022 | 0 | continue; |
1023 | 1 | unsigned DV = EV / (1 << CV); |
1024 | 1 | |
1025 | 1 | // Replace T0 with: (shl (add y d) c) |
1026 | 1 | SDLoc DL = SDLoc(I); |
1027 | 1 | EVT VT = T0.getValueType(); |
1028 | 1 | SDValue D = DAG.getConstant(DV, DL, VT); |
1029 | 1 | // NewAdd = (add y d) |
1030 | 1 | SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D); |
1031 | 1 | // NewShl = (shl NewAdd c) |
1032 | 1 | SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C); |
1033 | 1 | ReplaceNode(T0.getNode(), NewShl.getNode()); |
1034 | 1 | } |
1035 | 3.38k | } |
1036 | | |
1037 | | // Transform: (load ch (add x (and (srl y c) Mask))) |
1038 | | // to: (load ch (add x (shl (srl y d) d-c))) |
1039 | | // where |
1040 | | // Mask = 00..0 111..1 0.0 |
1041 | | // | | +-- d-c 0s, and d-c is 0, 1 or 2. |
1042 | | // | +-------- 1s |
1043 | | // +-------------- at most c 0s |
1044 | | // Motivating example: |
1045 | | // DAG combiner optimizes (add x (shl (srl y 5) 2)) |
1046 | | // to (add x (and (srl y 3) 1FFFFFFC)) |
1047 | | // which results in a constant-extended and(##...,lsr). This transformation |
1048 | | // undoes this simplification for cases where the shl can be folded into |
1049 | | // an addressing mode. |
1050 | 3.38k | void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) { |
1051 | 3.38k | SelectionDAG &DAG = *CurDAG; |
1052 | 3.38k | |
1053 | 55.6k | for (SDNode *N : Nodes) { |
1054 | 55.6k | unsigned Opc = N->getOpcode(); |
1055 | 55.6k | if (Opc != ISD::LOAD && 55.6k Opc != ISD::STORE53.4k ) |
1056 | 51.6k | continue; |
1057 | 3.99k | SDValue Addr = Opc == ISD::LOAD ? 3.99k N->getOperand(1)2.15k : N->getOperand(2)1.83k ; |
1058 | 3.99k | // Addr must match: (add x T0) |
1059 | 3.99k | if (Addr.getOpcode() != ISD::ADD) |
1060 | 3.18k | continue; |
1061 | 808 | SDValue T0 = Addr.getOperand(1); |
1062 | 808 | // T0 must match: (and T1 Mask) |
1063 | 808 | if (T0.getOpcode() != ISD::AND) |
1064 | 805 | continue; |
1065 | 3 | |
1066 | 3 | // We have an AND. |
1067 | 3 | // |
1068 | 3 | // Check the first operand. It must be: (srl y c). |
1069 | 3 | SDValue S = T0.getOperand(0); |
1070 | 3 | if (S.getOpcode() != ISD::SRL) |
1071 | 0 | continue; |
1072 | 3 | ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode()); |
1073 | 3 | if (SN == nullptr) |
1074 | 0 | continue; |
1075 | 3 | if (3 SN->getAPIntValue().getBitWidth() != 323 ) |
1076 | 0 | continue; |
1077 | 3 | uint32_t CV = SN->getZExtValue(); |
1078 | 3 | |
1079 | 3 | // Check the second operand: the supposed mask. |
1080 | 3 | ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode()); |
1081 | 3 | if (MN == nullptr) |
1082 | 0 | continue; |
1083 | 3 | if (3 MN->getAPIntValue().getBitWidth() != 323 ) |
1084 | 0 | continue; |
1085 | 3 | uint32_t Mask = MN->getZExtValue(); |
1086 | 3 | // Examine the mask. |
1087 | 3 | uint32_t TZ = countTrailingZeros(Mask); |
1088 | 3 | uint32_t M1 = countTrailingOnes(Mask >> TZ); |
1089 | 3 | uint32_t LZ = countLeadingZeros(Mask); |
1090 | 3 | // Trailing zeros + middle ones + leading zeros must equal the width. |
1091 | 3 | if (TZ + M1 + LZ != 32) |
1092 | 0 | continue; |
1093 | 3 | // The number of trailing zeros will be encoded in the addressing mode. |
1094 | 3 | if (3 TZ > 23 ) |
1095 | 0 | continue; |
1096 | 3 | // The number of leading zeros must be at most c. |
1097 | 3 | if (3 LZ > CV3 ) |
1098 | 0 | continue; |
1099 | 3 | |
1100 | 3 | // All looks good. |
1101 | 3 | SDValue Y = S.getOperand(0); |
1102 | 3 | EVT VT = Addr.getValueType(); |
1103 | 3 | SDLoc dl(S); |
1104 | 3 | // TZ = D-C, so D = TZ+C. |
1105 | 3 | SDValue D = DAG.getConstant(TZ+CV, dl, VT); |
1106 | 3 | SDValue DC = DAG.getConstant(TZ, dl, VT); |
1107 | 3 | SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D); |
1108 | 3 | SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC); |
1109 | 3 | ReplaceNode(T0.getNode(), NewShl.getNode()); |
1110 | 3 | } |
1111 | 3.38k | } |
1112 | | |
1113 | | // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...) |
1114 | | // (op ... 1 ...)) |
1115 | 3.38k | void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) { |
1116 | 3.38k | SelectionDAG &DAG = *CurDAG; |
1117 | 3.38k | |
1118 | 55.6k | for (SDNode *N : Nodes) { |
1119 | 55.6k | unsigned Opc = N->getOpcode(); |
1120 | 55.6k | if (Opc != ISD::ZERO_EXTEND) |
1121 | 55.5k | continue; |
1122 | 102 | SDValue OpI1 = N->getOperand(0); |
1123 | 102 | EVT OpVT = OpI1.getValueType(); |
1124 | 102 | if (!OpVT.isSimple() || 102 OpVT.getSimpleVT() != MVT::i1102 ) |
1125 | 44 | continue; |
1126 | 121 | for (auto I = N->use_begin(), E = N->use_end(); 58 I != E121 ; ++I63 ) { |
1127 | 63 | SDNode *U = *I; |
1128 | 63 | if (U->getNumValues() != 1) |
1129 | 47 | continue; |
1130 | 16 | EVT UVT = U->getValueType(0); |
1131 | 16 | if (!UVT.isSimple() || 16 !UVT.isInteger()16 || UVT.getSimpleVT() == MVT::i112 ) |
1132 | 5 | continue; |
1133 | 11 | if (11 isMemOPCandidate(N, U)11 ) |
1134 | 0 | continue; |
1135 | 11 | |
1136 | 11 | // Potentially simplifiable operation. |
1137 | 11 | unsigned I1N = I.getOperandNo(); |
1138 | 11 | SmallVector<SDValue,2> Ops(U->getNumOperands()); |
1139 | 39 | for (unsigned i = 0, n = U->getNumOperands(); i != n39 ; ++i28 ) |
1140 | 28 | Ops[i] = U->getOperand(i); |
1141 | 11 | EVT BVT = Ops[I1N].getValueType(); |
1142 | 11 | |
1143 | 11 | SDLoc dl(U); |
1144 | 11 | SDValue C0 = DAG.getConstant(0, dl, BVT); |
1145 | 11 | SDValue C1 = DAG.getConstant(1, dl, BVT); |
1146 | 11 | SDValue If0, If1; |
1147 | 11 | |
1148 | 11 | if (isa<MachineSDNode>(U)11 ) { |
1149 | 0 | unsigned UseOpc = U->getMachineOpcode(); |
1150 | 0 | Ops[I1N] = C0; |
1151 | 0 | If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0); |
1152 | 0 | Ops[I1N] = C1; |
1153 | 0 | If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0); |
1154 | 11 | } else { |
1155 | 11 | unsigned UseOpc = U->getOpcode(); |
1156 | 11 | Ops[I1N] = C0; |
1157 | 11 | If0 = DAG.getNode(UseOpc, dl, UVT, Ops); |
1158 | 11 | Ops[I1N] = C1; |
1159 | 11 | If1 = DAG.getNode(UseOpc, dl, UVT, Ops); |
1160 | 11 | } |
1161 | 63 | SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0); |
1162 | 63 | DAG.ReplaceAllUsesWith(U, Sel.getNode()); |
1163 | 63 | } |
1164 | 55.6k | } |
1165 | 3.38k | } |
1166 | | |
1167 | 3.38k | void HexagonDAGToDAGISel::PreprocessISelDAG() { |
1168 | 3.38k | // Repack all nodes before calling each preprocessing function, |
1169 | 3.38k | // because each of them can modify the set of nodes. |
1170 | 13.5k | auto getNodes = [this] () -> std::vector<SDNode*> { |
1171 | 13.5k | std::vector<SDNode*> T; |
1172 | 13.5k | T.reserve(CurDAG->allnodes_size()); |
1173 | 13.5k | for (SDNode &N : CurDAG->allnodes()) |
1174 | 222k | T.push_back(&N); |
1175 | 13.5k | return T; |
1176 | 13.5k | }; |
1177 | 3.38k | |
1178 | 3.38k | // Transform: (or (select c x 0) z) -> (select c (or x z) z) |
1179 | 3.38k | // (or (select c 0 y) z) -> (select c z (or y z)) |
1180 | 3.38k | ppSimplifyOrSelect0(getNodes()); |
1181 | 3.38k | |
1182 | 3.38k | // Transform: (store ch val (add x (add (shl y c) e))) |
1183 | 3.38k | // to: (store ch val (add x (shl (add y d) c))), |
1184 | 3.38k | // where e = (shl d c) for some integer d. |
1185 | 3.38k | // The purpose of this is to enable generation of loads/stores with |
1186 | 3.38k | // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift |
1187 | 3.38k | // value c must be 0, 1 or 2. |
1188 | 3.38k | ppAddrReorderAddShl(getNodes()); |
1189 | 3.38k | |
1190 | 3.38k | // Transform: (load ch (add x (and (srl y c) Mask))) |
1191 | 3.38k | // to: (load ch (add x (shl (srl y d) d-c))) |
1192 | 3.38k | // where |
1193 | 3.38k | // Mask = 00..0 111..1 0.0 |
1194 | 3.38k | // | | +-- d-c 0s, and d-c is 0, 1 or 2. |
1195 | 3.38k | // | +-------- 1s |
1196 | 3.38k | // +-------------- at most c 0s |
1197 | 3.38k | // Motivating example: |
1198 | 3.38k | // DAG combiner optimizes (add x (shl (srl y 5) 2)) |
1199 | 3.38k | // to (add x (and (srl y 3) 1FFFFFFC)) |
1200 | 3.38k | // which results in a constant-extended and(##...,lsr). This transformation |
1201 | 3.38k | // undoes this simplification for cases where the shl can be folded into |
1202 | 3.38k | // an addressing mode. |
1203 | 3.38k | ppAddrRewriteAndSrl(getNodes()); |
1204 | 3.38k | |
1205 | 3.38k | // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...) |
1206 | 3.38k | // (op ... 1 ...)) |
1207 | 3.38k | ppHoistZextI1(getNodes()); |
1208 | 3.38k | |
1209 | 3.38k | DEBUG_WITH_TYPE("isel", { |
1210 | 3.38k | dbgs() << "Preprocessed (Hexagon) selection DAG:"; |
1211 | 3.38k | CurDAG->dump(); |
1212 | 3.38k | }); |
1213 | 3.38k | |
1214 | 3.38k | if (EnableAddressRebalancing3.38k ) { |
1215 | 3.38k | rebalanceAddressTrees(); |
1216 | 3.38k | |
1217 | 3.38k | DEBUG_WITH_TYPE("isel", { |
1218 | 3.38k | dbgs() << "Address tree balanced selection DAG:"; |
1219 | 3.38k | CurDAG->dump(); |
1220 | 3.38k | }); |
1221 | 3.38k | } |
1222 | 3.38k | } |
1223 | | |
1224 | 2.40k | void HexagonDAGToDAGISel::EmitFunctionEntryCode() { |
1225 | 2.40k | auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget()); |
1226 | 2.40k | auto &HFI = *HST.getFrameLowering(); |
1227 | 2.40k | if (!HFI.needsAligna(*MF)) |
1228 | 2.40k | return; |
1229 | 1 | |
1230 | 1 | MachineFrameInfo &MFI = MF->getFrameInfo(); |
1231 | 1 | MachineBasicBlock *EntryBB = &MF->front(); |
1232 | 1 | unsigned AR = FuncInfo->CreateReg(MVT::i32); |
1233 | 1 | unsigned MaxA = MFI.getMaxAlignment(); |
1234 | 1 | BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR) |
1235 | 1 | .addImm(MaxA); |
1236 | 1 | MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR); |
1237 | 1 | } |
1238 | | |
1239 | | // Match a frame index that can be used in an addressing mode. |
1240 | 2.11k | bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) { |
1241 | 2.11k | if (N.getOpcode() != ISD::FrameIndex) |
1242 | 724 | return false; |
1243 | 1.39k | auto &HFI = *HST->getFrameLowering(); |
1244 | 1.39k | MachineFrameInfo &MFI = MF->getFrameInfo(); |
1245 | 1.39k | int FX = cast<FrameIndexSDNode>(N)->getIndex(); |
1246 | 1.39k | if (!MFI.isFixedObjectIndex(FX) && 1.39k HFI.needsAligna(*MF)1.38k ) |
1247 | 0 | return false; |
1248 | 1.39k | R = CurDAG->getTargetFrameIndex(FX, MVT::i32); |
1249 | 1.39k | return true; |
1250 | 1.39k | } |
1251 | | |
1252 | 3.43k | inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { |
1253 | 3.43k | return SelectGlobalAddress(N, R, false); |
1254 | 3.43k | } |
1255 | | |
1256 | 3.17k | inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { |
1257 | 3.17k | return SelectGlobalAddress(N, R, true); |
1258 | 3.17k | } |
1259 | | |
1260 | | bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, |
1261 | 6.60k | bool UseGP) { |
1262 | 6.60k | switch (N.getOpcode()) { |
1263 | 1.20k | case ISD::ADD: { |
1264 | 1.20k | SDValue N0 = N.getOperand(0); |
1265 | 1.20k | SDValue N1 = N.getOperand(1); |
1266 | 1.20k | unsigned GAOpc = N0.getOpcode(); |
1267 | 1.20k | if (UseGP && 1.20k GAOpc != HexagonISD::CONST32_GP589 ) |
1268 | 589 | return false; |
1269 | 612 | if (612 !UseGP && 612 GAOpc != HexagonISD::CONST32612 ) |
1270 | 557 | return false; |
1271 | 55 | if (ConstantSDNode *55 Const55 = dyn_cast<ConstantSDNode>(N1)) { |
1272 | 54 | SDValue Addr = N0.getOperand(0); |
1273 | 54 | if (GlobalAddressSDNode *GA54 = dyn_cast<GlobalAddressSDNode>(Addr)) { |
1274 | 54 | if (GA->getOpcode() == ISD::TargetGlobalAddress54 ) { |
1275 | 54 | uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); |
1276 | 54 | R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const), |
1277 | 54 | N.getValueType(), NewOff); |
1278 | 54 | return true; |
1279 | 54 | } |
1280 | 1 | } |
1281 | 54 | } |
1282 | 1 | break; |
1283 | 1 | } |
1284 | 374 | case HexagonISD::CONST32: |
1285 | 374 | // The operand(0) of CONST32 is TargetGlobalAddress, which is what we |
1286 | 374 | // want in the instruction. |
1287 | 374 | if (!UseGP) |
1288 | 245 | R = N.getOperand(0); |
1289 | 374 | return !UseGP; |
1290 | 60 | case HexagonISD::CONST32_GP: |
1291 | 60 | if (UseGP) |
1292 | 59 | R = N.getOperand(0); |
1293 | 60 | return UseGP; |
1294 | 4.97k | default: |
1295 | 4.97k | return false; |
1296 | 1 | } |
1297 | 1 | |
1298 | 1 | return false; |
1299 | 1 | } |
1300 | | |
1301 | 110 | bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { |
1302 | 110 | // This (complex pattern) function is meant to detect a sign-extension |
1303 | 110 | // i32->i64 on a per-operand basis. This would allow writing single |
1304 | 110 | // patterns that would cover a number of combinations of different ways |
1305 | 110 | // a sign-extensions could be written. For example: |
1306 | 110 | // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y) |
1307 | 110 | // could match either one of these: |
1308 | 110 | // (mul (sext x) (sext_inreg y)) |
1309 | 110 | // (mul (sext-load *p) (sext_inreg y)) |
1310 | 110 | // (mul (sext_inreg x) (sext y)) |
1311 | 110 | // etc. |
1312 | 110 | // |
1313 | 110 | // The returned value will have type i64 and its low word will |
1314 | 110 | // contain the value being extended. The high bits are not specified. |
1315 | 110 | // The returned type is i64 because the original type of N was i64, |
1316 | 110 | // but the users of this function should only use the low-word of the |
1317 | 110 | // result, e.g. |
1318 | 110 | // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y)) |
1319 | 110 | |
1320 | 110 | if (N.getValueType() != MVT::i64) |
1321 | 0 | return false; |
1322 | 110 | EVT SrcVT; |
1323 | 110 | unsigned Opc = N.getOpcode(); |
1324 | 110 | switch (Opc) { |
1325 | 19 | case ISD::SIGN_EXTEND: |
1326 | 19 | case ISD::SIGN_EXTEND_INREG: { |
1327 | 19 | // sext_inreg has the source type as a separate operand. |
1328 | 19 | EVT T = Opc == ISD::SIGN_EXTEND |
1329 | 10 | ? N.getOperand(0).getValueType() |
1330 | 9 | : cast<VTSDNode>(N.getOperand(1))->getVT(); |
1331 | 19 | if (T.getSizeInBits() != 32) |
1332 | 0 | return false; |
1333 | 19 | R = N.getOperand(0); |
1334 | 19 | break; |
1335 | 19 | } |
1336 | 2 | case ISD::LOAD: { |
1337 | 2 | LoadSDNode *L = cast<LoadSDNode>(N); |
1338 | 2 | if (L->getExtensionType() != ISD::SEXTLOAD) |
1339 | 1 | return false; |
1340 | 1 | // All extending loads extend to i32, so even if the value in |
1341 | 1 | // memory is shorter than 32 bits, it will be i32 after the load. |
1342 | 1 | if (1 L->getMemoryVT().getSizeInBits() > 321 ) |
1343 | 0 | return false; |
1344 | 1 | R = N; |
1345 | 1 | break; |
1346 | 1 | } |
1347 | 89 | default: |
1348 | 89 | return false; |
1349 | 20 | } |
1350 | 20 | EVT RT = R.getValueType(); |
1351 | 20 | if (RT == MVT::i64) |
1352 | 10 | return true; |
1353 | 20 | assert(RT == MVT::i32); |
1354 | 10 | // This is only to produce a value of type i64. Do not rely on the |
1355 | 10 | // high bits produced by this. |
1356 | 10 | const SDLoc &dl(N); |
1357 | 10 | SDValue Ops[] = { |
1358 | 10 | CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), |
1359 | 10 | R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), |
1360 | 10 | R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) |
1361 | 10 | }; |
1362 | 10 | SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, |
1363 | 10 | MVT::i64, Ops); |
1364 | 10 | R = SDValue(T, 0); |
1365 | 10 | return true; |
1366 | 10 | } |
1367 | | |
1368 | | bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, |
1369 | 5 | SDValue &Src) { |
1370 | 5 | unsigned Opc = Val.getOpcode(); |
1371 | 5 | switch (Opc) { |
1372 | 0 | case ISD::SIGN_EXTEND: |
1373 | 0 | case ISD::ZERO_EXTEND: |
1374 | 0 | case ISD::ANY_EXTEND: { |
1375 | 0 | const SDValue &Op0 = Val.getOperand(0); |
1376 | 0 | EVT T = Op0.getValueType(); |
1377 | 0 | if (T.isInteger() && 0 T.getSizeInBits() == NumBits0 ) { |
1378 | 0 | Src = Op0; |
1379 | 0 | return true; |
1380 | 0 | } |
1381 | 0 | break; |
1382 | 0 | } |
1383 | 0 | case ISD::SIGN_EXTEND_INREG: |
1384 | 0 | case ISD::AssertSext: |
1385 | 0 | case ISD::AssertZext: |
1386 | 0 | if (Val.getOperand(0).getValueType().isInteger()0 ) { |
1387 | 0 | VTSDNode *T = cast<VTSDNode>(Val.getOperand(1)); |
1388 | 0 | if (T->getVT().getSizeInBits() == NumBits0 ) { |
1389 | 0 | Src = Val.getOperand(0); |
1390 | 0 | return true; |
1391 | 0 | } |
1392 | 0 | } |
1393 | 0 | break; |
1394 | 1 | case ISD::AND: { |
1395 | 1 | // Check if this is an AND with NumBits of lower bits set to 1. |
1396 | 1 | uint64_t Mask = (1 << NumBits) - 1; |
1397 | 1 | if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { |
1398 | 0 | if (C->getZExtValue() == Mask0 ) { |
1399 | 0 | Src = Val.getOperand(1); |
1400 | 0 | return true; |
1401 | 0 | } |
1402 | 1 | } |
1403 | 1 | if (ConstantSDNode *1 C1 = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { |
1404 | 1 | if (C->getZExtValue() == Mask1 ) { |
1405 | 1 | Src = Val.getOperand(0); |
1406 | 1 | return true; |
1407 | 1 | } |
1408 | 0 | } |
1409 | 0 | break; |
1410 | 0 | } |
1411 | 0 | case ISD::OR: |
1412 | 0 | case ISD::XOR: { |
1413 | 0 | // OR/XOR with the lower NumBits bits set to 0. |
1414 | 0 | uint64_t Mask = (1 << NumBits) - 1; |
1415 | 0 | if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { |
1416 | 0 | if ((C->getZExtValue() & Mask) == 00 ) { |
1417 | 0 | Src = Val.getOperand(1); |
1418 | 0 | return true; |
1419 | 0 | } |
1420 | 0 | } |
1421 | 0 | if (ConstantSDNode *0 C0 = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { |
1422 | 0 | if ((C->getZExtValue() & Mask) == 00 ) { |
1423 | 0 | Src = Val.getOperand(0); |
1424 | 0 | return true; |
1425 | 0 | } |
1426 | 0 | } |
1427 | 0 | } |
1428 | 4 | default: |
1429 | 4 | break; |
1430 | 4 | } |
1431 | 4 | return false; |
1432 | 4 | } |
1433 | | |
1434 | | |
1435 | 1.36k | bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const { |
1436 | 1.36k | assert(N->getOpcode() == ISD::OR); |
1437 | 1.36k | auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
1438 | 1.36k | assert(C); |
1439 | 1.36k | |
1440 | 1.36k | // Detect when "or" is used to add an offset to a stack object. |
1441 | 1.36k | if (auto *FN1.36k = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { |
1442 | 1.35k | MachineFrameInfo &MFI = MF->getFrameInfo(); |
1443 | 1.35k | unsigned A = MFI.getObjectAlignment(FN->getIndex()); |
1444 | 1.35k | assert(isPowerOf2_32(A)); |
1445 | 1.35k | int32_t Off = C->getSExtValue(); |
1446 | 1.35k | // If the alleged offset fits in the zero bits guaranteed by |
1447 | 1.35k | // the alignment, then this or is really an add. |
1448 | 1.35k | return (Off >= 0) && (((A-1) & Off) == unsigned(Off)); |
1449 | 1.35k | } |
1450 | 10 | return false; |
1451 | 10 | } |
1452 | | |
1453 | 4.70k | bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { |
1454 | 4.70k | return N->getAlignment() >= N->getMemoryVT().getStoreSize(); |
1455 | 4.70k | } |
1456 | | |
1457 | 60 | bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const { |
1458 | 60 | unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF); |
1459 | 60 | switch (N->getMemoryVT().getStoreSize()) { |
1460 | 10 | case 1: |
1461 | 10 | return StackSize <= 56; // 1*2^6 - 8 |
1462 | 4 | case 2: |
1463 | 4 | return StackSize <= 120; // 2*2^6 - 8 |
1464 | 46 | case 4: |
1465 | 46 | return StackSize <= 248; // 4*2^6 - 8 |
1466 | 0 | default: |
1467 | 0 | return false; |
1468 | 0 | } |
1469 | 0 | } |
1470 | | |
1471 | | // Return true when the given node fits in a positive half word. |
1472 | 2 | bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { |
1473 | 2 | if (const ConstantSDNode *CN2 = dyn_cast<const ConstantSDNode>(N)) { |
1474 | 1 | int64_t V = CN->getSExtValue(); |
1475 | 0 | return V > 0 && isInt<16>(V); |
1476 | 1 | } |
1477 | 1 | if (1 N->getOpcode() == ISD::SIGN_EXTEND_INREG1 ) { |
1478 | 1 | const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1)); |
1479 | 1 | return VN->getVT().getSizeInBits() <= 16; |
1480 | 1 | } |
1481 | 0 | return false; |
1482 | 0 | } |
1483 | | |
1484 | | //////////////////////////////////////////////////////////////////////////////// |
1485 | | // Rebalancing of address calculation trees |
1486 | | |
1487 | 7.50k | static bool isOpcodeHandled(const SDNode *N) { |
1488 | 7.50k | switch (N->getOpcode()) { |
1489 | 511 | case ISD::ADD: |
1490 | 511 | case ISD::MUL: |
1491 | 511 | return true; |
1492 | 472 | case ISD::SHL: |
1493 | 472 | // We only handle constant shifts because these can be easily flattened |
1494 | 472 | // into multiplications by 2^Op1. |
1495 | 472 | return isa<ConstantSDNode>(N->getOperand(1).getNode()); |
1496 | 6.52k | default: |
1497 | 6.52k | return false; |
1498 | 0 | } |
1499 | 0 | } |
1500 | | |
1501 | | /// \brief Return the weight of an SDNode |
1502 | 1.44k | int HexagonDAGToDAGISel::getWeight(SDNode *N) { |
1503 | 1.44k | if (!isOpcodeHandled(N)) |
1504 | 1.27k | return 1; |
1505 | 1.44k | assert(RootWeights.count(N) && "Cannot get weight of unseen root!"); |
1506 | 174 | assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!"); |
1507 | 174 | assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!"); |
1508 | 174 | return RootWeights[N]; |
1509 | 174 | } |
1510 | | |
1511 | 1.43k | int HexagonDAGToDAGISel::getHeight(SDNode *N) { |
1512 | 1.43k | if (!isOpcodeHandled(N)) |
1513 | 1.27k | return 0; |
1514 | 1.43k | assert(RootWeights.count(N) && RootWeights[N] >= 0 && |
1515 | 169 | "Cannot query height of unvisited/RAUW'd node!"); |
1516 | 169 | return RootHeights[N]; |
1517 | 169 | } |
1518 | | |
1519 | | namespace { |
1520 | | struct WeightedLeaf { |
1521 | | SDValue Value; |
1522 | | int Weight; |
1523 | | int InsertionOrder; |
1524 | | |
1525 | 71 | WeightedLeaf() : Value(SDValue()) { } |
1526 | | |
1527 | | WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : |
1528 | 59 | Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { |
1529 | 59 | assert(Weight >= 0 && "Weight must be >= 0"); |
1530 | 59 | } |
1531 | | |
1532 | 44 | static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) { |
1533 | 44 | assert(A.Value.getNode() && B.Value.getNode()); |
1534 | 44 | return A.Weight == B.Weight ? |
1535 | 15 | (A.InsertionOrder > B.InsertionOrder) : |
1536 | 29 | (A.Weight > B.Weight); |
1537 | 44 | } |
1538 | | }; |
1539 | | |
1540 | | /// A specialized priority queue for WeigthedLeaves. It automatically folds |
1541 | | /// constants and allows removal of non-top elements while maintaining the |
1542 | | /// priority order. |
1543 | | class LeafPrioQueue { |
1544 | | SmallVector<WeightedLeaf, 8> Q; |
1545 | | bool HaveConst; |
1546 | | WeightedLeaf ConstElt; |
1547 | | unsigned Opcode; |
1548 | | |
1549 | | public: |
1550 | 0 | bool empty() { |
1551 | 0 | return (!HaveConst && Q.empty()); |
1552 | 0 | } |
1553 | | |
1554 | 34 | size_t size() { |
1555 | 34 | return Q.size() + HaveConst; |
1556 | 34 | } |
1557 | | |
1558 | 12 | bool hasConst() { |
1559 | 12 | return HaveConst; |
1560 | 12 | } |
1561 | | |
1562 | 24 | const WeightedLeaf &top() { |
1563 | 24 | if (HaveConst) |
1564 | 0 | return ConstElt; |
1565 | 24 | return Q.front(); |
1566 | 24 | } |
1567 | | |
1568 | 51 | WeightedLeaf pop() { |
1569 | 51 | if (HaveConst51 ) { |
1570 | 7 | HaveConst = false; |
1571 | 7 | return ConstElt; |
1572 | 7 | } |
1573 | 44 | std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); |
1574 | 44 | return Q.pop_back_val(); |
1575 | 44 | } |
1576 | | |
1577 | 64 | void push(WeightedLeaf L, bool SeparateConst=true) { |
1578 | 64 | if (!HaveConst && 64 SeparateConst62 && isa<ConstantSDNode>(L.Value)55 ) { |
1579 | 7 | if (Opcode == ISD::MUL && |
1580 | 0 | cast<ConstantSDNode>(L.Value)->getSExtValue() == 1) |
1581 | 0 | return; |
1582 | 7 | if (7 Opcode == ISD::ADD && |
1583 | 7 | cast<ConstantSDNode>(L.Value)->getSExtValue() == 0) |
1584 | 0 | return; |
1585 | 7 | |
1586 | 7 | HaveConst = true; |
1587 | 7 | ConstElt = L; |
1588 | 64 | } else { |
1589 | 57 | Q.push_back(L); |
1590 | 57 | std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); |
1591 | 57 | } |
1592 | 64 | } |
1593 | | |
1594 | | /// Push L to the bottom of the queue regardless of its weight. If L is |
1595 | | /// constant, it will not be folded with other constants in the queue. |
1596 | 7 | void pushToBottom(WeightedLeaf L) { |
1597 | 7 | L.Weight = 1000; |
1598 | 7 | push(L, false); |
1599 | 7 | } |
1600 | | |
1601 | | /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of |
1602 | | /// lowest weight and remove it from the queue. |
1603 | | WeightedLeaf findSHL(uint64_t MaxAmount); |
1604 | | |
1605 | | WeightedLeaf findMULbyConst(); |
1606 | | |
1607 | | LeafPrioQueue(unsigned Opcode) : |
1608 | 12 | HaveConst(false), Opcode(Opcode) { } |
1609 | | }; |
1610 | | } // end anonymous namespace |
1611 | | |
1612 | 1 | WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) { |
1613 | 1 | int ResultPos; |
1614 | 1 | WeightedLeaf Result; |
1615 | 1 | |
1616 | 2 | for (int Pos = 0, End = Q.size(); Pos != End2 ; ++Pos1 ) { |
1617 | 1 | const WeightedLeaf &L = Q[Pos]; |
1618 | 1 | const SDValue &Val = L.Value; |
1619 | 1 | if (Val.getOpcode() != ISD::SHL || |
1620 | 1 | !isa<ConstantSDNode>(Val.getOperand(1)) || |
1621 | 1 | Val.getConstantOperandVal(1) > MaxAmount) |
1622 | 0 | continue; |
1623 | 1 | if (1 !Result.Value.getNode() || 1 Result.Weight > L.Weight0 || |
1624 | 0 | (Result.Weight == L.Weight && 0 Result.InsertionOrder > L.InsertionOrder0 )) |
1625 | 1 | { |
1626 | 1 | Result = L; |
1627 | 1 | ResultPos = Pos; |
1628 | 1 | } |
1629 | 1 | } |
1630 | 1 | |
1631 | 1 | if (Result.Value.getNode()1 ) { |
1632 | 1 | Q.erase(&Q[ResultPos]); |
1633 | 1 | std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); |
1634 | 1 | } |
1635 | 1 | |
1636 | 1 | return Result; |
1637 | 1 | } |
1638 | | |
1639 | 22 | WeightedLeaf LeafPrioQueue::findMULbyConst() { |
1640 | 22 | int ResultPos; |
1641 | 22 | WeightedLeaf Result; |
1642 | 22 | |
1643 | 55 | for (int Pos = 0, End = Q.size(); Pos != End55 ; ++Pos33 ) { |
1644 | 33 | const WeightedLeaf &L = Q[Pos]; |
1645 | 33 | const SDValue &Val = L.Value; |
1646 | 33 | if (Val.getOpcode() != ISD::MUL || |
1647 | 0 | !isa<ConstantSDNode>(Val.getOperand(1)) || |
1648 | 0 | Val.getConstantOperandVal(1) > 127) |
1649 | 33 | continue; |
1650 | 0 | if (0 !Result.Value.getNode() || 0 Result.Weight > L.Weight0 || |
1651 | 0 | (Result.Weight == L.Weight && 0 Result.InsertionOrder > L.InsertionOrder0 )) |
1652 | 0 | { |
1653 | 0 | Result = L; |
1654 | 0 | ResultPos = Pos; |
1655 | 0 | } |
1656 | 33 | } |
1657 | 22 | |
1658 | 22 | if (Result.Value.getNode()22 ) { |
1659 | 0 | Q.erase(&Q[ResultPos]); |
1660 | 0 | std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); |
1661 | 0 | } |
1662 | 22 | |
1663 | 22 | return Result; |
1664 | 22 | } |
1665 | | |
1666 | 0 | SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) { |
1667 | 0 | uint64_t MulFactor = 1ull << N->getConstantOperandVal(1); |
1668 | 0 | return CurDAG->getConstant(MulFactor, SDLoc(N), |
1669 | 0 | N->getOperand(1).getValueType()); |
1670 | 0 | } |
1671 | | |
1672 | | /// @returns the value x for which 2^x is a factor of Val |
1673 | 4 | static unsigned getPowerOf2Factor(SDValue Val) { |
1674 | 4 | if (Val.getOpcode() == ISD::MUL4 ) { |
1675 | 1 | unsigned MaxFactor = 0; |
1676 | 3 | for (int i = 0; i < 23 ; ++i2 ) { |
1677 | 2 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i)); |
1678 | 2 | if (!C) |
1679 | 1 | continue; |
1680 | 1 | const APInt &CInt = C->getAPIntValue(); |
1681 | 1 | if (CInt.getBoolValue()) |
1682 | 1 | MaxFactor = CInt.countTrailingZeros(); |
1683 | 2 | } |
1684 | 1 | return MaxFactor; |
1685 | 1 | } |
1686 | 3 | if (3 Val.getOpcode() == ISD::SHL3 ) { |
1687 | 3 | if (!isa<ConstantSDNode>(Val.getOperand(1).getNode())) |
1688 | 0 | return 0; |
1689 | 3 | return (unsigned) Val.getConstantOperandVal(1); |
1690 | 3 | } |
1691 | 0 |
|
1692 | 0 | return 0; |
1693 | 0 | } |
1694 | | |
1695 | | /// @returns true if V>>Amount will eliminate V's operation on its child |
1696 | 2 | static bool willShiftRightEliminate(SDValue V, unsigned Amount) { |
1697 | 2 | if (V.getOpcode() == ISD::MUL2 ) { |
1698 | 1 | SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; |
1699 | 2 | for (int i = 0; i < 22 ; ++i1 ) |
1700 | 2 | if (2 isa<ConstantSDNode>(Ops[i].getNode()) && |
1701 | 2 | V.getConstantOperandVal(i) % (1ULL << Amount) == 01 ) { |
1702 | 1 | uint64_t NewConst = V.getConstantOperandVal(i) >> Amount; |
1703 | 1 | return (NewConst == 1); |
1704 | 1 | } |
1705 | 2 | } else if (1 V.getOpcode() == ISD::SHL1 ) { |
1706 | 1 | return (Amount == V.getConstantOperandVal(1)); |
1707 | 1 | } |
1708 | 0 |
|
1709 | 0 | return false; |
1710 | 0 | } |
1711 | | |
1712 | 2 | SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) { |
1713 | 2 | SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; |
1714 | 2 | if (V.getOpcode() == ISD::MUL2 ) { |
1715 | 2 | for (int i=0; i < 22 ; ++i1 ) { |
1716 | 2 | if (isa<ConstantSDNode>(Ops[i].getNode()) && |
1717 | 2 | V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 01 ) { |
1718 | 1 | uint64_t NewConst = V.getConstantOperandVal(i) >> Power; |
1719 | 1 | if (NewConst == 1) |
1720 | 0 | return Ops[!i]; |
1721 | 1 | Ops[i] = CurDAG->getConstant(NewConst, |
1722 | 1 | SDLoc(V), V.getValueType()); |
1723 | 1 | break; |
1724 | 1 | } |
1725 | 2 | } |
1726 | 2 | } else if (1 V.getOpcode() == ISD::SHL1 ) { |
1727 | 1 | uint64_t ShiftAmount = V.getConstantOperandVal(1); |
1728 | 1 | if (ShiftAmount == Power) |
1729 | 1 | return Ops[0]; |
1730 | 0 | Ops[1] = CurDAG->getConstant(ShiftAmount - Power, |
1731 | 0 | SDLoc(V), V.getValueType()); |
1732 | 0 | } |
1733 | 2 | |
1734 | 1 | return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops); |
1735 | 2 | } |
1736 | | |
1737 | 27 | static bool isTargetConstant(const SDValue &V) { |
1738 | 27 | return V.getOpcode() == HexagonISD::CONST32 || |
1739 | 26 | V.getOpcode() == HexagonISD::CONST32_GP; |
1740 | 27 | } |
1741 | | |
1742 | 0 | unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) { |
1743 | 0 | if (GAUsesInFunction.count(V)) |
1744 | 0 | return GAUsesInFunction[V]; |
1745 | 0 |
|
1746 | 0 | unsigned Result = 0; |
1747 | 0 | const Function *CurF = CurDAG->getMachineFunction().getFunction(); |
1748 | 0 | for (const User *U : V->users()) { |
1749 | 0 | if (isa<Instruction>(U) && |
1750 | 0 | cast<Instruction>(U)->getParent()->getParent() == CurF) |
1751 | 0 | ++Result; |
1752 | 0 | } |
1753 | 0 |
|
1754 | 0 | GAUsesInFunction[V] = Result; |
1755 | 0 |
|
1756 | 0 | return Result; |
1757 | 0 | } |
1758 | | |
1759 | | /// Note - After calling this, N may be dead. It may have been replaced by a |
1760 | | /// new node, so always use the returned value in place of N. |
1761 | | /// |
1762 | | /// @returns The SDValue taking the place of N (which could be N if it is |
1763 | | /// unchanged) |
1764 | 718 | SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { |
1765 | 718 | assert(RootWeights.count(N) && "Cannot balance non-root node."); |
1766 | 718 | assert(RootWeights[N] != -2 && "This node was RAUW'd!"); |
1767 | 718 | assert(!TopLevel || N->getOpcode() == ISD::ADD); |
1768 | 718 | |
1769 | 718 | // Return early if this node was already visited |
1770 | 718 | if (RootWeights[N] != -1) |
1771 | 0 | return SDValue(N, 0); |
1772 | 718 | |
1773 | 718 | assert(isOpcodeHandled(N)); |
1774 | 718 | |
1775 | 718 | SDValue Op0 = N->getOperand(0); |
1776 | 718 | SDValue Op1 = N->getOperand(1); |
1777 | 718 | |
1778 | 718 | // Return early if the operands will remain unchanged or are all roots |
1779 | 718 | if ((!isOpcodeHandled(Op0.getNode()) || 718 RootWeights.count(Op0.getNode())110 ) && |
1780 | 718 | (!isOpcodeHandled(Op1.getNode()) || 710 RootWeights.count(Op1.getNode())71 )) { |
1781 | 706 | SDNode *Op0N = Op0.getNode(); |
1782 | 706 | int Weight; |
1783 | 706 | if (isOpcodeHandled(Op0N) && 706 RootWeights[Op0N] == -1102 ) { |
1784 | 40 | Weight = getWeight(balanceSubTree(Op0N).getNode()); |
1785 | 40 | // Weight = calculateWeight(Op0N); |
1786 | 40 | } else |
1787 | 666 | Weight = getWeight(Op0N); |
1788 | 706 | |
1789 | 706 | SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd |
1790 | 706 | if (isOpcodeHandled(Op1N) && 706 RootWeights[Op1N] == -167 ) { |
1791 | 60 | Weight += getWeight(balanceSubTree(Op1N).getNode()); |
1792 | 60 | // Weight += calculateWeight(Op1N); |
1793 | 60 | } else |
1794 | 646 | Weight += getWeight(Op1N); |
1795 | 706 | |
1796 | 706 | RootWeights[N] = Weight; |
1797 | 706 | RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()), |
1798 | 706 | getHeight(N->getOperand(1).getNode())) + 1; |
1799 | 706 | |
1800 | 706 | DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight |
1801 | 706 | << " Height=" << RootHeights[N] << "): "); |
1802 | 706 | DEBUG(N->dump()); |
1803 | 706 | |
1804 | 706 | return SDValue(N, 0); |
1805 | 706 | } |
1806 | 12 | |
1807 | 12 | DEBUG12 (dbgs() << "** Balancing root node: "); |
1808 | 12 | DEBUG(N->dump()); |
1809 | 12 | |
1810 | 12 | unsigned NOpcode = N->getOpcode(); |
1811 | 12 | |
1812 | 12 | LeafPrioQueue Leaves(NOpcode); |
1813 | 12 | SmallVector<SDValue, 4> Worklist; |
1814 | 12 | Worklist.push_back(SDValue(N, 0)); |
1815 | 12 | |
1816 | 12 | // SHL nodes will be converted to MUL nodes |
1817 | 12 | if (NOpcode == ISD::SHL) |
1818 | 0 | NOpcode = ISD::MUL; |
1819 | 12 | |
1820 | 12 | bool CanFactorize = false; |
1821 | 12 | WeightedLeaf Mul1, Mul2; |
1822 | 12 | unsigned MaxPowerOf2 = 0; |
1823 | 12 | WeightedLeaf GA; |
1824 | 12 | |
1825 | 12 | // Do not try to factor out a shift if there is already a shift at the tip of |
1826 | 12 | // the tree. |
1827 | 12 | bool HaveTopLevelShift = false; |
1828 | 12 | if (TopLevel && |
1829 | 12 | ((isOpcodeHandled(Op0.getNode()) && 12 Op0.getOpcode() == ISD::SHL8 && |
1830 | 0 | Op0.getConstantOperandVal(1) < 4) || |
1831 | 12 | (isOpcodeHandled(Op1.getNode()) && 12 Op1.getOpcode() == ISD::SHL4 && |
1832 | 0 | Op1.getConstantOperandVal(1) < 4))) |
1833 | 0 | HaveTopLevelShift = true; |
1834 | 12 | |
1835 | 12 | // Flatten the subtree into an ordered list of leaves; at the same time |
1836 | 12 | // determine whether the tree is already balanced. |
1837 | 12 | int InsertionOrder = 0; |
1838 | 12 | SmallDenseMap<SDValue, int> NodeHeights; |
1839 | 12 | bool Imbalanced = false; |
1840 | 12 | int CurrentWeight = 0; |
1841 | 96 | while (!Worklist.empty()96 ) { |
1842 | 84 | SDValue Child = Worklist.pop_back_val(); |
1843 | 84 | |
1844 | 84 | if (Child.getNode() != N && 84 RootWeights.count(Child.getNode())60 ) { |
1845 | 9 | // CASE 1: Child is a root note |
1846 | 9 | |
1847 | 9 | int Weight = RootWeights[Child.getNode()]; |
1848 | 9 | if (Weight == -19 ) { |
1849 | 5 | Child = balanceSubTree(Child.getNode()); |
1850 | 5 | // calculateWeight(Child.getNode()); |
1851 | 5 | Weight = getWeight(Child.getNode()); |
1852 | 9 | } else if (4 Weight == -24 ) { |
1853 | 0 | // Whoops, this node was RAUWd by one of the balanceSubTree calls we |
1854 | 0 | // made. Our worklist isn't up to date anymore. |
1855 | 0 | // Restart the whole process. |
1856 | 0 | DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); |
1857 | 4 | return balanceSubTree(N, TopLevel); |
1858 | 4 | } |
1859 | 9 | |
1860 | 9 | NodeHeights[Child] = 1; |
1861 | 9 | CurrentWeight += Weight; |
1862 | 9 | |
1863 | 9 | unsigned PowerOf2; |
1864 | 9 | if (TopLevel && 9 !CanFactorize9 && !HaveTopLevelShift9 && |
1865 | 9 | (Child.getOpcode() == ISD::MUL || 9 Child.getOpcode() == ISD::SHL8 ) && |
1866 | 9 | Child.hasOneUse()7 && (PowerOf2 = getPowerOf2Factor(Child))4 ) { |
1867 | 4 | // Try to identify two factorizable MUL/SHL children greedily. Leave |
1868 | 4 | // them out of the priority queue for now so we can deal with them |
1869 | 4 | // after. |
1870 | 4 | if (!Mul1.Value.getNode()4 ) { |
1871 | 3 | Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++); |
1872 | 3 | MaxPowerOf2 = PowerOf2; |
1873 | 4 | } else { |
1874 | 1 | Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++); |
1875 | 1 | MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2); |
1876 | 1 | |
1877 | 1 | // Our addressing modes can only shift by a maximum of 3 |
1878 | 1 | if (MaxPowerOf2 > 3) |
1879 | 0 | MaxPowerOf2 = 3; |
1880 | 1 | |
1881 | 1 | CanFactorize = true; |
1882 | 1 | } |
1883 | 4 | } else |
1884 | 5 | Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); |
1885 | 84 | } else if (75 !isOpcodeHandled(Child.getNode())75 ) { |
1886 | 27 | // CASE 2: Child is an unhandled kind of node (e.g. constant) |
1887 | 27 | int Weight = getWeight(Child.getNode()); |
1888 | 27 | |
1889 | 27 | NodeHeights[Child] = getHeight(Child.getNode()); |
1890 | 27 | CurrentWeight += Weight; |
1891 | 27 | |
1892 | 27 | if (isTargetConstant(Child) && 27 !GA.Value.getNode()1 ) |
1893 | 1 | GA = WeightedLeaf(Child, Weight, InsertionOrder++); |
1894 | 27 | else |
1895 | 26 | Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); |
1896 | 75 | } else { |
1897 | 48 | // CASE 3: Child is a subtree of same opcode |
1898 | 48 | // Visit children first, then flatten. |
1899 | 48 | unsigned ChildOpcode = Child.getOpcode(); |
1900 | 48 | assert(ChildOpcode == NOpcode || |
1901 | 48 | (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL)); |
1902 | 48 | |
1903 | 48 | // Convert SHL to MUL |
1904 | 48 | SDValue Op1; |
1905 | 48 | if (ChildOpcode == ISD::SHL) |
1906 | 0 | Op1 = getMultiplierForSHL(Child.getNode()); |
1907 | 48 | else |
1908 | 48 | Op1 = Child->getOperand(1); |
1909 | 48 | |
1910 | 48 | if (!NodeHeights.count(Op1) || 48 !NodeHeights.count(Child->getOperand(0))24 ) { |
1911 | 24 | assert(!NodeHeights.count(Child) && "Parent visited before children?"); |
1912 | 24 | // Visit children first, then re-visit this node |
1913 | 24 | Worklist.push_back(Child); |
1914 | 24 | Worklist.push_back(Op1); |
1915 | 24 | Worklist.push_back(Child->getOperand(0)); |
1916 | 48 | } else { |
1917 | 24 | // Back at this node after visiting the children |
1918 | 24 | if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1) |
1919 | 6 | Imbalanced = true; |
1920 | 24 | |
1921 | 24 | NodeHeights[Child] = std::max(NodeHeights[Op1], |
1922 | 24 | NodeHeights[Child->getOperand(0)]) + 1; |
1923 | 24 | } |
1924 | 75 | } |
1925 | 84 | } |
1926 | 12 | |
1927 | 12 | DEBUG12 (dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)] |
1928 | 12 | << " weight=" << CurrentWeight << " imbalanced=" |
1929 | 12 | << Imbalanced << "\n"); |
1930 | 12 | |
1931 | 12 | // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y) |
1932 | 12 | // This factors out a shift in order to match memw(a<<Y+b). |
1933 | 12 | if (CanFactorize && 12 (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) || |
1934 | 12 | willShiftRightEliminate(Mul2.Value, MaxPowerOf2)1 )) { |
1935 | 1 | DEBUG(dbgs() << "--> Found common factor for two MUL children!\n"); |
1936 | 1 | int Weight = Mul1.Weight + Mul2.Weight; |
1937 | 1 | int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1; |
1938 | 1 | SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2); |
1939 | 1 | SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2); |
1940 | 1 | SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(), |
1941 | 1 | Mul1Factored, Mul2Factored); |
1942 | 1 | SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N), |
1943 | 1 | Mul1.Value.getValueType()); |
1944 | 1 | SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(), |
1945 | 1 | Sum, Const); |
1946 | 1 | NodeHeights[New] = Height; |
1947 | 1 | Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder)); |
1948 | 12 | } else if (11 Mul1.Value.getNode()11 ) { |
1949 | 2 | // We failed to factorize two MULs, so now the Muls are left outside the |
1950 | 2 | // queue... add them back. |
1951 | 2 | Leaves.push(Mul1); |
1952 | 2 | if (Mul2.Value.getNode()) |
1953 | 0 | Leaves.push(Mul2); |
1954 | 11 | CanFactorize = false; |
1955 | 11 | } |
1956 | 12 | |
1957 | 12 | // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere |
1958 | 12 | // and the root node itself is not used more than twice. This reduces the |
1959 | 12 | // amount of additional constant extenders introduced by this optimization. |
1960 | 12 | bool CombinedGA = false; |
1961 | 12 | if (NOpcode == ISD::ADD && 12 GA.Value.getNode()12 && Leaves.hasConst()1 && |
1962 | 12 | GA.Value.hasOneUse()0 && N->use_size() < 30 ) { |
1963 | 0 | GlobalAddressSDNode *GANode = |
1964 | 0 | cast<GlobalAddressSDNode>(GA.Value.getOperand(0)); |
1965 | 0 | ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value); |
1966 | 0 |
|
1967 | 0 | if (getUsesInFunction(GANode->getGlobal()) == 1 && 0 Offset->hasOneUse()0 && |
1968 | 0 | getTargetLowering()->isOffsetFoldingLegal(GANode)0 ) { |
1969 | 0 | DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue() |
1970 | 0 | << "): "); |
1971 | 0 | DEBUG(GANode->dump()); |
1972 | 0 |
|
1973 | 0 | SDValue NewTGA = |
1974 | 0 | CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value), |
1975 | 0 | GANode->getValueType(0), |
1976 | 0 | GANode->getOffset() + (uint64_t)Offset->getSExtValue()); |
1977 | 0 | GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value), |
1978 | 0 | GA.Value.getValueType(), NewTGA); |
1979 | 0 | GA.Weight += Leaves.top().Weight; |
1980 | 0 |
|
1981 | 0 | NodeHeights[GA.Value] = getHeight(GA.Value.getNode()); |
1982 | 0 | CombinedGA = true; |
1983 | 0 |
|
1984 | 0 | Leaves.pop(); // Remove the offset constant from the queue |
1985 | 0 | } |
1986 | 0 | } |
1987 | 12 | |
1988 | 12 | if ((RebalanceOnlyForOptimizations && 12 !CanFactorize0 && !CombinedGA0 ) || |
1989 | 12 | (RebalanceOnlyImbalancedTrees && 12 !Imbalanced0 )) { |
1990 | 0 | RootWeights[N] = CurrentWeight; |
1991 | 0 | RootHeights[N] = NodeHeights[SDValue(N, 0)]; |
1992 | 0 |
|
1993 | 0 | return SDValue(N, 0); |
1994 | 0 | } |
1995 | 12 | |
1996 | 12 | // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5)) |
1997 | 12 | if (12 NOpcode == ISD::ADD && 12 GA.Value.getNode()12 ) { |
1998 | 1 | WeightedLeaf SHL = Leaves.findSHL(31); |
1999 | 1 | if (SHL.Value.getNode()1 ) { |
2000 | 1 | int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1; |
2001 | 1 | GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value), |
2002 | 1 | GA.Value.getValueType(), |
2003 | 1 | GA.Value, SHL.Value); |
2004 | 1 | GA.Weight = SHL.Weight; // Specifically ignore the GA weight here |
2005 | 1 | NodeHeights[GA.Value] = Height; |
2006 | 1 | } |
2007 | 1 | } |
2008 | 12 | |
2009 | 12 | if (GA.Value.getNode()) |
2010 | 1 | Leaves.push(GA); |
2011 | 12 | |
2012 | 12 | // If this is the top level and we haven't factored out a shift, we should try |
2013 | 12 | // to move a constant to the bottom to match addressing modes like memw(rX+C) |
2014 | 12 | if (TopLevel && 12 !CanFactorize12 && Leaves.hasConst()11 ) { |
2015 | 7 | DEBUG(dbgs() << "--> Pushing constant to tip of tree."); |
2016 | 7 | Leaves.pushToBottom(Leaves.pop()); |
2017 | 7 | } |
2018 | 12 | |
2019 | 12 | const DataLayout &DL = CurDAG->getDataLayout(); |
2020 | 12 | const TargetLowering &TLI = *getTargetLowering(); |
2021 | 12 | |
2022 | 12 | // Rebuild the tree using Huffman's algorithm |
2023 | 34 | while (Leaves.size() > 134 ) { |
2024 | 22 | WeightedLeaf L0 = Leaves.pop(); |
2025 | 22 | |
2026 | 22 | // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)), |
2027 | 22 | // otherwise just get the next leaf |
2028 | 22 | WeightedLeaf L1 = Leaves.findMULbyConst(); |
2029 | 22 | if (!L1.Value.getNode()) |
2030 | 22 | L1 = Leaves.pop(); |
2031 | 22 | |
2032 | 22 | assert(L0.Weight <= L1.Weight && "Priority queue is broken!"); |
2033 | 22 | |
2034 | 22 | SDValue V0 = L0.Value; |
2035 | 22 | int V0Weight = L0.Weight; |
2036 | 22 | SDValue V1 = L1.Value; |
2037 | 22 | int V1Weight = L1.Weight; |
2038 | 22 | |
2039 | 22 | // Make sure that none of these nodes have been RAUW'd |
2040 | 22 | if ((RootWeights.count(V0.getNode()) && 22 RootWeights[V0.getNode()] == -22 ) || |
2041 | 22 | (RootWeights.count(V1.getNode()) && 22 RootWeights[V1.getNode()] == -25 )) { |
2042 | 0 | DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); |
2043 | 0 | return balanceSubTree(N, TopLevel); |
2044 | 0 | } |
2045 | 22 | |
2046 | 22 | ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0); |
2047 | 22 | ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1); |
2048 | 22 | EVT VT = N->getValueType(0); |
2049 | 22 | SDValue NewNode; |
2050 | 22 | |
2051 | 22 | if (V0C && 22 !V1C0 ) { |
2052 | 0 | std::swap(V0, V1); |
2053 | 0 | std::swap(V0C, V1C); |
2054 | 0 | } |
2055 | 22 | |
2056 | 22 | // Calculate height of this node |
2057 | 22 | assert(NodeHeights.count(V0) && NodeHeights.count(V1) && |
2058 | 22 | "Children must have been visited before re-combining them!"); |
2059 | 22 | int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1; |
2060 | 22 | |
2061 | 22 | // Rebuild this node (and restore SHL from MUL if needed) |
2062 | 22 | if (V1C && 22 NOpcode == ISD::MUL7 && V1C->getAPIntValue().isPowerOf2()0 ) |
2063 | 0 | NewNode = CurDAG->getNode( |
2064 | 0 | ISD::SHL, SDLoc(V0), VT, V0, |
2065 | 0 | CurDAG->getConstant( |
2066 | 0 | V1C->getAPIntValue().logBase2(), SDLoc(N), |
2067 | 0 | TLI.getScalarShiftAmountTy(DL, V0.getValueType()))); |
2068 | 22 | else |
2069 | 22 | NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1); |
2070 | 22 | |
2071 | 22 | NodeHeights[NewNode] = Height; |
2072 | 22 | |
2073 | 22 | int Weight = V0Weight + V1Weight; |
2074 | 22 | Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder)); |
2075 | 22 | |
2076 | 22 | DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height=" |
2077 | 22 | << Height << "):\n"); |
2078 | 22 | DEBUG(NewNode.dump()); |
2079 | 22 | } |
2080 | 12 | |
2081 | 12 | assert(Leaves.size() == 1); |
2082 | 12 | SDValue NewRoot = Leaves.top().Value; |
2083 | 12 | |
2084 | 12 | assert(NodeHeights.count(NewRoot)); |
2085 | 12 | int Height = NodeHeights[NewRoot]; |
2086 | 12 | |
2087 | 12 | // Restore SHL if we earlier converted it to a MUL |
2088 | 12 | if (NewRoot.getOpcode() == ISD::MUL12 ) { |
2089 | 0 | ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1)); |
2090 | 0 | if (V1C && 0 V1C->getAPIntValue().isPowerOf2()0 ) { |
2091 | 0 | EVT VT = NewRoot.getValueType(); |
2092 | 0 | SDValue V0 = NewRoot.getOperand(0); |
2093 | 0 | NewRoot = CurDAG->getNode( |
2094 | 0 | ISD::SHL, SDLoc(NewRoot), VT, V0, |
2095 | 0 | CurDAG->getConstant( |
2096 | 0 | V1C->getAPIntValue().logBase2(), SDLoc(NewRoot), |
2097 | 0 | TLI.getScalarShiftAmountTy(DL, V0.getValueType()))); |
2098 | 0 | } |
2099 | 0 | } |
2100 | 12 | |
2101 | 12 | if (N != NewRoot.getNode()12 ) { |
2102 | 6 | DEBUG(dbgs() << "--> Root is now: "); |
2103 | 6 | DEBUG(NewRoot.dump()); |
2104 | 6 | |
2105 | 6 | // Replace all uses of old root by new root |
2106 | 6 | CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode()); |
2107 | 6 | // Mark that we have RAUW'd N |
2108 | 6 | RootWeights[N] = -2; |
2109 | 12 | } else { |
2110 | 6 | DEBUG(dbgs() << "--> Root unchanged.\n"); |
2111 | 6 | } |
2112 | 12 | |
2113 | 12 | RootWeights[NewRoot.getNode()] = Leaves.top().Weight; |
2114 | 12 | RootHeights[NewRoot.getNode()] = Height; |
2115 | 12 | |
2116 | 12 | return NewRoot; |
2117 | 718 | } |
2118 | | |
2119 | 3.38k | void HexagonDAGToDAGISel::rebalanceAddressTrees() { |
2120 | 59.0k | for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E59.0k ;) { |
2121 | 55.6k | SDNode *N = &*I++; |
2122 | 55.6k | if (N->getOpcode() != ISD::LOAD && 55.6k N->getOpcode() != ISD::STORE53.5k ) |
2123 | 51.6k | continue; |
2124 | 3.99k | |
2125 | 3.99k | SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr(); |
2126 | 3.99k | if (BasePtr.getOpcode() != ISD::ADD) |
2127 | 3.18k | continue; |
2128 | 808 | |
2129 | 808 | // We've already processed this node |
2130 | 808 | if (808 RootWeights.count(BasePtr.getNode())808 ) |
2131 | 195 | continue; |
2132 | 613 | |
2133 | 613 | DEBUG613 (dbgs() << "** Rebalancing address calculation in node: "); |
2134 | 613 | DEBUG(N->dump()); |
2135 | 613 | |
2136 | 613 | // FindRoots |
2137 | 613 | SmallVector<SDNode *, 4> Worklist; |
2138 | 613 | |
2139 | 613 | Worklist.push_back(BasePtr.getOperand(0).getNode()); |
2140 | 613 | Worklist.push_back(BasePtr.getOperand(1).getNode()); |
2141 | 613 | |
2142 | 2.29k | while (!Worklist.empty()2.29k ) { |
2143 | 1.68k | SDNode *N = Worklist.pop_back_val(); |
2144 | 1.68k | unsigned Opcode = N->getOpcode(); |
2145 | 1.68k | |
2146 | 1.68k | if (!isOpcodeHandled(N)) |
2147 | 1.45k | continue; |
2148 | 230 | |
2149 | 230 | Worklist.push_back(N->getOperand(0).getNode()); |
2150 | 230 | Worklist.push_back(N->getOperand(1).getNode()); |
2151 | 230 | |
2152 | 230 | // Not a root if it has only one use and same opcode as its parent |
2153 | 230 | if (N->hasOneUse() && 230 Opcode == N->use_begin()->getOpcode()131 ) |
2154 | 12 | continue; |
2155 | 218 | |
2156 | 218 | // This root node has already been processed |
2157 | 218 | if (218 RootWeights.count(N)218 ) |
2158 | 113 | continue; |
2159 | 105 | |
2160 | 105 | RootWeights[N] = -1; |
2161 | 105 | } |
2162 | 613 | |
2163 | 613 | // Balance node itself |
2164 | 613 | RootWeights[BasePtr.getNode()] = -1; |
2165 | 613 | SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true); |
2166 | 613 | |
2167 | 613 | if (N->getOpcode() == ISD::LOAD) |
2168 | 417 | N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), |
2169 | 417 | NewBasePtr, N->getOperand(2)); |
2170 | 613 | else |
2171 | 196 | N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), |
2172 | 196 | NewBasePtr, N->getOperand(3)); |
2173 | 613 | |
2174 | 613 | DEBUG(dbgs() << "--> Final node: "); |
2175 | 613 | DEBUG(N->dump()); |
2176 | 55.6k | } |
2177 | 3.38k | |
2178 | 3.38k | CurDAG->RemoveDeadNodes(); |
2179 | 3.38k | GAUsesInFunction.clear(); |
2180 | 3.38k | RootHeights.clear(); |
2181 | 3.38k | RootWeights.clear(); |
2182 | 3.38k | } |
2183 | | |