/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonBitSimplify.cpp ---------------------------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #include "BitTracker.h" |
11 | | #include "HexagonBitTracker.h" |
12 | | #include "HexagonInstrInfo.h" |
13 | | #include "HexagonRegisterInfo.h" |
14 | | #include "HexagonSubtarget.h" |
15 | | #include "llvm/ADT/BitVector.h" |
16 | | #include "llvm/ADT/DenseMap.h" |
17 | | #include "llvm/ADT/GraphTraits.h" |
18 | | #include "llvm/ADT/STLExtras.h" |
19 | | #include "llvm/ADT/SmallVector.h" |
20 | | #include "llvm/ADT/StringRef.h" |
21 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
22 | | #include "llvm/CodeGen/MachineDominators.h" |
23 | | #include "llvm/CodeGen/MachineFunction.h" |
24 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | | #include "llvm/CodeGen/MachineInstr.h" |
26 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
27 | | #include "llvm/CodeGen/MachineOperand.h" |
28 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
29 | | #include "llvm/IR/DebugLoc.h" |
30 | | #include "llvm/MC/MCInstrDesc.h" |
31 | | #include "llvm/Pass.h" |
32 | | #include "llvm/Support/CommandLine.h" |
33 | | #include "llvm/Support/Compiler.h" |
34 | | #include "llvm/Support/Debug.h" |
35 | | #include "llvm/Support/ErrorHandling.h" |
36 | | #include "llvm/Support/MathExtras.h" |
37 | | #include "llvm/Support/raw_ostream.h" |
38 | | #include "llvm/Target/TargetRegisterInfo.h" |
39 | | #include <algorithm> |
40 | | #include <cassert> |
41 | | #include <cstdint> |
42 | | #include <iterator> |
43 | | #include <limits> |
44 | | #include <utility> |
45 | | #include <vector> |
46 | | |
47 | | #define DEBUG_TYPE "hexbit" |
48 | | |
49 | | using namespace llvm; |
50 | | |
51 | | static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden, |
52 | | cl::init(true), cl::desc("Preserve subregisters in tied operands")); |
53 | | static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden, |
54 | | cl::init(true), cl::desc("Generate extract instructions")); |
55 | | static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden, |
56 | | cl::init(true), cl::desc("Generate bitsplit instructions")); |
57 | | |
58 | | static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden, |
59 | | cl::init(std::numeric_limits<unsigned>::max())); |
60 | | static unsigned CountExtract = 0; |
61 | | static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, |
62 | | cl::init(std::numeric_limits<unsigned>::max())); |
63 | | static unsigned CountBitSplit = 0; |
64 | | |
65 | | namespace llvm { |
66 | | |
67 | | void initializeHexagonBitSimplifyPass(PassRegistry& Registry); |
68 | | FunctionPass *createHexagonBitSimplify(); |
69 | | |
70 | | } // end namespace llvm |
71 | | |
72 | | namespace { |
73 | | |
74 | | // Set of virtual registers, based on BitVector. |
75 | | struct RegisterSet : private BitVector { |
76 | 24.1k | RegisterSet() = default; |
77 | 0 | explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} |
78 | 12.6k | RegisterSet(const RegisterSet &RS) = default; |
79 | | |
80 | | using BitVector::clear; |
81 | | using BitVector::count; |
82 | | |
83 | 27.4k | unsigned find_first() const { |
84 | 27.4k | int First = BitVector::find_first(); |
85 | 27.4k | if (First < 0) |
86 | 4.54k | return 0; |
87 | 22.8k | return x2v(First); |
88 | 22.8k | } |
89 | | |
90 | 959k | unsigned find_next(unsigned Prev) const { |
91 | 959k | int Next = BitVector::find_next(v2x(Prev)); |
92 | 959k | if (Next < 0) |
93 | 13.7k | return 0; |
94 | 945k | return x2v(Next); |
95 | 945k | } |
96 | | |
97 | 76.2k | RegisterSet &insert(unsigned R) { |
98 | 76.2k | unsigned Idx = v2x(R); |
99 | 76.2k | ensure(Idx); |
100 | 76.2k | return static_cast<RegisterSet&>(BitVector::set(Idx)); |
101 | 76.2k | } |
102 | 0 | RegisterSet &remove(unsigned R) { |
103 | 0 | unsigned Idx = v2x(R); |
104 | 0 | if (Idx >= size()) |
105 | 0 | return *this; |
106 | 0 | return static_cast<RegisterSet&>(BitVector::reset(Idx)); |
107 | 0 | } |
108 | | |
109 | 36.7k | RegisterSet &insert(const RegisterSet &Rs) { |
110 | 36.7k | return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); |
111 | 36.7k | } |
112 | 3 | RegisterSet &remove(const RegisterSet &Rs) { |
113 | 3 | return static_cast<RegisterSet&>(BitVector::reset(Rs)); |
114 | 3 | } |
115 | | |
116 | 849k | reference operator[](unsigned R) { |
117 | 849k | unsigned Idx = v2x(R); |
118 | 849k | ensure(Idx); |
119 | 849k | return BitVector::operator[](Idx); |
120 | 849k | } |
121 | 0 | bool operator[](unsigned R) const { |
122 | 0 | unsigned Idx = v2x(R); |
123 | 0 | assert(Idx < size()); |
124 | 0 | return BitVector::operator[](Idx); |
125 | 0 | } |
126 | 4.36k | bool has(unsigned R) const { |
127 | 4.36k | unsigned Idx = v2x(R); |
128 | 4.36k | if (Idx >= size()) |
129 | 3.87k | return false; |
130 | 489 | return BitVector::test(Idx); |
131 | 489 | } |
132 | | |
133 | 0 | bool empty() const { |
134 | 0 | return !BitVector::any(); |
135 | 0 | } |
136 | 0 | bool includes(const RegisterSet &Rs) const { |
137 | 0 | // A.BitVector::test(B) <=> A-B != {} |
138 | 0 | return !Rs.BitVector::test(*this); |
139 | 0 | } |
140 | 12 | bool intersects(const RegisterSet &Rs) const { |
141 | 12 | return BitVector::anyCommon(Rs); |
142 | 12 | } |
143 | | |
144 | | private: |
145 | 925k | void ensure(unsigned Idx) { |
146 | 925k | if (size() <= Idx) |
147 | 51.0k | resize(std::max(Idx+1, 32U)); |
148 | 925k | } |
149 | | |
150 | 1.88M | static inline unsigned v2x(unsigned v) { |
151 | 1.88M | return TargetRegisterInfo::virtReg2Index(v); |
152 | 1.88M | } |
153 | | |
154 | 968k | static inline unsigned x2v(unsigned x) { |
155 | 968k | return TargetRegisterInfo::index2VirtReg(x); |
156 | 968k | } |
157 | | }; |
158 | | |
159 | | struct PrintRegSet { |
160 | | PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) |
161 | 0 | : RS(S), TRI(RI) {} |
162 | | |
163 | | friend raw_ostream &operator<< (raw_ostream &OS, |
164 | | const PrintRegSet &P); |
165 | | |
166 | | private: |
167 | | const RegisterSet &RS; |
168 | | const TargetRegisterInfo *TRI; |
169 | | }; |
170 | | |
171 | | raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) |
172 | | LLVM_ATTRIBUTE_UNUSED; |
173 | 0 | raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { |
174 | 0 | OS << '{'; |
175 | 0 | for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) |
176 | 0 | OS << ' ' << PrintReg(R, P.TRI); |
177 | 0 | OS << " }"; |
178 | 0 | return OS; |
179 | 0 | } |
180 | | |
181 | | class Transformation; |
182 | | |
183 | | class HexagonBitSimplify : public MachineFunctionPass { |
184 | | public: |
185 | | static char ID; |
186 | | |
187 | 399 | HexagonBitSimplify() : MachineFunctionPass(ID) { |
188 | 399 | initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); |
189 | 399 | } |
190 | | |
191 | 0 | StringRef getPassName() const override { |
192 | 0 | return "Hexagon bit simplification"; |
193 | 0 | } |
194 | | |
195 | 395 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
196 | 395 | AU.addRequired<MachineDominatorTree>(); |
197 | 395 | AU.addPreserved<MachineDominatorTree>(); |
198 | 395 | MachineFunctionPass::getAnalysisUsage(AU); |
199 | 395 | } |
200 | | |
201 | | bool runOnMachineFunction(MachineFunction &MF) override; |
202 | | |
203 | | static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); |
204 | | static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); |
205 | | static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, |
206 | | const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); |
207 | | static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, |
208 | | uint16_t W); |
209 | | static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, |
210 | | uint16_t W, uint64_t &U); |
211 | | static bool replaceReg(unsigned OldR, unsigned NewR, |
212 | | MachineRegisterInfo &MRI); |
213 | | static bool getSubregMask(const BitTracker::RegisterRef &RR, |
214 | | unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); |
215 | | static bool replaceRegWithSub(unsigned OldR, unsigned NewR, |
216 | | unsigned NewSR, MachineRegisterInfo &MRI); |
217 | | static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, |
218 | | unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); |
219 | | static bool parseRegSequence(const MachineInstr &I, |
220 | | BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, |
221 | | const MachineRegisterInfo &MRI); |
222 | | |
223 | | static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, |
224 | | uint16_t Begin); |
225 | | static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, |
226 | | uint16_t Begin, const HexagonInstrInfo &HII); |
227 | | |
228 | | static const TargetRegisterClass *getFinalVRegClass( |
229 | | const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); |
230 | | static bool isTransparentCopy(const BitTracker::RegisterRef &RD, |
231 | | const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); |
232 | | |
233 | | private: |
234 | | MachineDominatorTree *MDT = nullptr; |
235 | | |
236 | | bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); |
237 | | static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, |
238 | | unsigned NewSub = Hexagon::NoSubRegister); |
239 | | }; |
240 | | |
241 | | using HBS = HexagonBitSimplify; |
242 | | |
243 | | // The purpose of this class is to provide a common facility to traverse |
244 | | // the function top-down or bottom-up via the dominator tree, and keep |
245 | | // track of the available registers. |
246 | | class Transformation { |
247 | | public: |
248 | | bool TopDown; |
249 | | |
250 | 4.20k | Transformation(bool TD) : TopDown(TD) {} |
251 | 4.20k | virtual ~Transformation() = default; |
252 | | |
253 | | virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; |
254 | | }; |
255 | | |
256 | | } // end anonymous namespace |
257 | | |
258 | | char HexagonBitSimplify::ID = 0; |
259 | | |
260 | 399 | INITIALIZE_PASS_BEGIN399 (HexagonBitSimplify, "hexbit",
|
261 | 399 | "Hexagon bit simplification", false, false) |
262 | 399 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
263 | 399 | INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", |
264 | | "Hexagon bit simplification", false, false) |
265 | | |
266 | | bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, |
267 | 9.01k | RegisterSet &AVs) { |
268 | 9.01k | bool Changed = false; |
269 | 9.01k | |
270 | 9.01k | if (T.TopDown) |
271 | 7.20k | Changed = T.processBlock(B, AVs); |
272 | 9.01k | |
273 | 9.01k | RegisterSet Defs; |
274 | 9.01k | for (auto &I : B) |
275 | 70.8k | getInstrDefs(I, Defs); |
276 | 9.01k | RegisterSet NewAVs = AVs; |
277 | 9.01k | NewAVs.insert(Defs); |
278 | 9.01k | |
279 | 9.01k | for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B))) |
280 | 4.81k | Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs); |
281 | 9.01k | |
282 | 9.01k | if (!T.TopDown) |
283 | 1.80k | Changed |= T.processBlock(B, AVs); |
284 | 9.01k | |
285 | 9.01k | return Changed; |
286 | 9.01k | } |
287 | | |
288 | | // |
289 | | // Utility functions: |
290 | | // |
291 | | void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, |
292 | 112k | RegisterSet &Defs) { |
293 | 333k | for (auto &Op : MI.operands()) { |
294 | 333k | if (!Op.isReg() || 333k !Op.isDef()239k ) |
295 | 225k | continue; |
296 | 107k | unsigned R = Op.getReg(); |
297 | 107k | if (!TargetRegisterInfo::isVirtualRegister(R)) |
298 | 36.0k | continue; |
299 | 71.8k | Defs.insert(R); |
300 | 71.8k | } |
301 | 112k | } |
302 | | |
303 | | void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, |
304 | 9 | RegisterSet &Uses) { |
305 | 21 | for (auto &Op : MI.operands()) { |
306 | 21 | if (!Op.isReg() || 21 !Op.isUse()18 ) |
307 | 12 | continue; |
308 | 9 | unsigned R = Op.getReg(); |
309 | 9 | if (!TargetRegisterInfo::isVirtualRegister(R)) |
310 | 1 | continue; |
311 | 8 | Uses.insert(R); |
312 | 8 | } |
313 | 9 | } |
314 | | |
315 | | // Check if all the bits in range [B, E) in both cells are equal. |
316 | | bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, |
317 | | uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, |
318 | 716k | uint16_t W) { |
319 | 757k | for (uint16_t i = 0; i < W757k ; ++i40.9k ) { |
320 | 757k | // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. |
321 | 757k | if (RC1[B1+i].Type == BitTracker::BitValue::Ref && 757k RC1[B1+i].RefI.Reg == 0748k ) |
322 | 0 | return false; |
323 | 757k | // Same for RC2[i]. |
324 | 757k | if (757k RC2[B2+i].Type == BitTracker::BitValue::Ref && 757k RC2[B2+i].RefI.Reg == 0697k ) |
325 | 0 | return false; |
326 | 757k | if (757k RC1[B1+i] != RC2[B2+i]757k ) |
327 | 716k | return false; |
328 | 757k | } |
329 | 369 | return true; |
330 | 716k | } |
331 | | |
332 | | bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, |
333 | 54 | uint16_t B, uint16_t W) { |
334 | 54 | assert(B < RC.width() && B+W <= RC.width()); |
335 | 294 | for (uint16_t i = B; i < B+W294 ; ++i240 ) |
336 | 279 | if (279 !RC[i].is(0)279 ) |
337 | 39 | return false; |
338 | 15 | return true; |
339 | 54 | } |
340 | | |
341 | | bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, |
342 | 8.10k | uint16_t B, uint16_t W, uint64_t &U) { |
343 | 8.10k | assert(B < RC.width() && B+W <= RC.width()); |
344 | 8.10k | int64_t T = 0; |
345 | 44.2k | for (uint16_t i = B+W; i > B44.2k ; --i36.1k ) { |
346 | 44.1k | const BitTracker::BitValue &BV = RC[i-1]; |
347 | 44.1k | T <<= 1; |
348 | 44.1k | if (BV.is(1)) |
349 | 817 | T |= 1; |
350 | 43.3k | else if (43.3k !BV.is(0)43.3k ) |
351 | 8.00k | return false; |
352 | 44.1k | } |
353 | 100 | U = T; |
354 | 100 | return true; |
355 | 8.10k | } |
356 | | |
357 | | bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, |
358 | 592 | MachineRegisterInfo &MRI) { |
359 | 592 | if (!TargetRegisterInfo::isVirtualRegister(OldR) || |
360 | 592 | !TargetRegisterInfo::isVirtualRegister(NewR)) |
361 | 0 | return false; |
362 | 592 | auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); |
363 | 592 | decltype(End) NextI; |
364 | 1.23k | for (auto I = Begin; I != End1.23k ; I = NextI647 ) { |
365 | 647 | NextI = std::next(I); |
366 | 647 | I->setReg(NewR); |
367 | 647 | } |
368 | 592 | return Begin != End; |
369 | 592 | } |
370 | | |
371 | | bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, |
372 | 285 | unsigned NewSR, MachineRegisterInfo &MRI) { |
373 | 285 | if (!TargetRegisterInfo::isVirtualRegister(OldR) || |
374 | 285 | !TargetRegisterInfo::isVirtualRegister(NewR)) |
375 | 0 | return false; |
376 | 285 | if (285 hasTiedUse(OldR, MRI, NewSR)285 ) |
377 | 13 | return false; |
378 | 272 | auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); |
379 | 272 | decltype(End) NextI; |
380 | 625 | for (auto I = Begin; I != End625 ; I = NextI353 ) { |
381 | 353 | NextI = std::next(I); |
382 | 353 | I->setReg(NewR); |
383 | 353 | I->setSubReg(NewSR); |
384 | 353 | } |
385 | 285 | return Begin != End; |
386 | 285 | } |
387 | | |
388 | | bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, |
389 | 576 | unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { |
390 | 576 | if (!TargetRegisterInfo::isVirtualRegister(OldR) || |
391 | 576 | !TargetRegisterInfo::isVirtualRegister(NewR)) |
392 | 0 | return false; |
393 | 576 | if (576 OldSR != NewSR && 576 hasTiedUse(OldR, MRI, NewSR)484 ) |
394 | 0 | return false; |
395 | 576 | auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); |
396 | 576 | decltype(End) NextI; |
397 | 1.04k | for (auto I = Begin; I != End1.04k ; I = NextI469 ) { |
398 | 469 | NextI = std::next(I); |
399 | 469 | if (I->getSubReg() != OldSR) |
400 | 346 | continue; |
401 | 123 | I->setReg(NewR); |
402 | 123 | I->setSubReg(NewSR); |
403 | 123 | } |
404 | 576 | return Begin != End; |
405 | 576 | } |
406 | | |
407 | | // For a register ref (pair Reg:Sub), set Begin to the position of the LSB |
408 | | // of Sub in Reg, and set Width to the size of Sub in bits. Return true, |
409 | | // if this succeeded, otherwise return false. |
410 | | bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, |
411 | 22.9k | unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { |
412 | 22.9k | const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); |
413 | 22.9k | if (RR.Sub == 022.9k ) { |
414 | 21.8k | Begin = 0; |
415 | 21.8k | Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC); |
416 | 21.8k | return true; |
417 | 21.8k | } |
418 | 1.09k | |
419 | 1.09k | Begin = 0; |
420 | 1.09k | |
421 | 1.09k | switch (RC->getID()) { |
422 | 1.09k | case Hexagon::DoubleRegsRegClassID: |
423 | 1.09k | case Hexagon::HvxWRRegClassID: |
424 | 1.09k | Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2; |
425 | 1.09k | if (RR.Sub == Hexagon::isub_hi || 1.09k RR.Sub == Hexagon::vsub_hi967 ) |
426 | 174 | Begin = Width; |
427 | 1.09k | break; |
428 | 0 | default: |
429 | 0 | return false; |
430 | 1.09k | } |
431 | 1.09k | return true; |
432 | 1.09k | } |
433 | | |
434 | | |
435 | | // For a REG_SEQUENCE, set SL to the low subregister and SH to the high |
436 | | // subregister. |
437 | | bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, |
438 | | BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, |
439 | 209 | const MachineRegisterInfo &MRI) { |
440 | 209 | assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); |
441 | 209 | unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); |
442 | 209 | auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg()); |
443 | 209 | auto &HRI = static_cast<const HexagonRegisterInfo&>( |
444 | 209 | *MRI.getTargetRegisterInfo()); |
445 | 209 | unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo); |
446 | 209 | unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi); |
447 | 209 | assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo)); |
448 | 209 | if (Sub1 == SubLo && 209 Sub2 == SubHi197 ) { |
449 | 197 | SL = I.getOperand(1); |
450 | 197 | SH = I.getOperand(3); |
451 | 197 | return true; |
452 | 197 | } |
453 | 12 | if (12 Sub1 == SubHi && 12 Sub2 == SubLo12 ) { |
454 | 12 | SH = I.getOperand(1); |
455 | 12 | SL = I.getOperand(3); |
456 | 12 | return true; |
457 | 12 | } |
458 | 0 | return false; |
459 | 0 | } |
460 | | |
461 | | // All stores (except 64-bit stores) take a 32-bit register as the source |
462 | | // of the value to be stored. If the instruction stores into a location |
463 | | // that is shorter than 32 bits, some bits of the source register are not |
464 | | // used. For each store instruction, calculate the set of used bits in |
465 | | // the source register, and set appropriate bits in Bits. Return true if |
466 | | // the bits are calculated, false otherwise. |
467 | | bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, |
468 | 770 | uint16_t Begin) { |
469 | 770 | using namespace Hexagon; |
470 | 770 | |
471 | 770 | switch (Opc) { |
472 | 770 | // Store byte |
473 | 10 | case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 |
474 | 10 | case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new |
475 | 10 | case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 |
476 | 10 | case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 |
477 | 10 | case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 |
478 | 10 | case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 |
479 | 10 | case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new |
480 | 10 | case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new |
481 | 10 | case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new |
482 | 10 | case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new |
483 | 10 | case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 |
484 | 10 | case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new |
485 | 10 | case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 |
486 | 10 | case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 |
487 | 10 | case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 |
488 | 10 | case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 |
489 | 10 | case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new |
490 | 10 | case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new |
491 | 10 | case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new |
492 | 10 | case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new |
493 | 10 | case S4_storerb_ap: // memb(Re32=#U6)=Rt32 |
494 | 10 | case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new |
495 | 10 | case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 |
496 | 10 | case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new |
497 | 10 | case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 |
498 | 10 | case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new |
499 | 10 | case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 |
500 | 10 | case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new |
501 | 10 | case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 |
502 | 10 | case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new |
503 | 10 | case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 |
504 | 10 | case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new |
505 | 10 | case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 |
506 | 10 | case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new |
507 | 10 | case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 |
508 | 10 | case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 |
509 | 10 | case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 |
510 | 10 | case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 |
511 | 10 | case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new |
512 | 10 | case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new |
513 | 10 | case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new |
514 | 10 | case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new |
515 | 10 | case S2_storerbgp: // memb(gp+#u16:0)=Rt32 |
516 | 10 | case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new |
517 | 10 | case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 |
518 | 10 | case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 |
519 | 10 | case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 |
520 | 10 | case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 |
521 | 10 | case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new |
522 | 10 | case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new |
523 | 10 | case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new |
524 | 10 | case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new |
525 | 10 | Bits.set(Begin, Begin+8); |
526 | 10 | return true; |
527 | 10 | |
528 | 10 | // Store low half |
529 | 31 | case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 |
530 | 31 | case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new |
531 | 31 | case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 |
532 | 31 | case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 |
533 | 31 | case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 |
534 | 31 | case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 |
535 | 31 | case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new |
536 | 31 | case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new |
537 | 31 | case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new |
538 | 31 | case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new |
539 | 31 | case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 |
540 | 31 | case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new |
541 | 31 | case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 |
542 | 31 | case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 |
543 | 31 | case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 |
544 | 31 | case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 |
545 | 31 | case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new |
546 | 31 | case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new |
547 | 31 | case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new |
548 | 31 | case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new |
549 | 31 | case S4_storerh_ap: // memh(Re32=#U6)=Rt32 |
550 | 31 | case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new |
551 | 31 | case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 |
552 | 31 | case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new |
553 | 31 | case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 |
554 | 31 | case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new |
555 | 31 | case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 |
556 | 31 | case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new |
557 | 31 | case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 |
558 | 31 | case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new |
559 | 31 | case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 |
560 | 31 | case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new |
561 | 31 | case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 |
562 | 31 | case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 |
563 | 31 | case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 |
564 | 31 | case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 |
565 | 31 | case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 |
566 | 31 | case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new |
567 | 31 | case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new |
568 | 31 | case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new |
569 | 31 | case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new |
570 | 31 | case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new |
571 | 31 | case S2_storerhgp: // memh(gp+#u16:1)=Rt32 |
572 | 31 | case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new |
573 | 31 | case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 |
574 | 31 | case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 |
575 | 31 | case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 |
576 | 31 | case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 |
577 | 31 | case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new |
578 | 31 | case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new |
579 | 31 | case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new |
580 | 31 | case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new |
581 | 31 | Bits.set(Begin, Begin+16); |
582 | 31 | return true; |
583 | 31 | |
584 | 31 | // Store high half |
585 | 0 | case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 |
586 | 0 | case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 |
587 | 0 | case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 |
588 | 0 | case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 |
589 | 0 | case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 |
590 | 0 | case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 |
591 | 0 | case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 |
592 | 0 | case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 |
593 | 0 | case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 |
594 | 0 | case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 |
595 | 0 | case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 |
596 | 0 | case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 |
597 | 0 | case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 |
598 | 0 | case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 |
599 | 0 | case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 |
600 | 0 | case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 |
601 | 0 | case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 |
602 | 0 | case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 |
603 | 0 | case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 |
604 | 0 | case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 |
605 | 0 | case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 |
606 | 0 | case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 |
607 | 0 | case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 |
608 | 0 | case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 |
609 | 0 | case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 |
610 | 0 | case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 |
611 | 0 | Bits.set(Begin+16, Begin+32); |
612 | 0 | return true; |
613 | 729 | } |
614 | 729 | |
615 | 729 | return false; |
616 | 729 | } |
617 | | |
618 | | // For an instruction with opcode Opc, calculate the set of bits that it |
619 | | // uses in a register in operand OpN. This only calculates the set of used |
620 | | // bits for cases where it does not depend on any operands (as is the case |
621 | | // in shifts, for example). For concrete instructions from a program, the |
622 | | // operand may be a subregister of a larger register, while Bits would |
623 | | // correspond to the larger register in its entirety. Because of that, |
624 | | // the parameter Begin can be used to indicate which bit of Bits should be |
625 | | // considered the LSB of of the operand. |
626 | | bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, |
627 | 3.32k | BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { |
628 | 3.32k | using namespace Hexagon; |
629 | 3.32k | |
630 | 3.32k | const MCInstrDesc &D = HII.get(Opc); |
631 | 3.32k | if (D.mayStore()3.32k ) { |
632 | 1.08k | if (OpN == D.getNumOperands()-1) |
633 | 770 | return getUsedBitsInStore(Opc, Bits, Begin); |
634 | 319 | return false; |
635 | 319 | } |
636 | 2.23k | |
637 | 2.23k | switch (Opc) { |
638 | 2.23k | // One register source. Used bits: R1[0-7]. |
639 | 20 | case A2_sxtb: |
640 | 20 | case A2_zxtb: |
641 | 20 | case A4_cmpbeqi: |
642 | 20 | case A4_cmpbgti: |
643 | 20 | case A4_cmpbgtui: |
644 | 20 | if (OpN == 120 ) { |
645 | 20 | Bits.set(Begin, Begin+8); |
646 | 20 | return true; |
647 | 20 | } |
648 | 0 | break; |
649 | 0 |
|
650 | 0 | // One register source. Used bits: R1[0-15]. |
651 | 45 | case A2_aslh: |
652 | 45 | case A2_sxth: |
653 | 45 | case A2_zxth: |
654 | 45 | case A4_cmpheqi: |
655 | 45 | case A4_cmphgti: |
656 | 45 | case A4_cmphgtui: |
657 | 45 | if (OpN == 145 ) { |
658 | 45 | Bits.set(Begin, Begin+16); |
659 | 45 | return true; |
660 | 45 | } |
661 | 0 | break; |
662 | 0 |
|
663 | 0 | // One register source. Used bits: R1[16-31]. |
664 | 4 | case A2_asrh: |
665 | 4 | if (OpN == 14 ) { |
666 | 4 | Bits.set(Begin+16, Begin+32); |
667 | 4 | return true; |
668 | 4 | } |
669 | 0 | break; |
670 | 0 |
|
671 | 0 | // Two register sources. Used bits: R1[0-7], R2[0-7]. |
672 | 0 | case A4_cmpbeq: |
673 | 0 | case A4_cmpbgt: |
674 | 0 | case A4_cmpbgtu: |
675 | 0 | if (OpN == 10 ) { |
676 | 0 | Bits.set(Begin, Begin+8); |
677 | 0 | return true; |
678 | 0 | } |
679 | 0 | break; |
680 | 0 |
|
681 | 0 | // Two register sources. Used bits: R1[0-15], R2[0-15]. |
682 | 22 | case A4_cmpheq: |
683 | 22 | case A4_cmphgt: |
684 | 22 | case A4_cmphgtu: |
685 | 22 | case A2_addh_h16_ll: |
686 | 22 | case A2_addh_h16_sat_ll: |
687 | 22 | case A2_addh_l16_ll: |
688 | 22 | case A2_addh_l16_sat_ll: |
689 | 22 | case A2_combine_ll: |
690 | 22 | case A2_subh_h16_ll: |
691 | 22 | case A2_subh_h16_sat_ll: |
692 | 22 | case A2_subh_l16_ll: |
693 | 22 | case A2_subh_l16_sat_ll: |
694 | 22 | case M2_mpy_acc_ll_s0: |
695 | 22 | case M2_mpy_acc_ll_s1: |
696 | 22 | case M2_mpy_acc_sat_ll_s0: |
697 | 22 | case M2_mpy_acc_sat_ll_s1: |
698 | 22 | case M2_mpy_ll_s0: |
699 | 22 | case M2_mpy_ll_s1: |
700 | 22 | case M2_mpy_nac_ll_s0: |
701 | 22 | case M2_mpy_nac_ll_s1: |
702 | 22 | case M2_mpy_nac_sat_ll_s0: |
703 | 22 | case M2_mpy_nac_sat_ll_s1: |
704 | 22 | case M2_mpy_rnd_ll_s0: |
705 | 22 | case M2_mpy_rnd_ll_s1: |
706 | 22 | case M2_mpy_sat_ll_s0: |
707 | 22 | case M2_mpy_sat_ll_s1: |
708 | 22 | case M2_mpy_sat_rnd_ll_s0: |
709 | 22 | case M2_mpy_sat_rnd_ll_s1: |
710 | 22 | case M2_mpyd_acc_ll_s0: |
711 | 22 | case M2_mpyd_acc_ll_s1: |
712 | 22 | case M2_mpyd_ll_s0: |
713 | 22 | case M2_mpyd_ll_s1: |
714 | 22 | case M2_mpyd_nac_ll_s0: |
715 | 22 | case M2_mpyd_nac_ll_s1: |
716 | 22 | case M2_mpyd_rnd_ll_s0: |
717 | 22 | case M2_mpyd_rnd_ll_s1: |
718 | 22 | case M2_mpyu_acc_ll_s0: |
719 | 22 | case M2_mpyu_acc_ll_s1: |
720 | 22 | case M2_mpyu_ll_s0: |
721 | 22 | case M2_mpyu_ll_s1: |
722 | 22 | case M2_mpyu_nac_ll_s0: |
723 | 22 | case M2_mpyu_nac_ll_s1: |
724 | 22 | case M2_mpyud_acc_ll_s0: |
725 | 22 | case M2_mpyud_acc_ll_s1: |
726 | 22 | case M2_mpyud_ll_s0: |
727 | 22 | case M2_mpyud_ll_s1: |
728 | 22 | case M2_mpyud_nac_ll_s0: |
729 | 22 | case M2_mpyud_nac_ll_s1: |
730 | 22 | if (OpN == 1 || 22 OpN == 210 ) { |
731 | 21 | Bits.set(Begin, Begin+16); |
732 | 21 | return true; |
733 | 21 | } |
734 | 1 | break; |
735 | 1 | |
736 | 1 | // Two register sources. Used bits: R1[0-15], R2[16-31]. |
737 | 0 | case A2_addh_h16_lh: |
738 | 0 | case A2_addh_h16_sat_lh: |
739 | 0 | case A2_combine_lh: |
740 | 0 | case A2_subh_h16_lh: |
741 | 0 | case A2_subh_h16_sat_lh: |
742 | 0 | case M2_mpy_acc_lh_s0: |
743 | 0 | case M2_mpy_acc_lh_s1: |
744 | 0 | case M2_mpy_acc_sat_lh_s0: |
745 | 0 | case M2_mpy_acc_sat_lh_s1: |
746 | 0 | case M2_mpy_lh_s0: |
747 | 0 | case M2_mpy_lh_s1: |
748 | 0 | case M2_mpy_nac_lh_s0: |
749 | 0 | case M2_mpy_nac_lh_s1: |
750 | 0 | case M2_mpy_nac_sat_lh_s0: |
751 | 0 | case M2_mpy_nac_sat_lh_s1: |
752 | 0 | case M2_mpy_rnd_lh_s0: |
753 | 0 | case M2_mpy_rnd_lh_s1: |
754 | 0 | case M2_mpy_sat_lh_s0: |
755 | 0 | case M2_mpy_sat_lh_s1: |
756 | 0 | case M2_mpy_sat_rnd_lh_s0: |
757 | 0 | case M2_mpy_sat_rnd_lh_s1: |
758 | 0 | case M2_mpyd_acc_lh_s0: |
759 | 0 | case M2_mpyd_acc_lh_s1: |
760 | 0 | case M2_mpyd_lh_s0: |
761 | 0 | case M2_mpyd_lh_s1: |
762 | 0 | case M2_mpyd_nac_lh_s0: |
763 | 0 | case M2_mpyd_nac_lh_s1: |
764 | 0 | case M2_mpyd_rnd_lh_s0: |
765 | 0 | case M2_mpyd_rnd_lh_s1: |
766 | 0 | case M2_mpyu_acc_lh_s0: |
767 | 0 | case M2_mpyu_acc_lh_s1: |
768 | 0 | case M2_mpyu_lh_s0: |
769 | 0 | case M2_mpyu_lh_s1: |
770 | 0 | case M2_mpyu_nac_lh_s0: |
771 | 0 | case M2_mpyu_nac_lh_s1: |
772 | 0 | case M2_mpyud_acc_lh_s0: |
773 | 0 | case M2_mpyud_acc_lh_s1: |
774 | 0 | case M2_mpyud_lh_s0: |
775 | 0 | case M2_mpyud_lh_s1: |
776 | 0 | case M2_mpyud_nac_lh_s0: |
777 | 0 | case M2_mpyud_nac_lh_s1: |
778 | 0 | // These four are actually LH. |
779 | 0 | case A2_addh_l16_hl: |
780 | 0 | case A2_addh_l16_sat_hl: |
781 | 0 | case A2_subh_l16_hl: |
782 | 0 | case A2_subh_l16_sat_hl: |
783 | 0 | if (OpN == 10 ) { |
784 | 0 | Bits.set(Begin, Begin+16); |
785 | 0 | return true; |
786 | 0 | } |
787 | 0 | if (0 OpN == 20 ) { |
788 | 0 | Bits.set(Begin+16, Begin+32); |
789 | 0 | return true; |
790 | 0 | } |
791 | 0 | break; |
792 | 0 |
|
793 | 0 | // Two register sources, used bits: R1[16-31], R2[0-15]. |
794 | 0 | case A2_addh_h16_hl: |
795 | 0 | case A2_addh_h16_sat_hl: |
796 | 0 | case A2_combine_hl: |
797 | 0 | case A2_subh_h16_hl: |
798 | 0 | case A2_subh_h16_sat_hl: |
799 | 0 | case M2_mpy_acc_hl_s0: |
800 | 0 | case M2_mpy_acc_hl_s1: |
801 | 0 | case M2_mpy_acc_sat_hl_s0: |
802 | 0 | case M2_mpy_acc_sat_hl_s1: |
803 | 0 | case M2_mpy_hl_s0: |
804 | 0 | case M2_mpy_hl_s1: |
805 | 0 | case M2_mpy_nac_hl_s0: |
806 | 0 | case M2_mpy_nac_hl_s1: |
807 | 0 | case M2_mpy_nac_sat_hl_s0: |
808 | 0 | case M2_mpy_nac_sat_hl_s1: |
809 | 0 | case M2_mpy_rnd_hl_s0: |
810 | 0 | case M2_mpy_rnd_hl_s1: |
811 | 0 | case M2_mpy_sat_hl_s0: |
812 | 0 | case M2_mpy_sat_hl_s1: |
813 | 0 | case M2_mpy_sat_rnd_hl_s0: |
814 | 0 | case M2_mpy_sat_rnd_hl_s1: |
815 | 0 | case M2_mpyd_acc_hl_s0: |
816 | 0 | case M2_mpyd_acc_hl_s1: |
817 | 0 | case M2_mpyd_hl_s0: |
818 | 0 | case M2_mpyd_hl_s1: |
819 | 0 | case M2_mpyd_nac_hl_s0: |
820 | 0 | case M2_mpyd_nac_hl_s1: |
821 | 0 | case M2_mpyd_rnd_hl_s0: |
822 | 0 | case M2_mpyd_rnd_hl_s1: |
823 | 0 | case M2_mpyu_acc_hl_s0: |
824 | 0 | case M2_mpyu_acc_hl_s1: |
825 | 0 | case M2_mpyu_hl_s0: |
826 | 0 | case M2_mpyu_hl_s1: |
827 | 0 | case M2_mpyu_nac_hl_s0: |
828 | 0 | case M2_mpyu_nac_hl_s1: |
829 | 0 | case M2_mpyud_acc_hl_s0: |
830 | 0 | case M2_mpyud_acc_hl_s1: |
831 | 0 | case M2_mpyud_hl_s0: |
832 | 0 | case M2_mpyud_hl_s1: |
833 | 0 | case M2_mpyud_nac_hl_s0: |
834 | 0 | case M2_mpyud_nac_hl_s1: |
835 | 0 | if (OpN == 10 ) { |
836 | 0 | Bits.set(Begin+16, Begin+32); |
837 | 0 | return true; |
838 | 0 | } |
839 | 0 | if (0 OpN == 20 ) { |
840 | 0 | Bits.set(Begin, Begin+16); |
841 | 0 | return true; |
842 | 0 | } |
843 | 0 | break; |
844 | 0 |
|
845 | 0 | // Two register sources, used bits: R1[16-31], R2[16-31]. |
846 | 0 | case A2_addh_h16_hh: |
847 | 0 | case A2_addh_h16_sat_hh: |
848 | 0 | case A2_combine_hh: |
849 | 0 | case A2_subh_h16_hh: |
850 | 0 | case A2_subh_h16_sat_hh: |
851 | 0 | case M2_mpy_acc_hh_s0: |
852 | 0 | case M2_mpy_acc_hh_s1: |
853 | 0 | case M2_mpy_acc_sat_hh_s0: |
854 | 0 | case M2_mpy_acc_sat_hh_s1: |
855 | 0 | case M2_mpy_hh_s0: |
856 | 0 | case M2_mpy_hh_s1: |
857 | 0 | case M2_mpy_nac_hh_s0: |
858 | 0 | case M2_mpy_nac_hh_s1: |
859 | 0 | case M2_mpy_nac_sat_hh_s0: |
860 | 0 | case M2_mpy_nac_sat_hh_s1: |
861 | 0 | case M2_mpy_rnd_hh_s0: |
862 | 0 | case M2_mpy_rnd_hh_s1: |
863 | 0 | case M2_mpy_sat_hh_s0: |
864 | 0 | case M2_mpy_sat_hh_s1: |
865 | 0 | case M2_mpy_sat_rnd_hh_s0: |
866 | 0 | case M2_mpy_sat_rnd_hh_s1: |
867 | 0 | case M2_mpyd_acc_hh_s0: |
868 | 0 | case M2_mpyd_acc_hh_s1: |
869 | 0 | case M2_mpyd_hh_s0: |
870 | 0 | case M2_mpyd_hh_s1: |
871 | 0 | case M2_mpyd_nac_hh_s0: |
872 | 0 | case M2_mpyd_nac_hh_s1: |
873 | 0 | case M2_mpyd_rnd_hh_s0: |
874 | 0 | case M2_mpyd_rnd_hh_s1: |
875 | 0 | case M2_mpyu_acc_hh_s0: |
876 | 0 | case M2_mpyu_acc_hh_s1: |
877 | 0 | case M2_mpyu_hh_s0: |
878 | 0 | case M2_mpyu_hh_s1: |
879 | 0 | case M2_mpyu_nac_hh_s0: |
880 | 0 | case M2_mpyu_nac_hh_s1: |
881 | 0 | case M2_mpyud_acc_hh_s0: |
882 | 0 | case M2_mpyud_acc_hh_s1: |
883 | 0 | case M2_mpyud_hh_s0: |
884 | 0 | case M2_mpyud_hh_s1: |
885 | 0 | case M2_mpyud_nac_hh_s0: |
886 | 0 | case M2_mpyud_nac_hh_s1: |
887 | 0 | if (OpN == 1 || 0 OpN == 20 ) { |
888 | 0 | Bits.set(Begin+16, Begin+32); |
889 | 0 | return true; |
890 | 0 | } |
891 | 0 | break; |
892 | 2.14k | } |
893 | 2.14k | |
894 | 2.14k | return false; |
895 | 2.14k | } |
896 | | |
897 | | // Calculate the register class that matches Reg:Sub. For example, if |
898 | | // vreg1 is a double register, then vreg1:isub_hi would match the "int" |
899 | | // register class. |
900 | | const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( |
901 | 1.11M | const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { |
902 | 1.11M | if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) |
903 | 0 | return nullptr; |
904 | 1.11M | auto *RC = MRI.getRegClass(RR.Reg); |
905 | 1.11M | if (RR.Sub == 0) |
906 | 985k | return RC; |
907 | 126k | auto &HRI = static_cast<const HexagonRegisterInfo&>( |
908 | 126k | *MRI.getTargetRegisterInfo()); |
909 | 126k | |
910 | 125k | auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { |
911 | 125k | (void)HRI; |
912 | 125k | assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) || |
913 | 125k | Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi)); |
914 | 125k | }; |
915 | 126k | |
916 | 126k | switch (RC->getID()) { |
917 | 8.45k | case Hexagon::DoubleRegsRegClassID: |
918 | 8.45k | VerifySR(RC, RR.Sub); |
919 | 8.45k | return &Hexagon::IntRegsRegClass; |
920 | 117k | case Hexagon::HvxWRRegClassID: |
921 | 117k | VerifySR(RC, RR.Sub); |
922 | 117k | return &Hexagon::HvxVRRegClass; |
923 | 142 | } |
924 | 142 | return nullptr; |
925 | 142 | } |
926 | | |
927 | | // Check if RD could be replaced with RS at any possible use of RD. |
928 | | // For example a predicate register cannot be replaced with a integer |
929 | | // register, but a 64-bit register with a subregister can be replaced |
930 | | // with a 32-bit register. |
931 | | bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, |
932 | 545k | const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { |
933 | 545k | if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || |
934 | 544k | !TargetRegisterInfo::isVirtualRegister(RS.Reg)) |
935 | 2.15k | return false; |
936 | 543k | // Return false if one (or both) classes are nullptr. |
937 | 543k | auto *DRC = getFinalVRegClass(RD, MRI); |
938 | 543k | if (!DRC) |
939 | 0 | return false; |
940 | 543k | |
941 | 543k | return DRC == getFinalVRegClass(RS, MRI); |
942 | 543k | } |
943 | | |
944 | | bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, |
945 | 769 | unsigned NewSub) { |
946 | 769 | if (!PreserveTiedOps) |
947 | 0 | return false; |
948 | 769 | return llvm::any_of(MRI.use_operands(Reg), |
949 | 761 | [NewSub] (const MachineOperand &Op) -> bool { |
950 | 461 | return Op.getSubReg() != NewSub && Op.isTied(); |
951 | 761 | }); |
952 | 769 | } |
953 | | |
954 | | namespace { |
955 | | |
956 | | class DeadCodeElimination { |
957 | | public: |
958 | | DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) |
959 | | : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), |
960 | 2.71k | MDT(mdt), MRI(mf.getRegInfo()) {} |
961 | | |
962 | 2.71k | bool run() { |
963 | 2.71k | return runOnNode(MDT.getRootNode()); |
964 | 2.71k | } |
965 | | |
966 | | private: |
967 | | bool isDead(unsigned R) const; |
968 | | bool runOnNode(MachineDomTreeNode *N); |
969 | | |
970 | | MachineFunction &MF; |
971 | | const HexagonInstrInfo &HII; |
972 | | MachineDominatorTree &MDT; |
973 | | MachineRegisterInfo &MRI; |
974 | | }; |
975 | | |
976 | | } // end anonymous namespace |
977 | | |
978 | 28.4k | bool DeadCodeElimination::isDead(unsigned R) const { |
979 | 28.4k | for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E28.4k ; ++I6 ) { |
980 | 27.3k | MachineInstr *UseI = I->getParent(); |
981 | 27.3k | if (UseI->isDebugValue()) |
982 | 3 | continue; |
983 | 27.3k | if (27.3k UseI->isPHI()27.3k ) { |
984 | 1.74k | assert(!UseI->getOperand(0).getSubReg()); |
985 | 1.74k | unsigned DR = UseI->getOperand(0).getReg(); |
986 | 1.74k | if (DR == R) |
987 | 3 | continue; |
988 | 27.3k | } |
989 | 27.3k | return false; |
990 | 27.3k | } |
991 | 1.13k | return true; |
992 | 28.4k | } |
993 | | |
994 | 6.12k | bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { |
995 | 6.12k | bool Changed = false; |
996 | 6.12k | |
997 | 6.12k | for (auto *DTN : children<MachineDomTreeNode*>(N)) |
998 | 3.40k | Changed |= runOnNode(DTN); |
999 | 6.12k | |
1000 | 6.12k | MachineBasicBlock *B = N->getBlock(); |
1001 | 6.12k | std::vector<MachineInstr*> Instrs; |
1002 | 56.1k | for (auto I = B->rbegin(), E = B->rend(); I != E56.1k ; ++I49.9k ) |
1003 | 49.9k | Instrs.push_back(&*I); |
1004 | 6.12k | |
1005 | 49.9k | for (auto MI : Instrs) { |
1006 | 49.9k | unsigned Opc = MI->getOpcode(); |
1007 | 49.9k | // Do not touch lifetime markers. This is why the target-independent DCE |
1008 | 49.9k | // cannot be used. |
1009 | 49.9k | if (Opc == TargetOpcode::LIFETIME_START || |
1010 | 49.8k | Opc == TargetOpcode::LIFETIME_END) |
1011 | 180 | continue; |
1012 | 49.8k | bool Store = false; |
1013 | 49.8k | if (MI->isInlineAsm()) |
1014 | 42 | continue; |
1015 | 49.7k | // Delete PHIs if possible. |
1016 | 49.7k | if (49.7k !MI->isPHI() && 49.7k !MI->isSafeToMove(nullptr, Store)48.2k ) |
1017 | 17.2k | continue; |
1018 | 32.4k | |
1019 | 32.4k | bool AllDead = true; |
1020 | 32.4k | SmallVector<unsigned,2> Regs; |
1021 | 36.8k | for (auto &Op : MI->operands()) { |
1022 | 36.8k | if (!Op.isReg() || 36.8k !Op.isDef()33.6k ) |
1023 | 4.36k | continue; |
1024 | 32.4k | unsigned R = Op.getReg(); |
1025 | 32.4k | if (!TargetRegisterInfo::isVirtualRegister(R) || 32.4k !isDead(R)28.4k ) { |
1026 | 31.3k | AllDead = false; |
1027 | 31.3k | break; |
1028 | 31.3k | } |
1029 | 1.13k | Regs.push_back(R); |
1030 | 1.13k | } |
1031 | 32.4k | if (!AllDead) |
1032 | 31.3k | continue; |
1033 | 1.13k | |
1034 | 1.13k | B->erase(MI); |
1035 | 2.26k | for (unsigned i = 0, n = Regs.size(); i != n2.26k ; ++i1.13k ) |
1036 | 1.13k | MRI.markUsesInDebugValueAsUndef(Regs[i]); |
1037 | 49.9k | Changed = true; |
1038 | 49.9k | } |
1039 | 6.12k | |
1040 | 6.12k | return Changed; |
1041 | 6.12k | } |
1042 | | |
1043 | | namespace { |
1044 | | |
1045 | | // Eliminate redundant instructions |
1046 | | // |
1047 | | // This transformation will identify instructions where the output register |
1048 | | // is the same as one of its input registers. This only works on instructions |
1049 | | // that define a single register (unlike post-increment loads, for example). |
1050 | | // The equality check is actually more detailed: the code calculates which |
1051 | | // bits of the output are used, and only compares these bits with the input |
1052 | | // registers. |
1053 | | // If the output matches an input, the instruction is replaced with COPY. |
1054 | | // The copies will be removed by another transformation. |
1055 | | class RedundantInstrElimination : public Transformation { |
1056 | | public: |
1057 | | RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, |
1058 | | const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1059 | 840 | : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} |
1060 | | |
1061 | | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1062 | | |
1063 | | private: |
1064 | | bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, |
1065 | | unsigned &LostB, unsigned &LostE); |
1066 | | bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, |
1067 | | unsigned &LostB, unsigned &LostE); |
1068 | | bool computeUsedBits(unsigned Reg, BitVector &Bits); |
1069 | | bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, |
1070 | | uint16_t Begin); |
1071 | | bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); |
1072 | | |
1073 | | const HexagonInstrInfo &HII; |
1074 | | const HexagonRegisterInfo &HRI; |
1075 | | MachineRegisterInfo &MRI; |
1076 | | BitTracker &BT; |
1077 | | }; |
1078 | | |
1079 | | } // end anonymous namespace |
1080 | | |
1081 | | // Check if the instruction is a lossy shift left, where the input being |
1082 | | // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range |
1083 | | // of bit indices that are lost. |
1084 | | bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, |
1085 | 3.32k | unsigned OpN, unsigned &LostB, unsigned &LostE) { |
1086 | 3.32k | using namespace Hexagon; |
1087 | 3.32k | |
1088 | 3.32k | unsigned Opc = MI.getOpcode(); |
1089 | 3.32k | unsigned ImN, RegN, Width; |
1090 | 3.32k | switch (Opc) { |
1091 | 0 | case S2_asl_i_p: |
1092 | 0 | ImN = 2; |
1093 | 0 | RegN = 1; |
1094 | 0 | Width = 64; |
1095 | 0 | break; |
1096 | 0 | case S2_asl_i_p_acc: |
1097 | 0 | case S2_asl_i_p_and: |
1098 | 0 | case S2_asl_i_p_nac: |
1099 | 0 | case S2_asl_i_p_or: |
1100 | 0 | case S2_asl_i_p_xacc: |
1101 | 0 | ImN = 3; |
1102 | 0 | RegN = 2; |
1103 | 0 | Width = 64; |
1104 | 0 | break; |
1105 | 6 | case S2_asl_i_r: |
1106 | 6 | ImN = 2; |
1107 | 6 | RegN = 1; |
1108 | 6 | Width = 32; |
1109 | 6 | break; |
1110 | 144 | case S2_addasl_rrri: |
1111 | 144 | case S4_andi_asl_ri: |
1112 | 144 | case S4_ori_asl_ri: |
1113 | 144 | case S4_addi_asl_ri: |
1114 | 144 | case S4_subi_asl_ri: |
1115 | 144 | case S2_asl_i_r_acc: |
1116 | 144 | case S2_asl_i_r_and: |
1117 | 144 | case S2_asl_i_r_nac: |
1118 | 144 | case S2_asl_i_r_or: |
1119 | 144 | case S2_asl_i_r_sat: |
1120 | 144 | case S2_asl_i_r_xacc: |
1121 | 144 | ImN = 3; |
1122 | 144 | RegN = 2; |
1123 | 144 | Width = 32; |
1124 | 144 | break; |
1125 | 3.17k | default: |
1126 | 3.17k | return false; |
1127 | 150 | } |
1128 | 150 | |
1129 | 150 | if (150 RegN != OpN150 ) |
1130 | 58 | return false; |
1131 | 92 | |
1132 | 150 | assert(MI.getOperand(ImN).isImm()); |
1133 | 92 | unsigned S = MI.getOperand(ImN).getImm(); |
1134 | 92 | if (S == 0) |
1135 | 0 | return false; |
1136 | 92 | LostB = Width-S; |
1137 | 92 | LostE = Width; |
1138 | 92 | return true; |
1139 | 92 | } |
1140 | | |
1141 | | // Check if the instruction is a lossy shift right, where the input being |
1142 | | // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range |
1143 | | // of bit indices that are lost. |
1144 | | bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, |
1145 | 3.22k | unsigned OpN, unsigned &LostB, unsigned &LostE) { |
1146 | 3.22k | using namespace Hexagon; |
1147 | 3.22k | |
1148 | 3.22k | unsigned Opc = MI.getOpcode(); |
1149 | 3.22k | unsigned ImN, RegN; |
1150 | 3.22k | switch (Opc) { |
1151 | 14 | case S2_asr_i_p: |
1152 | 14 | case S2_lsr_i_p: |
1153 | 14 | ImN = 2; |
1154 | 14 | RegN = 1; |
1155 | 14 | break; |
1156 | 0 | case S2_asr_i_p_acc: |
1157 | 0 | case S2_asr_i_p_and: |
1158 | 0 | case S2_asr_i_p_nac: |
1159 | 0 | case S2_asr_i_p_or: |
1160 | 0 | case S2_lsr_i_p_acc: |
1161 | 0 | case S2_lsr_i_p_and: |
1162 | 0 | case S2_lsr_i_p_nac: |
1163 | 0 | case S2_lsr_i_p_or: |
1164 | 0 | case S2_lsr_i_p_xacc: |
1165 | 0 | ImN = 3; |
1166 | 0 | RegN = 2; |
1167 | 0 | break; |
1168 | 22 | case S2_asr_i_r: |
1169 | 22 | case S2_lsr_i_r: |
1170 | 22 | ImN = 2; |
1171 | 22 | RegN = 1; |
1172 | 22 | break; |
1173 | 10 | case S4_andi_lsr_ri: |
1174 | 10 | case S4_ori_lsr_ri: |
1175 | 10 | case S4_addi_lsr_ri: |
1176 | 10 | case S4_subi_lsr_ri: |
1177 | 10 | case S2_asr_i_r_acc: |
1178 | 10 | case S2_asr_i_r_and: |
1179 | 10 | case S2_asr_i_r_nac: |
1180 | 10 | case S2_asr_i_r_or: |
1181 | 10 | case S2_lsr_i_r_acc: |
1182 | 10 | case S2_lsr_i_r_and: |
1183 | 10 | case S2_lsr_i_r_nac: |
1184 | 10 | case S2_lsr_i_r_or: |
1185 | 10 | case S2_lsr_i_r_xacc: |
1186 | 10 | ImN = 3; |
1187 | 10 | RegN = 2; |
1188 | 10 | break; |
1189 | 10 | |
1190 | 3.18k | default: |
1191 | 3.18k | return false; |
1192 | 46 | } |
1193 | 46 | |
1194 | 46 | if (46 RegN != OpN46 ) |
1195 | 6 | return false; |
1196 | 40 | |
1197 | 46 | assert(MI.getOperand(ImN).isImm()); |
1198 | 40 | unsigned S = MI.getOperand(ImN).getImm(); |
1199 | 40 | LostB = 0; |
1200 | 40 | LostE = S; |
1201 | 40 | return true; |
1202 | 40 | } |
1203 | | |
1204 | | // Calculate the bit vector that corresponds to the used bits of register Reg. |
1205 | | // The vector Bits has the same size, as the size of Reg in bits. If the cal- |
1206 | | // culation fails (i.e. the used bits are unknown), it returns false. Other- |
1207 | | // wise, it returns true and sets the corresponding bits in Bits. |
1208 | 3.69k | bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { |
1209 | 3.69k | BitVector Used(Bits.size()); |
1210 | 3.69k | RegisterSet Visited; |
1211 | 3.69k | std::vector<unsigned> Pending; |
1212 | 3.69k | Pending.push_back(Reg); |
1213 | 3.69k | |
1214 | 4.60k | for (unsigned i = 0; i < Pending.size()4.60k ; ++i902 ) { |
1215 | 4.36k | unsigned R = Pending[i]; |
1216 | 4.36k | if (Visited.has(R)) |
1217 | 0 | continue; |
1218 | 4.36k | Visited.insert(R); |
1219 | 5.49k | for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E5.49k ; ++I1.13k ) { |
1220 | 4.59k | BitTracker::RegisterRef UR = *I; |
1221 | 4.59k | unsigned B, W; |
1222 | 4.59k | if (!HBS::getSubregMask(UR, B, W, MRI)) |
1223 | 0 | return false; |
1224 | 4.59k | MachineInstr &UseI = *I->getParent(); |
1225 | 4.59k | if (UseI.isPHI() || 4.59k UseI.isCopy()4.10k ) { |
1226 | 1.27k | unsigned DefR = UseI.getOperand(0).getReg(); |
1227 | 1.27k | if (!TargetRegisterInfo::isVirtualRegister(DefR)) |
1228 | 406 | return false; |
1229 | 869 | Pending.push_back(DefR); |
1230 | 4.59k | } else { |
1231 | 3.32k | if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) |
1232 | 3.05k | return false; |
1233 | 3.32k | } |
1234 | 4.59k | } |
1235 | 4.36k | } |
1236 | 235 | Bits |= Used; |
1237 | 235 | return true; |
1238 | 3.69k | } |
1239 | | |
1240 | | // Calculate the bits used by instruction MI in a register in operand OpN. |
1241 | | // Return true/false if the calculation succeeds/fails. If is succeeds, set |
1242 | | // used bits in Bits. This function does not reset any bits in Bits, so |
1243 | | // subsequent calls over different instructions will result in the union |
1244 | | // of the used bits in all these instructions. |
1245 | | // The register in question may be used with a sub-register, whereas Bits |
1246 | | // holds the bits for the entire register. To keep track of that, the |
1247 | | // argument Begin indicates where in Bits is the lowest-significant bit |
1248 | | // of the register used in operand OpN. For example, in instruction: |
1249 | | // vreg1 = S2_lsr_i_r vreg2:isub_hi, 10 |
1250 | | // the operand 1 is a 32-bit register, which happens to be a subregister |
1251 | | // of the 64-bit register vreg2, and that subregister starts at position 32. |
1252 | | // In this case Begin=32, since Bits[32] would be the lowest-significant bit |
1253 | | // of vreg2:isub_hi. |
1254 | | bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, |
1255 | 3.32k | unsigned OpN, BitVector &Bits, uint16_t Begin) { |
1256 | 3.32k | unsigned Opc = MI.getOpcode(); |
1257 | 3.32k | BitVector T(Bits.size()); |
1258 | 3.32k | bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); |
1259 | 3.32k | // Even if we don't have bits yet, we could still provide some information |
1260 | 3.32k | // if the instruction is a lossy shift: the lost bits will be marked as |
1261 | 3.32k | // not used. |
1262 | 3.32k | unsigned LB, LE; |
1263 | 3.32k | if (isLossyShiftLeft(MI, OpN, LB, LE) || 3.32k isLossyShiftRight(MI, OpN, LB, LE)3.22k ) { |
1264 | 132 | assert(MI.getOperand(OpN).isReg()); |
1265 | 132 | BitTracker::RegisterRef RR = MI.getOperand(OpN); |
1266 | 132 | const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); |
1267 | 132 | uint16_t Width = HRI.getRegSizeInBits(*RC); |
1268 | 132 | |
1269 | 132 | if (!GotBits) |
1270 | 132 | T.set(Begin, Begin+Width); |
1271 | 132 | assert(LB <= LE && LB < Width && LE <= Width); |
1272 | 132 | T.reset(Begin+LB, Begin+LE); |
1273 | 132 | GotBits = true; |
1274 | 132 | } |
1275 | 3.32k | if (GotBits) |
1276 | 263 | Bits |= T; |
1277 | 3.32k | return GotBits; |
1278 | 3.32k | } |
1279 | | |
1280 | | // Calculates the used bits in RD ("defined register"), and checks if these |
1281 | | // bits in RS ("used register") and RD are identical. |
1282 | | bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, |
1283 | 3.69k | BitTracker::RegisterRef RS) { |
1284 | 3.69k | const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); |
1285 | 3.69k | const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); |
1286 | 3.69k | |
1287 | 3.69k | unsigned DB, DW; |
1288 | 3.69k | if (!HBS::getSubregMask(RD, DB, DW, MRI)) |
1289 | 0 | return false; |
1290 | 3.69k | unsigned SB, SW; |
1291 | 3.69k | if (!HBS::getSubregMask(RS, SB, SW, MRI)) |
1292 | 0 | return false; |
1293 | 3.69k | if (3.69k SW != DW3.69k ) |
1294 | 0 | return false; |
1295 | 3.69k | |
1296 | 3.69k | BitVector Used(DC.width()); |
1297 | 3.69k | if (!computeUsedBits(RD.Reg, Used)) |
1298 | 3.46k | return false; |
1299 | 235 | |
1300 | 2.38k | for (unsigned i = 0; 235 i != DW2.38k ; ++i2.14k ) |
1301 | 2.33k | if (2.33k Used[i+DB] && 2.33k DC[DB+i] != SC[SB+i]656 ) |
1302 | 188 | return false; |
1303 | 47 | return true; |
1304 | 3.69k | } |
1305 | | |
1306 | | bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, |
1307 | 1.80k | const RegisterSet&) { |
1308 | 1.80k | if (!BT.reached(&B)) |
1309 | 7 | return false; |
1310 | 1.79k | bool Changed = false; |
1311 | 1.79k | |
1312 | 16.0k | for (auto I = B.begin(), E = B.end(), NextI = I; I != E16.0k ; ++I14.2k ) { |
1313 | 14.2k | NextI = std::next(I); |
1314 | 14.2k | MachineInstr *MI = &*I; |
1315 | 14.2k | |
1316 | 14.2k | if (MI->getOpcode() == TargetOpcode::COPY) |
1317 | 2.72k | continue; |
1318 | 11.5k | if (11.5k MI->hasUnmodeledSideEffects() || 11.5k MI->isInlineAsm()10.8k ) |
1319 | 678 | continue; |
1320 | 10.8k | unsigned NumD = MI->getDesc().getNumDefs(); |
1321 | 10.8k | if (NumD != 1) |
1322 | 4.30k | continue; |
1323 | 6.51k | |
1324 | 6.51k | BitTracker::RegisterRef RD = MI->getOperand(0); |
1325 | 6.51k | if (!BT.has(RD.Reg)) |
1326 | 0 | continue; |
1327 | 6.51k | const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); |
1328 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
1329 | 6.51k | : MachineBasicBlock::iterator(MI); |
1330 | 6.51k | |
1331 | 6.51k | // Find a source operand that is equal to the result. |
1332 | 12.2k | for (auto &Op : MI->uses()) { |
1333 | 12.2k | if (!Op.isReg()) |
1334 | 5.28k | continue; |
1335 | 6.92k | BitTracker::RegisterRef RS = Op; |
1336 | 6.92k | if (!BT.has(RS.Reg)) |
1337 | 172 | continue; |
1338 | 6.75k | if (6.75k !HBS::isTransparentCopy(RD, RS, MRI)6.75k ) |
1339 | 3.05k | continue; |
1340 | 3.69k | |
1341 | 3.69k | unsigned BN, BW; |
1342 | 3.69k | if (!HBS::getSubregMask(RS, BN, BW, MRI)) |
1343 | 0 | continue; |
1344 | 3.69k | |
1345 | 3.69k | const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); |
1346 | 3.69k | if (!usedBitsEqual(RD, RS) && 3.69k !HBS::isEqual(DC, 0, SC, BN, BW)3.65k ) |
1347 | 3.62k | continue; |
1348 | 69 | |
1349 | 69 | // If found, replace the instruction with a COPY. |
1350 | 69 | const DebugLoc &DL = MI->getDebugLoc(); |
1351 | 69 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); |
1352 | 69 | unsigned NewR = MRI.createVirtualRegister(FRC); |
1353 | 69 | MachineInstr *CopyI = |
1354 | 69 | BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) |
1355 | 69 | .addReg(RS.Reg, 0, RS.Sub); |
1356 | 69 | HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); |
1357 | 69 | // This pass can create copies between registers that don't have the |
1358 | 69 | // exact same values. Updating the tracker has to involve updating |
1359 | 69 | // all dependent cells. Example: |
1360 | 69 | // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal |
1361 | 69 | // |
1362 | 69 | // vreg3 = copy vreg2 ; <- inserted |
1363 | 69 | // ... = vreg3 ; <- replaced from vreg2 |
1364 | 69 | // Indirectly, we can create a "copy" between vreg1 and vreg2 even |
1365 | 69 | // though their exact values do not match. |
1366 | 69 | BT.visit(*CopyI); |
1367 | 69 | Changed = true; |
1368 | 69 | break; |
1369 | 69 | } |
1370 | 14.2k | } |
1371 | 1.80k | |
1372 | 1.80k | return Changed; |
1373 | 1.80k | } |
1374 | | |
1375 | | namespace { |
1376 | | |
1377 | | // Recognize instructions that produce constant values known at compile-time. |
1378 | | // Replace them with register definitions that load these constants directly. |
1379 | | class ConstGeneration : public Transformation { |
1380 | | public: |
1381 | | ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, |
1382 | | MachineRegisterInfo &mri) |
1383 | 840 | : Transformation(true), HII(hii), MRI(mri), BT(bt) {} |
1384 | | |
1385 | | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1386 | | static bool isTfrConst(const MachineInstr &MI); |
1387 | | |
1388 | | private: |
1389 | | unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, |
1390 | | MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); |
1391 | | |
1392 | | const HexagonInstrInfo &HII; |
1393 | | MachineRegisterInfo &MRI; |
1394 | | BitTracker &BT; |
1395 | | }; |
1396 | | |
1397 | | } // end anonymous namespace |
1398 | | |
1399 | 25.4k | bool ConstGeneration::isTfrConst(const MachineInstr &MI) { |
1400 | 25.4k | unsigned Opc = MI.getOpcode(); |
1401 | 25.4k | switch (Opc) { |
1402 | 1.76k | case Hexagon::A2_combineii: |
1403 | 1.76k | case Hexagon::A4_combineii: |
1404 | 1.76k | case Hexagon::A2_tfrsi: |
1405 | 1.76k | case Hexagon::A2_tfrpi: |
1406 | 1.76k | case Hexagon::PS_true: |
1407 | 1.76k | case Hexagon::PS_false: |
1408 | 1.76k | case Hexagon::CONST32: |
1409 | 1.76k | case Hexagon::CONST64: |
1410 | 1.76k | return true; |
1411 | 23.6k | } |
1412 | 23.6k | return false; |
1413 | 23.6k | } |
1414 | | |
1415 | | // Generate a transfer-immediate instruction that is appropriate for the |
1416 | | // register class and the actual value being transferred. |
1417 | | unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, |
1418 | 93 | MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { |
1419 | 93 | unsigned Reg = MRI.createVirtualRegister(RC); |
1420 | 93 | if (RC == &Hexagon::IntRegsRegClass93 ) { |
1421 | 50 | BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) |
1422 | 50 | .addImm(int32_t(C)); |
1423 | 50 | return Reg; |
1424 | 50 | } |
1425 | 43 | |
1426 | 43 | if (43 RC == &Hexagon::DoubleRegsRegClass43 ) { |
1427 | 43 | if (isInt<8>(C)43 ) { |
1428 | 32 | BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) |
1429 | 32 | .addImm(C); |
1430 | 32 | return Reg; |
1431 | 32 | } |
1432 | 11 | |
1433 | 11 | unsigned Lo = Lo_32(C), Hi = Hi_32(C); |
1434 | 11 | if (isInt<8>(Lo) || 11 isInt<8>(Hi)0 ) { |
1435 | 11 | unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii |
1436 | 0 | : Hexagon::A4_combineii; |
1437 | 11 | BuildMI(B, At, DL, HII.get(Opc), Reg) |
1438 | 11 | .addImm(int32_t(Hi)) |
1439 | 11 | .addImm(int32_t(Lo)); |
1440 | 11 | return Reg; |
1441 | 11 | } |
1442 | 0 |
|
1443 | 0 | BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg) |
1444 | 0 | .addImm(C); |
1445 | 0 | return Reg; |
1446 | 0 | } |
1447 | 0 |
|
1448 | 0 | if (0 RC == &Hexagon::PredRegsRegClass0 ) { |
1449 | 0 | unsigned Opc; |
1450 | 0 | if (C == 0) |
1451 | 0 | Opc = Hexagon::PS_false; |
1452 | 0 | else if (0 (C & 0xFF) == 0xFF0 ) |
1453 | 0 | Opc = Hexagon::PS_true; |
1454 | 0 | else |
1455 | 0 | return 0; |
1456 | 0 | BuildMI(B, At, DL, HII.get(Opc), Reg); |
1457 | 0 | return Reg; |
1458 | 0 | } |
1459 | 0 |
|
1460 | 0 | return 0; |
1461 | 0 | } |
1462 | | |
1463 | 1.80k | bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { |
1464 | 1.80k | if (!BT.reached(&B)) |
1465 | 7 | return false; |
1466 | 1.79k | bool Changed = false; |
1467 | 1.79k | RegisterSet Defs; |
1468 | 1.79k | |
1469 | 15.9k | for (auto I = B.begin(), E = B.end(); I != E15.9k ; ++I14.1k ) { |
1470 | 14.1k | if (isTfrConst(*I)) |
1471 | 838 | continue; |
1472 | 13.2k | Defs.clear(); |
1473 | 13.2k | HBS::getInstrDefs(*I, Defs); |
1474 | 13.2k | if (Defs.count() != 1) |
1475 | 5.30k | continue; |
1476 | 7.99k | unsigned DR = Defs.find_first(); |
1477 | 7.99k | if (!TargetRegisterInfo::isVirtualRegister(DR)) |
1478 | 0 | continue; |
1479 | 7.99k | uint64_t U; |
1480 | 7.99k | const BitTracker::RegisterCell &DRC = BT.lookup(DR); |
1481 | 7.99k | if (HBS::getConst(DRC, 0, DRC.width(), U)7.99k ) { |
1482 | 93 | int64_t C = U; |
1483 | 93 | DebugLoc DL = I->getDebugLoc(); |
1484 | 93 | auto At = I->isPHI() ? B.getFirstNonPHI()1 : I92 ; |
1485 | 93 | unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); |
1486 | 93 | if (ImmReg93 ) { |
1487 | 93 | HBS::replaceReg(DR, ImmReg, MRI); |
1488 | 93 | BT.put(ImmReg, DRC); |
1489 | 93 | Changed = true; |
1490 | 93 | } |
1491 | 93 | } |
1492 | 14.1k | } |
1493 | 1.80k | return Changed; |
1494 | 1.80k | } |
1495 | | |
1496 | | namespace { |
1497 | | |
1498 | | // Identify pairs of available registers which hold identical values. |
1499 | | // In such cases, only one of them needs to be calculated, the other one |
1500 | | // will be defined as a copy of the first. |
1501 | | class CopyGeneration : public Transformation { |
1502 | | public: |
1503 | | CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, |
1504 | | const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1505 | 840 | : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} |
1506 | | |
1507 | | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1508 | | |
1509 | | private: |
1510 | | bool findMatch(const BitTracker::RegisterRef &Inp, |
1511 | | BitTracker::RegisterRef &Out, const RegisterSet &AVs); |
1512 | | |
1513 | | const HexagonInstrInfo &HII; |
1514 | | const HexagonRegisterInfo &HRI; |
1515 | | MachineRegisterInfo &MRI; |
1516 | | BitTracker &BT; |
1517 | | RegisterSet Forbidden; |
1518 | | }; |
1519 | | |
1520 | | // Eliminate register copies RD = RS, by replacing the uses of RD with |
1521 | | // with uses of RS. |
1522 | | class CopyPropagation : public Transformation { |
1523 | | public: |
1524 | | CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1525 | 840 | : Transformation(false), HRI(hri), MRI(mri) {} |
1526 | | |
1527 | | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1528 | | |
1529 | | static bool isCopyReg(unsigned Opc, bool NoConv); |
1530 | | |
1531 | | private: |
1532 | | bool propagateRegCopy(MachineInstr &MI); |
1533 | | |
1534 | | const HexagonRegisterInfo &HRI; |
1535 | | MachineRegisterInfo &MRI; |
1536 | | }; |
1537 | | |
1538 | | } // end anonymous namespace |
1539 | | |
1540 | | /// Check if there is a register in AVs that is identical to Inp. If so, |
1541 | | /// set Out to the found register. The output may be a pair Reg:Sub. |
1542 | | bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, |
1543 | 6.85k | BitTracker::RegisterRef &Out, const RegisterSet &AVs) { |
1544 | 6.85k | if (!BT.has(Inp.Reg)) |
1545 | 0 | return false; |
1546 | 6.85k | const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); |
1547 | 6.85k | auto *FRC = HBS::getFinalVRegClass(Inp, MRI); |
1548 | 6.85k | unsigned B, W; |
1549 | 6.85k | if (!HBS::getSubregMask(Inp, B, W, MRI)) |
1550 | 0 | return false; |
1551 | 6.85k | |
1552 | 855k | for (unsigned R = AVs.find_first(); 6.85k R855k ; R = AVs.find_next(R)849k ) { |
1553 | 849k | if (!BT.has(R) || 849k Forbidden[R]849k ) |
1554 | 1.95k | continue; |
1555 | 847k | const BitTracker::RegisterCell &RC = BT.lookup(R); |
1556 | 847k | unsigned RW = RC.width(); |
1557 | 847k | if (W == RW847k ) { |
1558 | 639k | if (FRC != MRI.getRegClass(R)) |
1559 | 103k | continue; |
1560 | 536k | if (536k !HBS::isTransparentCopy(R, Inp, MRI)536k ) |
1561 | 0 | continue; |
1562 | 536k | if (536k !HBS::isEqual(InpRC, B, RC, 0, W)536k ) |
1563 | 535k | continue; |
1564 | 97 | Out.Reg = R; |
1565 | 97 | Out.Sub = 0; |
1566 | 97 | return true; |
1567 | 97 | } |
1568 | 208k | // Check if there is a super-register, whose part (with a subregister) |
1569 | 208k | // is equal to the input. |
1570 | 208k | // Only do double registers for now. |
1571 | 208k | if (208k W*2 != RW208k ) |
1572 | 176k | continue; |
1573 | 31.9k | if (31.9k MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass31.9k ) |
1574 | 22.7k | continue; |
1575 | 9.22k | |
1576 | 9.22k | if (9.22k HBS::isEqual(InpRC, B, RC, 0, W)9.22k ) |
1577 | 13 | Out.Sub = Hexagon::isub_lo; |
1578 | 9.20k | else if (9.20k HBS::isEqual(InpRC, B, RC, W, W)9.20k ) |
1579 | 20 | Out.Sub = Hexagon::isub_hi; |
1580 | 9.20k | else |
1581 | 9.18k | continue; |
1582 | 33 | Out.Reg = R; |
1583 | 33 | if (HBS::isTransparentCopy(Out, Inp, MRI)) |
1584 | 33 | return true; |
1585 | 849k | } |
1586 | 6.72k | return false; |
1587 | 6.85k | } |
1588 | | |
1589 | | bool CopyGeneration::processBlock(MachineBasicBlock &B, |
1590 | 1.80k | const RegisterSet &AVs) { |
1591 | 1.80k | if (!BT.reached(&B)) |
1592 | 7 | return false; |
1593 | 1.79k | RegisterSet AVB(AVs); |
1594 | 1.79k | bool Changed = false; |
1595 | 1.79k | RegisterSet Defs; |
1596 | 1.79k | |
1597 | 16.0k | for (auto I = B.begin(), E = B.end(), NextI = I; I != E; |
1598 | 14.2k | ++I, AVB.insert(Defs)14.2k ) { |
1599 | 14.2k | NextI = std::next(I); |
1600 | 14.2k | Defs.clear(); |
1601 | 14.2k | HBS::getInstrDefs(*I, Defs); |
1602 | 14.2k | |
1603 | 14.2k | unsigned Opc = I->getOpcode(); |
1604 | 14.2k | if (CopyPropagation::isCopyReg(Opc, false) || |
1605 | 11.3k | ConstGeneration::isTfrConst(*I)) |
1606 | 3.90k | continue; |
1607 | 10.3k | |
1608 | 10.3k | DebugLoc DL = I->getDebugLoc(); |
1609 | 10.3k | auto At = I->isPHI() ? B.getFirstNonPHI()447 : I9.94k ; |
1610 | 10.3k | |
1611 | 16.4k | for (unsigned R = Defs.find_first(); R16.4k ; R = Defs.find_next(R)6.04k ) { |
1612 | 6.04k | BitTracker::RegisterRef MR; |
1613 | 6.04k | auto *FRC = HBS::getFinalVRegClass(R, MRI); |
1614 | 6.04k | |
1615 | 6.04k | if (findMatch(R, MR, AVB)6.04k ) { |
1616 | 48 | unsigned NewR = MRI.createVirtualRegister(FRC); |
1617 | 48 | BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) |
1618 | 48 | .addReg(MR.Reg, 0, MR.Sub); |
1619 | 48 | BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); |
1620 | 48 | HBS::replaceReg(R, NewR, MRI); |
1621 | 48 | Forbidden.insert(R); |
1622 | 48 | continue; |
1623 | 48 | } |
1624 | 5.99k | |
1625 | 5.99k | if (5.99k FRC == &Hexagon::DoubleRegsRegClass || |
1626 | 5.99k | FRC == &Hexagon::HvxWRRegClass5.49k ) { |
1627 | 767 | // Try to generate REG_SEQUENCE. |
1628 | 767 | unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo); |
1629 | 767 | unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi); |
1630 | 767 | BitTracker::RegisterRef TL = { R, SubLo }; |
1631 | 767 | BitTracker::RegisterRef TH = { R, SubHi }; |
1632 | 767 | BitTracker::RegisterRef ML, MH; |
1633 | 767 | if (findMatch(TL, ML, AVB) && 767 findMatch(TH, MH, AVB)44 ) { |
1634 | 38 | auto *FRC = HBS::getFinalVRegClass(R, MRI); |
1635 | 38 | unsigned NewR = MRI.createVirtualRegister(FRC); |
1636 | 38 | BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) |
1637 | 38 | .addReg(ML.Reg, 0, ML.Sub) |
1638 | 38 | .addImm(SubLo) |
1639 | 38 | .addReg(MH.Reg, 0, MH.Sub) |
1640 | 38 | .addImm(SubHi); |
1641 | 38 | BT.put(BitTracker::RegisterRef(NewR), BT.get(R)); |
1642 | 38 | HBS::replaceReg(R, NewR, MRI); |
1643 | 38 | Forbidden.insert(R); |
1644 | 38 | } |
1645 | 767 | } |
1646 | 6.04k | } |
1647 | 14.2k | } |
1648 | 1.80k | |
1649 | 1.80k | return Changed; |
1650 | 1.80k | } |
1651 | | |
1652 | 28.6k | bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { |
1653 | 28.6k | switch (Opc) { |
1654 | 6.03k | case TargetOpcode::COPY: |
1655 | 6.03k | case TargetOpcode::REG_SEQUENCE: |
1656 | 6.03k | case Hexagon::A4_combineir: |
1657 | 6.03k | case Hexagon::A4_combineri: |
1658 | 6.03k | return true; |
1659 | 66 | case Hexagon::A2_tfr: |
1660 | 66 | case Hexagon::A2_tfrp: |
1661 | 66 | case Hexagon::A2_combinew: |
1662 | 66 | case Hexagon::V6_vcombine: |
1663 | 66 | return NoConv; |
1664 | 22.5k | default: |
1665 | 22.5k | break; |
1666 | 22.5k | } |
1667 | 22.5k | return false; |
1668 | 22.5k | } |
1669 | | |
1670 | 3.09k | bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { |
1671 | 3.09k | bool Changed = false; |
1672 | 3.09k | unsigned Opc = MI.getOpcode(); |
1673 | 3.09k | BitTracker::RegisterRef RD = MI.getOperand(0); |
1674 | 3.09k | assert(MI.getOperand(0).getSubReg() == 0); |
1675 | 3.09k | |
1676 | 3.09k | switch (Opc) { |
1677 | 2.84k | case TargetOpcode::COPY: |
1678 | 2.84k | case Hexagon::A2_tfr: |
1679 | 2.84k | case Hexagon::A2_tfrp: { |
1680 | 2.84k | BitTracker::RegisterRef RS = MI.getOperand(1); |
1681 | 2.84k | if (!HBS::isTransparentCopy(RD, RS, MRI)) |
1682 | 2.17k | break; |
1683 | 664 | if (664 RS.Sub != 0664 ) |
1684 | 281 | Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); |
1685 | 664 | else |
1686 | 383 | Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); |
1687 | 664 | break; |
1688 | 664 | } |
1689 | 209 | case TargetOpcode::REG_SEQUENCE: { |
1690 | 209 | BitTracker::RegisterRef SL, SH; |
1691 | 209 | if (HBS::parseRegSequence(MI, SL, SH, MRI)209 ) { |
1692 | 209 | const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg); |
1693 | 209 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); |
1694 | 209 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); |
1695 | 209 | Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI); |
1696 | 209 | Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI); |
1697 | 209 | } |
1698 | 209 | break; |
1699 | 664 | } |
1700 | 33 | case Hexagon::A2_combinew: |
1701 | 33 | case Hexagon::V6_vcombine: { |
1702 | 33 | const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg); |
1703 | 33 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo); |
1704 | 33 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi); |
1705 | 33 | BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); |
1706 | 33 | Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI); |
1707 | 33 | Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI); |
1708 | 33 | break; |
1709 | 33 | } |
1710 | 11 | case Hexagon::A4_combineir: |
1711 | 11 | case Hexagon::A4_combineri: { |
1712 | 11 | unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 211 : 10 ; |
1713 | 11 | unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo |
1714 | 0 | : Hexagon::isub_hi; |
1715 | 11 | BitTracker::RegisterRef RS = MI.getOperand(SrcX); |
1716 | 11 | Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); |
1717 | 11 | break; |
1718 | 3.09k | } |
1719 | 3.09k | } |
1720 | 3.09k | return Changed; |
1721 | 3.09k | } |
1722 | | |
1723 | 1.80k | bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { |
1724 | 1.80k | std::vector<MachineInstr*> Instrs; |
1725 | 16.1k | for (auto I = B.rbegin(), E = B.rend(); I != E16.1k ; ++I14.3k ) |
1726 | 14.3k | Instrs.push_back(&*I); |
1727 | 1.80k | |
1728 | 1.80k | bool Changed = false; |
1729 | 14.3k | for (auto I : Instrs) { |
1730 | 14.3k | unsigned Opc = I->getOpcode(); |
1731 | 14.3k | if (!CopyPropagation::isCopyReg(Opc, true)) |
1732 | 11.3k | continue; |
1733 | 3.09k | Changed |= propagateRegCopy(*I); |
1734 | 3.09k | } |
1735 | 1.80k | |
1736 | 1.80k | return Changed; |
1737 | 1.80k | } |
1738 | | |
1739 | | namespace { |
1740 | | |
1741 | | // Recognize patterns that can be simplified and replace them with the |
1742 | | // simpler forms. |
1743 | | // This is by no means complete |
1744 | | class BitSimplification : public Transformation { |
1745 | | public: |
1746 | | BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt, |
1747 | | const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri, |
1748 | | MachineRegisterInfo &mri, MachineFunction &mf) |
1749 | | : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), |
1750 | 840 | MF(mf), BT(bt) {} |
1751 | | |
1752 | | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1753 | | |
1754 | | private: |
1755 | | struct RegHalf : public BitTracker::RegisterRef { |
1756 | | bool Low; // Low/High halfword. |
1757 | | }; |
1758 | | |
1759 | | bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, |
1760 | | unsigned B, RegHalf &RH); |
1761 | | bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum); |
1762 | | |
1763 | | bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, |
1764 | | BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); |
1765 | | unsigned getCombineOpcode(bool HLow, bool LLow); |
1766 | | |
1767 | | bool genStoreUpperHalf(MachineInstr *MI); |
1768 | | bool genStoreImmediate(MachineInstr *MI); |
1769 | | bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, |
1770 | | const BitTracker::RegisterCell &RC); |
1771 | | bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, |
1772 | | const BitTracker::RegisterCell &RC); |
1773 | | bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, |
1774 | | const BitTracker::RegisterCell &RC); |
1775 | | bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, |
1776 | | const BitTracker::RegisterCell &RC); |
1777 | | bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD, |
1778 | | const BitTracker::RegisterCell &RC, const RegisterSet &AVs); |
1779 | | bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, |
1780 | | const BitTracker::RegisterCell &RC); |
1781 | | bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, |
1782 | | const BitTracker::RegisterCell &RC, const RegisterSet &AVs); |
1783 | | |
1784 | | // Cache of created instructions to avoid creating duplicates. |
1785 | | // XXX Currently only used by genBitSplit. |
1786 | | std::vector<MachineInstr*> NewMIs; |
1787 | | |
1788 | | const MachineDominatorTree &MDT; |
1789 | | const HexagonInstrInfo &HII; |
1790 | | const HexagonRegisterInfo &HRI; |
1791 | | MachineRegisterInfo &MRI; |
1792 | | MachineFunction &MF; |
1793 | | BitTracker &BT; |
1794 | | }; |
1795 | | |
1796 | | } // end anonymous namespace |
1797 | | |
1798 | | // Check if the bits [B..B+16) in register cell RC form a valid halfword, |
1799 | | // i.e. [0..16), [16..32), etc. of some register. If so, return true and |
1800 | | // set the information about the found register in RH. |
1801 | | bool BitSimplification::matchHalf(unsigned SelfR, |
1802 | 8.03k | const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { |
1803 | 8.03k | // XXX This could be searching in the set of available registers, in case |
1804 | 8.03k | // the match is not exact. |
1805 | 8.03k | |
1806 | 8.03k | // Match 16-bit chunks, where the RC[B..B+15] references exactly one |
1807 | 8.03k | // register and all the bits B..B+15 match between RC and the register. |
1808 | 8.03k | // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, |
1809 | 8.03k | // and RC = { [0]:0 [1-15]:v1[1-15]... }. |
1810 | 8.03k | bool Low = false; |
1811 | 8.03k | unsigned I = B; |
1812 | 29.5k | while (I < B+16 && 29.5k RC[I].num()28.2k ) |
1813 | 21.4k | I++; |
1814 | 8.03k | if (I == B+16) |
1815 | 1.25k | return false; |
1816 | 6.77k | |
1817 | 6.77k | unsigned Reg = RC[I].RefI.Reg; |
1818 | 6.77k | unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. |
1819 | 6.77k | if (P < I-B) |
1820 | 34 | return false; |
1821 | 6.74k | unsigned Pos = P - (I-B); |
1822 | 6.74k | |
1823 | 6.74k | if (Reg == 0 || 6.74k Reg == SelfR6.74k ) // Don't match "self". |
1824 | 5.39k | return false; |
1825 | 1.34k | if (1.34k !TargetRegisterInfo::isVirtualRegister(Reg)1.34k ) |
1826 | 0 | return false; |
1827 | 1.34k | if (1.34k !BT.has(Reg)1.34k ) |
1828 | 0 | return false; |
1829 | 1.34k | |
1830 | 1.34k | const BitTracker::RegisterCell &SC = BT.lookup(Reg); |
1831 | 1.34k | if (Pos+16 > SC.width()) |
1832 | 39 | return false; |
1833 | 1.31k | |
1834 | 9.71k | for (unsigned i = 0; 1.31k i < 169.71k ; ++i8.40k ) { |
1835 | 9.46k | const BitTracker::BitValue &RV = RC[i+B]; |
1836 | 9.46k | if (RV.Type == BitTracker::BitValue::Ref9.46k ) { |
1837 | 9.13k | if (RV.RefI.Reg != Reg) |
1838 | 763 | return false; |
1839 | 8.36k | if (8.36k RV.RefI.Pos != i+Pos8.36k ) |
1840 | 38 | return false; |
1841 | 8.32k | continue; |
1842 | 8.32k | } |
1843 | 332 | if (332 RC[i+B] != SC[i+Pos]332 ) |
1844 | 252 | return false; |
1845 | 9.46k | } |
1846 | 1.31k | |
1847 | 257 | unsigned Sub = 0; |
1848 | 257 | switch (Pos) { |
1849 | 128 | case 0: |
1850 | 128 | Sub = Hexagon::isub_lo; |
1851 | 128 | Low = true; |
1852 | 128 | break; |
1853 | 39 | case 16: |
1854 | 39 | Sub = Hexagon::isub_lo; |
1855 | 39 | Low = false; |
1856 | 39 | break; |
1857 | 18 | case 32: |
1858 | 18 | Sub = Hexagon::isub_hi; |
1859 | 18 | Low = true; |
1860 | 18 | break; |
1861 | 12 | case 48: |
1862 | 12 | Sub = Hexagon::isub_hi; |
1863 | 12 | Low = false; |
1864 | 12 | break; |
1865 | 60 | default: |
1866 | 60 | return false; |
1867 | 197 | } |
1868 | 197 | |
1869 | 197 | RH.Reg = Reg; |
1870 | 197 | RH.Sub = Sub; |
1871 | 197 | RH.Low = Low; |
1872 | 197 | // If the subregister is not valid with the register, set it to 0. |
1873 | 197 | if (!HBS::getFinalVRegClass(RH, MRI)) |
1874 | 142 | RH.Sub = 0; |
1875 | 8.03k | |
1876 | 8.03k | return true; |
1877 | 8.03k | } |
1878 | | |
1879 | | bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, |
1880 | 242 | unsigned OpNum) { |
1881 | 242 | auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF); |
1882 | 242 | auto *RRC = HBS::getFinalVRegClass(R, MRI); |
1883 | 242 | return OpRC->hasSubClassEq(RRC); |
1884 | 242 | } |
1885 | | |
1886 | | // Check if RC matches the pattern of a S2_packhl. If so, return true and |
1887 | | // set the inputs Rs and Rt. |
1888 | | bool BitSimplification::matchPackhl(unsigned SelfR, |
1889 | | const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, |
1890 | 512 | BitTracker::RegisterRef &Rt) { |
1891 | 512 | RegHalf L1, H1, L2, H2; |
1892 | 512 | |
1893 | 512 | if (!matchHalf(SelfR, RC, 0, L2) || 512 !matchHalf(SelfR, RC, 16, L1)29 ) |
1894 | 505 | return false; |
1895 | 7 | if (7 !matchHalf(SelfR, RC, 32, H2) || 7 !matchHalf(SelfR, RC, 48, H1)3 ) |
1896 | 7 | return false; |
1897 | 0 |
|
1898 | 0 | // Rs = H1.L1, Rt = H2.L2 |
1899 | 0 | if (0 H1.Reg != L1.Reg || 0 H1.Sub != L1.Sub0 || H1.Low0 || !L1.Low0 ) |
1900 | 0 | return false; |
1901 | 0 | if (0 H2.Reg != L2.Reg || 0 H2.Sub != L2.Sub0 || H2.Low0 || !L2.Low0 ) |
1902 | 0 | return false; |
1903 | 0 |
|
1904 | 0 | Rs = H1; |
1905 | 0 | Rt = H2; |
1906 | 0 | return true; |
1907 | 0 | } |
1908 | | |
1909 | 13 | unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { |
1910 | 13 | return HLow ? LLow ? 13 Hexagon::A2_combine_ll13 |
1911 | 0 | : Hexagon::A2_combine_lh |
1912 | 0 | : LLow ? 0 Hexagon::A2_combine_hl0 |
1913 | 0 | : Hexagon::A2_combine_hh; |
1914 | 13 | } |
1915 | | |
1916 | | // If MI stores the upper halfword of a register (potentially obtained via |
1917 | | // shifts or extracts), replace it with a storerf instruction. This could |
1918 | | // cause the "extraction" code to become dead. |
1919 | 1.92k | bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { |
1920 | 1.92k | unsigned Opc = MI->getOpcode(); |
1921 | 1.92k | if (Opc != Hexagon::S2_storerh_io) |
1922 | 1.88k | return false; |
1923 | 41 | |
1924 | 41 | MachineOperand &ValOp = MI->getOperand(2); |
1925 | 41 | BitTracker::RegisterRef RS = ValOp; |
1926 | 41 | if (!BT.has(RS.Reg)) |
1927 | 0 | return false; |
1928 | 41 | const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); |
1929 | 41 | RegHalf H; |
1930 | 41 | if (!matchHalf(0, RC, 0, H)) |
1931 | 3 | return false; |
1932 | 38 | if (38 H.Low38 ) |
1933 | 28 | return false; |
1934 | 10 | MI->setDesc(HII.get(Hexagon::S2_storerf_io)); |
1935 | 10 | ValOp.setReg(H.Reg); |
1936 | 10 | ValOp.setSubReg(H.Sub); |
1937 | 10 | return true; |
1938 | 10 | } |
1939 | | |
1940 | | // If MI stores a value known at compile-time, and the value is within a range |
1941 | | // that avoids using constant-extenders, replace it with a store-immediate. |
1942 | 1.91k | bool BitSimplification::genStoreImmediate(MachineInstr *MI) { |
1943 | 1.91k | unsigned Opc = MI->getOpcode(); |
1944 | 1.91k | unsigned Align = 0; |
1945 | 1.91k | switch (Opc) { |
1946 | 240 | case Hexagon::S2_storeri_io: |
1947 | 240 | Align++; |
1948 | 240 | LLVM_FALLTHROUGH; |
1949 | 271 | case Hexagon::S2_storerh_io: |
1950 | 271 | Align++; |
1951 | 271 | LLVM_FALLTHROUGH; |
1952 | 800 | case Hexagon::S2_storerb_io: |
1953 | 800 | break; |
1954 | 1.11k | default: |
1955 | 1.11k | return false; |
1956 | 800 | } |
1957 | 800 | |
1958 | 800 | // Avoid stores to frame-indices (due to an unknown offset). |
1959 | 800 | if (800 !MI->getOperand(0).isReg()800 ) |
1960 | 647 | return false; |
1961 | 153 | MachineOperand &OffOp = MI->getOperand(1); |
1962 | 153 | if (!OffOp.isImm()) |
1963 | 0 | return false; |
1964 | 153 | |
1965 | 153 | int64_t Off = OffOp.getImm(); |
1966 | 153 | // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). |
1967 | 153 | if (!isUIntN(6+Align, Off) || 153 (Off & ((1<<Align)-1))113 ) |
1968 | 40 | return false; |
1969 | 113 | // Source register: |
1970 | 113 | BitTracker::RegisterRef RS = MI->getOperand(2); |
1971 | 113 | if (!BT.has(RS.Reg)) |
1972 | 0 | return false; |
1973 | 113 | const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); |
1974 | 113 | uint64_t U; |
1975 | 113 | if (!HBS::getConst(RC, 0, RC.width(), U)) |
1976 | 106 | return false; |
1977 | 7 | |
1978 | 7 | // Only consider 8-bit values to avoid constant-extenders. |
1979 | 7 | int V; |
1980 | 7 | switch (Opc) { |
1981 | 0 | case Hexagon::S2_storerb_io: |
1982 | 0 | V = int8_t(U); |
1983 | 0 | break; |
1984 | 1 | case Hexagon::S2_storerh_io: |
1985 | 1 | V = int16_t(U); |
1986 | 1 | break; |
1987 | 6 | case Hexagon::S2_storeri_io: |
1988 | 6 | V = int32_t(U); |
1989 | 6 | break; |
1990 | 7 | } |
1991 | 7 | if (7 !isInt<8>(V)7 ) |
1992 | 1 | return false; |
1993 | 6 | |
1994 | 6 | MI->RemoveOperand(2); |
1995 | 6 | switch (Opc) { |
1996 | 0 | case Hexagon::S2_storerb_io: |
1997 | 0 | MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); |
1998 | 0 | break; |
1999 | 0 | case Hexagon::S2_storerh_io: |
2000 | 0 | MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); |
2001 | 0 | break; |
2002 | 6 | case Hexagon::S2_storeri_io: |
2003 | 6 | MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); |
2004 | 6 | break; |
2005 | 6 | } |
2006 | 6 | MI->addOperand(MachineOperand::CreateImm(V)); |
2007 | 6 | return true; |
2008 | 6 | } |
2009 | | |
2010 | | // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the |
2011 | | // last instruction in a sequence that results in something equivalent to |
2012 | | // the pack-halfwords. The intent is to cause the entire sequence to become |
2013 | | // dead. |
2014 | | bool BitSimplification::genPackhl(MachineInstr *MI, |
2015 | 518 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2016 | 518 | unsigned Opc = MI->getOpcode(); |
2017 | 518 | if (Opc == Hexagon::S2_packhl) |
2018 | 6 | return false; |
2019 | 512 | BitTracker::RegisterRef Rs, Rt; |
2020 | 512 | if (!matchPackhl(RD.Reg, RC, Rs, Rt)) |
2021 | 512 | return false; |
2022 | 0 | if (0 !validateReg(Rs, Hexagon::S2_packhl, 1) || |
2023 | 0 | !validateReg(Rt, Hexagon::S2_packhl, 2)) |
2024 | 0 | return false; |
2025 | 0 |
|
2026 | 0 | MachineBasicBlock &B = *MI->getParent(); |
2027 | 0 | unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); |
2028 | 0 | DebugLoc DL = MI->getDebugLoc(); |
2029 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2030 | 0 | : MachineBasicBlock::iterator(MI); |
2031 | 518 | BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR) |
2032 | 518 | .addReg(Rs.Reg, 0, Rs.Sub) |
2033 | 518 | .addReg(Rt.Reg, 0, Rt.Sub); |
2034 | 518 | HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); |
2035 | 518 | BT.put(BitTracker::RegisterRef(NewR), RC); |
2036 | 518 | return true; |
2037 | 518 | } |
2038 | | |
2039 | | // If MI produces halfword of the input in the low half of the output, |
2040 | | // replace it with zero-extend or extractu. |
2041 | | bool BitSimplification::genExtractHalf(MachineInstr *MI, |
2042 | 3.69k | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2043 | 3.69k | RegHalf L; |
2044 | 3.69k | // Check for halfword in low 16 bits, zeros elsewhere. |
2045 | 3.69k | if (!matchHalf(RD.Reg, RC, 0, L) || 3.69k !HBS::isZero(RC, 16, 16)54 ) |
2046 | 3.67k | return false; |
2047 | 15 | |
2048 | 15 | unsigned Opc = MI->getOpcode(); |
2049 | 15 | MachineBasicBlock &B = *MI->getParent(); |
2050 | 15 | DebugLoc DL = MI->getDebugLoc(); |
2051 | 15 | |
2052 | 15 | // Prefer zxth, since zxth can go in any slot, while extractu only in |
2053 | 15 | // slots 2 and 3. |
2054 | 15 | unsigned NewR = 0; |
2055 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2056 | 15 | : MachineBasicBlock::iterator(MI); |
2057 | 15 | if (L.Low && 15 Opc != Hexagon::A2_zxth7 ) { |
2058 | 0 | if (validateReg(L, Hexagon::A2_zxth, 1)0 ) { |
2059 | 0 | NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
2060 | 0 | BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) |
2061 | 0 | .addReg(L.Reg, 0, L.Sub); |
2062 | 0 | } |
2063 | 15 | } else if (15 !L.Low && 15 Opc != Hexagon::S2_lsr_i_r8 ) { |
2064 | 1 | if (validateReg(L, Hexagon::S2_lsr_i_r, 1)1 ) { |
2065 | 1 | NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
2066 | 1 | BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) |
2067 | 1 | .addReg(L.Reg, 0, L.Sub) |
2068 | 1 | .addImm(16); |
2069 | 1 | } |
2070 | 15 | } |
2071 | 15 | if (NewR == 0) |
2072 | 14 | return false; |
2073 | 1 | HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); |
2074 | 1 | BT.put(BitTracker::RegisterRef(NewR), RC); |
2075 | 1 | return true; |
2076 | 1 | } |
2077 | | |
2078 | | // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the |
2079 | | // combine. |
2080 | | bool BitSimplification::genCombineHalf(MachineInstr *MI, |
2081 | 3.69k | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2082 | 3.69k | RegHalf L, H; |
2083 | 3.69k | // Check for combine h/l |
2084 | 3.69k | if (!matchHalf(RD.Reg, RC, 0, L) || 3.69k !matchHalf(RD.Reg, RC, 16, H)53 ) |
2085 | 3.68k | return false; |
2086 | 13 | // Do nothing if this is just a reg copy. |
2087 | 13 | if (13 L.Reg == H.Reg && 13 L.Sub == H.Sub0 && !H.Low0 && L.Low0 ) |
2088 | 0 | return false; |
2089 | 13 | |
2090 | 13 | unsigned Opc = MI->getOpcode(); |
2091 | 13 | unsigned COpc = getCombineOpcode(H.Low, L.Low); |
2092 | 13 | if (COpc == Opc) |
2093 | 2 | return false; |
2094 | 11 | if (11 !validateReg(H, COpc, 1) || 11 !validateReg(L, COpc, 2)11 ) |
2095 | 0 | return false; |
2096 | 11 | |
2097 | 11 | MachineBasicBlock &B = *MI->getParent(); |
2098 | 11 | DebugLoc DL = MI->getDebugLoc(); |
2099 | 11 | unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
2100 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2101 | 11 | : MachineBasicBlock::iterator(MI); |
2102 | 3.69k | BuildMI(B, At, DL, HII.get(COpc), NewR) |
2103 | 3.69k | .addReg(H.Reg, 0, H.Sub) |
2104 | 3.69k | .addReg(L.Reg, 0, L.Sub); |
2105 | 3.69k | HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); |
2106 | 3.69k | BT.put(BitTracker::RegisterRef(NewR), RC); |
2107 | 3.69k | return true; |
2108 | 3.69k | } |
2109 | | |
2110 | | // If MI resets high bits of a register and keeps the lower ones, replace it |
2111 | | // with zero-extend byte/half, and-immediate, or extractu, as appropriate. |
2112 | | bool BitSimplification::genExtractLow(MachineInstr *MI, |
2113 | 3.68k | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2114 | 3.68k | unsigned Opc = MI->getOpcode(); |
2115 | 3.68k | switch (Opc) { |
2116 | 38 | case Hexagon::A2_zxtb: |
2117 | 38 | case Hexagon::A2_zxth: |
2118 | 38 | case Hexagon::S2_extractu: |
2119 | 38 | return false; |
2120 | 3.64k | } |
2121 | 3.64k | if (3.64k Opc == Hexagon::A2_andir && 3.64k MI->getOperand(2).isImm()23 ) { |
2122 | 23 | int32_t Imm = MI->getOperand(2).getImm(); |
2123 | 23 | if (isInt<10>(Imm)) |
2124 | 16 | return false; |
2125 | 3.62k | } |
2126 | 3.62k | |
2127 | 3.62k | if (3.62k MI->hasUnmodeledSideEffects() || 3.62k MI->isInlineAsm()3.21k ) |
2128 | 411 | return false; |
2129 | 3.21k | unsigned W = RC.width(); |
2130 | 38.7k | while (W > 0 && 38.7k RC[W-1].is(0)38.4k ) |
2131 | 35.5k | W--; |
2132 | 3.21k | if (W == 0 || 3.21k W == RC.width()2.96k ) |
2133 | 2.00k | return false; |
2134 | 1.21k | unsigned NewOpc = (W == 8) ? 1.21k Hexagon::A2_zxtb755 |
2135 | 456 | : (W == 16) ? 456 Hexagon::A2_zxth113 |
2136 | 343 | : (W < 10) ? 343 Hexagon::A2_andir214 |
2137 | 129 | : Hexagon::S2_extractu; |
2138 | 1.21k | MachineBasicBlock &B = *MI->getParent(); |
2139 | 1.21k | DebugLoc DL = MI->getDebugLoc(); |
2140 | 1.21k | |
2141 | 2.10k | for (auto &Op : MI->uses()) { |
2142 | 2.10k | if (!Op.isReg()) |
2143 | 1.70k | continue; |
2144 | 407 | BitTracker::RegisterRef RS = Op; |
2145 | 407 | if (!BT.has(RS.Reg)) |
2146 | 10 | continue; |
2147 | 397 | const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); |
2148 | 397 | unsigned BN, BW; |
2149 | 397 | if (!HBS::getSubregMask(RS, BN, BW, MRI)) |
2150 | 0 | continue; |
2151 | 397 | if (397 BW < W || 397 !HBS::isEqual(RC, 0, SC, BN, W)397 ) |
2152 | 338 | continue; |
2153 | 59 | if (59 !validateReg(RS, NewOpc, 1)59 ) |
2154 | 59 | continue; |
2155 | 0 |
|
2156 | 0 | unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
2157 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2158 | 0 | : MachineBasicBlock::iterator(MI); |
2159 | 0 | auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) |
2160 | 0 | .addReg(RS.Reg, 0, RS.Sub); |
2161 | 0 | if (NewOpc == Hexagon::A2_andir) |
2162 | 0 | MIB.addImm((1 << W) - 1); |
2163 | 0 | else if (0 NewOpc == Hexagon::S2_extractu0 ) |
2164 | 0 | MIB.addImm(W).addImm(0); |
2165 | 2.10k | HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); |
2166 | 2.10k | BT.put(BitTracker::RegisterRef(NewR), RC); |
2167 | 2.10k | return true; |
2168 | 2.10k | } |
2169 | 1.21k | return false; |
2170 | 1.21k | } |
2171 | | |
2172 | | bool BitSimplification::genBitSplit(MachineInstr *MI, |
2173 | | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, |
2174 | 3.72k | const RegisterSet &AVs) { |
2175 | 3.72k | if (!GenBitSplit) |
2176 | 0 | return false; |
2177 | 3.72k | if (3.72k MaxBitSplit.getNumOccurrences()3.72k ) { |
2178 | 0 | if (CountBitSplit >= MaxBitSplit) |
2179 | 0 | return false; |
2180 | 3.72k | } |
2181 | 3.72k | |
2182 | 3.72k | unsigned Opc = MI->getOpcode(); |
2183 | 3.72k | switch (Opc) { |
2184 | 0 | case Hexagon::A4_bitsplit: |
2185 | 0 | case Hexagon::A4_bitspliti: |
2186 | 0 | return false; |
2187 | 3.72k | } |
2188 | 3.72k | |
2189 | 3.72k | unsigned W = RC.width(); |
2190 | 3.72k | if (W != 32) |
2191 | 0 | return false; |
2192 | 3.72k | |
2193 | 3.72k | auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned 3.72k { |
2194 | 95.8k | unsigned Z = C.width(); |
2195 | 1.94M | while (Z > 0 && 1.94M C[Z-1].is(0)1.94M ) |
2196 | 1.84M | --Z; |
2197 | 95.8k | return C.width() - Z; |
2198 | 95.8k | }; |
2199 | 3.72k | |
2200 | 3.72k | // Count the number of leading zeros in the target RC. |
2201 | 3.72k | unsigned Z = ctlz(RC); |
2202 | 3.72k | if (Z == 0 || 3.72k Z == W1.53k ) |
2203 | 2.43k | return false; |
2204 | 1.28k | |
2205 | 1.28k | // A simplistic analysis: assume the source register (the one being split) |
2206 | 1.28k | // is fully unknown, and that all its bits are self-references. |
2207 | 1.28k | const BitTracker::BitValue &B0 = RC[0]; |
2208 | 1.28k | if (B0.Type != BitTracker::BitValue::Ref) |
2209 | 289 | return false; |
2210 | 997 | |
2211 | 997 | unsigned SrcR = B0.RefI.Reg; |
2212 | 997 | unsigned SrcSR = 0; |
2213 | 997 | unsigned Pos = B0.RefI.Pos; |
2214 | 997 | |
2215 | 997 | // All the non-zero bits should be consecutive bits from the same register. |
2216 | 8.36k | for (unsigned i = 1; i < W-Z8.36k ; ++i7.37k ) { |
2217 | 7.43k | const BitTracker::BitValue &V = RC[i]; |
2218 | 7.43k | if (V.Type != BitTracker::BitValue::Ref) |
2219 | 10 | return false; |
2220 | 7.42k | if (7.42k V.RefI.Reg != SrcR || 7.42k V.RefI.Pos != Pos+i7.37k ) |
2221 | 56 | return false; |
2222 | 7.43k | } |
2223 | 997 | |
2224 | 997 | // Now, find the other bitfield among AVs. |
2225 | 96.6k | for (unsigned S = AVs.find_first(); 931 S96.6k ; S = AVs.find_next(S)95.7k ) { |
2226 | 95.7k | // The number of leading zeros here should be the number of trailing |
2227 | 95.7k | // non-zeros in RC. |
2228 | 95.7k | if (!BT.has(S)) |
2229 | 0 | continue; |
2230 | 95.7k | const BitTracker::RegisterCell &SC = BT.lookup(S); |
2231 | 95.7k | if (SC.width() != W || 95.7k ctlz(SC) != W-Z92.0k ) |
2232 | 95.6k | continue; |
2233 | 108 | // The Z lower bits should now match SrcR. |
2234 | 108 | const BitTracker::BitValue &S0 = SC[0]; |
2235 | 108 | if (S0.Type != BitTracker::BitValue::Ref || 108 S0.RefI.Reg != SrcR107 ) |
2236 | 106 | continue; |
2237 | 2 | unsigned P = S0.RefI.Pos; |
2238 | 2 | |
2239 | 2 | if (Pos <= P && 2 (Pos + W-Z) != P1 ) |
2240 | 0 | continue; |
2241 | 2 | if (2 P < Pos && 2 (P + Z) != Pos1 ) |
2242 | 0 | continue; |
2243 | 2 | // The starting bitfield position must be at a subregister boundary. |
2244 | 2 | if (2 std::min(P, Pos) != 0 && 2 std::min(P, Pos) != 320 ) |
2245 | 0 | continue; |
2246 | 2 | |
2247 | 2 | unsigned I; |
2248 | 33 | for (I = 1; I < Z33 ; ++I31 ) { |
2249 | 31 | const BitTracker::BitValue &V = SC[I]; |
2250 | 31 | if (V.Type != BitTracker::BitValue::Ref) |
2251 | 0 | break; |
2252 | 31 | if (31 V.RefI.Reg != SrcR || 31 V.RefI.Pos != P+I31 ) |
2253 | 0 | break; |
2254 | 31 | } |
2255 | 2 | if (I != Z) |
2256 | 0 | continue; |
2257 | 2 | |
2258 | 2 | // Generate bitsplit where S is defined. |
2259 | 2 | if (2 MaxBitSplit.getNumOccurrences()2 ) |
2260 | 0 | CountBitSplit++; |
2261 | 2 | MachineInstr *DefS = MRI.getVRegDef(S); |
2262 | 2 | assert(DefS != nullptr); |
2263 | 2 | DebugLoc DL = DefS->getDebugLoc(); |
2264 | 2 | MachineBasicBlock &B = *DefS->getParent(); |
2265 | 0 | auto At = DefS->isPHI() ? B.getFirstNonPHI() |
2266 | 2 | : MachineBasicBlock::iterator(DefS); |
2267 | 2 | if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID) |
2268 | 0 | SrcSR = (std::min(Pos, P) == 32) ? 0 Hexagon::isub_hi0 : Hexagon::isub_lo0 ; |
2269 | 2 | if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1)) |
2270 | 0 | continue; |
2271 | 2 | unsigned ImmOp = Pos <= P ? 2 W-Z1 : Z1 ; |
2272 | 2 | |
2273 | 2 | // Find an existing bitsplit instruction if one already exists. |
2274 | 2 | unsigned NewR = 0; |
2275 | 0 | for (MachineInstr *In : NewMIs) { |
2276 | 0 | if (In->getOpcode() != Hexagon::A4_bitspliti) |
2277 | 0 | continue; |
2278 | 0 | MachineOperand &Op1 = In->getOperand(1); |
2279 | 0 | if (Op1.getReg() != SrcR || 0 Op1.getSubReg() != SrcSR0 ) |
2280 | 0 | continue; |
2281 | 0 | if (0 In->getOperand(2).getImm() != ImmOp0 ) |
2282 | 0 | continue; |
2283 | 0 | // Check if the target register is available here. |
2284 | 0 | MachineOperand &Op0 = In->getOperand(0); |
2285 | 0 | MachineInstr *DefI = MRI.getVRegDef(Op0.getReg()); |
2286 | 0 | assert(DefI != nullptr); |
2287 | 0 | if (!MDT.dominates(DefI, &*At)) |
2288 | 0 | continue; |
2289 | 0 |
|
2290 | 0 | // Found one that can be reused. |
2291 | 0 | assert(Op0.getSubReg() == 0); |
2292 | 0 | NewR = Op0.getReg(); |
2293 | 0 | break; |
2294 | 0 | } |
2295 | 2 | if (!NewR2 ) { |
2296 | 2 | NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); |
2297 | 2 | auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR) |
2298 | 2 | .addReg(SrcR, 0, SrcSR) |
2299 | 2 | .addImm(ImmOp); |
2300 | 2 | NewMIs.push_back(NewBS); |
2301 | 2 | } |
2302 | 2 | if (Pos <= P2 ) { |
2303 | 1 | HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI); |
2304 | 1 | HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI); |
2305 | 2 | } else { |
2306 | 1 | HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI); |
2307 | 1 | HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI); |
2308 | 1 | } |
2309 | 95.7k | return true; |
2310 | 95.7k | } |
2311 | 931 | |
2312 | 929 | return false; |
2313 | 3.72k | } |
2314 | | |
2315 | | // Check for tstbit simplification opportunity, where the bit being checked |
2316 | | // can be tracked back to another register. For example: |
2317 | | // vreg2 = S2_lsr_i_r vreg1, 5 |
2318 | | // vreg3 = S2_tstbit_i vreg2, 0 |
2319 | | // => |
2320 | | // vreg3 = S2_tstbit_i vreg1, 5 |
2321 | | bool BitSimplification::simplifyTstbit(MachineInstr *MI, |
2322 | 760 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2323 | 760 | unsigned Opc = MI->getOpcode(); |
2324 | 760 | if (Opc != Hexagon::S2_tstbit_i) |
2325 | 757 | return false; |
2326 | 3 | |
2327 | 3 | unsigned BN = MI->getOperand(2).getImm(); |
2328 | 3 | BitTracker::RegisterRef RS = MI->getOperand(1); |
2329 | 3 | unsigned F, W; |
2330 | 3 | DebugLoc DL = MI->getDebugLoc(); |
2331 | 3 | if (!BT.has(RS.Reg) || 3 !HBS::getSubregMask(RS, F, W, MRI)3 ) |
2332 | 0 | return false; |
2333 | 3 | MachineBasicBlock &B = *MI->getParent(); |
2334 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2335 | 3 | : MachineBasicBlock::iterator(MI); |
2336 | 3 | |
2337 | 3 | const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); |
2338 | 3 | const BitTracker::BitValue &V = SC[F+BN]; |
2339 | 3 | if (V.Type == BitTracker::BitValue::Ref && 3 V.RefI.Reg != RS.Reg3 ) { |
2340 | 0 | const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); |
2341 | 0 | // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is |
2342 | 0 | // a double register, need to use a subregister and adjust bit |
2343 | 0 | // number. |
2344 | 0 | unsigned P = std::numeric_limits<unsigned>::max(); |
2345 | 0 | BitTracker::RegisterRef RR(V.RefI.Reg, 0); |
2346 | 0 | if (TC == &Hexagon::DoubleRegsRegClass0 ) { |
2347 | 0 | P = V.RefI.Pos; |
2348 | 0 | RR.Sub = Hexagon::isub_lo; |
2349 | 0 | if (P >= 320 ) { |
2350 | 0 | P -= 32; |
2351 | 0 | RR.Sub = Hexagon::isub_hi; |
2352 | 0 | } |
2353 | 0 | } else if (0 TC == &Hexagon::IntRegsRegClass0 ) { |
2354 | 0 | P = V.RefI.Pos; |
2355 | 0 | } |
2356 | 0 | if (P != std::numeric_limits<unsigned>::max()0 ) { |
2357 | 0 | unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); |
2358 | 0 | BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR) |
2359 | 0 | .addReg(RR.Reg, 0, RR.Sub) |
2360 | 0 | .addImm(P); |
2361 | 0 | HBS::replaceReg(RD.Reg, NewR, MRI); |
2362 | 0 | BT.put(NewR, RC); |
2363 | 0 | return true; |
2364 | 0 | } |
2365 | 3 | } else if (3 V.is(0) || 3 V.is(1)3 ) { |
2366 | 0 | unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); |
2367 | 0 | unsigned NewOpc = V.is(0) ? Hexagon::PS_false0 : Hexagon::PS_true0 ; |
2368 | 3 | BuildMI(B, At, DL, HII.get(NewOpc), NewR); |
2369 | 3 | HBS::replaceReg(RD.Reg, NewR, MRI); |
2370 | 3 | return true; |
2371 | 3 | } |
2372 | 3 | |
2373 | 3 | return false; |
2374 | 3 | } |
2375 | | |
2376 | | // Detect whether RD is a bitfield extract (sign- or zero-extended) of |
2377 | | // some register from the AVs set. Create a new corresponding instruction |
2378 | | // at the location of MI. The intent is to recognize situations where |
2379 | | // a sequence of instructions performs an operation that is equivalent to |
2380 | | // an extract operation, such as a shift left followed by a shift right. |
2381 | | bool BitSimplification::simplifyExtractLow(MachineInstr *MI, |
2382 | | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, |
2383 | 4.23k | const RegisterSet &AVs) { |
2384 | 4.23k | if (!GenExtract) |
2385 | 5 | return false; |
2386 | 4.23k | if (4.23k MaxExtract.getNumOccurrences()4.23k ) { |
2387 | 0 | if (CountExtract >= MaxExtract) |
2388 | 0 | return false; |
2389 | 0 | CountExtract++; |
2390 | 0 | } |
2391 | 4.23k | |
2392 | 4.23k | unsigned W = RC.width(); |
2393 | 4.23k | unsigned RW = W; |
2394 | 4.23k | unsigned Len; |
2395 | 4.23k | bool Signed; |
2396 | 4.23k | |
2397 | 4.23k | // The code is mostly class-independent, except for the part that generates |
2398 | 4.23k | // the extract instruction, and establishes the source register (in case it |
2399 | 4.23k | // needs to use a subregister). |
2400 | 4.23k | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); |
2401 | 4.23k | if (FRC != &Hexagon::IntRegsRegClass && 4.23k FRC != &Hexagon::DoubleRegsRegClass518 ) |
2402 | 0 | return false; |
2403 | 4.23k | assert(RD.Sub == 0); |
2404 | 4.23k | |
2405 | 4.23k | // Observation: |
2406 | 4.23k | // If the cell has a form of 00..0xx..x with k zeros and n remaining |
2407 | 4.23k | // bits, this could be an extractu of the n bits, but it could also be |
2408 | 4.23k | // an extractu of a longer field which happens to have 0s in the top |
2409 | 4.23k | // bit positions. |
2410 | 4.23k | // The same logic applies to sign-extended fields. |
2411 | 4.23k | // |
2412 | 4.23k | // Do not check for the extended extracts, since it would expand the |
2413 | 4.23k | // search space quite a bit. The search may be expensive as it is. |
2414 | 4.23k | |
2415 | 4.23k | const BitTracker::BitValue &TopV = RC[W-1]; |
2416 | 4.23k | |
2417 | 4.23k | // Eliminate candidates that have self-referential bits, since they |
2418 | 4.23k | // cannot be extracts from other registers. Also, skip registers that |
2419 | 4.23k | // have compile-time constant values. |
2420 | 4.23k | bool IsConst = true; |
2421 | 43.3k | for (unsigned I = 0; I != W43.3k ; ++I39.1k ) { |
2422 | 42.3k | const BitTracker::BitValue &V = RC[I]; |
2423 | 42.3k | if (V.Type == BitTracker::BitValue::Ref && 42.3k V.RefI.Reg == RD.Reg12.4k ) |
2424 | 3.19k | return false; |
2425 | 39.1k | IsConst = IsConst && 39.1k (V.is(0) || 24.8k V.is(1)3.68k ); |
2426 | 42.3k | } |
2427 | 1.04k | if (1.04k IsConst1.04k ) |
2428 | 636 | return false; |
2429 | 404 | |
2430 | 404 | if (404 TopV.is(0) || 404 TopV.is(1)159 ) { |
2431 | 247 | bool S = TopV.is(1); |
2432 | 5.54k | for (--W; W > 0 && 5.54k RC[W-1].is(S)5.54k ; --W5.29k ) |
2433 | 5.29k | ; |
2434 | 247 | Len = W; |
2435 | 247 | Signed = S; |
2436 | 247 | // The sign bit must be a part of the field being extended. |
2437 | 247 | if (Signed) |
2438 | 2 | ++Len; |
2439 | 404 | } else { |
2440 | 157 | // This could still be a sign-extended extract. |
2441 | 157 | assert(TopV.Type == BitTracker::BitValue::Ref); |
2442 | 157 | if (TopV.RefI.Reg == RD.Reg || 157 TopV.RefI.Pos == W-1157 ) |
2443 | 62 | return false; |
2444 | 1.00k | for (--W; 95 W > 0 && 1.00k RC[W-1] == TopV1.00k ; --W914 ) |
2445 | 914 | ; |
2446 | 157 | // The top bits of RC are copies of TopV. One occurrence of TopV will |
2447 | 157 | // be a part of the field. |
2448 | 157 | Len = W + 1; |
2449 | 157 | Signed = true; |
2450 | 157 | } |
2451 | 404 | |
2452 | 404 | // This would be just a copy. It should be handled elsewhere. |
2453 | 342 | if (342 Len == RW342 ) |
2454 | 55 | return false; |
2455 | 287 | |
2456 | 287 | DEBUG287 ({ |
2457 | 287 | dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub) |
2458 | 287 | << ", MI: " << *MI; |
2459 | 287 | dbgs() << "Cell: " << RC << '\n'; |
2460 | 287 | dbgs() << "Expected bitfield size: " << Len << " bits, " |
2461 | 287 | << (Signed ? "sign" : "zero") << "-extended\n"; |
2462 | 287 | }); |
2463 | 287 | |
2464 | 287 | bool Changed = false; |
2465 | 287 | |
2466 | 8.53k | for (unsigned R = AVs.find_first(); R != 08.53k ; R = AVs.find_next(R)8.24k ) { |
2467 | 8.27k | if (!BT.has(R)) |
2468 | 0 | continue; |
2469 | 8.27k | const BitTracker::RegisterCell &SC = BT.lookup(R); |
2470 | 8.27k | unsigned SW = SC.width(); |
2471 | 8.27k | |
2472 | 8.27k | // The source can be longer than the destination, as long as its size is |
2473 | 8.27k | // a multiple of the size of the destination. Also, we would need to be |
2474 | 8.27k | // able to refer to the subregister in the source that would be of the |
2475 | 8.27k | // same size as the destination, but only check the sizes here. |
2476 | 8.27k | if (SW < RW || 8.27k (SW % RW) != 07.91k ) |
2477 | 353 | continue; |
2478 | 7.91k | |
2479 | 7.91k | // The field can start at any offset in SC as long as it contains Len |
2480 | 7.91k | // bits and does not cross subregister boundary (if the source register |
2481 | 7.91k | // is longer than the destination). |
2482 | 7.91k | unsigned Off = 0; |
2483 | 174k | while (Off <= SW-Len174k ) { |
2484 | 166k | unsigned OE = (Off+Len)/RW; |
2485 | 166k | if (OE != Off/RW166k ) { |
2486 | 8.24k | // The assumption here is that if the source (R) is longer than the |
2487 | 8.24k | // destination, then the destination is a sequence of words of |
2488 | 8.24k | // size RW, and each such word in R can be accessed via a subregister. |
2489 | 8.24k | // |
2490 | 8.24k | // If the beginning and the end of the field cross the subregister |
2491 | 8.24k | // boundary, advance to the next subregister. |
2492 | 8.24k | Off = OE*RW; |
2493 | 8.24k | continue; |
2494 | 8.24k | } |
2495 | 158k | if (158k HBS::isEqual(RC, 0, SC, Off, Len)158k ) |
2496 | 158 | break; |
2497 | 158k | ++Off; |
2498 | 158k | } |
2499 | 7.91k | |
2500 | 7.91k | if (Off > SW-Len) |
2501 | 7.76k | continue; |
2502 | 158 | |
2503 | 158 | // Found match. |
2504 | 158 | unsigned ExtOpc = 0; |
2505 | 158 | if (Off == 0158 ) { |
2506 | 118 | if (Len == 8) |
2507 | 81 | ExtOpc = Signed ? 81 Hexagon::A2_sxtb11 : Hexagon::A2_zxtb70 ; |
2508 | 37 | else if (37 Len == 1637 ) |
2509 | 23 | ExtOpc = Signed ? 23 Hexagon::A2_sxth13 : Hexagon::A2_zxth10 ; |
2510 | 14 | else if (14 Len < 10 && 14 !Signed9 ) |
2511 | 9 | ExtOpc = Hexagon::A2_andir; |
2512 | 118 | } |
2513 | 158 | if (ExtOpc == 0158 ) { |
2514 | 45 | ExtOpc = |
2515 | 9 | Signed ? (RW == 32 ? 9 Hexagon::S4_extract9 : Hexagon::S4_extractp0 ) |
2516 | 36 | : (RW == 32 ? 36 Hexagon::S2_extractu28 : Hexagon::S2_extractup8 ); |
2517 | 45 | } |
2518 | 158 | unsigned SR = 0; |
2519 | 158 | // This only recognizes isub_lo and isub_hi. |
2520 | 158 | if (RW != SW && 158 RW*2 != SW11 ) |
2521 | 0 | continue; |
2522 | 158 | if (158 RW != SW158 ) |
2523 | 11 | SR = (Off/RW == 0) ? 11 Hexagon::isub_lo3 : Hexagon::isub_hi8 ; |
2524 | 158 | Off = Off % RW; |
2525 | 158 | |
2526 | 158 | if (!validateReg({R,SR}, ExtOpc, 1)) |
2527 | 60 | continue; |
2528 | 98 | |
2529 | 98 | // Don't generate the same instruction as the one being optimized. |
2530 | 98 | if (98 MI->getOpcode() == ExtOpc98 ) { |
2531 | 74 | // All possible ExtOpc's have the source in operand(1). |
2532 | 74 | const MachineOperand &SrcOp = MI->getOperand(1); |
2533 | 74 | if (SrcOp.getReg() == R) |
2534 | 72 | continue; |
2535 | 26 | } |
2536 | 26 | |
2537 | 26 | DebugLoc DL = MI->getDebugLoc(); |
2538 | 26 | MachineBasicBlock &B = *MI->getParent(); |
2539 | 26 | unsigned NewR = MRI.createVirtualRegister(FRC); |
2540 | 0 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2541 | 26 | : MachineBasicBlock::iterator(MI); |
2542 | 26 | auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR) |
2543 | 26 | .addReg(R, 0, SR); |
2544 | 26 | switch (ExtOpc) { |
2545 | 3 | case Hexagon::A2_sxtb: |
2546 | 3 | case Hexagon::A2_zxtb: |
2547 | 3 | case Hexagon::A2_sxth: |
2548 | 3 | case Hexagon::A2_zxth: |
2549 | 3 | break; |
2550 | 1 | case Hexagon::A2_andir: |
2551 | 1 | MIB.addImm((1u << Len) - 1); |
2552 | 1 | break; |
2553 | 22 | case Hexagon::S4_extract: |
2554 | 22 | case Hexagon::S2_extractu: |
2555 | 22 | case Hexagon::S4_extractp: |
2556 | 22 | case Hexagon::S2_extractup: |
2557 | 22 | MIB.addImm(Len) |
2558 | 22 | .addImm(Off); |
2559 | 22 | break; |
2560 | 0 | default: |
2561 | 0 | llvm_unreachable("Unexpected opcode"); |
2562 | 26 | } |
2563 | 26 | |
2564 | 26 | HBS::replaceReg(RD.Reg, NewR, MRI); |
2565 | 26 | BT.put(BitTracker::RegisterRef(NewR), RC); |
2566 | 26 | Changed = true; |
2567 | 26 | break; |
2568 | 26 | } |
2569 | 287 | |
2570 | 287 | return Changed; |
2571 | 4.23k | } |
2572 | | |
2573 | | bool BitSimplification::processBlock(MachineBasicBlock &B, |
2574 | 1.80k | const RegisterSet &AVs) { |
2575 | 1.80k | if (!BT.reached(&B)) |
2576 | 7 | return false; |
2577 | 1.79k | bool Changed = false; |
2578 | 1.79k | RegisterSet AVB = AVs; |
2579 | 1.79k | RegisterSet Defs; |
2580 | 1.79k | |
2581 | 15.2k | for (auto I = B.begin(), E = B.end(); I != E15.2k ; ++I, AVB.insert(Defs)13.4k ) { |
2582 | 13.4k | MachineInstr *MI = &*I; |
2583 | 13.4k | Defs.clear(); |
2584 | 13.4k | HBS::getInstrDefs(*MI, Defs); |
2585 | 13.4k | |
2586 | 13.4k | unsigned Opc = MI->getOpcode(); |
2587 | 13.4k | if (Opc == TargetOpcode::COPY || 13.4k Opc == TargetOpcode::REG_SEQUENCE11.2k ) |
2588 | 2.33k | continue; |
2589 | 11.0k | |
2590 | 11.0k | if (11.0k MI->mayStore()11.0k ) { |
2591 | 1.92k | bool T = genStoreUpperHalf(MI); |
2592 | 1.91k | T = T || genStoreImmediate(MI); |
2593 | 1.92k | Changed |= T; |
2594 | 1.92k | continue; |
2595 | 1.92k | } |
2596 | 9.17k | |
2597 | 9.17k | if (9.17k Defs.count() != 19.17k ) |
2598 | 2.72k | continue; |
2599 | 6.45k | const MachineOperand &Op0 = MI->getOperand(0); |
2600 | 6.45k | if (!Op0.isReg() || 6.45k !Op0.isDef()6.45k ) |
2601 | 2 | continue; |
2602 | 6.45k | BitTracker::RegisterRef RD = Op0; |
2603 | 6.45k | if (!BT.has(RD.Reg)) |
2604 | 0 | continue; |
2605 | 6.45k | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); |
2606 | 6.45k | const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); |
2607 | 6.45k | |
2608 | 6.45k | if (FRC->getID() == Hexagon::DoubleRegsRegClassID6.45k ) { |
2609 | 518 | bool T = genPackhl(MI, RD, RC); |
2610 | 518 | T = T || simplifyExtractLow(MI, RD, RC, AVB); |
2611 | 518 | Changed |= T; |
2612 | 518 | continue; |
2613 | 518 | } |
2614 | 5.93k | |
2615 | 5.93k | if (5.93k FRC->getID() == Hexagon::IntRegsRegClassID5.93k ) { |
2616 | 3.72k | bool T = genBitSplit(MI, RD, RC, AVB); |
2617 | 3.71k | T = T || simplifyExtractLow(MI, RD, RC, AVB); |
2618 | 3.69k | T = T || genExtractHalf(MI, RD, RC); |
2619 | 3.69k | T = T || genCombineHalf(MI, RD, RC); |
2620 | 3.68k | T = T || genExtractLow(MI, RD, RC); |
2621 | 3.72k | Changed |= T; |
2622 | 3.72k | continue; |
2623 | 3.72k | } |
2624 | 2.21k | |
2625 | 2.21k | if (2.21k FRC->getID() == Hexagon::PredRegsRegClassID2.21k ) { |
2626 | 760 | bool T = simplifyTstbit(MI, RD, RC); |
2627 | 760 | Changed |= T; |
2628 | 760 | continue; |
2629 | 760 | } |
2630 | 13.4k | } |
2631 | 1.80k | return Changed; |
2632 | 1.80k | } |
2633 | | |
2634 | 840 | bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { |
2635 | 840 | if (skipFunction(*MF.getFunction())) |
2636 | 0 | return false; |
2637 | 840 | |
2638 | 840 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
2639 | 840 | auto &HRI = *HST.getRegisterInfo(); |
2640 | 840 | auto &HII = *HST.getInstrInfo(); |
2641 | 840 | |
2642 | 840 | MDT = &getAnalysis<MachineDominatorTree>(); |
2643 | 840 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2644 | 840 | bool Changed; |
2645 | 840 | |
2646 | 840 | Changed = DeadCodeElimination(MF, *MDT).run(); |
2647 | 840 | |
2648 | 840 | const HexagonEvaluator HE(HRI, MRI, HII, MF); |
2649 | 840 | BitTracker BT(HE, MF); |
2650 | 840 | DEBUG(BT.trace(true)); |
2651 | 840 | BT.run(); |
2652 | 840 | |
2653 | 840 | MachineBasicBlock &Entry = MF.front(); |
2654 | 840 | |
2655 | 840 | RegisterSet AIG; // Available registers for IG. |
2656 | 840 | ConstGeneration ImmG(BT, HII, MRI); |
2657 | 840 | Changed |= visitBlock(Entry, ImmG, AIG); |
2658 | 840 | |
2659 | 840 | RegisterSet ARE; // Available registers for RIE. |
2660 | 840 | RedundantInstrElimination RIE(BT, HII, HRI, MRI); |
2661 | 840 | bool Ried = visitBlock(Entry, RIE, ARE); |
2662 | 840 | if (Ried840 ) { |
2663 | 44 | Changed = true; |
2664 | 44 | BT.run(); |
2665 | 44 | } |
2666 | 840 | |
2667 | 840 | RegisterSet ACG; // Available registers for CG. |
2668 | 840 | CopyGeneration CopyG(BT, HII, HRI, MRI); |
2669 | 840 | Changed |= visitBlock(Entry, CopyG, ACG); |
2670 | 840 | |
2671 | 840 | RegisterSet ACP; // Available registers for CP. |
2672 | 840 | CopyPropagation CopyP(HRI, MRI); |
2673 | 840 | Changed |= visitBlock(Entry, CopyP, ACP); |
2674 | 840 | |
2675 | 683 | Changed = DeadCodeElimination(MF, *MDT).run() || Changed; |
2676 | 840 | |
2677 | 840 | BT.run(); |
2678 | 840 | RegisterSet ABS; // Available registers for BS. |
2679 | 840 | BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF); |
2680 | 840 | Changed |= visitBlock(Entry, BitS, ABS); |
2681 | 840 | |
2682 | 812 | Changed = DeadCodeElimination(MF, *MDT).run() || Changed; |
2683 | 840 | |
2684 | 840 | if (Changed840 ) { |
2685 | 196 | for (auto &B : MF) |
2686 | 718 | for (auto &I : B) |
2687 | 7.83k | I.clearKillInfo(); |
2688 | 196 | DeadCodeElimination(MF, *MDT).run(); |
2689 | 196 | } |
2690 | 840 | return Changed; |
2691 | 840 | } |
2692 | | |
2693 | | // Recognize loops where the code at the end of the loop matches the code |
2694 | | // before the entry of the loop, and the matching code is such that is can |
2695 | | // be simplified. This pass relies on the bit simplification above and only |
2696 | | // prepares code in a way that can be handled by the bit simplifcation. |
2697 | | // |
2698 | | // This is the motivating testcase (and explanation): |
2699 | | // |
2700 | | // { |
2701 | | // loop0(.LBB0_2, r1) // %for.body.preheader |
2702 | | // r5:4 = memd(r0++#8) |
2703 | | // } |
2704 | | // { |
2705 | | // r3 = lsr(r4, #16) |
2706 | | // r7:6 = combine(r5, r5) |
2707 | | // } |
2708 | | // { |
2709 | | // r3 = insert(r5, #16, #16) |
2710 | | // r7:6 = vlsrw(r7:6, #16) |
2711 | | // } |
2712 | | // .LBB0_2: |
2713 | | // { |
2714 | | // memh(r2+#4) = r5 |
2715 | | // memh(r2+#6) = r6 # R6 is really R5.H |
2716 | | // } |
2717 | | // { |
2718 | | // r2 = add(r2, #8) |
2719 | | // memh(r2+#0) = r4 |
2720 | | // memh(r2+#2) = r3 # R3 is really R4.H |
2721 | | // } |
2722 | | // { |
2723 | | // r5:4 = memd(r0++#8) |
2724 | | // } |
2725 | | // { # "Shuffling" code that sets up R3 and R6 |
2726 | | // r3 = lsr(r4, #16) # so that their halves can be stored in the |
2727 | | // r7:6 = combine(r5, r5) # next iteration. This could be folded into |
2728 | | // } # the stores if the code was at the beginning |
2729 | | // { # of the loop iteration. Since the same code |
2730 | | // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved |
2731 | | // r7:6 = vlsrw(r7:6, #16) # there. |
2732 | | // }:endloop0 |
2733 | | // |
2734 | | // |
2735 | | // The outcome: |
2736 | | // |
2737 | | // { |
2738 | | // loop0(.LBB0_2, r1) |
2739 | | // r5:4 = memd(r0++#8) |
2740 | | // } |
2741 | | // .LBB0_2: |
2742 | | // { |
2743 | | // memh(r2+#4) = r5 |
2744 | | // memh(r2+#6) = r5.h |
2745 | | // } |
2746 | | // { |
2747 | | // r2 = add(r2, #8) |
2748 | | // memh(r2+#0) = r4 |
2749 | | // memh(r2+#2) = r4.h |
2750 | | // } |
2751 | | // { |
2752 | | // r5:4 = memd(r0++#8) |
2753 | | // }:endloop0 |
2754 | | |
2755 | | namespace llvm { |
2756 | | |
2757 | | FunctionPass *createHexagonLoopRescheduling(); |
2758 | | void initializeHexagonLoopReschedulingPass(PassRegistry&); |
2759 | | |
2760 | | } // end namespace llvm |
2761 | | |
2762 | | namespace { |
2763 | | |
2764 | | class HexagonLoopRescheduling : public MachineFunctionPass { |
2765 | | public: |
2766 | | static char ID; |
2767 | | |
2768 | 405 | HexagonLoopRescheduling() : MachineFunctionPass(ID) { |
2769 | 405 | initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); |
2770 | 405 | } |
2771 | | |
2772 | | bool runOnMachineFunction(MachineFunction &MF) override; |
2773 | | |
2774 | | private: |
2775 | | const HexagonInstrInfo *HII = nullptr; |
2776 | | const HexagonRegisterInfo *HRI = nullptr; |
2777 | | MachineRegisterInfo *MRI = nullptr; |
2778 | | BitTracker *BTP = nullptr; |
2779 | | |
2780 | | struct LoopCand { |
2781 | | LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, |
2782 | 158 | MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} |
2783 | | |
2784 | | MachineBasicBlock *LB, *PB, *EB; |
2785 | | }; |
2786 | | using InstrList = std::vector<MachineInstr *>; |
2787 | | struct InstrGroup { |
2788 | | BitTracker::RegisterRef Inp, Out; |
2789 | | InstrList Ins; |
2790 | | }; |
2791 | | struct PhiInfo { |
2792 | | PhiInfo(MachineInstr &P, MachineBasicBlock &B); |
2793 | | |
2794 | | unsigned DefR; |
2795 | | BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register |
2796 | | MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block |
2797 | | }; |
2798 | | |
2799 | | static unsigned getDefReg(const MachineInstr *MI); |
2800 | | bool isConst(unsigned Reg) const; |
2801 | | bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; |
2802 | | bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; |
2803 | | bool isShuffleOf(unsigned OutR, unsigned InpR) const; |
2804 | | bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, |
2805 | | unsigned &InpR2) const; |
2806 | | void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, |
2807 | | MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); |
2808 | | bool processLoop(LoopCand &C); |
2809 | | }; |
2810 | | |
2811 | | } // end anonymous namespace |
2812 | | |
2813 | | char HexagonLoopRescheduling::ID = 0; |
2814 | | |
2815 | | INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", |
2816 | | "Hexagon Loop Rescheduling", false, false) |
2817 | | |
2818 | | HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, |
2819 | 91 | MachineBasicBlock &B) { |
2820 | 91 | DefR = HexagonLoopRescheduling::getDefReg(&P); |
2821 | 91 | LB = &B; |
2822 | 91 | PB = nullptr; |
2823 | 273 | for (unsigned i = 1, n = P.getNumOperands(); i < n273 ; i += 2182 ) { |
2824 | 182 | const MachineOperand &OpB = P.getOperand(i+1); |
2825 | 182 | if (OpB.getMBB() == &B182 ) { |
2826 | 91 | LR = P.getOperand(i); |
2827 | 91 | continue; |
2828 | 91 | } |
2829 | 91 | PB = OpB.getMBB(); |
2830 | 91 | PR = P.getOperand(i); |
2831 | 91 | } |
2832 | 91 | } |
2833 | | |
2834 | 386 | unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { |
2835 | 386 | RegisterSet Defs; |
2836 | 386 | HBS::getInstrDefs(*MI, Defs); |
2837 | 386 | if (Defs.count() != 1) |
2838 | 0 | return 0; |
2839 | 386 | return Defs.find_first(); |
2840 | 386 | } |
2841 | | |
2842 | 282 | bool HexagonLoopRescheduling::isConst(unsigned Reg) const { |
2843 | 282 | if (!BTP->has(Reg)) |
2844 | 0 | return false; |
2845 | 282 | const BitTracker::RegisterCell &RC = BTP->lookup(Reg); |
2846 | 424 | for (unsigned i = 0, w = RC.width(); i < w424 ; ++i142 ) { |
2847 | 424 | const BitTracker::BitValue &V = RC[i]; |
2848 | 424 | if (!V.is(0) && 424 !V.is(1)294 ) |
2849 | 282 | return false; |
2850 | 424 | } |
2851 | 0 | return true; |
2852 | 282 | } |
2853 | | |
2854 | | bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, |
2855 | 1.05k | unsigned DefR) const { |
2856 | 1.05k | unsigned Opc = MI->getOpcode(); |
2857 | 1.05k | switch (Opc) { |
2858 | 71 | case TargetOpcode::COPY: |
2859 | 71 | case Hexagon::S2_lsr_i_r: |
2860 | 71 | case Hexagon::S2_asr_i_r: |
2861 | 71 | case Hexagon::S2_asl_i_r: |
2862 | 71 | case Hexagon::S2_lsr_i_p: |
2863 | 71 | case Hexagon::S2_asr_i_p: |
2864 | 71 | case Hexagon::S2_asl_i_p: |
2865 | 71 | case Hexagon::S2_insert: |
2866 | 71 | case Hexagon::A2_or: |
2867 | 71 | case Hexagon::A2_orp: |
2868 | 71 | case Hexagon::A2_and: |
2869 | 71 | case Hexagon::A2_andp: |
2870 | 71 | case Hexagon::A2_combinew: |
2871 | 71 | case Hexagon::A4_combineri: |
2872 | 71 | case Hexagon::A4_combineir: |
2873 | 71 | case Hexagon::A2_combineii: |
2874 | 71 | case Hexagon::A4_combineii: |
2875 | 71 | case Hexagon::A2_combine_ll: |
2876 | 71 | case Hexagon::A2_combine_lh: |
2877 | 71 | case Hexagon::A2_combine_hl: |
2878 | 71 | case Hexagon::A2_combine_hh: |
2879 | 71 | return true; |
2880 | 979 | } |
2881 | 979 | return false; |
2882 | 979 | } |
2883 | | |
2884 | | bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, |
2885 | 480 | unsigned InpR) const { |
2886 | 1.09k | for (unsigned i = 0, n = MI->getNumOperands(); i < n1.09k ; ++i618 ) { |
2887 | 1.09k | const MachineOperand &Op = MI->getOperand(i); |
2888 | 1.09k | if (!Op.isReg()) |
2889 | 19 | continue; |
2890 | 1.07k | if (1.07k Op.getReg() == InpR1.07k ) |
2891 | 480 | return i == n-1; |
2892 | 1.09k | } |
2893 | 0 | return false; |
2894 | 480 | } |
2895 | | |
2896 | 4 | bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { |
2897 | 4 | if (!BTP->has(OutR) || 4 !BTP->has(InpR)4 ) |
2898 | 0 | return false; |
2899 | 4 | const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); |
2900 | 132 | for (unsigned i = 0, w = OutC.width(); i < w132 ; ++i128 ) { |
2901 | 128 | const BitTracker::BitValue &V = OutC[i]; |
2902 | 128 | if (V.Type != BitTracker::BitValue::Ref) |
2903 | 16 | continue; |
2904 | 112 | if (112 V.RefI.Reg != InpR112 ) |
2905 | 0 | return false; |
2906 | 128 | } |
2907 | 4 | return true; |
2908 | 4 | } |
2909 | | |
2910 | | bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, |
2911 | 4 | unsigned OutR2, unsigned &InpR2) const { |
2912 | 4 | if (!BTP->has(OutR1) || 4 !BTP->has(InpR1)4 || !BTP->has(OutR2)4 ) |
2913 | 0 | return false; |
2914 | 4 | const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); |
2915 | 4 | const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); |
2916 | 4 | unsigned W = OutC1.width(); |
2917 | 4 | unsigned MatchR = 0; |
2918 | 4 | if (W != OutC2.width()) |
2919 | 0 | return false; |
2920 | 132 | for (unsigned i = 0; 4 i < W132 ; ++i128 ) { |
2921 | 128 | const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; |
2922 | 128 | if (V1.Type != V2.Type || 128 V1.Type == BitTracker::BitValue::One128 ) |
2923 | 0 | return false; |
2924 | 128 | if (128 V1.Type != BitTracker::BitValue::Ref128 ) |
2925 | 16 | continue; |
2926 | 112 | if (112 V1.RefI.Pos != V2.RefI.Pos112 ) |
2927 | 0 | return false; |
2928 | 112 | if (112 V1.RefI.Reg != InpR1112 ) |
2929 | 0 | return false; |
2930 | 112 | if (112 V2.RefI.Reg == 0 || 112 V2.RefI.Reg == OutR2112 ) |
2931 | 0 | return false; |
2932 | 112 | if (112 !MatchR112 ) |
2933 | 4 | MatchR = V2.RefI.Reg; |
2934 | 108 | else if (108 V2.RefI.Reg != MatchR108 ) |
2935 | 0 | return false; |
2936 | 128 | } |
2937 | 4 | InpR2 = MatchR; |
2938 | 4 | return true; |
2939 | 4 | } |
2940 | | |
2941 | | void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, |
2942 | | MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, |
2943 | 4 | unsigned NewPredR) { |
2944 | 4 | DenseMap<unsigned,unsigned> RegMap; |
2945 | 4 | |
2946 | 4 | const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); |
2947 | 4 | unsigned PhiR = MRI->createVirtualRegister(PhiRC); |
2948 | 4 | BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) |
2949 | 4 | .addReg(NewPredR) |
2950 | 4 | .addMBB(&PB) |
2951 | 4 | .addReg(G.Inp.Reg) |
2952 | 4 | .addMBB(&LB); |
2953 | 4 | RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); |
2954 | 4 | |
2955 | 11 | for (unsigned i = G.Ins.size(); i > 011 ; --i7 ) { |
2956 | 7 | const MachineInstr *SI = G.Ins[i-1]; |
2957 | 7 | unsigned DR = getDefReg(SI); |
2958 | 7 | const TargetRegisterClass *RC = MRI->getRegClass(DR); |
2959 | 7 | unsigned NewDR = MRI->createVirtualRegister(RC); |
2960 | 7 | DebugLoc DL = SI->getDebugLoc(); |
2961 | 7 | |
2962 | 7 | auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); |
2963 | 24 | for (unsigned j = 0, m = SI->getNumOperands(); j < m24 ; ++j17 ) { |
2964 | 17 | const MachineOperand &Op = SI->getOperand(j); |
2965 | 17 | if (!Op.isReg()17 ) { |
2966 | 3 | MIB.add(Op); |
2967 | 3 | continue; |
2968 | 3 | } |
2969 | 14 | if (14 !Op.isUse()14 ) |
2970 | 7 | continue; |
2971 | 7 | unsigned UseR = RegMap[Op.getReg()]; |
2972 | 7 | MIB.addReg(UseR, 0, Op.getSubReg()); |
2973 | 7 | } |
2974 | 7 | RegMap.insert(std::make_pair(DR, NewDR)); |
2975 | 7 | } |
2976 | 4 | |
2977 | 4 | HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); |
2978 | 4 | } |
2979 | | |
2980 | 158 | bool HexagonLoopRescheduling::processLoop(LoopCand &C) { |
2981 | 158 | DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); |
2982 | 158 | std::vector<PhiInfo> Phis; |
2983 | 440 | for (auto &I : *C.LB) { |
2984 | 440 | if (!I.isPHI()) |
2985 | 158 | break; |
2986 | 282 | unsigned PR = getDefReg(&I); |
2987 | 282 | if (isConst(PR)) |
2988 | 0 | continue; |
2989 | 282 | bool BadUse = false, GoodUse = false; |
2990 | 769 | for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE769 ; ++UI487 ) { |
2991 | 488 | MachineInstr *UseI = UI->getParent(); |
2992 | 488 | if (UseI->getParent() != C.LB488 ) { |
2993 | 1 | BadUse = true; |
2994 | 1 | break; |
2995 | 1 | } |
2996 | 487 | if (487 isBitShuffle(UseI, PR) || 487 isStoreInput(UseI, PR)480 ) |
2997 | 98 | GoodUse = true; |
2998 | 488 | } |
2999 | 282 | if (BadUse || 282 !GoodUse281 ) |
3000 | 191 | continue; |
3001 | 91 | |
3002 | 91 | Phis.push_back(PhiInfo(I, *C.LB)); |
3003 | 91 | } |
3004 | 158 | |
3005 | 158 | DEBUG({ |
3006 | 158 | dbgs() << "Phis: {"; |
3007 | 158 | for (auto &I : Phis) { |
3008 | 158 | dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi(" |
3009 | 158 | << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() |
3010 | 158 | << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" |
3011 | 158 | << I.LB->getNumber() << ')'; |
3012 | 158 | } |
3013 | 158 | dbgs() << " }\n"; |
3014 | 158 | }); |
3015 | 158 | |
3016 | 158 | if (Phis.empty()) |
3017 | 82 | return false; |
3018 | 76 | |
3019 | 76 | bool Changed = false; |
3020 | 76 | InstrList ShufIns; |
3021 | 76 | |
3022 | 76 | // Go backwards in the block: for each bit shuffling instruction, check |
3023 | 76 | // if that instruction could potentially be moved to the front of the loop: |
3024 | 76 | // the output of the loop cannot be used in a non-shuffling instruction |
3025 | 76 | // in this loop. |
3026 | 883 | for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E883 ; ++I807 ) { |
3027 | 883 | if (I->isTerminator()) |
3028 | 152 | continue; |
3029 | 731 | if (731 I->isPHI()731 ) |
3030 | 76 | break; |
3031 | 655 | |
3032 | 655 | RegisterSet Defs; |
3033 | 655 | HBS::getInstrDefs(*I, Defs); |
3034 | 655 | if (Defs.count() != 1) |
3035 | 92 | continue; |
3036 | 563 | unsigned DefR = Defs.find_first(); |
3037 | 563 | if (!TargetRegisterInfo::isVirtualRegister(DefR)) |
3038 | 0 | continue; |
3039 | 563 | if (563 !isBitShuffle(&*I, DefR)563 ) |
3040 | 499 | continue; |
3041 | 64 | |
3042 | 64 | bool BadUse = false; |
3043 | 73 | for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE73 ; ++UI9 ) { |
3044 | 64 | MachineInstr *UseI = UI->getParent(); |
3045 | 64 | if (UseI->getParent() == C.LB64 ) { |
3046 | 61 | if (UseI->isPHI()61 ) { |
3047 | 4 | // If the use is in a phi node in this loop, then it should be |
3048 | 4 | // the value corresponding to the back edge. |
3049 | 4 | unsigned Idx = UI.getOperandNo(); |
3050 | 4 | if (UseI->getOperand(Idx+1).getMBB() != C.LB) |
3051 | 0 | BadUse = true; |
3052 | 61 | } else { |
3053 | 57 | auto F = find(ShufIns, UseI); |
3054 | 57 | if (F == ShufIns.end()) |
3055 | 54 | BadUse = true; |
3056 | 57 | } |
3057 | 64 | } else { |
3058 | 3 | // There is a use outside of the loop, but there is no epilog block |
3059 | 3 | // suitable for a copy-out. |
3060 | 3 | if (C.EB == nullptr) |
3061 | 1 | BadUse = true; |
3062 | 3 | } |
3063 | 64 | if (BadUse) |
3064 | 55 | break; |
3065 | 64 | } |
3066 | 64 | |
3067 | 64 | if (BadUse) |
3068 | 55 | continue; |
3069 | 9 | ShufIns.push_back(&*I); |
3070 | 9 | } |
3071 | 76 | |
3072 | 76 | // Partition the list of shuffling instructions into instruction groups, |
3073 | 76 | // where each group has to be moved as a whole (i.e. a group is a chain of |
3074 | 76 | // dependent instructions). A group produces a single live output register, |
3075 | 76 | // which is meant to be the input of the loop phi node (although this is |
3076 | 76 | // not checked here yet). It also uses a single register as its input, |
3077 | 76 | // which is some value produced in the loop body. After moving the group |
3078 | 76 | // to the beginning of the loop, that input register would need to be |
3079 | 76 | // the loop-carried register (through a phi node) instead of the (currently |
3080 | 76 | // loop-carried) output register. |
3081 | 76 | using InstrGroupList = std::vector<InstrGroup>; |
3082 | 76 | InstrGroupList Groups; |
3083 | 76 | |
3084 | 85 | for (unsigned i = 0, n = ShufIns.size(); i < n85 ; ++i9 ) { |
3085 | 9 | MachineInstr *SI = ShufIns[i]; |
3086 | 9 | if (SI == nullptr) |
3087 | 3 | continue; |
3088 | 6 | |
3089 | 6 | InstrGroup G; |
3090 | 6 | G.Ins.push_back(SI); |
3091 | 6 | G.Out.Reg = getDefReg(SI); |
3092 | 6 | RegisterSet Inputs; |
3093 | 6 | HBS::getInstrUses(*SI, Inputs); |
3094 | 6 | |
3095 | 18 | for (unsigned j = i+1; j < n18 ; ++j12 ) { |
3096 | 12 | MachineInstr *MI = ShufIns[j]; |
3097 | 12 | if (MI == nullptr) |
3098 | 0 | continue; |
3099 | 12 | RegisterSet Defs; |
3100 | 12 | HBS::getInstrDefs(*MI, Defs); |
3101 | 12 | // If this instruction does not define any pending inputs, skip it. |
3102 | 12 | if (!Defs.intersects(Inputs)) |
3103 | 9 | continue; |
3104 | 3 | // Otherwise, add it to the current group and remove the inputs that |
3105 | 3 | // are defined by MI. |
3106 | 3 | G.Ins.push_back(MI); |
3107 | 3 | Inputs.remove(Defs); |
3108 | 3 | // Then add all registers used by MI. |
3109 | 3 | HBS::getInstrUses(*MI, Inputs); |
3110 | 3 | ShufIns[j] = nullptr; |
3111 | 3 | } |
3112 | 6 | |
3113 | 6 | // Only add a group if it requires at most one register. |
3114 | 6 | if (Inputs.count() > 1) |
3115 | 0 | continue; |
3116 | 6 | auto LoopInpEq = [G] (const PhiInfo &P) -> bool 6 { |
3117 | 12 | return G.Out.Reg == P.LR.Reg; |
3118 | 12 | }; |
3119 | 6 | if (llvm::find_if(Phis, LoopInpEq) == Phis.end()) |
3120 | 2 | continue; |
3121 | 4 | |
3122 | 4 | G.Inp.Reg = Inputs.find_first(); |
3123 | 4 | Groups.push_back(G); |
3124 | 4 | } |
3125 | 76 | |
3126 | 76 | DEBUG({ |
3127 | 76 | for (unsigned i = 0, n = Groups.size(); i < n; ++i) { |
3128 | 76 | InstrGroup &G = Groups[i]; |
3129 | 76 | dbgs() << "Group[" << i << "] inp: " |
3130 | 76 | << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub) |
3131 | 76 | << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; |
3132 | 76 | for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) |
3133 | 76 | dbgs() << " " << *G.Ins[j]; |
3134 | 76 | } |
3135 | 76 | }); |
3136 | 76 | |
3137 | 80 | for (unsigned i = 0, n = Groups.size(); i < n80 ; ++i4 ) { |
3138 | 4 | InstrGroup &G = Groups[i]; |
3139 | 4 | if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) |
3140 | 0 | continue; |
3141 | 4 | auto LoopInpEq = [G] (const PhiInfo &P) -> bool 4 { |
3142 | 10 | return G.Out.Reg == P.LR.Reg; |
3143 | 10 | }; |
3144 | 4 | auto F = llvm::find_if(Phis, LoopInpEq); |
3145 | 4 | if (F == Phis.end()) |
3146 | 0 | continue; |
3147 | 4 | unsigned PrehR = 0; |
3148 | 4 | if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)4 ) { |
3149 | 0 | const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg); |
3150 | 0 | unsigned Opc = DefPrehR->getOpcode(); |
3151 | 0 | if (Opc != Hexagon::A2_tfrsi && 0 Opc != Hexagon::A2_tfrpi0 ) |
3152 | 0 | continue; |
3153 | 0 | if (0 !DefPrehR->getOperand(1).isImm()0 ) |
3154 | 0 | continue; |
3155 | 0 | if (0 DefPrehR->getOperand(1).getImm() != 00 ) |
3156 | 0 | continue; |
3157 | 0 | const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); |
3158 | 0 | if (RC != MRI->getRegClass(F->PR.Reg)0 ) { |
3159 | 0 | PrehR = MRI->createVirtualRegister(RC); |
3160 | 0 | unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi |
3161 | 0 | : Hexagon::A2_tfrpi; |
3162 | 0 | auto T = C.PB->getFirstTerminator(); |
3163 | 0 | DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc()0 : DebugLoc()0 ; |
3164 | 0 | BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR) |
3165 | 0 | .addImm(0); |
3166 | 0 | } else { |
3167 | 0 | PrehR = F->PR.Reg; |
3168 | 0 | } |
3169 | 0 | } |
3170 | 4 | // isSameShuffle could match with PrehR being of a wider class than |
3171 | 4 | // G.Inp.Reg, for example if G shuffles the low 32 bits of its input, |
3172 | 4 | // it would match for the input being a 32-bit register, and PrehR |
3173 | 4 | // being a 64-bit register (where the low 32 bits match). This could |
3174 | 4 | // be handled, but for now skip these cases. |
3175 | 4 | if (4 MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg)4 ) |
3176 | 0 | continue; |
3177 | 4 | moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR); |
3178 | 4 | Changed = true; |
3179 | 4 | } |
3180 | 158 | |
3181 | 158 | return Changed; |
3182 | 158 | } |
3183 | | |
3184 | 860 | bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { |
3185 | 860 | if (skipFunction(*MF.getFunction())) |
3186 | 0 | return false; |
3187 | 860 | |
3188 | 860 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
3189 | 860 | HII = HST.getInstrInfo(); |
3190 | 860 | HRI = HST.getRegisterInfo(); |
3191 | 860 | MRI = &MF.getRegInfo(); |
3192 | 860 | const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); |
3193 | 860 | BitTracker BT(HE, MF); |
3194 | 860 | DEBUG(BT.trace(true)); |
3195 | 860 | BT.run(); |
3196 | 860 | BTP = &BT; |
3197 | 860 | |
3198 | 860 | std::vector<LoopCand> Cand; |
3199 | 860 | |
3200 | 1.83k | for (auto &B : MF) { |
3201 | 1.83k | if (B.pred_size() != 2 || 1.83k B.succ_size() != 2402 ) |
3202 | 1.59k | continue; |
3203 | 235 | MachineBasicBlock *PB = nullptr; |
3204 | 235 | bool IsLoop = false; |
3205 | 705 | for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE705 ; ++PI470 ) { |
3206 | 470 | if (*PI != &B) |
3207 | 312 | PB = *PI; |
3208 | 470 | else |
3209 | 158 | IsLoop = true; |
3210 | 470 | } |
3211 | 235 | if (!IsLoop) |
3212 | 77 | continue; |
3213 | 158 | |
3214 | 158 | MachineBasicBlock *EB = nullptr; |
3215 | 253 | for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE253 ; ++SI95 ) { |
3216 | 253 | if (*SI == &B) |
3217 | 95 | continue; |
3218 | 158 | // Set EP to the epilog block, if it has only 1 predecessor (i.e. the |
3219 | 158 | // edge from B to EP is non-critical. |
3220 | 158 | if (158 (*SI)->pred_size() == 1158 ) |
3221 | 76 | EB = *SI; |
3222 | 253 | break; |
3223 | 253 | } |
3224 | 1.83k | |
3225 | 1.83k | Cand.push_back(LoopCand(&B, PB, EB)); |
3226 | 1.83k | } |
3227 | 860 | |
3228 | 860 | bool Changed = false; |
3229 | 860 | for (auto &C : Cand) |
3230 | 158 | Changed |= processLoop(C); |
3231 | 860 | |
3232 | 860 | return Changed; |
3233 | 860 | } |
3234 | | |
3235 | | //===----------------------------------------------------------------------===// |
3236 | | // Public Constructor Functions |
3237 | | //===----------------------------------------------------------------------===// |
3238 | | |
3239 | 405 | FunctionPass *llvm::createHexagonLoopRescheduling() { |
3240 | 405 | return new HexagonLoopRescheduling(); |
3241 | 405 | } |
3242 | | |
3243 | 399 | FunctionPass *llvm::createHexagonBitSimplify() { |
3244 | 399 | return new HexagonBitSimplify(); |
3245 | 399 | } |