/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/PeepholeOptimizer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // Perform peephole optimizations on the machine code: |
11 | | // |
12 | | // - Optimize Extensions |
13 | | // |
14 | | // Optimization of sign / zero extension instructions. It may be extended to |
15 | | // handle other instructions with similar properties. |
16 | | // |
17 | | // On some targets, some instructions, e.g. X86 sign / zero extension, may |
18 | | // leave the source value in the lower part of the result. This optimization |
19 | | // will replace some uses of the pre-extension value with uses of the |
20 | | // sub-register of the results. |
21 | | // |
22 | | // - Optimize Comparisons |
23 | | // |
24 | | // Optimization of comparison instructions. For instance, in this code: |
25 | | // |
26 | | // sub r1, 1 |
27 | | // cmp r1, 0 |
28 | | // bz L1 |
29 | | // |
30 | | // If the "sub" instruction all ready sets (or could be modified to set) the |
31 | | // same flag that the "cmp" instruction sets and that "bz" uses, then we can |
32 | | // eliminate the "cmp" instruction. |
33 | | // |
34 | | // Another instance, in this code: |
35 | | // |
36 | | // sub r1, r3 | sub r1, imm |
37 | | // cmp r3, r1 or cmp r1, r3 | cmp r1, imm |
38 | | // bge L1 |
39 | | // |
40 | | // If the branch instruction can use flag from "sub", then we can replace |
41 | | // "sub" with "subs" and eliminate the "cmp" instruction. |
42 | | // |
43 | | // - Optimize Loads: |
44 | | // |
45 | | // Loads that can be folded into a later instruction. A load is foldable |
46 | | // if it loads to virtual registers and the virtual register defined has |
47 | | // a single use. |
48 | | // |
49 | | // - Optimize Copies and Bitcast (more generally, target specific copies): |
50 | | // |
51 | | // Rewrite copies and bitcasts to avoid cross register bank copies |
52 | | // when possible. |
53 | | // E.g., Consider the following example, where capital and lower |
54 | | // letters denote different register file: |
55 | | // b = copy A <-- cross-bank copy |
56 | | // C = copy b <-- cross-bank copy |
57 | | // => |
58 | | // b = copy A <-- cross-bank copy |
59 | | // C = copy A <-- same-bank copy |
60 | | // |
61 | | // E.g., for bitcast: |
62 | | // b = bitcast A <-- cross-bank copy |
63 | | // C = bitcast b <-- cross-bank copy |
64 | | // => |
65 | | // b = bitcast A <-- cross-bank copy |
66 | | // C = copy A <-- same-bank copy |
67 | | //===----------------------------------------------------------------------===// |
68 | | |
69 | | #include "llvm/ADT/DenseMap.h" |
70 | | #include "llvm/ADT/Optional.h" |
71 | | #include "llvm/ADT/SmallPtrSet.h" |
72 | | #include "llvm/ADT/SmallSet.h" |
73 | | #include "llvm/ADT/SmallVector.h" |
74 | | #include "llvm/ADT/Statistic.h" |
75 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
76 | | #include "llvm/CodeGen/MachineDominators.h" |
77 | | #include "llvm/CodeGen/MachineFunction.h" |
78 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
79 | | #include "llvm/CodeGen/MachineInstr.h" |
80 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
81 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
82 | | #include "llvm/CodeGen/MachineOperand.h" |
83 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
84 | | #include "llvm/MC/LaneBitmask.h" |
85 | | #include "llvm/MC/MCInstrDesc.h" |
86 | | #include "llvm/Pass.h" |
87 | | #include "llvm/Support/CommandLine.h" |
88 | | #include "llvm/Support/Debug.h" |
89 | | #include "llvm/Support/ErrorHandling.h" |
90 | | #include "llvm/Support/raw_ostream.h" |
91 | | #include "llvm/Target/TargetInstrInfo.h" |
92 | | #include "llvm/Target/TargetOpcodes.h" |
93 | | #include "llvm/Target/TargetRegisterInfo.h" |
94 | | #include "llvm/Target/TargetSubtargetInfo.h" |
95 | | #include <cassert> |
96 | | #include <cstdint> |
97 | | #include <memory> |
98 | | #include <utility> |
99 | | |
100 | | using namespace llvm; |
101 | | |
102 | | #define DEBUG_TYPE "peephole-opt" |
103 | | |
104 | | // Optimize Extensions |
105 | | static cl::opt<bool> |
106 | | Aggressive("aggressive-ext-opt", cl::Hidden, |
107 | | cl::desc("Aggressive extension optimization")); |
108 | | |
109 | | static cl::opt<bool> |
110 | | DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), |
111 | | cl::desc("Disable the peephole optimizer")); |
112 | | |
113 | | static cl::opt<bool> |
114 | | DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), |
115 | | cl::desc("Disable advanced copy optimization")); |
116 | | |
117 | | static cl::opt<bool> DisableNAPhysCopyOpt( |
118 | | "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), |
119 | | cl::desc("Disable non-allocatable physical register copy optimization")); |
120 | | |
121 | | // Limit the number of PHI instructions to process |
122 | | // in PeepholeOptimizer::getNextSource. |
123 | | static cl::opt<unsigned> RewritePHILimit( |
124 | | "rewrite-phi-limit", cl::Hidden, cl::init(10), |
125 | | cl::desc("Limit the length of PHI chains to lookup")); |
126 | | |
127 | | // Limit the length of recurrence chain when evaluating the benefit of |
128 | | // commuting operands. |
129 | | static cl::opt<unsigned> MaxRecurrenceChain( |
130 | | "recurrence-chain-limit", cl::Hidden, cl::init(3), |
131 | | cl::desc("Maximum length of recurrence chain when evaluating the benefit " |
132 | | "of commuting operands")); |
133 | | |
134 | | |
135 | | STATISTIC(NumReuse, "Number of extension results reused"); |
136 | | STATISTIC(NumCmps, "Number of compares eliminated"); |
137 | | STATISTIC(NumImmFold, "Number of move immediate folded"); |
138 | | STATISTIC(NumLoadFold, "Number of loads folded"); |
139 | | STATISTIC(NumSelects, "Number of selects optimized"); |
140 | | STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); |
141 | | STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); |
142 | | STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); |
143 | | |
144 | | namespace { |
145 | | |
146 | | class ValueTrackerResult; |
147 | | class RecurrenceInstr; |
148 | | |
149 | | class PeepholeOptimizer : public MachineFunctionPass { |
150 | | const TargetInstrInfo *TII; |
151 | | const TargetRegisterInfo *TRI; |
152 | | MachineRegisterInfo *MRI; |
153 | | MachineDominatorTree *DT; // Machine dominator tree |
154 | | MachineLoopInfo *MLI; |
155 | | |
156 | | public: |
157 | | static char ID; // Pass identification |
158 | | |
159 | 32.1k | PeepholeOptimizer() : MachineFunctionPass(ID) { |
160 | 32.1k | initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); |
161 | 32.1k | } |
162 | | |
163 | | bool runOnMachineFunction(MachineFunction &MF) override; |
164 | | |
165 | 32.0k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
166 | 32.0k | AU.setPreservesCFG(); |
167 | 32.0k | MachineFunctionPass::getAnalysisUsage(AU); |
168 | 32.0k | AU.addRequired<MachineLoopInfo>(); |
169 | 32.0k | AU.addPreserved<MachineLoopInfo>(); |
170 | 32.0k | if (Aggressive32.0k ) { |
171 | 0 | AU.addRequired<MachineDominatorTree>(); |
172 | 0 | AU.addPreserved<MachineDominatorTree>(); |
173 | 0 | } |
174 | 32.0k | } |
175 | | |
176 | | /// \brief Track Def -> Use info used for rewriting copies. |
177 | | using RewriteMapTy = |
178 | | SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>; |
179 | | |
180 | | /// \brief Sequence of instructions that formulate recurrence cycle. |
181 | | using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>; |
182 | | |
183 | | private: |
184 | | bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); |
185 | | bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, |
186 | | SmallPtrSetImpl<MachineInstr*> &LocalMIs); |
187 | | bool optimizeSelect(MachineInstr *MI, |
188 | | SmallPtrSetImpl<MachineInstr *> &LocalMIs); |
189 | | bool optimizeCondBranch(MachineInstr *MI); |
190 | | bool optimizeCoalescableCopy(MachineInstr *MI); |
191 | | bool optimizeUncoalescableCopy(MachineInstr *MI, |
192 | | SmallPtrSetImpl<MachineInstr *> &LocalMIs); |
193 | | bool optimizeRecurrence(MachineInstr &PHI); |
194 | | bool findNextSource(unsigned Reg, unsigned SubReg, |
195 | | RewriteMapTy &RewriteMap); |
196 | | bool isMoveImmediate(MachineInstr *MI, |
197 | | SmallSet<unsigned, 4> &ImmDefRegs, |
198 | | DenseMap<unsigned, MachineInstr*> &ImmDefMIs); |
199 | | bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, |
200 | | SmallSet<unsigned, 4> &ImmDefRegs, |
201 | | DenseMap<unsigned, MachineInstr*> &ImmDefMIs); |
202 | | |
203 | | /// \brief Finds recurrence cycles, but only ones that formulated around |
204 | | /// a def operand and a use operand that are tied. If there is a use |
205 | | /// operand commutable with the tied use operand, find recurrence cycle |
206 | | /// along that operand as well. |
207 | | bool findTargetRecurrence(unsigned Reg, |
208 | | const SmallSet<unsigned, 2> &TargetReg, |
209 | | RecurrenceCycle &RC); |
210 | | |
211 | | /// \brief If copy instruction \p MI is a virtual register copy, track it in |
212 | | /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was |
213 | | /// previously seen as a copy, replace the uses of this copy with the |
214 | | /// previously seen copy's destination register. |
215 | | bool foldRedundantCopy(MachineInstr *MI, |
216 | | SmallSet<unsigned, 4> &CopySrcRegs, |
217 | | DenseMap<unsigned, MachineInstr *> &CopyMIs); |
218 | | |
219 | | /// \brief Is the register \p Reg a non-allocatable physical register? |
220 | | bool isNAPhysCopy(unsigned Reg); |
221 | | |
222 | | /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical |
223 | | /// register copy, track it in the \p NAPhysToVirtMIs map. If this |
224 | | /// non-allocatable physical register was previously copied to a virtual |
225 | | /// registered and hasn't been clobbered, the virt->phys copy can be |
226 | | /// deleted. |
227 | | bool foldRedundantNAPhysCopy( |
228 | | MachineInstr *MI, |
229 | | DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs); |
230 | | |
231 | | bool isLoadFoldable(MachineInstr *MI, |
232 | | SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); |
233 | | |
234 | | /// \brief Check whether \p MI is understood by the register coalescer |
235 | | /// but may require some rewriting. |
236 | 35.9M | bool isCoalescableCopy(const MachineInstr &MI) { |
237 | 35.9M | // SubregToRegs are not interesting, because they are already register |
238 | 35.9M | // coalescer friendly. |
239 | 24.4M | return MI.isCopy() || (!DisableAdvCopyOpt && |
240 | 24.4M | (MI.isRegSequence() || 24.4M MI.isInsertSubreg()24.3M || |
241 | 24.4M | MI.isExtractSubreg())); |
242 | 35.9M | } |
243 | | |
244 | | /// \brief Check whether \p MI is a copy like instruction that is |
245 | | /// not recognized by the register coalescer. |
246 | 36.2M | bool isUncoalescableCopy(const MachineInstr &MI) { |
247 | 36.2M | return MI.isBitcast() || |
248 | 36.2M | (!DisableAdvCopyOpt && |
249 | 36.2M | (MI.isRegSequenceLike() || 36.2M MI.isInsertSubregLike()36.2M || |
250 | 36.2M | MI.isExtractSubregLike())); |
251 | 36.2M | } |
252 | | }; |
253 | | |
254 | | /// \brief Helper class to hold instructions that are inside recurrence |
255 | | /// cycles. The recurrence cycle is formulated around 1) a def operand and its |
256 | | /// tied use operand, or 2) a def operand and a use operand that is commutable |
257 | | /// with another use operand which is tied to the def operand. In the latter |
258 | | /// case, index of the tied use operand and the commutable use operand are |
259 | | /// maintained with CommutePair. |
260 | | class RecurrenceInstr { |
261 | | public: |
262 | | using IndexPair = std::pair<unsigned, unsigned>; |
263 | | |
264 | 15.8k | RecurrenceInstr(MachineInstr *MI) : MI(MI) {} |
265 | | RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) |
266 | 159 | : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} |
267 | | |
268 | 130 | MachineInstr *getMI() const { return MI; } |
269 | 1.96k | Optional<IndexPair> getCommutePair() const { return CommutePair; } |
270 | | |
271 | | private: |
272 | | MachineInstr *MI; |
273 | | Optional<IndexPair> CommutePair; |
274 | | }; |
275 | | |
276 | | /// \brief Helper class to hold a reply for ValueTracker queries. Contains the |
277 | | /// returned sources for a given search and the instructions where the sources |
278 | | /// were tracked from. |
279 | | class ValueTrackerResult { |
280 | | private: |
281 | | /// Track all sources found by one ValueTracker query. |
282 | | SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs; |
283 | | |
284 | | /// Instruction using the sources in 'RegSrcs'. |
285 | | const MachineInstr *Inst = nullptr; |
286 | | |
287 | | public: |
288 | 15.3M | ValueTrackerResult() = default; |
289 | | |
290 | 6.22M | ValueTrackerResult(unsigned Reg, unsigned SubReg) { |
291 | 6.22M | addSource(Reg, SubReg); |
292 | 6.22M | } |
293 | | |
294 | 27.5M | bool isValid() const { return getNumSources() > 0; } |
295 | | |
296 | 6.22M | void setInst(const MachineInstr *I) { Inst = I; } |
297 | 5 | const MachineInstr *getInst() const { return Inst; } |
298 | | |
299 | 0 | void clear() { |
300 | 0 | RegSrcs.clear(); |
301 | 0 | Inst = nullptr; |
302 | 0 | } |
303 | | |
304 | 6.22M | void addSource(unsigned SrcReg, unsigned SrcSubReg) { |
305 | 6.22M | RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); |
306 | 6.22M | } |
307 | | |
308 | 0 | void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { |
309 | 0 | assert(Idx < getNumSources() && "Reg pair source out of index"); |
310 | 0 | RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); |
311 | 0 | } |
312 | | |
313 | 42.6M | int getNumSources() const { return RegSrcs.size(); } |
314 | | |
315 | 15.1M | unsigned getSrcReg(int Idx) const { |
316 | 15.1M | assert(Idx < getNumSources() && "Reg source out of index"); |
317 | 15.1M | return RegSrcs[Idx].Reg; |
318 | 15.1M | } |
319 | | |
320 | 11.9M | unsigned getSrcSubReg(int Idx) const { |
321 | 11.9M | assert(Idx < getNumSources() && "SubReg source out of index"); |
322 | 11.9M | return RegSrcs[Idx].SubReg; |
323 | 11.9M | } |
324 | | |
325 | 0 | bool operator==(const ValueTrackerResult &Other) { |
326 | 0 | if (Other.getInst() != getInst()) |
327 | 0 | return false; |
328 | 0 |
|
329 | 0 | if (Other.getNumSources() != getNumSources()) |
330 | 0 | return false; |
331 | 0 |
|
332 | 0 | for (int i = 0, e = Other.getNumSources(); i != e; ++i) |
333 | 0 | if (Other.getSrcReg(i) != getSrcReg(i) || |
334 | 0 | Other.getSrcSubReg(i) != getSrcSubReg(i)) |
335 | 0 | return false; |
336 | 0 | return true; |
337 | 0 | } |
338 | | }; |
339 | | |
340 | | /// \brief Helper class to track the possible sources of a value defined by |
341 | | /// a (chain of) copy related instructions. |
342 | | /// Given a definition (instruction and definition index), this class |
343 | | /// follows the use-def chain to find successive suitable sources. |
344 | | /// The given source can be used to rewrite the definition into |
345 | | /// def = COPY src. |
346 | | /// |
347 | | /// For instance, let us consider the following snippet: |
348 | | /// v0 = |
349 | | /// v2 = INSERT_SUBREG v1, v0, sub0 |
350 | | /// def = COPY v2.sub0 |
351 | | /// |
352 | | /// Using a ValueTracker for def = COPY v2.sub0 will give the following |
353 | | /// suitable sources: |
354 | | /// v2.sub0 and v0. |
355 | | /// Then, def can be rewritten into def = COPY v0. |
356 | | class ValueTracker { |
357 | | private: |
358 | | /// The current point into the use-def chain. |
359 | | const MachineInstr *Def = nullptr; |
360 | | |
361 | | /// The index of the definition in Def. |
362 | | unsigned DefIdx = 0; |
363 | | |
364 | | /// The sub register index of the definition. |
365 | | unsigned DefSubReg; |
366 | | |
367 | | /// The register where the value can be found. |
368 | | unsigned Reg; |
369 | | |
370 | | /// Specifiy whether or not the value tracking looks through |
371 | | /// complex instructions. When this is false, the value tracker |
372 | | /// bails on everything that is not a copy or a bitcast. |
373 | | /// |
374 | | /// Note: This could have been implemented as a specialized version of |
375 | | /// the ValueTracker class but that would have complicated the code of |
376 | | /// the users of this class. |
377 | | bool UseAdvancedTracking; |
378 | | |
379 | | /// MachineRegisterInfo used to perform tracking. |
380 | | const MachineRegisterInfo &MRI; |
381 | | |
382 | | /// Optional TargetInstrInfo used to perform some complex |
383 | | /// tracking. |
384 | | const TargetInstrInfo *TII; |
385 | | |
386 | | /// \brief Dispatcher to the right underlying implementation of |
387 | | /// getNextSource. |
388 | | ValueTrackerResult getNextSourceImpl(); |
389 | | |
390 | | /// \brief Specialized version of getNextSource for Copy instructions. |
391 | | ValueTrackerResult getNextSourceFromCopy(); |
392 | | |
393 | | /// \brief Specialized version of getNextSource for Bitcast instructions. |
394 | | ValueTrackerResult getNextSourceFromBitcast(); |
395 | | |
396 | | /// \brief Specialized version of getNextSource for RegSequence |
397 | | /// instructions. |
398 | | ValueTrackerResult getNextSourceFromRegSequence(); |
399 | | |
400 | | /// \brief Specialized version of getNextSource for InsertSubreg |
401 | | /// instructions. |
402 | | ValueTrackerResult getNextSourceFromInsertSubreg(); |
403 | | |
404 | | /// \brief Specialized version of getNextSource for ExtractSubreg |
405 | | /// instructions. |
406 | | ValueTrackerResult getNextSourceFromExtractSubreg(); |
407 | | |
408 | | /// \brief Specialized version of getNextSource for SubregToReg |
409 | | /// instructions. |
410 | | ValueTrackerResult getNextSourceFromSubregToReg(); |
411 | | |
412 | | /// \brief Specialized version of getNextSource for PHI instructions. |
413 | | ValueTrackerResult getNextSourceFromPHI(); |
414 | | |
415 | | public: |
416 | | /// \brief Create a ValueTracker instance for the value defined by \p Reg. |
417 | | /// \p DefSubReg represents the sub register index the value tracker will |
418 | | /// track. It does not need to match the sub register index used in the |
419 | | /// definition of \p Reg. |
420 | | /// \p UseAdvancedTracking specifies whether or not the value tracker looks |
421 | | /// through complex instructions. By default (false), it handles only copy |
422 | | /// and bitcast instructions. |
423 | | /// If \p Reg is a physical register, a value tracker constructed with |
424 | | /// this constructor will not find any alternative source. |
425 | | /// Indeed, when \p Reg is a physical register that constructor does not |
426 | | /// know which definition of \p Reg it should track. |
427 | | /// Use the next constructor to track a physical register. |
428 | | ValueTracker(unsigned Reg, unsigned DefSubReg, |
429 | | const MachineRegisterInfo &MRI, |
430 | | bool UseAdvancedTracking = false, |
431 | | const TargetInstrInfo *TII = nullptr) |
432 | | : DefSubReg(DefSubReg), Reg(Reg), |
433 | 6.12M | UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { |
434 | 6.12M | if (!TargetRegisterInfo::isPhysicalRegister(Reg)6.12M ) { |
435 | 6.12M | Def = MRI.getVRegDef(Reg); |
436 | 6.12M | DefIdx = MRI.def_begin(Reg).getOperandNo(); |
437 | 6.12M | } |
438 | 6.12M | } |
439 | | |
440 | | /// \brief Create a ValueTracker instance for the value defined by |
441 | | /// the pair \p MI, \p DefIdx. |
442 | | /// Unlike the other constructor, the value tracker produced by this one |
443 | | /// may be able to find a new source when the definition is a physical |
444 | | /// register. |
445 | | /// This could be useful to rewrite target specific instructions into |
446 | | /// generic copy instructions. |
447 | | ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, |
448 | | const MachineRegisterInfo &MRI, |
449 | | bool UseAdvancedTracking = false, |
450 | | const TargetInstrInfo *TII = nullptr) |
451 | | : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), |
452 | 0 | UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { |
453 | 0 | assert(DefIdx < Def->getDesc().getNumDefs() && |
454 | 0 | Def->getOperand(DefIdx).isReg() && "Invalid definition"); |
455 | 0 | Reg = Def->getOperand(DefIdx).getReg(); |
456 | 0 | } |
457 | | |
458 | | /// \brief Following the use-def chain, get the next available source |
459 | | /// for the tracked value. |
460 | | /// \return A ValueTrackerResult containing a set of registers |
461 | | /// and sub registers with tracked values. A ValueTrackerResult with |
462 | | /// an empty set of registers means no source was found. |
463 | | ValueTrackerResult getNextSource(); |
464 | | |
465 | | /// \brief Get the last register where the initial value can be found. |
466 | | /// Initially this is the register of the definition. |
467 | | /// Then, after each successful call to getNextSource, this is the |
468 | | /// register of the last source. |
469 | 0 | unsigned getReg() const { return Reg; } |
470 | | }; |
471 | | |
472 | | } // end anonymous namespace |
473 | | |
474 | | char PeepholeOptimizer::ID = 0; |
475 | | |
476 | | char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; |
477 | | |
478 | 36.7k | INITIALIZE_PASS_BEGIN36.7k (PeepholeOptimizer, DEBUG_TYPE,
|
479 | 36.7k | "Peephole Optimizations", false, false) |
480 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
481 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
482 | 36.7k | INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, |
483 | | "Peephole Optimizations", false, false) |
484 | | |
485 | | /// If instruction is a copy-like instruction, i.e. it reads a single register |
486 | | /// and writes a single register and it does not modify the source, and if the |
487 | | /// source value is preserved as a sub-register of the result, then replace all |
488 | | /// reachable uses of the source with the subreg of the result. |
489 | | /// |
490 | | /// Do not generate an EXTRACT that is used only in a debug use, as this changes |
491 | | /// the code. Since this code does not currently share EXTRACTs, just ignore all |
492 | | /// debug uses. |
493 | | bool PeepholeOptimizer:: |
494 | | optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, |
495 | 34.5M | SmallPtrSetImpl<MachineInstr*> &LocalMIs) { |
496 | 34.5M | unsigned SrcReg, DstReg, SubIdx; |
497 | 34.5M | if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) |
498 | 34.4M | return false; |
499 | 151k | |
500 | 151k | if (151k TargetRegisterInfo::isPhysicalRegister(DstReg) || |
501 | 151k | TargetRegisterInfo::isPhysicalRegister(SrcReg)) |
502 | 5 | return false; |
503 | 151k | |
504 | 151k | if (151k MRI->hasOneNonDBGUse(SrcReg)151k ) |
505 | 151k | // No other uses. |
506 | 148k | return false; |
507 | 3.07k | |
508 | 3.07k | // Ensure DstReg can get a register class that actually supports |
509 | 3.07k | // sub-registers. Don't change the class until we commit. |
510 | 3.07k | const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); |
511 | 3.07k | DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); |
512 | 3.07k | if (!DstRC) |
513 | 0 | return false; |
514 | 3.07k | |
515 | 3.07k | // The ext instr may be operating on a sub-register of SrcReg as well. |
516 | 3.07k | // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit |
517 | 3.07k | // register. |
518 | 3.07k | // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of |
519 | 3.07k | // SrcReg:SubIdx should be replaced. |
520 | 3.07k | bool UseSrcSubIdx = |
521 | 3.07k | TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; |
522 | 3.07k | |
523 | 3.07k | // The source has other uses. See if we can replace the other uses with use of |
524 | 3.07k | // the result of the extension. |
525 | 3.07k | SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; |
526 | 3.07k | for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) |
527 | 5.40k | ReachedBBs.insert(UI.getParent()); |
528 | 3.07k | |
529 | 3.07k | // Uses that are in the same BB of uses of the result of the instruction. |
530 | 3.07k | SmallVector<MachineOperand*, 8> Uses; |
531 | 3.07k | |
532 | 3.07k | // Uses that the result of the instruction can reach. |
533 | 3.07k | SmallVector<MachineOperand*, 8> ExtendedUses; |
534 | 3.07k | |
535 | 3.07k | bool ExtendLife = true; |
536 | 6.69k | for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { |
537 | 6.69k | MachineInstr *UseMI = UseMO.getParent(); |
538 | 6.69k | if (UseMI == MI) |
539 | 2.29k | continue; |
540 | 4.40k | |
541 | 4.40k | if (4.40k UseMI->isPHI()4.40k ) { |
542 | 63 | ExtendLife = false; |
543 | 63 | continue; |
544 | 63 | } |
545 | 4.33k | |
546 | 4.33k | // Only accept uses of SrcReg:SubIdx. |
547 | 4.33k | if (4.33k UseSrcSubIdx && 4.33k UseMO.getSubReg() != SubIdx3.68k ) |
548 | 2.92k | continue; |
549 | 1.41k | |
550 | 1.41k | // It's an error to translate this: |
551 | 1.41k | // |
552 | 1.41k | // %reg1025 = <sext> %reg1024 |
553 | 1.41k | // ... |
554 | 1.41k | // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 |
555 | 1.41k | // |
556 | 1.41k | // into this: |
557 | 1.41k | // |
558 | 1.41k | // %reg1025 = <sext> %reg1024 |
559 | 1.41k | // ... |
560 | 1.41k | // %reg1027 = COPY %reg1025:4 |
561 | 1.41k | // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 |
562 | 1.41k | // |
563 | 1.41k | // The problem here is that SUBREG_TO_REG is there to assert that an |
564 | 1.41k | // implicit zext occurs. It doesn't insert a zext instruction. If we allow |
565 | 1.41k | // the COPY here, it will give us the value after the <sext>, not the |
566 | 1.41k | // original value of %reg1024 before <sext>. |
567 | 1.41k | if (1.41k UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG1.41k ) |
568 | 3 | continue; |
569 | 1.41k | |
570 | 1.41k | MachineBasicBlock *UseMBB = UseMI->getParent(); |
571 | 1.41k | if (UseMBB == MBB1.41k ) { |
572 | 471 | // Local uses that come after the extension. |
573 | 471 | if (!LocalMIs.count(UseMI)) |
574 | 338 | Uses.push_back(&UseMO); |
575 | 1.41k | } else if (942 ReachedBBs.count(UseMBB)942 ) { |
576 | 19 | // Non-local uses where the result of the extension is used. Always |
577 | 19 | // replace these unless it's a PHI. |
578 | 19 | Uses.push_back(&UseMO); |
579 | 942 | } else if (923 Aggressive && 923 DT->dominates(MBB, UseMBB)0 ) { |
580 | 0 | // We may want to extend the live range of the extension result in order |
581 | 0 | // to replace these uses. |
582 | 0 | ExtendedUses.push_back(&UseMO); |
583 | 923 | } else { |
584 | 923 | // Both will be live out of the def MBB anyway. Don't extend live range of |
585 | 923 | // the extension result. |
586 | 923 | ExtendLife = false; |
587 | 923 | break; |
588 | 923 | } |
589 | 3.07k | } |
590 | 3.07k | |
591 | 3.07k | if (ExtendLife && 3.07k !ExtendedUses.empty()2.11k ) |
592 | 3.07k | // Extend the liveness of the extension result. |
593 | 0 | Uses.append(ExtendedUses.begin(), ExtendedUses.end()); |
594 | 3.07k | |
595 | 3.07k | // Now replace all uses. |
596 | 3.07k | bool Changed = false; |
597 | 3.07k | if (!Uses.empty()3.07k ) { |
598 | 255 | SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; |
599 | 255 | |
600 | 255 | // Look for PHI uses of the extended result, we don't want to extend the |
601 | 255 | // liveness of a PHI input. It breaks all kinds of assumptions down |
602 | 255 | // stream. A PHI use is expected to be the kill of its source values. |
603 | 255 | for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) |
604 | 307 | if (307 UI.isPHI()307 ) |
605 | 2 | PHIBBs.insert(UI.getParent()); |
606 | 255 | |
607 | 255 | const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); |
608 | 612 | for (unsigned i = 0, e = Uses.size(); i != e612 ; ++i357 ) { |
609 | 357 | MachineOperand *UseMO = Uses[i]; |
610 | 357 | MachineInstr *UseMI = UseMO->getParent(); |
611 | 357 | MachineBasicBlock *UseMBB = UseMI->getParent(); |
612 | 357 | if (PHIBBs.count(UseMBB)) |
613 | 0 | continue; |
614 | 357 | |
615 | 357 | // About to add uses of DstReg, clear DstReg's kill flags. |
616 | 357 | if (357 !Changed357 ) { |
617 | 255 | MRI->clearKillFlags(DstReg); |
618 | 255 | MRI->constrainRegClass(DstReg, DstRC); |
619 | 255 | } |
620 | 357 | |
621 | 357 | unsigned NewVR = MRI->createVirtualRegister(RC); |
622 | 357 | MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), |
623 | 357 | TII->get(TargetOpcode::COPY), NewVR) |
624 | 357 | .addReg(DstReg, 0, SubIdx); |
625 | 357 | // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. |
626 | 357 | if (UseSrcSubIdx357 ) { |
627 | 42 | Copy->getOperand(0).setSubReg(SubIdx); |
628 | 42 | Copy->getOperand(0).setIsUndef(); |
629 | 42 | } |
630 | 357 | UseMO->setReg(NewVR); |
631 | 357 | ++NumReuse; |
632 | 357 | Changed = true; |
633 | 357 | } |
634 | 255 | } |
635 | 34.5M | |
636 | 34.5M | return Changed; |
637 | 34.5M | } |
638 | | |
639 | | /// If the instruction is a compare and the previous instruction it's comparing |
640 | | /// against already sets (or could be modified to set) the same flag as the |
641 | | /// compare, then we can remove the comparison and use the flag from the |
642 | | /// previous instruction. |
643 | | bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, |
644 | 1.41M | MachineBasicBlock *MBB) { |
645 | 1.41M | // If this instruction is a comparison against zero and isn't comparing a |
646 | 1.41M | // physical register, we can try to optimize it. |
647 | 1.41M | unsigned SrcReg, SrcReg2; |
648 | 1.41M | int CmpMask, CmpValue; |
649 | 1.41M | if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) || |
650 | 1.40M | TargetRegisterInfo::isPhysicalRegister(SrcReg) || |
651 | 1.40M | (SrcReg2 != 0 && 1.40M TargetRegisterInfo::isPhysicalRegister(SrcReg2)684k )) |
652 | 11.5k | return false; |
653 | 1.40M | |
654 | 1.40M | // Attempt to optimize the comparison instruction. |
655 | 1.40M | if (1.40M TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)1.40M ) { |
656 | 240k | ++NumCmps; |
657 | 240k | return true; |
658 | 240k | } |
659 | 1.16M | |
660 | 1.16M | return false; |
661 | 1.16M | } |
662 | | |
663 | | /// Optimize a select instruction. |
664 | | bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, |
665 | 2.99k | SmallPtrSetImpl<MachineInstr *> &LocalMIs) { |
666 | 2.99k | unsigned TrueOp = 0; |
667 | 2.99k | unsigned FalseOp = 0; |
668 | 2.99k | bool Optimizable = false; |
669 | 2.99k | SmallVector<MachineOperand, 4> Cond; |
670 | 2.99k | if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable)) |
671 | 696 | return false; |
672 | 2.29k | if (2.29k !Optimizable2.29k ) |
673 | 0 | return false; |
674 | 2.29k | if (2.29k !TII->optimizeSelect(*MI, LocalMIs)2.29k ) |
675 | 1.06k | return false; |
676 | 1.23k | MI->eraseFromParent(); |
677 | 1.23k | ++NumSelects; |
678 | 1.23k | return true; |
679 | 1.23k | } |
680 | | |
681 | | /// \brief Check if a simpler conditional branch can be |
682 | | /// generated |
683 | 1.89M | bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { |
684 | 1.89M | return TII->optimizeCondBranch(*MI); |
685 | 1.89M | } |
686 | | |
687 | | /// \brief Try to find the next source that share the same register file |
688 | | /// for the value defined by \p Reg and \p SubReg. |
689 | | /// When true is returned, the \p RewriteMap can be used by the client to |
690 | | /// retrieve all Def -> Use along the way up to the next source. Any found |
691 | | /// Use that is not itself a key for another entry, is the next source to |
692 | | /// use. During the search for the next source, multiple sources can be found |
693 | | /// given multiple incoming sources of a PHI instruction. In this case, we |
694 | | /// look in each PHI source for the next source; all found next sources must |
695 | | /// share the same register file as \p Reg and \p SubReg. The client should |
696 | | /// then be capable to rewrite all intermediate PHIs to get the next source. |
697 | | /// \return False if no alternative sources are available. True otherwise. |
698 | | bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, |
699 | 6.12M | RewriteMapTy &RewriteMap) { |
700 | 6.12M | // Do not try to find a new source for a physical register. |
701 | 6.12M | // So far we do not have any motivating example for doing that. |
702 | 6.12M | // Thus, instead of maintaining untested code, we will revisit that if |
703 | 6.12M | // that changes at some point. |
704 | 6.12M | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
705 | 0 | return false; |
706 | 6.12M | const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); |
707 | 6.12M | |
708 | 6.12M | SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook; |
709 | 6.12M | TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); |
710 | 6.12M | SrcToLook.push_back(CurSrcPair); |
711 | 6.12M | |
712 | 6.12M | unsigned PHICount = 0; |
713 | 8.79M | while (!SrcToLook.empty() && 8.79M PHICount < RewritePHILimit6.12M ) { |
714 | 6.12M | TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); |
715 | 6.12M | // As explained above, do not handle physical registers |
716 | 6.12M | if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) |
717 | 0 | return false; |
718 | 6.12M | |
719 | 6.12M | CurSrcPair = Pair; |
720 | 6.12M | ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, |
721 | 6.12M | !DisableAdvCopyOpt, TII); |
722 | 6.12M | ValueTrackerResult Res; |
723 | 6.12M | bool ShouldRewrite = false; |
724 | 6.12M | |
725 | 6.50M | do { |
726 | 6.50M | // Follow the chain of copies until we reach the top of the use-def chain |
727 | 6.50M | // or find a more suitable source. |
728 | 6.50M | Res = ValTracker.getNextSource(); |
729 | 6.50M | if (!Res.isValid()) |
730 | 280k | break; |
731 | 6.22M | |
732 | 6.22M | // Insert the Def -> Use entry for the recently found source. |
733 | 6.22M | ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); |
734 | 6.22M | if (CurSrcRes.isValid()6.22M ) { |
735 | 5 | assert(CurSrcRes == Res && "ValueTrackerResult found must match"); |
736 | 5 | // An existent entry with multiple sources is a PHI cycle we must avoid. |
737 | 5 | // Otherwise it's an entry with a valid next source we already found. |
738 | 5 | if (CurSrcRes.getNumSources() > 15 ) { |
739 | 0 | DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); |
740 | 0 | return false; |
741 | 0 | } |
742 | 5 | break; |
743 | 5 | } |
744 | 6.22M | RewriteMap.insert(std::make_pair(CurSrcPair, Res)); |
745 | 6.22M | |
746 | 6.22M | // ValueTrackerResult usually have one source unless it's the result from |
747 | 6.22M | // a PHI instruction. Add the found PHI edges to be looked up further. |
748 | 6.22M | unsigned NumSrcs = Res.getNumSources(); |
749 | 6.22M | if (NumSrcs > 16.22M ) { |
750 | 354 | PHICount++; |
751 | 1.32k | for (unsigned i = 0; i < NumSrcs1.32k ; ++i967 ) |
752 | 967 | SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( |
753 | 967 | Res.getSrcReg(i), Res.getSrcSubReg(i))); |
754 | 354 | break; |
755 | 354 | } |
756 | 6.22M | |
757 | 6.22M | CurSrcPair.Reg = Res.getSrcReg(0); |
758 | 6.22M | CurSrcPair.SubReg = Res.getSrcSubReg(0); |
759 | 6.22M | // Do not extend the live-ranges of physical registers as they add |
760 | 6.22M | // constraints to the register allocator. Moreover, if we want to extend |
761 | 6.22M | // the live-range of a physical register, unlike SSA virtual register, |
762 | 6.22M | // we will have to check that they aren't redefine before the related use. |
763 | 6.22M | if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) |
764 | 3.17M | return false; |
765 | 3.04M | |
766 | 3.04M | const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); |
767 | 3.04M | ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, |
768 | 3.04M | CurSrcPair.SubReg); |
769 | 6.12M | } while (!ShouldRewrite); |
770 | 6.12M | |
771 | 6.12M | // Continue looking for new sources... |
772 | 2.95M | if (2.95M Res.isValid()2.95M ) |
773 | 2.67M | continue; |
774 | 280k | |
775 | 280k | // Do not continue searching for a new source if the there's at least |
776 | 280k | // one use-def which cannot be rewritten. |
777 | 280k | if (280k !ShouldRewrite280k ) |
778 | 280k | return false; |
779 | 6.12M | } |
780 | 6.12M | |
781 | 2.67M | if (2.67M PHICount >= RewritePHILimit2.67M ) { |
782 | 0 | DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); |
783 | 0 | return false; |
784 | 0 | } |
785 | 2.67M | |
786 | 2.67M | // If we did not find a more suitable source, there is nothing to optimize. |
787 | 2.67M | return CurSrcPair.Reg != Reg; |
788 | 2.67M | } |
789 | | |
790 | | /// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are |
791 | | /// guaranteed to have the same register class. This is necessary whenever we |
792 | | /// successfully traverse a PHI instruction and find suitable sources coming |
793 | | /// from its edges. By inserting a new PHI, we provide a rewritten PHI def |
794 | | /// suitable to be used in a new COPY instruction. |
795 | | static MachineInstr * |
796 | | insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, |
797 | | const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs, |
798 | 5 | MachineInstr *OrigPHI) { |
799 | 5 | assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); |
800 | 5 | |
801 | 5 | const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); |
802 | 5 | unsigned NewVR = MRI->createVirtualRegister(NewRC); |
803 | 5 | MachineBasicBlock *MBB = OrigPHI->getParent(); |
804 | 5 | MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), |
805 | 5 | TII->get(TargetOpcode::PHI), NewVR); |
806 | 5 | |
807 | 5 | unsigned MBBOpIdx = 2; |
808 | 10 | for (auto RegPair : SrcRegs) { |
809 | 10 | MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); |
810 | 10 | MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); |
811 | 10 | // Since we're extended the lifetime of RegPair.Reg, clear the |
812 | 10 | // kill flags to account for that and make RegPair.Reg reaches |
813 | 10 | // the new PHI. |
814 | 10 | MRI->clearKillFlags(RegPair.Reg); |
815 | 10 | MBBOpIdx += 2; |
816 | 10 | } |
817 | 5 | |
818 | 5 | return MIB; |
819 | 5 | } |
820 | | |
821 | | namespace { |
822 | | |
823 | | /// \brief Helper class to rewrite the arguments of a copy-like instruction. |
824 | | class CopyRewriter { |
825 | | protected: |
826 | | /// The copy-like instruction. |
827 | | MachineInstr &CopyLike; |
828 | | |
829 | | /// The index of the source being rewritten. |
830 | | unsigned CurrentSrcIdx = 0; |
831 | | |
832 | | public: |
833 | 6.03M | CopyRewriter(MachineInstr &MI) : CopyLike(MI) {} |
834 | 6.03M | virtual ~CopyRewriter() = default; |
835 | | |
836 | | /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and |
837 | | /// the related value that it affects (TrackReg, TrackSubReg). |
838 | | /// A source is considered rewritable if its register class and the |
839 | | /// register class of the related TrackReg may not be register |
840 | | /// coalescer friendly. In other words, given a copy-like instruction |
841 | | /// not all the arguments may be returned at rewritable source, since |
842 | | /// some arguments are none to be register coalescer friendly. |
843 | | /// |
844 | | /// Each call of this method moves the current source to the next |
845 | | /// rewritable source. |
846 | | /// For instance, let CopyLike be the instruction to rewrite. |
847 | | /// CopyLike has one definition and one source: |
848 | | /// dst.dstSubIdx = CopyLike src.srcSubIdx. |
849 | | /// |
850 | | /// The first call will give the first rewritable source, i.e., |
851 | | /// the only source this instruction has: |
852 | | /// (SrcReg, SrcSubReg) = (src, srcSubIdx). |
853 | | /// This source defines the whole definition, i.e., |
854 | | /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). |
855 | | /// |
856 | | /// The second and subsequent calls will return false, as there is only one |
857 | | /// rewritable source. |
858 | | /// |
859 | | /// \return True if a rewritable source has been found, false otherwise. |
860 | | /// The output arguments are valid if and only if true is returned. |
861 | | virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, |
862 | | unsigned &TrackReg, |
863 | 11.5M | unsigned &TrackSubReg) { |
864 | 11.5M | // If CurrentSrcIdx == 1, this means this function has already been called |
865 | 11.5M | // once. CopyLike has one definition and one argument, thus, there is |
866 | 11.5M | // nothing else to rewrite. |
867 | 11.5M | if (!CopyLike.isCopy() || 11.5M CurrentSrcIdx == 111.5M ) |
868 | 5.78M | return false; |
869 | 5.78M | // This is the first call to getNextRewritableSource. |
870 | 5.78M | // Move the CurrentSrcIdx to remember that we made that call. |
871 | 5.78M | CurrentSrcIdx = 1; |
872 | 5.78M | // The rewritable source is the argument. |
873 | 5.78M | const MachineOperand &MOSrc = CopyLike.getOperand(1); |
874 | 5.78M | SrcReg = MOSrc.getReg(); |
875 | 5.78M | SrcSubReg = MOSrc.getSubReg(); |
876 | 5.78M | // What we track are the alternative sources of the definition. |
877 | 5.78M | const MachineOperand &MODef = CopyLike.getOperand(0); |
878 | 5.78M | TrackReg = MODef.getReg(); |
879 | 5.78M | TrackSubReg = MODef.getSubReg(); |
880 | 5.78M | return true; |
881 | 5.78M | } |
882 | | |
883 | | /// \brief Rewrite the current source with \p NewReg and \p NewSubReg |
884 | | /// if possible. |
885 | | /// \return True if the rewriting was possible, false otherwise. |
886 | 17.1k | virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { |
887 | 17.1k | if (!CopyLike.isCopy() || 17.1k CurrentSrcIdx != 117.1k ) |
888 | 0 | return false; |
889 | 17.1k | MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); |
890 | 17.1k | MOSrc.setReg(NewReg); |
891 | 17.1k | MOSrc.setSubReg(NewSubReg); |
892 | 17.1k | return true; |
893 | 17.1k | } |
894 | | |
895 | | /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find |
896 | | /// the new source to use for rewrite. If \p HandleMultipleSources is true and |
897 | | /// multiple sources for a given \p Def are found along the way, we found a |
898 | | /// PHI instructions that needs to be rewritten. |
899 | | /// TODO: HandleMultipleSources should be removed once we test PHI handling |
900 | | /// with coalescable copies. |
901 | | TargetInstrInfo::RegSubRegPair |
902 | | getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, |
903 | | TargetInstrInfo::RegSubRegPair Def, |
904 | | PeepholeOptimizer::RewriteMapTy &RewriteMap, |
905 | 2.67M | bool HandleMultipleSources = true) { |
906 | 2.67M | TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); |
907 | 5.36M | do { |
908 | 5.36M | ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); |
909 | 5.36M | // If there are no entries on the map, LookupSrc is the new source. |
910 | 5.36M | if (!Res.isValid()) |
911 | 2.67M | return LookupSrc; |
912 | 2.69M | |
913 | 2.69M | // There's only one source for this definition, keep searching... |
914 | 2.69M | unsigned NumSrcs = Res.getNumSources(); |
915 | 2.69M | if (NumSrcs == 12.69M ) { |
916 | 2.69M | LookupSrc.Reg = Res.getSrcReg(0); |
917 | 2.69M | LookupSrc.SubReg = Res.getSrcSubReg(0); |
918 | 2.69M | continue; |
919 | 2.69M | } |
920 | 5 | |
921 | 5 | // TODO: Remove once multiple srcs w/ coalescable copies are supported. |
922 | 5 | if (5 !HandleMultipleSources5 ) |
923 | 0 | break; |
924 | 5 | |
925 | 5 | // Multiple sources, recurse into each source to find a new source |
926 | 5 | // for it. Then, rewrite the PHI accordingly to its new edges. |
927 | 5 | SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs; |
928 | 15 | for (unsigned i = 0; i < NumSrcs15 ; ++i10 ) { |
929 | 10 | TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), |
930 | 10 | Res.getSrcSubReg(i)); |
931 | 10 | NewPHISrcs.push_back( |
932 | 10 | getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); |
933 | 10 | } |
934 | 5 | |
935 | 5 | // Build the new PHI node and return its def register as the new source. |
936 | 5 | MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst()); |
937 | 5 | MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); |
938 | 5 | DEBUG(dbgs() << "-- getNewSource\n"); |
939 | 5 | DEBUG(dbgs() << " Replacing: " << *OrigPHI); |
940 | 5 | DEBUG(dbgs() << " With: " << *NewPHI); |
941 | 5.36M | const MachineOperand &MODef = NewPHI->getOperand(0); |
942 | 5.36M | return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); |
943 | 5.36M | |
944 | 2.69M | } while (true); |
945 | 2.67M | |
946 | 0 | return TargetInstrInfo::RegSubRegPair(0, 0); |
947 | 2.67M | } |
948 | | |
949 | | /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap |
950 | | /// and create a new COPY instruction. More info about RewriteMap in |
951 | | /// PeepholeOptimizer::findNextSource. Right now this is only used to handle |
952 | | /// Uncoalescable copies, since they are copy like instructions that aren't |
953 | | /// recognized by the register allocator. |
954 | | virtual MachineInstr * |
955 | | RewriteSource(TargetInstrInfo::RegSubRegPair Def, |
956 | 0 | PeepholeOptimizer::RewriteMapTy &RewriteMap) { |
957 | 0 | return nullptr; |
958 | 0 | } |
959 | | }; |
960 | | |
961 | | /// \brief Helper class to rewrite uncoalescable copy like instructions |
962 | | /// into new COPY (coalescable friendly) instructions. |
963 | | class UncoalescableRewriter : public CopyRewriter { |
964 | | protected: |
965 | | const TargetInstrInfo &TII; |
966 | | MachineRegisterInfo &MRI; |
967 | | |
968 | | /// The number of defs in the bitcast |
969 | | unsigned NumDefs; |
970 | | |
971 | | public: |
972 | | UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, |
973 | | MachineRegisterInfo &MRI) |
974 | 9.28k | : CopyRewriter(MI), TII(TII), MRI(MRI) { |
975 | 9.28k | NumDefs = MI.getDesc().getNumDefs(); |
976 | 9.28k | } |
977 | | |
978 | | /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) |
979 | | /// All such sources need to be considered rewritable in order to |
980 | | /// rewrite a uncoalescable copy-like instruction. This method return |
981 | | /// each definition that must be checked if rewritable. |
982 | | bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, |
983 | | unsigned &TrackReg, |
984 | 9.76k | unsigned &TrackSubReg) override { |
985 | 9.76k | // Find the next non-dead definition and continue from there. |
986 | 9.76k | if (CurrentSrcIdx == NumDefs) |
987 | 251 | return false; |
988 | 9.51k | |
989 | 9.51k | while (9.51k CopyLike.getOperand(CurrentSrcIdx).isDead()9.51k ) { |
990 | 0 | ++CurrentSrcIdx; |
991 | 0 | if (CurrentSrcIdx == NumDefs) |
992 | 0 | return false; |
993 | 0 | } |
994 | 9.51k | |
995 | 9.51k | // What we track are the alternative sources of the definition. |
996 | 9.51k | const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); |
997 | 9.51k | TrackReg = MODef.getReg(); |
998 | 9.51k | TrackSubReg = MODef.getSubReg(); |
999 | 9.51k | |
1000 | 9.51k | CurrentSrcIdx++; |
1001 | 9.51k | return true; |
1002 | 9.76k | } |
1003 | | |
1004 | | /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap |
1005 | | /// and create a new COPY instruction. More info about RewriteMap in |
1006 | | /// PeepholeOptimizer::findNextSource. Right now this is only used to handle |
1007 | | /// Uncoalescable copies, since they are copy like instructions that aren't |
1008 | | /// recognized by the register allocator. |
1009 | | MachineInstr * |
1010 | | RewriteSource(TargetInstrInfo::RegSubRegPair Def, |
1011 | 476 | PeepholeOptimizer::RewriteMapTy &RewriteMap) override { |
1012 | 476 | assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && |
1013 | 476 | "We do not rewrite physical registers"); |
1014 | 476 | |
1015 | 476 | // Find the new source to use in the COPY rewrite. |
1016 | 476 | TargetInstrInfo::RegSubRegPair NewSrc = |
1017 | 476 | getNewSource(&MRI, &TII, Def, RewriteMap); |
1018 | 476 | |
1019 | 476 | // Insert the COPY. |
1020 | 476 | const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); |
1021 | 476 | unsigned NewVR = MRI.createVirtualRegister(DefRC); |
1022 | 476 | |
1023 | 476 | MachineInstr *NewCopy = |
1024 | 476 | BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), |
1025 | 476 | TII.get(TargetOpcode::COPY), NewVR) |
1026 | 476 | .addReg(NewSrc.Reg, 0, NewSrc.SubReg); |
1027 | 476 | |
1028 | 476 | NewCopy->getOperand(0).setSubReg(Def.SubReg); |
1029 | 476 | if (Def.SubReg) |
1030 | 0 | NewCopy->getOperand(0).setIsUndef(); |
1031 | 476 | |
1032 | 476 | DEBUG(dbgs() << "-- RewriteSource\n"); |
1033 | 476 | DEBUG(dbgs() << " Replacing: " << CopyLike); |
1034 | 476 | DEBUG(dbgs() << " With: " << *NewCopy); |
1035 | 476 | MRI.replaceRegWith(Def.Reg, NewVR); |
1036 | 476 | MRI.clearKillFlags(NewVR); |
1037 | 476 | |
1038 | 476 | // We extended the lifetime of NewSrc.Reg, clear the kill flags to |
1039 | 476 | // account for that. |
1040 | 476 | MRI.clearKillFlags(NewSrc.Reg); |
1041 | 476 | |
1042 | 476 | return NewCopy; |
1043 | 476 | } |
1044 | | }; |
1045 | | |
1046 | | /// \brief Specialized rewriter for INSERT_SUBREG instruction. |
1047 | | class InsertSubregRewriter : public CopyRewriter { |
1048 | | public: |
1049 | 189k | InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { |
1050 | 189k | assert(MI.isInsertSubreg() && "Invalid instruction"); |
1051 | 189k | } |
1052 | | |
1053 | | /// \brief See CopyRewriter::getNextRewritableSource. |
1054 | | /// Here CopyLike has the following form: |
1055 | | /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. |
1056 | | /// Src1 has the same register class has dst, hence, there is |
1057 | | /// nothing to rewrite. |
1058 | | /// Src2.src2SubIdx, may not be register coalescer friendly. |
1059 | | /// Therefore, the first call to this method returns: |
1060 | | /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). |
1061 | | /// (TrackReg, TrackSubReg) = (dst, subIdx). |
1062 | | /// |
1063 | | /// Subsequence calls will return false. |
1064 | | bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, |
1065 | | unsigned &TrackReg, |
1066 | 378k | unsigned &TrackSubReg) override { |
1067 | 378k | // If we already get the only source we can rewrite, return false. |
1068 | 378k | if (CurrentSrcIdx == 2) |
1069 | 189k | return false; |
1070 | 189k | // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. |
1071 | 189k | CurrentSrcIdx = 2; |
1072 | 189k | const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); |
1073 | 189k | SrcReg = MOInsertedReg.getReg(); |
1074 | 189k | SrcSubReg = MOInsertedReg.getSubReg(); |
1075 | 189k | const MachineOperand &MODef = CopyLike.getOperand(0); |
1076 | 189k | |
1077 | 189k | // We want to track something that is compatible with the |
1078 | 189k | // partial definition. |
1079 | 189k | TrackReg = MODef.getReg(); |
1080 | 189k | if (MODef.getSubReg()) |
1081 | 189k | // Bail if we have to compose sub-register indices. |
1082 | 0 | return false; |
1083 | 189k | TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); |
1084 | 189k | return true; |
1085 | 189k | } |
1086 | | |
1087 | 5 | bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { |
1088 | 5 | if (CurrentSrcIdx != 2) |
1089 | 0 | return false; |
1090 | 5 | // We are rewriting the inserted reg. |
1091 | 5 | MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); |
1092 | 5 | MO.setReg(NewReg); |
1093 | 5 | MO.setSubReg(NewSubReg); |
1094 | 5 | return true; |
1095 | 5 | } |
1096 | | }; |
1097 | | |
1098 | | /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. |
1099 | | class ExtractSubregRewriter : public CopyRewriter { |
1100 | | const TargetInstrInfo &TII; |
1101 | | |
1102 | | public: |
1103 | | ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) |
1104 | 0 | : CopyRewriter(MI), TII(TII) { |
1105 | 0 | assert(MI.isExtractSubreg() && "Invalid instruction"); |
1106 | 0 | } |
1107 | | |
1108 | | /// \brief See CopyRewriter::getNextRewritableSource. |
1109 | | /// Here CopyLike has the following form: |
1110 | | /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. |
1111 | | /// There is only one rewritable source: Src.subIdx, |
1112 | | /// which defines dst.dstSubIdx. |
1113 | | bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, |
1114 | | unsigned &TrackReg, |
1115 | 0 | unsigned &TrackSubReg) override { |
1116 | 0 | // If we already get the only source we can rewrite, return false. |
1117 | 0 | if (CurrentSrcIdx == 1) |
1118 | 0 | return false; |
1119 | 0 | // We are looking at v1 = EXTRACT_SUBREG v0, sub0. |
1120 | 0 | CurrentSrcIdx = 1; |
1121 | 0 | const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); |
1122 | 0 | SrcReg = MOExtractedReg.getReg(); |
1123 | 0 | // If we have to compose sub-register indices, bail out. |
1124 | 0 | if (MOExtractedReg.getSubReg()) |
1125 | 0 | return false; |
1126 | 0 |
|
1127 | 0 | SrcSubReg = CopyLike.getOperand(2).getImm(); |
1128 | 0 |
|
1129 | 0 | // We want to track something that is compatible with the definition. |
1130 | 0 | const MachineOperand &MODef = CopyLike.getOperand(0); |
1131 | 0 | TrackReg = MODef.getReg(); |
1132 | 0 | TrackSubReg = MODef.getSubReg(); |
1133 | 0 | return true; |
1134 | 0 | } |
1135 | | |
1136 | 0 | bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { |
1137 | 0 | // The only source we can rewrite is the input register. |
1138 | 0 | if (CurrentSrcIdx != 1) |
1139 | 0 | return false; |
1140 | 0 |
|
1141 | 0 | CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); |
1142 | 0 |
|
1143 | 0 | // If we find a source that does not require to extract something, |
1144 | 0 | // rewrite the operation with a copy. |
1145 | 0 | if (!NewSubReg0 ) { |
1146 | 0 | // Move the current index to an invalid position. |
1147 | 0 | // We do not want another call to this method to be able |
1148 | 0 | // to do any change. |
1149 | 0 | CurrentSrcIdx = -1; |
1150 | 0 | // Rewrite the operation as a COPY. |
1151 | 0 | // Get rid of the sub-register index. |
1152 | 0 | CopyLike.RemoveOperand(2); |
1153 | 0 | // Morph the operation into a COPY. |
1154 | 0 | CopyLike.setDesc(TII.get(TargetOpcode::COPY)); |
1155 | 0 | return true; |
1156 | 0 | } |
1157 | 0 | CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); |
1158 | 0 | return true; |
1159 | 0 | } |
1160 | | }; |
1161 | | |
1162 | | /// \brief Specialized rewriter for REG_SEQUENCE instruction. |
1163 | | class RegSequenceRewriter : public CopyRewriter { |
1164 | | public: |
1165 | 51.5k | RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { |
1166 | 51.5k | assert(MI.isRegSequence() && "Invalid instruction"); |
1167 | 51.5k | } |
1168 | | |
1169 | | /// \brief See CopyRewriter::getNextRewritableSource. |
1170 | | /// Here CopyLike has the following form: |
1171 | | /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. |
1172 | | /// Each call will return a different source, walking all the available |
1173 | | /// source. |
1174 | | /// |
1175 | | /// The first call returns: |
1176 | | /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). |
1177 | | /// (TrackReg, TrackSubReg) = (dst, subIdx1). |
1178 | | /// |
1179 | | /// The second call returns: |
1180 | | /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). |
1181 | | /// (TrackReg, TrackSubReg) = (dst, subIdx2). |
1182 | | /// |
1183 | | /// And so on, until all the sources have been traversed, then |
1184 | | /// it returns false. |
1185 | | bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, |
1186 | | unsigned &TrackReg, |
1187 | 197k | unsigned &TrackSubReg) override { |
1188 | 197k | // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. |
1189 | 197k | |
1190 | 197k | // If this is the first call, move to the first argument. |
1191 | 197k | if (CurrentSrcIdx == 0197k ) { |
1192 | 51.5k | CurrentSrcIdx = 1; |
1193 | 197k | } else { |
1194 | 145k | // Otherwise, move to the next argument and check that it is valid. |
1195 | 145k | CurrentSrcIdx += 2; |
1196 | 145k | if (CurrentSrcIdx >= CopyLike.getNumOperands()) |
1197 | 51.5k | return false; |
1198 | 145k | } |
1199 | 145k | const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); |
1200 | 145k | SrcReg = MOInsertedReg.getReg(); |
1201 | 145k | // If we have to compose sub-register indices, bail out. |
1202 | 145k | if ((SrcSubReg = MOInsertedReg.getSubReg())) |
1203 | 59 | return false; |
1204 | 145k | |
1205 | 145k | // We want to track something that is compatible with the related |
1206 | 145k | // partial definition. |
1207 | 145k | TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); |
1208 | 145k | |
1209 | 145k | const MachineOperand &MODef = CopyLike.getOperand(0); |
1210 | 145k | TrackReg = MODef.getReg(); |
1211 | 145k | // If we have to compose sub-registers, bail. |
1212 | 145k | return MODef.getSubReg() == 0; |
1213 | 145k | } |
1214 | | |
1215 | 7.42k | bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { |
1216 | 7.42k | // We cannot rewrite out of bound operands. |
1217 | 7.42k | // Moreover, rewritable sources are at odd positions. |
1218 | 7.42k | if ((CurrentSrcIdx & 1) != 1 || 7.42k CurrentSrcIdx > CopyLike.getNumOperands()7.42k ) |
1219 | 0 | return false; |
1220 | 7.42k | |
1221 | 7.42k | MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); |
1222 | 7.42k | MO.setReg(NewReg); |
1223 | 7.42k | MO.setSubReg(NewSubReg); |
1224 | 7.42k | return true; |
1225 | 7.42k | } |
1226 | | }; |
1227 | | |
1228 | | } // end anonymous namespace |
1229 | | |
1230 | | /// \brief Get the appropriated CopyRewriter for \p MI. |
1231 | | /// \return A pointer to a dynamically allocated CopyRewriter or nullptr |
1232 | | /// if no rewriter works for \p MI. |
1233 | | static CopyRewriter *getCopyRewriter(MachineInstr &MI, |
1234 | | const TargetInstrInfo &TII, |
1235 | 6.03M | MachineRegisterInfo &MRI) { |
1236 | 6.03M | // Handle uncoalescable copy-like instructions. |
1237 | 6.03M | if (MI.isBitcast() || 6.03M (MI.isRegSequenceLike() || 6.02M MI.isInsertSubregLike()6.02M || |
1238 | 6.02M | MI.isExtractSubregLike())) |
1239 | 9.28k | return new UncoalescableRewriter(MI, TII, MRI); |
1240 | 6.02M | |
1241 | 6.02M | switch (MI.getOpcode()) { |
1242 | 0 | default: |
1243 | 0 | return nullptr; |
1244 | 5.78M | case TargetOpcode::COPY: |
1245 | 5.78M | return new CopyRewriter(MI); |
1246 | 189k | case TargetOpcode::INSERT_SUBREG: |
1247 | 189k | return new InsertSubregRewriter(MI); |
1248 | 0 | case TargetOpcode::EXTRACT_SUBREG: |
1249 | 0 | return new ExtractSubregRewriter(MI, TII); |
1250 | 51.5k | case TargetOpcode::REG_SEQUENCE: |
1251 | 51.5k | return new RegSequenceRewriter(MI); |
1252 | 0 | } |
1253 | 0 | llvm_unreachable0 (nullptr); |
1254 | 0 | } |
1255 | | |
1256 | | /// \brief Optimize generic copy instructions to avoid cross |
1257 | | /// register bank copy. The optimization looks through a chain of |
1258 | | /// copies and tries to find a source that has a compatible register |
1259 | | /// class. |
1260 | | /// Two register classes are considered to be compatible if they share |
1261 | | /// the same register bank. |
1262 | | /// New copies issued by this optimization are register allocator |
1263 | | /// friendly. This optimization does not remove any copy as it may |
1264 | | /// overconstrain the register allocator, but replaces some operands |
1265 | | /// when possible. |
1266 | | /// \pre isCoalescableCopy(*MI) is true. |
1267 | | /// \return True, when \p MI has been rewritten. False otherwise. |
1268 | 11.7M | bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { |
1269 | 11.7M | assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); |
1270 | 11.7M | assert(MI->getDesc().getNumDefs() == 1 && |
1271 | 11.7M | "Coalescer can understand multiple defs?!"); |
1272 | 11.7M | const MachineOperand &MODef = MI->getOperand(0); |
1273 | 11.7M | // Do not rewrite physical definitions. |
1274 | 11.7M | if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) |
1275 | 5.70M | return false; |
1276 | 6.02M | |
1277 | 6.02M | bool Changed = false; |
1278 | 6.02M | // Get the right rewriter for the current copy. |
1279 | 6.02M | std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); |
1280 | 6.02M | // If none exists, bail out. |
1281 | 6.02M | if (!CpyRewriter) |
1282 | 0 | return false; |
1283 | 6.02M | // Rewrite each rewritable source. |
1284 | 6.02M | unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; |
1285 | 12.1M | while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, |
1286 | 6.02M | TrackSubReg)) { |
1287 | 6.11M | // Keep track of PHI nodes and its incoming edges when looking for sources. |
1288 | 6.11M | RewriteMapTy RewriteMap; |
1289 | 6.11M | // Try to find a more suitable source. If we failed to do so, or get the |
1290 | 6.11M | // actual source, move to the next source. |
1291 | 6.11M | if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) |
1292 | 3.44M | continue; |
1293 | 2.67M | |
1294 | 2.67M | // Get the new source to rewrite. TODO: Only enable handling of multiple |
1295 | 2.67M | // sources (PHIs) once we have a motivating example and testcases for it. |
1296 | 2.67M | TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); |
1297 | 2.67M | TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( |
1298 | 2.67M | MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); |
1299 | 2.67M | if (SrcReg == NewSrc.Reg || 2.67M NewSrc.Reg == 024.5k ) |
1300 | 2.64M | continue; |
1301 | 24.5k | |
1302 | 24.5k | // Rewrite source. |
1303 | 24.5k | if (24.5k CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)24.5k ) { |
1304 | 24.5k | // We may have extended the live-range of NewSrc, account for that. |
1305 | 24.5k | MRI->clearKillFlags(NewSrc.Reg); |
1306 | 24.5k | Changed = true; |
1307 | 24.5k | } |
1308 | 6.11M | } |
1309 | 11.7M | // TODO: We could have a clean-up method to tidy the instruction. |
1310 | 11.7M | // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 |
1311 | 11.7M | // => v0 = COPY v1 |
1312 | 11.7M | // Currently we haven't seen motivating example for that and we |
1313 | 11.7M | // want to avoid untested code. |
1314 | 11.7M | NumRewrittenCopies += Changed; |
1315 | 11.7M | return Changed; |
1316 | 11.7M | } |
1317 | | |
1318 | | /// \brief Optimize copy-like instructions to create |
1319 | | /// register coalescer friendly instruction. |
1320 | | /// The optimization tries to kill-off the \p MI by looking |
1321 | | /// through a chain of copies to find a source that has a compatible |
1322 | | /// register class. |
1323 | | /// If such a source is found, it replace \p MI by a generic COPY |
1324 | | /// operation. |
1325 | | /// \pre isUncoalescableCopy(*MI) is true. |
1326 | | /// \return True, when \p MI has been optimized. In that case, \p MI has |
1327 | | /// been removed from its parent. |
1328 | | /// All COPY instructions created, are inserted in \p LocalMIs. |
1329 | | bool PeepholeOptimizer::optimizeUncoalescableCopy( |
1330 | 9.28k | MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { |
1331 | 9.28k | assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); |
1332 | 9.28k | |
1333 | 9.28k | // Check if we can rewrite all the values defined by this instruction. |
1334 | 9.28k | SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs; |
1335 | 9.28k | // Get the right rewriter for the current copy. |
1336 | 9.28k | std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); |
1337 | 9.28k | // If none exists, bail out. |
1338 | 9.28k | if (!CpyRewriter) |
1339 | 0 | return false; |
1340 | 9.28k | |
1341 | 9.28k | // Rewrite each rewritable source by generating new COPYs. This works |
1342 | 9.28k | // differently from optimizeCoalescableCopy since it first makes sure that all |
1343 | 9.28k | // definitions can be rewritten. |
1344 | 9.28k | RewriteMapTy RewriteMap; |
1345 | 9.28k | unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; |
1346 | 9.76k | while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, |
1347 | 9.28k | CopyDefSubReg)) { |
1348 | 9.51k | // If a physical register is here, this is probably for a good reason. |
1349 | 9.51k | // Do not rewrite that. |
1350 | 9.51k | if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) |
1351 | 0 | return false; |
1352 | 9.51k | |
1353 | 9.51k | // If we do not know how to rewrite this definition, there is no point |
1354 | 9.51k | // in trying to kill this instruction. |
1355 | 9.51k | TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); |
1356 | 9.51k | if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) |
1357 | 9.03k | return false; |
1358 | 480 | |
1359 | 480 | RewritePairs.push_back(Def); |
1360 | 480 | } |
1361 | 9.28k | |
1362 | 9.28k | // The change is possible for all defs, do it. |
1363 | 251 | for (const auto &Def : RewritePairs) 251 { |
1364 | 476 | // Rewrite the "copy" in a way the register coalescer understands. |
1365 | 476 | MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); |
1366 | 476 | assert(NewCopy && "Should be able to always generate a new copy"); |
1367 | 476 | LocalMIs.insert(NewCopy); |
1368 | 476 | } |
1369 | 251 | |
1370 | 251 | // MI is now dead. |
1371 | 251 | MI->eraseFromParent(); |
1372 | 251 | ++NumUncoalescableCopies; |
1373 | 251 | return true; |
1374 | 9.28k | } |
1375 | | |
1376 | | /// Check whether MI is a candidate for folding into a later instruction. |
1377 | | /// We only fold loads to virtual registers and the virtual register defined |
1378 | | /// has a single use. |
1379 | | bool PeepholeOptimizer::isLoadFoldable( |
1380 | 35.8M | MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { |
1381 | 35.8M | if (!MI->canFoldAsLoad() || 35.8M !MI->mayLoad()101k ) |
1382 | 35.7M | return false; |
1383 | 96.5k | const MCInstrDesc &MCID = MI->getDesc(); |
1384 | 96.5k | if (MCID.getNumDefs() != 1) |
1385 | 0 | return false; |
1386 | 96.5k | |
1387 | 96.5k | unsigned Reg = MI->getOperand(0).getReg(); |
1388 | 96.5k | // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting |
1389 | 96.5k | // loads. It should be checked when processing uses of the load, since |
1390 | 96.5k | // uses can be removed during peephole. |
1391 | 96.5k | if (!MI->getOperand(0).getSubReg() && |
1392 | 96.5k | TargetRegisterInfo::isVirtualRegister(Reg) && |
1393 | 96.5k | MRI->hasOneNonDBGUse(Reg)96.5k ) { |
1394 | 69.2k | FoldAsLoadDefCandidates.insert(Reg); |
1395 | 69.2k | return true; |
1396 | 69.2k | } |
1397 | 27.3k | return false; |
1398 | 27.3k | } |
1399 | | |
1400 | | bool PeepholeOptimizer::isMoveImmediate( |
1401 | | MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, |
1402 | 35.8M | DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { |
1403 | 35.8M | const MCInstrDesc &MCID = MI->getDesc(); |
1404 | 35.8M | if (!MI->isMoveImmediate()) |
1405 | 34.5M | return false; |
1406 | 1.28M | if (1.28M MCID.getNumDefs() != 11.28M ) |
1407 | 1.57k | return false; |
1408 | 1.28M | unsigned Reg = MI->getOperand(0).getReg(); |
1409 | 1.28M | if (TargetRegisterInfo::isVirtualRegister(Reg)1.28M ) { |
1410 | 1.28M | ImmDefMIs.insert(std::make_pair(Reg, MI)); |
1411 | 1.28M | ImmDefRegs.insert(Reg); |
1412 | 1.28M | return true; |
1413 | 1.28M | } |
1414 | 1.52k | |
1415 | 1.52k | return false; |
1416 | 1.52k | } |
1417 | | |
1418 | | /// Try folding register operands that are defined by move immediate |
1419 | | /// instructions, i.e. a trivial constant folding optimization, if |
1420 | | /// and only if the def and use are in the same BB. |
1421 | | bool PeepholeOptimizer::foldImmediate( |
1422 | | MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, |
1423 | 9.59M | DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { |
1424 | 31.2M | for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e31.2M ; ++i21.6M ) { |
1425 | 21.6M | MachineOperand &MO = MI->getOperand(i); |
1426 | 21.6M | if (!MO.isReg() || 21.6M MO.isDef()13.3M ) |
1427 | 14.1M | continue; |
1428 | 7.45M | // Ignore dead implicit defs. |
1429 | 7.45M | if (7.45M MO.isImplicit() && 7.45M MO.isDead()0 ) |
1430 | 0 | continue; |
1431 | 7.45M | unsigned Reg = MO.getReg(); |
1432 | 7.45M | if (!TargetRegisterInfo::isVirtualRegister(Reg)) |
1433 | 877k | continue; |
1434 | 6.57M | if (6.57M ImmDefRegs.count(Reg) == 06.57M ) |
1435 | 5.28M | continue; |
1436 | 1.29M | DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); |
1437 | 1.29M | assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); |
1438 | 1.29M | if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)1.29M ) { |
1439 | 1.48k | ++NumImmFold; |
1440 | 1.48k | return true; |
1441 | 1.48k | } |
1442 | 21.6M | } |
1443 | 9.59M | return false; |
1444 | 9.59M | } |
1445 | | |
1446 | | // FIXME: This is very simple and misses some cases which should be handled when |
1447 | | // motivating examples are found. |
1448 | | // |
1449 | | // The copy rewriting logic should look at uses as well as defs and be able to |
1450 | | // eliminate copies across blocks. |
1451 | | // |
1452 | | // Later copies that are subregister extracts will also not be eliminated since |
1453 | | // only the first copy is considered. |
1454 | | // |
1455 | | // e.g. |
1456 | | // %vreg1 = COPY %vreg0 |
1457 | | // %vreg2 = COPY %vreg0:sub1 |
1458 | | // |
1459 | | // Should replace %vreg2 uses with %vreg1:sub1 |
1460 | | bool PeepholeOptimizer::foldRedundantCopy( |
1461 | | MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs, |
1462 | 11.4M | DenseMap<unsigned, MachineInstr *> &CopyMIs) { |
1463 | 11.4M | assert(MI->isCopy() && "expected a COPY machine instruction"); |
1464 | 11.4M | |
1465 | 11.4M | unsigned SrcReg = MI->getOperand(1).getReg(); |
1466 | 11.4M | if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) |
1467 | 3.16M | return false; |
1468 | 8.30M | |
1469 | 8.30M | unsigned DstReg = MI->getOperand(0).getReg(); |
1470 | 8.30M | if (!TargetRegisterInfo::isVirtualRegister(DstReg)) |
1471 | 5.70M | return false; |
1472 | 2.59M | |
1473 | 2.59M | if (2.59M CopySrcRegs.insert(SrcReg).second2.59M ) { |
1474 | 2.51M | // First copy of this reg seen. |
1475 | 2.51M | CopyMIs.insert(std::make_pair(SrcReg, MI)); |
1476 | 2.51M | return false; |
1477 | 2.51M | } |
1478 | 77.9k | |
1479 | 77.9k | MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; |
1480 | 77.9k | |
1481 | 77.9k | unsigned SrcSubReg = MI->getOperand(1).getSubReg(); |
1482 | 77.9k | unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); |
1483 | 77.9k | |
1484 | 77.9k | // Can't replace different subregister extracts. |
1485 | 77.9k | if (SrcSubReg != PrevSrcSubReg) |
1486 | 51.1k | return false; |
1487 | 26.7k | |
1488 | 26.7k | unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); |
1489 | 26.7k | |
1490 | 26.7k | // Only replace if the copy register class is the same. |
1491 | 26.7k | // |
1492 | 26.7k | // TODO: If we have multiple copies to different register classes, we may want |
1493 | 26.7k | // to track multiple copies of the same source register. |
1494 | 26.7k | if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg)) |
1495 | 1.75k | return false; |
1496 | 25.0k | |
1497 | 25.0k | MRI->replaceRegWith(DstReg, PrevDstReg); |
1498 | 25.0k | |
1499 | 25.0k | // Lifetime of the previous copy has been extended. |
1500 | 25.0k | MRI->clearKillFlags(PrevDstReg); |
1501 | 25.0k | return true; |
1502 | 25.0k | } |
1503 | | |
1504 | 42.5M | bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { |
1505 | 42.5M | return TargetRegisterInfo::isPhysicalRegister(Reg) && |
1506 | 20.1M | !MRI->isAllocatable(Reg); |
1507 | 42.5M | } |
1508 | | |
1509 | | bool PeepholeOptimizer::foldRedundantNAPhysCopy( |
1510 | 11.4M | MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) { |
1511 | 11.4M | assert(MI->isCopy() && "expected a COPY machine instruction"); |
1512 | 11.4M | |
1513 | 11.4M | if (DisableNAPhysCopyOpt) |
1514 | 0 | return false; |
1515 | 11.4M | |
1516 | 11.4M | unsigned DstReg = MI->getOperand(0).getReg(); |
1517 | 11.4M | unsigned SrcReg = MI->getOperand(1).getReg(); |
1518 | 11.4M | if (isNAPhysCopy(SrcReg) && 11.4M TargetRegisterInfo::isVirtualRegister(DstReg)790k ) { |
1519 | 790k | // %vreg = COPY %PHYSREG |
1520 | 790k | // Avoid using a datastructure which can track multiple live non-allocatable |
1521 | 790k | // phys->virt copies since LLVM doesn't seem to do this. |
1522 | 790k | NAPhysToVirtMIs.insert({SrcReg, MI}); |
1523 | 790k | return false; |
1524 | 790k | } |
1525 | 10.6M | |
1526 | 10.6M | if (10.6M !(TargetRegisterInfo::isVirtualRegister(SrcReg) && 10.6M isNAPhysCopy(DstReg)8.27M )) |
1527 | 10.6M | return false; |
1528 | 1.17k | |
1529 | 1.17k | // %PHYSREG = COPY %vreg |
1530 | 1.17k | auto PrevCopy = NAPhysToVirtMIs.find(DstReg); |
1531 | 1.17k | if (PrevCopy == NAPhysToVirtMIs.end()1.17k ) { |
1532 | 236 | // We can't remove the copy: there was an intervening clobber of the |
1533 | 236 | // non-allocatable physical register after the copy to virtual. |
1534 | 236 | DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI |
1535 | 236 | << '\n'); |
1536 | 236 | return false; |
1537 | 236 | } |
1538 | 936 | |
1539 | 936 | unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); |
1540 | 936 | if (PrevDstReg == SrcReg936 ) { |
1541 | 426 | // Remove the virt->phys copy: we saw the virtual register definition, and |
1542 | 426 | // the non-allocatable physical register's state hasn't changed since then. |
1543 | 426 | DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); |
1544 | 426 | ++NumNAPhysCopies; |
1545 | 426 | return true; |
1546 | 426 | } |
1547 | 510 | |
1548 | 510 | // Potential missed optimization opportunity: we saw a different virtual |
1549 | 510 | // register get a copy of the non-allocatable physical register, and we only |
1550 | 510 | // track one such copy. Avoid getting confused by this new non-allocatable |
1551 | 510 | // physical register definition, and remove it from the tracked copies. |
1552 | 510 | DEBUG510 (dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); |
1553 | 510 | NAPhysToVirtMIs.erase(PrevCopy); |
1554 | 510 | return false; |
1555 | 510 | } |
1556 | | |
1557 | | /// \bried Returns true if \p MO is a virtual register operand. |
1558 | 186k | static bool isVirtualRegisterOperand(MachineOperand &MO) { |
1559 | 186k | if (!MO.isReg()) |
1560 | 0 | return false; |
1561 | 186k | return TargetRegisterInfo::isVirtualRegister(MO.getReg()); |
1562 | 186k | } |
1563 | | |
1564 | | bool PeepholeOptimizer::findTargetRecurrence( |
1565 | | unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs, |
1566 | 529k | RecurrenceCycle &RC) { |
1567 | 529k | // Recurrence found if Reg is in TargetRegs. |
1568 | 529k | if (TargetRegs.count(Reg)) |
1569 | 2.05k | return true; |
1570 | 527k | |
1571 | 527k | // TODO: Curerntly, we only allow the last instruction of the recurrence |
1572 | 527k | // cycle (the instruction that feeds the PHI instruction) to have more than |
1573 | 527k | // one uses to guarantee that commuting operands does not tie registers |
1574 | 527k | // with overlapping live range. Once we have actual live range info of |
1575 | 527k | // each register, this constraint can be relaxed. |
1576 | 527k | if (527k !MRI->hasOneNonDBGUse(Reg)527k ) |
1577 | 298k | return false; |
1578 | 229k | |
1579 | 229k | // Give up if the reccurrence chain length is longer than the limit. |
1580 | 229k | if (229k RC.size() >= MaxRecurrenceChain229k ) |
1581 | 22 | return false; |
1582 | 229k | |
1583 | 229k | MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg)); |
1584 | 229k | unsigned Idx = MI.findRegisterUseOperandIdx(Reg); |
1585 | 229k | |
1586 | 229k | // Only interested in recurrences whose instructions have only one def, which |
1587 | 229k | // is a virtual register. |
1588 | 229k | if (MI.getDesc().getNumDefs() != 1) |
1589 | 42.4k | return false; |
1590 | 186k | |
1591 | 186k | MachineOperand &DefOp = MI.getOperand(0); |
1592 | 186k | if (!isVirtualRegisterOperand(DefOp)) |
1593 | 17.7k | return false; |
1594 | 168k | |
1595 | 168k | // Check if def operand of MI is tied to any use operand. We are only |
1596 | 168k | // interested in the case that all the instructions in the recurrence chain |
1597 | 168k | // have there def operand tied with one of the use operand. |
1598 | 168k | unsigned TiedUseIdx; |
1599 | 168k | if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx)) |
1600 | 151k | return false; |
1601 | 16.9k | |
1602 | 16.9k | if (16.9k Idx == TiedUseIdx16.9k ) { |
1603 | 15.8k | RC.push_back(RecurrenceInstr(&MI)); |
1604 | 15.8k | return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); |
1605 | 0 | } else { |
1606 | 1.09k | // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx. |
1607 | 1.09k | unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex; |
1608 | 1.09k | if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && 1.09k CommIdx == TiedUseIdx165 ) { |
1609 | 159 | RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx)); |
1610 | 159 | return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); |
1611 | 159 | } |
1612 | 938 | } |
1613 | 938 | |
1614 | 938 | return false; |
1615 | 938 | } |
1616 | | |
1617 | | /// \brief Phi instructions will eventually be lowered to copy instructions. If |
1618 | | /// phi is in a loop header, a recurrence may formulated around the source and |
1619 | | /// destination of the phi. For such case commuting operands of the instructions |
1620 | | /// in the recurrence may enable coalescing of the copy instruction generated |
1621 | | /// from the phi. For example, if there is a recurrence of |
1622 | | /// |
1623 | | /// LoopHeader: |
1624 | | /// %vreg1 = phi(%vreg0, %vreg100) |
1625 | | /// LoopLatch: |
1626 | | /// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1 |
1627 | | /// |
1628 | | /// , the fact that vreg0 and vreg2 are in the same tied operands set makes |
1629 | | /// the coalescing of copy instruction generated from the phi in |
1630 | | /// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and |
1631 | | /// %vreg2 have overlapping live range. This introduces additional move |
1632 | | /// instruction to the final assembly. However, if we commute %vreg2 and |
1633 | | /// %vreg1 of ADD instruction, the redundant move instruction can be |
1634 | | /// avoided. |
1635 | 513k | bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { |
1636 | 513k | SmallSet<unsigned, 2> TargetRegs; |
1637 | 1.55M | for (unsigned Idx = 1; Idx < PHI.getNumOperands()1.55M ; Idx += 21.03M ) { |
1638 | 1.03M | MachineOperand &MO = PHI.getOperand(Idx); |
1639 | 1.03M | assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction"); |
1640 | 1.03M | TargetRegs.insert(MO.getReg()); |
1641 | 1.03M | } |
1642 | 513k | |
1643 | 513k | bool Changed = false; |
1644 | 513k | RecurrenceCycle RC; |
1645 | 513k | if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)513k ) { |
1646 | 2.05k | // Commutes operands of instructions in RC if necessary so that the copy to |
1647 | 2.05k | // be generated from PHI can be coalesced. |
1648 | 2.05k | DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); |
1649 | 1.96k | for (auto &RI : RC) { |
1650 | 1.96k | DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); |
1651 | 1.96k | auto CP = RI.getCommutePair(); |
1652 | 1.96k | if (CP1.96k ) { |
1653 | 130 | Changed = true; |
1654 | 130 | TII->commuteInstruction(*(RI.getMI()), false, (*CP).first, |
1655 | 130 | (*CP).second); |
1656 | 130 | DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); |
1657 | 130 | } |
1658 | 1.96k | } |
1659 | 2.05k | } |
1660 | 513k | |
1661 | 513k | return Changed; |
1662 | 513k | } |
1663 | | |
1664 | 588k | bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { |
1665 | 588k | if (skipFunction(*MF.getFunction())) |
1666 | 44 | return false; |
1667 | 588k | |
1668 | 588k | DEBUG588k (dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); |
1669 | 588k | DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); |
1670 | 588k | |
1671 | 588k | if (DisablePeephole) |
1672 | 2.21k | return false; |
1673 | 586k | |
1674 | 586k | TII = MF.getSubtarget().getInstrInfo(); |
1675 | 586k | TRI = MF.getSubtarget().getRegisterInfo(); |
1676 | 586k | MRI = &MF.getRegInfo(); |
1677 | 586k | DT = Aggressive ? &getAnalysis<MachineDominatorTree>()0 : nullptr586k ; |
1678 | 586k | MLI = &getAnalysis<MachineLoopInfo>(); |
1679 | 586k | |
1680 | 586k | bool Changed = false; |
1681 | 586k | |
1682 | 4.08M | for (MachineBasicBlock &MBB : MF) { |
1683 | 4.08M | bool SeenMoveImm = false; |
1684 | 4.08M | |
1685 | 4.08M | // During this forward scan, at some point it needs to answer the question |
1686 | 4.08M | // "given a pointer to an MI in the current BB, is it located before or |
1687 | 4.08M | // after the current instruction". |
1688 | 4.08M | // To perform this, the following set keeps track of the MIs already seen |
1689 | 4.08M | // during the scan, if a MI is not in the set, it is assumed to be located |
1690 | 4.08M | // after. Newly created MIs have to be inserted in the set as well. |
1691 | 4.08M | SmallPtrSet<MachineInstr*, 16> LocalMIs; |
1692 | 4.08M | SmallSet<unsigned, 4> ImmDefRegs; |
1693 | 4.08M | DenseMap<unsigned, MachineInstr*> ImmDefMIs; |
1694 | 4.08M | SmallSet<unsigned, 16> FoldAsLoadDefCandidates; |
1695 | 4.08M | |
1696 | 4.08M | // Track when a non-allocatable physical register is copied to a virtual |
1697 | 4.08M | // register so that useless moves can be removed. |
1698 | 4.08M | // |
1699 | 4.08M | // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` |
1700 | 4.08M | // without any intervening re-definition of %PHYSREG. |
1701 | 4.08M | DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs; |
1702 | 4.08M | |
1703 | 4.08M | // Set of virtual registers that are copied from. |
1704 | 4.08M | SmallSet<unsigned, 4> CopySrcRegs; |
1705 | 4.08M | DenseMap<unsigned, MachineInstr *> CopySrcMIs; |
1706 | 4.08M | |
1707 | 4.08M | bool IsLoopHeader = MLI->isLoopHeader(&MBB); |
1708 | 4.08M | |
1709 | 4.08M | for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); |
1710 | 40.5M | MII != MIE40.5M ; ) { |
1711 | 36.4M | MachineInstr *MI = &*MII; |
1712 | 36.4M | // We may be erasing MI below, increment MII now. |
1713 | 36.4M | ++MII; |
1714 | 36.4M | LocalMIs.insert(MI); |
1715 | 36.4M | |
1716 | 36.4M | // Skip debug values. They should not affect this peephole optimization. |
1717 | 36.4M | if (MI->isDebugValue()) |
1718 | 681 | continue; |
1719 | 36.4M | |
1720 | 36.4M | if (36.4M MI->isPosition()36.4M ) |
1721 | 51.7k | continue; |
1722 | 36.4M | |
1723 | 36.4M | if (36.4M IsLoopHeader && 36.4M MI->isPHI()4.25M ) { |
1724 | 513k | if (optimizeRecurrence(*MI)513k ) { |
1725 | 113 | Changed = true; |
1726 | 113 | continue; |
1727 | 113 | } |
1728 | 36.4M | } |
1729 | 36.4M | |
1730 | 36.4M | if (36.4M !MI->isCopy()36.4M ) { |
1731 | 91.6M | for (const auto &Op : MI->operands()) { |
1732 | 91.6M | // Visit all operands: definitions can be implicit or explicit. |
1733 | 91.6M | if (Op.isReg()91.6M ) { |
1734 | 55.1M | unsigned Reg = Op.getReg(); |
1735 | 55.1M | if (Op.isDef() && 55.1M isNAPhysCopy(Reg)22.8M ) { |
1736 | 7.68M | const auto &Def = NAPhysToVirtMIs.find(Reg); |
1737 | 7.68M | if (Def != NAPhysToVirtMIs.end()7.68M ) { |
1738 | 3.28k | // A new definition of the non-allocatable physical register |
1739 | 3.28k | // invalidates previous copies. |
1740 | 3.28k | DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI |
1741 | 3.28k | << '\n'); |
1742 | 3.28k | NAPhysToVirtMIs.erase(Def); |
1743 | 3.28k | } |
1744 | 7.68M | } |
1745 | 91.6M | } else if (36.5M Op.isRegMask()36.5M ) { |
1746 | 2.37M | const uint32_t *RegMask = Op.getRegMask(); |
1747 | 321k | for (auto &RegMI : NAPhysToVirtMIs) { |
1748 | 321k | unsigned Def = RegMI.first; |
1749 | 321k | if (MachineOperand::clobbersPhysReg(RegMask, Def)321k ) { |
1750 | 321k | DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI |
1751 | 321k | << '\n'); |
1752 | 321k | NAPhysToVirtMIs.erase(Def); |
1753 | 321k | } |
1754 | 321k | } |
1755 | 36.5M | } |
1756 | 91.6M | } |
1757 | 24.9M | } |
1758 | 36.4M | |
1759 | 36.4M | if (MI->isImplicitDef() || 36.4M MI->isKill()36.2M ) |
1760 | 202k | continue; |
1761 | 36.2M | |
1762 | 36.2M | if (36.2M MI->isInlineAsm() || 36.2M MI->hasUnmodeledSideEffects()36.2M ) { |
1763 | 5.34M | // Blow away all non-allocatable physical registers knowledge since we |
1764 | 5.34M | // don't know what's correct anymore. |
1765 | 5.34M | // |
1766 | 5.34M | // FIXME: handle explicit asm clobbers. |
1767 | 5.34M | DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI |
1768 | 5.34M | << '\n'); |
1769 | 5.34M | NAPhysToVirtMIs.clear(); |
1770 | 5.34M | } |
1771 | 36.2M | |
1772 | 36.2M | if ((isUncoalescableCopy(*MI) && |
1773 | 9.28k | optimizeUncoalescableCopy(MI, LocalMIs)) || |
1774 | 36.2M | (MI->isCompare() && 36.2M optimizeCmpInstr(MI, &MBB)1.41M ) || |
1775 | 36.2M | (MI->isSelect() && 35.9M optimizeSelect(MI, LocalMIs)2.99k )) { |
1776 | 242k | // MI is deleted. |
1777 | 242k | LocalMIs.erase(MI); |
1778 | 242k | Changed = true; |
1779 | 242k | continue; |
1780 | 242k | } |
1781 | 35.9M | |
1782 | 35.9M | if (35.9M MI->isConditionalBranch() && 35.9M optimizeCondBranch(MI)1.89M ) { |
1783 | 67.2k | Changed = true; |
1784 | 67.2k | continue; |
1785 | 67.2k | } |
1786 | 35.9M | |
1787 | 35.9M | if (35.9M isCoalescableCopy(*MI) && 35.9M optimizeCoalescableCopy(MI)11.7M ) { |
1788 | 20.7k | // MI is just rewritten. |
1789 | 20.7k | Changed = true; |
1790 | 20.7k | continue; |
1791 | 20.7k | } |
1792 | 35.8M | |
1793 | 35.8M | if (35.8M MI->isCopy() && |
1794 | 11.4M | (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || |
1795 | 35.8M | foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs)11.4M )) { |
1796 | 25.4k | LocalMIs.erase(MI); |
1797 | 25.4k | MI->eraseFromParent(); |
1798 | 25.4k | Changed = true; |
1799 | 25.4k | continue; |
1800 | 25.4k | } |
1801 | 35.8M | |
1802 | 35.8M | if (35.8M isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)35.8M ) { |
1803 | 1.28M | SeenMoveImm = true; |
1804 | 35.8M | } else { |
1805 | 34.5M | Changed |= optimizeExtInstr(MI, &MBB, LocalMIs); |
1806 | 34.5M | // optimizeExtInstr might have created new instructions after MI |
1807 | 34.5M | // and before the already incremented MII. Adjust MII so that the |
1808 | 34.5M | // next iteration sees the new instructions. |
1809 | 34.5M | MII = MI; |
1810 | 34.5M | ++MII; |
1811 | 34.5M | if (SeenMoveImm) |
1812 | 9.59M | Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs); |
1813 | 34.5M | } |
1814 | 35.8M | |
1815 | 35.8M | // Check whether MI is a load candidate for folding into a later |
1816 | 35.8M | // instruction. If MI is not a candidate, check whether we can fold an |
1817 | 35.8M | // earlier load into MI. |
1818 | 35.8M | if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && |
1819 | 35.8M | !FoldAsLoadDefCandidates.empty()35.7M ) { |
1820 | 135k | |
1821 | 135k | // We visit each operand even after successfully folding a previous |
1822 | 135k | // one. This allows us to fold multiple loads into a single |
1823 | 135k | // instruction. We do assume that optimizeLoadInstr doesn't insert |
1824 | 135k | // foldable uses earlier in the argument list. Since we don't restart |
1825 | 135k | // iteration, we'd miss such cases. |
1826 | 135k | const MCInstrDesc &MIDesc = MI->getDesc(); |
1827 | 580k | for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); |
1828 | 444k | ++i444k ) { |
1829 | 444k | const MachineOperand &MOp = MI->getOperand(i); |
1830 | 444k | if (!MOp.isReg()) |
1831 | 151k | continue; |
1832 | 293k | unsigned FoldAsLoadDefReg = MOp.getReg(); |
1833 | 293k | if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)293k ) { |
1834 | 46.3k | // We need to fold load after optimizeCmpInstr, since |
1835 | 46.3k | // optimizeCmpInstr can enable folding by converting SUB to CMP. |
1836 | 46.3k | // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and |
1837 | 46.3k | // we need it for markUsesInDebugValueAsUndef(). |
1838 | 46.3k | unsigned FoldedReg = FoldAsLoadDefReg; |
1839 | 46.3k | MachineInstr *DefMI = nullptr; |
1840 | 46.3k | if (MachineInstr *FoldMI = |
1841 | 2.76k | TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { |
1842 | 2.76k | // Update LocalMIs since we replaced MI with FoldMI and deleted |
1843 | 2.76k | // DefMI. |
1844 | 2.76k | DEBUG(dbgs() << "Replacing: " << *MI); |
1845 | 2.76k | DEBUG(dbgs() << " With: " << *FoldMI); |
1846 | 2.76k | LocalMIs.erase(MI); |
1847 | 2.76k | LocalMIs.erase(DefMI); |
1848 | 2.76k | LocalMIs.insert(FoldMI); |
1849 | 2.76k | MI->eraseFromParent(); |
1850 | 2.76k | DefMI->eraseFromParent(); |
1851 | 2.76k | MRI->markUsesInDebugValueAsUndef(FoldedReg); |
1852 | 2.76k | FoldAsLoadDefCandidates.erase(FoldedReg); |
1853 | 2.76k | ++NumLoadFold; |
1854 | 2.76k | |
1855 | 2.76k | // MI is replaced with FoldMI so we can continue trying to fold |
1856 | 2.76k | Changed = true; |
1857 | 2.76k | MI = FoldMI; |
1858 | 2.76k | } |
1859 | 46.3k | } |
1860 | 444k | } |
1861 | 135k | } |
1862 | 35.8M | |
1863 | 35.8M | // If we run into an instruction we can't fold across, discard |
1864 | 35.8M | // the load candidates. Note: We might be able to fold *into* this |
1865 | 35.8M | // instruction, so this needs to be after the folding logic. |
1866 | 35.8M | if (MI->isLoadFoldBarrier()35.8M ) { |
1867 | 9.58M | DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n"); |
1868 | 9.58M | FoldAsLoadDefCandidates.clear(); |
1869 | 9.58M | } |
1870 | 36.4M | } |
1871 | 4.08M | } |
1872 | 588k | |
1873 | 588k | return Changed; |
1874 | 588k | } |
1875 | | |
1876 | 5.87M | ValueTrackerResult ValueTracker::getNextSourceFromCopy() { |
1877 | 5.87M | assert(Def->isCopy() && "Invalid definition"); |
1878 | 5.87M | // Copy instruction are supposed to be: Def = Src. |
1879 | 5.87M | // If someone breaks this assumption, bad things will happen everywhere. |
1880 | 5.87M | assert(Def->getNumOperands() == 2 && "Invalid number of operands"); |
1881 | 5.87M | |
1882 | 5.87M | if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) |
1883 | 5.87M | // If we look for a different subreg, it means we want a subreg of src. |
1884 | 5.87M | // Bails as we do not support composing subregs yet. |
1885 | 9.32k | return ValueTrackerResult(); |
1886 | 5.86M | // Otherwise, we want the whole source. |
1887 | 5.86M | const MachineOperand &Src = Def->getOperand(1); |
1888 | 5.86M | return ValueTrackerResult(Src.getReg(), Src.getSubReg()); |
1889 | 5.86M | } |
1890 | | |
1891 | 4.00k | ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { |
1892 | 4.00k | assert(Def->isBitcast() && "Invalid definition"); |
1893 | 4.00k | |
1894 | 4.00k | // Bail if there are effects that a plain copy will not expose. |
1895 | 4.00k | if (Def->hasUnmodeledSideEffects()) |
1896 | 0 | return ValueTrackerResult(); |
1897 | 4.00k | |
1898 | 4.00k | // Bitcasts with more than one def are not supported. |
1899 | 4.00k | if (4.00k Def->getDesc().getNumDefs() != 14.00k ) |
1900 | 0 | return ValueTrackerResult(); |
1901 | 4.00k | const MachineOperand DefOp = Def->getOperand(DefIdx); |
1902 | 4.00k | if (DefOp.getSubReg() != DefSubReg) |
1903 | 4.00k | // If we look for a different subreg, it means we want a subreg of the src. |
1904 | 4.00k | // Bails as we do not support composing subregs yet. |
1905 | 0 | return ValueTrackerResult(); |
1906 | 4.00k | |
1907 | 4.00k | unsigned SrcIdx = Def->getNumOperands(); |
1908 | 11.0k | for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; |
1909 | 7.05k | ++OpIdx7.05k ) { |
1910 | 7.05k | const MachineOperand &MO = Def->getOperand(OpIdx); |
1911 | 7.05k | if (!MO.isReg() || 7.05k !MO.getReg()5.52k ) |
1912 | 3.04k | continue; |
1913 | 4.00k | // Ignore dead implicit defs. |
1914 | 4.00k | if (4.00k MO.isImplicit() && 4.00k MO.isDead()0 ) |
1915 | 0 | continue; |
1916 | 4.00k | assert(!MO.isDef() && "We should have skipped all the definitions by now"); |
1917 | 4.00k | if (SrcIdx != EndOpIdx) |
1918 | 4.00k | // Multiple sources? |
1919 | 0 | return ValueTrackerResult(); |
1920 | 4.00k | SrcIdx = OpIdx; |
1921 | 4.00k | } |
1922 | 4.00k | |
1923 | 4.00k | // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY |
1924 | 4.00k | // will break the assumed guarantees for the upper bits. |
1925 | 4.00k | for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) 4.00k { |
1926 | 5.53k | if (UseMI.isSubregToReg()) |
1927 | 3 | return ValueTrackerResult(); |
1928 | 4.00k | } |
1929 | 4.00k | |
1930 | 4.00k | const MachineOperand &Src = Def->getOperand(SrcIdx); |
1931 | 4.00k | return ValueTrackerResult(Src.getReg(), Src.getSubReg()); |
1932 | 4.00k | } |
1933 | | |
1934 | 169k | ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { |
1935 | 169k | assert((Def->isRegSequence() || Def->isRegSequenceLike()) && |
1936 | 169k | "Invalid definition"); |
1937 | 169k | |
1938 | 169k | if (Def->getOperand(DefIdx).getSubReg()) |
1939 | 169k | // If we are composing subregs, bail out. |
1940 | 169k | // The case we are checking is Def.<subreg> = REG_SEQUENCE. |
1941 | 169k | // This should almost never happen as the SSA property is tracked at |
1942 | 169k | // the register level (as opposed to the subreg level). |
1943 | 169k | // I.e., |
1944 | 169k | // Def.sub0 = |
1945 | 169k | // Def.sub1 = |
1946 | 169k | // is a valid SSA representation for Def.sub0 and Def.sub1, but not for |
1947 | 169k | // Def. Thus, it must not be generated. |
1948 | 169k | // However, some code could theoretically generates a single |
1949 | 169k | // Def.sub0 (i.e, not defining the other subregs) and we would |
1950 | 169k | // have this case. |
1951 | 169k | // If we can ascertain (or force) that this never happens, we could |
1952 | 169k | // turn that into an assertion. |
1953 | 0 | return ValueTrackerResult(); |
1954 | 169k | |
1955 | 169k | if (169k !TII169k ) |
1956 | 169k | // We could handle the REG_SEQUENCE here, but we do not want to |
1957 | 169k | // duplicate the code from the generic TII. |
1958 | 0 | return ValueTrackerResult(); |
1959 | 169k | |
1960 | 169k | SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; |
1961 | 169k | if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) |
1962 | 0 | return ValueTrackerResult(); |
1963 | 169k | |
1964 | 169k | // We are looking at: |
1965 | 169k | // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... |
1966 | 169k | // Check if one of the operand defines the subreg we are interested in. |
1967 | 169k | for (auto &RegSeqInput : RegSeqInputRegs) 169k { |
1968 | 349k | if (RegSeqInput.SubIdx == DefSubReg349k ) { |
1969 | 165k | if (RegSeqInput.SubReg) |
1970 | 165k | // Bail if we have to compose sub registers. |
1971 | 1.91k | return ValueTrackerResult(); |
1972 | 163k | |
1973 | 163k | return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); |
1974 | 163k | } |
1975 | 349k | } |
1976 | 4.73k | |
1977 | 4.73k | // If the subreg we are tracking is super-defined by another subreg, |
1978 | 4.73k | // we could follow this value. However, this would require to compose |
1979 | 4.73k | // the subreg and we do not do that for now. |
1980 | 4.73k | return ValueTrackerResult(); |
1981 | 4.73k | } |
1982 | | |
1983 | 198k | ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { |
1984 | 198k | assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && |
1985 | 198k | "Invalid definition"); |
1986 | 198k | |
1987 | 198k | if (Def->getOperand(DefIdx).getSubReg()) |
1988 | 198k | // If we are composing subreg, bail out. |
1989 | 198k | // Same remark as getNextSourceFromRegSequence. |
1990 | 198k | // I.e., this may be turned into an assert. |
1991 | 0 | return ValueTrackerResult(); |
1992 | 198k | |
1993 | 198k | if (198k !TII198k ) |
1994 | 198k | // We could handle the REG_SEQUENCE here, but we do not want to |
1995 | 198k | // duplicate the code from the generic TII. |
1996 | 0 | return ValueTrackerResult(); |
1997 | 198k | |
1998 | 198k | TargetInstrInfo::RegSubRegPair BaseReg; |
1999 | 198k | TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; |
2000 | 198k | if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) |
2001 | 0 | return ValueTrackerResult(); |
2002 | 198k | |
2003 | 198k | // We are looking at: |
2004 | 198k | // Def = INSERT_SUBREG v0, v1, sub1 |
2005 | 198k | // There are two cases: |
2006 | 198k | // 1. DefSubReg == sub1, get v1. |
2007 | 198k | // 2. DefSubReg != sub1, the value may be available through v0. |
2008 | 198k | |
2009 | 198k | // #1 Check if the inserted register matches the required sub index. |
2010 | 198k | if (198k InsertedReg.SubIdx == DefSubReg198k ) { |
2011 | 192k | return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); |
2012 | 192k | } |
2013 | 6.44k | // #2 Otherwise, if the sub register we are looking for is not partial |
2014 | 6.44k | // defined by the inserted element, we can look through the main |
2015 | 6.44k | // register (v0). |
2016 | 6.44k | const MachineOperand &MODef = Def->getOperand(DefIdx); |
2017 | 6.44k | // If the result register (Def) and the base register (v0) do not |
2018 | 6.44k | // have the same register class or if we have to compose |
2019 | 6.44k | // subregisters, bail out. |
2020 | 6.44k | if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || |
2021 | 6.08k | BaseReg.SubReg) |
2022 | 360 | return ValueTrackerResult(); |
2023 | 6.08k | |
2024 | 6.08k | // Get the TRI and check if the inserted sub-register overlaps with the |
2025 | 6.08k | // sub-register we are tracking. |
2026 | 6.08k | const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); |
2027 | 6.08k | if (!TRI || |
2028 | 6.08k | !(TRI->getSubRegIndexLaneMask(DefSubReg) & |
2029 | 6.08k | TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none()) |
2030 | 6.07k | return ValueTrackerResult(); |
2031 | 7 | // At this point, the value is available in v0 via the same subreg |
2032 | 7 | // we used for Def. |
2033 | 7 | return ValueTrackerResult(BaseReg.Reg, DefSubReg); |
2034 | 7 | } |
2035 | | |
2036 | 3.59k | ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { |
2037 | 3.59k | assert((Def->isExtractSubreg() || |
2038 | 3.59k | Def->isExtractSubregLike()) && "Invalid definition"); |
2039 | 3.59k | // We are looking at: |
2040 | 3.59k | // Def = EXTRACT_SUBREG v0, sub0 |
2041 | 3.59k | |
2042 | 3.59k | // Bail if we have to compose sub registers. |
2043 | 3.59k | // Indeed, if DefSubReg != 0, we would have to compose it with sub0. |
2044 | 3.59k | if (DefSubReg) |
2045 | 0 | return ValueTrackerResult(); |
2046 | 3.59k | |
2047 | 3.59k | if (3.59k !TII3.59k ) |
2048 | 3.59k | // We could handle the EXTRACT_SUBREG here, but we do not want to |
2049 | 3.59k | // duplicate the code from the generic TII. |
2050 | 0 | return ValueTrackerResult(); |
2051 | 3.59k | |
2052 | 3.59k | TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; |
2053 | 3.59k | if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) |
2054 | 0 | return ValueTrackerResult(); |
2055 | 3.59k | |
2056 | 3.59k | // Bail if we have to compose sub registers. |
2057 | 3.59k | // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. |
2058 | 3.59k | if (3.59k ExtractSubregInputReg.SubReg3.59k ) |
2059 | 0 | return ValueTrackerResult(); |
2060 | 3.59k | // Otherwise, the value is available in the v0.sub0. |
2061 | 3.59k | return ValueTrackerResult(ExtractSubregInputReg.Reg, |
2062 | 3.59k | ExtractSubregInputReg.SubIdx); |
2063 | 3.59k | } |
2064 | | |
2065 | 91 | ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { |
2066 | 91 | assert(Def->isSubregToReg() && "Invalid definition"); |
2067 | 91 | // We are looking at: |
2068 | 91 | // Def = SUBREG_TO_REG Imm, v0, sub0 |
2069 | 91 | |
2070 | 91 | // Bail if we have to compose sub registers. |
2071 | 91 | // If DefSubReg != sub0, we would have to check that all the bits |
2072 | 91 | // we track are included in sub0 and if yes, we would have to |
2073 | 91 | // determine the right subreg in v0. |
2074 | 91 | if (DefSubReg != Def->getOperand(3).getImm()) |
2075 | 75 | return ValueTrackerResult(); |
2076 | 16 | // Bail if we have to compose sub registers. |
2077 | 16 | // Likewise, if v0.subreg != 0, we would have to compose it with sub0. |
2078 | 16 | if (16 Def->getOperand(2).getSubReg()16 ) |
2079 | 0 | return ValueTrackerResult(); |
2080 | 16 | |
2081 | 16 | return ValueTrackerResult(Def->getOperand(2).getReg(), |
2082 | 16 | Def->getOperand(3).getImm()); |
2083 | 16 | } |
2084 | | |
2085 | | /// \brief Explore each PHI incoming operand and return its sources |
2086 | 682 | ValueTrackerResult ValueTracker::getNextSourceFromPHI() { |
2087 | 682 | assert(Def->isPHI() && "Invalid definition"); |
2088 | 682 | ValueTrackerResult Res; |
2089 | 682 | |
2090 | 682 | // If we look for a different subreg, bail as we do not support composing |
2091 | 682 | // subregs yet. |
2092 | 682 | if (Def->getOperand(0).getSubReg() != DefSubReg) |
2093 | 328 | return ValueTrackerResult(); |
2094 | 354 | |
2095 | 354 | // Return all register sources for PHI instructions. |
2096 | 1.32k | for (unsigned i = 1, e = Def->getNumOperands(); 354 i < e1.32k ; i += 2967 ) { |
2097 | 967 | auto &MO = Def->getOperand(i); |
2098 | 967 | assert(MO.isReg() && "Invalid PHI instruction"); |
2099 | 967 | Res.addSource(MO.getReg(), MO.getSubReg()); |
2100 | 967 | } |
2101 | 682 | |
2102 | 682 | return Res; |
2103 | 682 | } |
2104 | | |
2105 | 6.50M | ValueTrackerResult ValueTracker::getNextSourceImpl() { |
2106 | 6.50M | assert(Def && "This method needs a valid definition"); |
2107 | 6.50M | |
2108 | 6.50M | assert(((Def->getOperand(DefIdx).isDef() && |
2109 | 6.50M | (DefIdx < Def->getDesc().getNumDefs() || |
2110 | 6.50M | Def->getDesc().isVariadic())) || |
2111 | 6.50M | Def->getOperand(DefIdx).isImplicit()) && |
2112 | 6.50M | "Invalid DefIdx"); |
2113 | 6.50M | if (Def->isCopy()) |
2114 | 5.87M | return getNextSourceFromCopy(); |
2115 | 634k | if (634k Def->isBitcast()634k ) |
2116 | 4.00k | return getNextSourceFromBitcast(); |
2117 | 630k | // All the remaining cases involve "complex" instructions. |
2118 | 630k | // Bail if we did not ask for the advanced tracking. |
2119 | 630k | if (630k !UseAdvancedTracking630k ) |
2120 | 38 | return ValueTrackerResult(); |
2121 | 630k | if (630k Def->isRegSequence() || 630k Def->isRegSequenceLike()463k ) |
2122 | 169k | return getNextSourceFromRegSequence(); |
2123 | 460k | if (460k Def->isInsertSubreg() || 460k Def->isInsertSubregLike()262k ) |
2124 | 198k | return getNextSourceFromInsertSubreg(); |
2125 | 262k | if (262k Def->isExtractSubreg() || 262k Def->isExtractSubregLike()262k ) |
2126 | 3.59k | return getNextSourceFromExtractSubreg(); |
2127 | 258k | if (258k Def->isSubregToReg()258k ) |
2128 | 91 | return getNextSourceFromSubregToReg(); |
2129 | 258k | if (258k Def->isPHI()258k ) |
2130 | 682 | return getNextSourceFromPHI(); |
2131 | 257k | return ValueTrackerResult(); |
2132 | 257k | } |
2133 | | |
2134 | 6.50M | ValueTrackerResult ValueTracker::getNextSource() { |
2135 | 6.50M | // If we reach a point where we cannot move up in the use-def chain, |
2136 | 6.50M | // there is nothing we can get. |
2137 | 6.50M | if (!Def) |
2138 | 0 | return ValueTrackerResult(); |
2139 | 6.50M | |
2140 | 6.50M | ValueTrackerResult Res = getNextSourceImpl(); |
2141 | 6.50M | if (Res.isValid()6.50M ) { |
2142 | 6.22M | // Update definition, definition index, and subregister for the |
2143 | 6.22M | // next call of getNextSource. |
2144 | 6.22M | // Update the current register. |
2145 | 6.22M | bool OneRegSrc = Res.getNumSources() == 1; |
2146 | 6.22M | if (OneRegSrc) |
2147 | 6.22M | Reg = Res.getSrcReg(0); |
2148 | 6.22M | // Update the result before moving up in the use-def chain |
2149 | 6.22M | // with the instruction containing the last found sources. |
2150 | 6.22M | Res.setInst(Def); |
2151 | 6.22M | |
2152 | 6.22M | // If we can still move up in the use-def chain, move to the next |
2153 | 6.22M | // definition. |
2154 | 6.22M | if (!TargetRegisterInfo::isPhysicalRegister(Reg) && 6.22M OneRegSrc3.04M ) { |
2155 | 3.04M | Def = MRI.getVRegDef(Reg); |
2156 | 3.04M | DefIdx = MRI.def_begin(Reg).getOperandNo(); |
2157 | 3.04M | DefSubReg = Res.getSrcSubReg(0); |
2158 | 3.04M | return Res; |
2159 | 3.04M | } |
2160 | 3.45M | } |
2161 | 3.45M | // If we end up here, this means we will not be able to find another source |
2162 | 3.45M | // for the next iteration. Make sure any new call to getNextSource bails out |
2163 | 3.45M | // early by cutting the use-def chain. |
2164 | 3.45M | Def = nullptr; |
2165 | 3.45M | return Res; |
2166 | 3.45M | } |