/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the machine instruction level CFG structurizer pass. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "AMDGPU.h" |
15 | | #include "AMDGPUSubtarget.h" |
16 | | #include "SIInstrInfo.h" |
17 | | #include "llvm/ADT/ArrayRef.h" |
18 | | #include "llvm/ADT/DenseMap.h" |
19 | | #include "llvm/ADT/DenseSet.h" |
20 | | #include "llvm/ADT/PostOrderIterator.h" |
21 | | #include "llvm/ADT/SetVector.h" |
22 | | #include "llvm/ADT/SmallPtrSet.h" |
23 | | #include "llvm/ADT/SmallVector.h" |
24 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
25 | | #include "llvm/CodeGen/MachineFunction.h" |
26 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | | #include "llvm/CodeGen/MachineInstr.h" |
28 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | | #include "llvm/CodeGen/MachineOperand.h" |
30 | | #include "llvm/CodeGen/MachineRegionInfo.h" |
31 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
32 | | #include "llvm/IR/DebugLoc.h" |
33 | | #include "llvm/Pass.h" |
34 | | #include "llvm/Support/Compiler.h" |
35 | | #include "llvm/Support/Debug.h" |
36 | | #include "llvm/Support/ErrorHandling.h" |
37 | | #include "llvm/Support/raw_ostream.h" |
38 | | #include "llvm/Target/TargetOpcodes.h" |
39 | | #include "llvm/Target/TargetRegisterInfo.h" |
40 | | #include <cassert> |
41 | | #include <tuple> |
42 | | #include <utility> |
43 | | |
44 | | using namespace llvm; |
45 | | |
46 | | #define DEBUG_TYPE "amdgpucfgstructurizer" |
47 | | |
48 | | namespace { |
49 | | |
50 | | class PHILinearizeDestIterator; |
51 | | |
52 | | class PHILinearize { |
53 | | friend class PHILinearizeDestIterator; |
54 | | |
55 | | public: |
56 | | using PHISourceT = std::pair<unsigned, MachineBasicBlock *>; |
57 | | |
58 | | private: |
59 | | using PHISourcesT = DenseSet<PHISourceT>; |
60 | | using PHIInfoElementT = struct { |
61 | | unsigned DestReg; |
62 | | DebugLoc DL; |
63 | | PHISourcesT Sources; |
64 | | }; |
65 | | using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>; |
66 | | PHIInfoT PHIInfo; |
67 | | |
68 | | static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); |
69 | | static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); |
70 | | static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); |
71 | | static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, |
72 | | MachineBasicBlock *SourceMBB); |
73 | | static void phiInfoElementRemoveSource(PHIInfoElementT *Info, |
74 | | unsigned SourceReg, |
75 | | MachineBasicBlock *SourceMBB); |
76 | | PHIInfoElementT *findPHIInfoElement(unsigned DestReg); |
77 | | PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, |
78 | | MachineBasicBlock *SourceMBB); |
79 | | |
80 | | public: |
81 | | bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, |
82 | | SmallVector<unsigned, 4> &Sources); |
83 | | void addDest(unsigned DestReg, const DebugLoc &DL); |
84 | | void replaceDef(unsigned OldDestReg, unsigned NewDestReg); |
85 | | void deleteDef(unsigned DestReg); |
86 | | void addSource(unsigned DestReg, unsigned SourceReg, |
87 | | MachineBasicBlock *SourceMBB); |
88 | | void removeSource(unsigned DestReg, unsigned SourceReg, |
89 | | MachineBasicBlock *SourceMBB = nullptr); |
90 | | bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, |
91 | | unsigned &DestReg); |
92 | | bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); |
93 | | unsigned getNumSources(unsigned DestReg); |
94 | | void dump(MachineRegisterInfo *MRI); |
95 | | void clear(); |
96 | | |
97 | | using source_iterator = PHISourcesT::iterator; |
98 | | using dest_iterator = PHILinearizeDestIterator; |
99 | | |
100 | | dest_iterator dests_begin(); |
101 | | dest_iterator dests_end(); |
102 | | |
103 | | source_iterator sources_begin(unsigned Reg); |
104 | | source_iterator sources_end(unsigned Reg); |
105 | | }; |
106 | | |
107 | | class PHILinearizeDestIterator { |
108 | | private: |
109 | | PHILinearize::PHIInfoT::iterator Iter; |
110 | | |
111 | | public: |
112 | 0 | PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} |
113 | | |
114 | 0 | unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } |
115 | 0 | PHILinearizeDestIterator &operator++() { |
116 | 0 | ++Iter; |
117 | 0 | return *this; |
118 | 0 | } |
119 | 0 | bool operator==(const PHILinearizeDestIterator &I) const { |
120 | 0 | return I.Iter == Iter; |
121 | 0 | } |
122 | 0 | bool operator!=(const PHILinearizeDestIterator &I) const { |
123 | 0 | return I.Iter != Iter; |
124 | 0 | } |
125 | | }; |
126 | | |
127 | | } // end anonymous namespace |
128 | | |
129 | 0 | unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { |
130 | 0 | return Info->DestReg; |
131 | 0 | } |
132 | | |
133 | | void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, |
134 | 0 | unsigned NewDef) { |
135 | 0 | Info->DestReg = NewDef; |
136 | 0 | } |
137 | | |
138 | | PHILinearize::PHISourcesT & |
139 | 0 | PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { |
140 | 0 | return Info->Sources; |
141 | 0 | } |
142 | | |
143 | | void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, |
144 | | unsigned SourceReg, |
145 | 0 | MachineBasicBlock *SourceMBB) { |
146 | 0 | // Assertion ensures we don't use the same SourceMBB for the |
147 | 0 | // sources, because we cannot have different registers with |
148 | 0 | // identical predecessors, but we can have the same register for |
149 | 0 | // multiple predecessors. |
150 | | #if !defined(NDEBUG) |
151 | | for (auto SI : phiInfoElementGetSources(Info)) { |
152 | | assert((SI.second != SourceMBB || SourceReg == SI.first)); |
153 | | } |
154 | | #endif |
155 | |
|
156 | 0 | phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); |
157 | 0 | } |
158 | | |
159 | | void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, |
160 | | unsigned SourceReg, |
161 | 0 | MachineBasicBlock *SourceMBB) { |
162 | 0 | auto &Sources = phiInfoElementGetSources(Info); |
163 | 0 | SmallVector<PHISourceT, 4> ElimiatedSources; |
164 | 0 | for (auto SI : Sources) { |
165 | 0 | if (SI.first == SourceReg && |
166 | 0 | (SI.second == nullptr || 0 SI.second == SourceMBB0 )) { |
167 | 0 | ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); |
168 | 0 | } |
169 | 0 | } |
170 | 0 |
|
171 | 0 | for (auto &Source : ElimiatedSources) { |
172 | 0 | Sources.erase(Source); |
173 | 0 | } |
174 | 0 | } |
175 | | |
176 | | PHILinearize::PHIInfoElementT * |
177 | 0 | PHILinearize::findPHIInfoElement(unsigned DestReg) { |
178 | 0 | for (auto I : PHIInfo) { |
179 | 0 | if (phiInfoElementGetDest(I) == DestReg0 ) { |
180 | 0 | return I; |
181 | 0 | } |
182 | 0 | } |
183 | 0 | return nullptr; |
184 | 0 | } |
185 | | |
186 | | PHILinearize::PHIInfoElementT * |
187 | | PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, |
188 | 0 | MachineBasicBlock *SourceMBB) { |
189 | 0 | for (auto I : PHIInfo) { |
190 | 0 | for (auto SI : phiInfoElementGetSources(I)) { |
191 | 0 | if (SI.first == SourceReg && |
192 | 0 | (SI.second == nullptr || 0 SI.second == SourceMBB0 )) { |
193 | 0 | return I; |
194 | 0 | } |
195 | 0 | } |
196 | 0 | } |
197 | 0 | return nullptr; |
198 | 0 | } |
199 | | |
200 | | bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, |
201 | 0 | SmallVector<unsigned, 4> &Sources) { |
202 | 0 | bool FoundSource = false; |
203 | 0 | for (auto I : PHIInfo) { |
204 | 0 | for (auto SI : phiInfoElementGetSources(I)) { |
205 | 0 | if (SI.second == SourceMBB0 ) { |
206 | 0 | FoundSource = true; |
207 | 0 | Sources.push_back(SI.first); |
208 | 0 | } |
209 | 0 | } |
210 | 0 | } |
211 | 0 | return FoundSource; |
212 | 0 | } |
213 | | |
214 | 0 | void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { |
215 | 0 | assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); |
216 | 0 | PHISourcesT EmptySet; |
217 | 0 | PHIInfoElementT *NewElement = new PHIInfoElementT(); |
218 | 0 | NewElement->DestReg = DestReg; |
219 | 0 | NewElement->DL = DL; |
220 | 0 | NewElement->Sources = EmptySet; |
221 | 0 | PHIInfo.insert(NewElement); |
222 | 0 | } |
223 | | |
224 | 0 | void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { |
225 | 0 | phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); |
226 | 0 | } |
227 | | |
228 | 0 | void PHILinearize::deleteDef(unsigned DestReg) { |
229 | 0 | PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); |
230 | 0 | PHIInfo.erase(InfoElement); |
231 | 0 | delete InfoElement; |
232 | 0 | } |
233 | | |
234 | | void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, |
235 | 0 | MachineBasicBlock *SourceMBB) { |
236 | 0 | phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); |
237 | 0 | } |
238 | | |
239 | | void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, |
240 | 0 | MachineBasicBlock *SourceMBB) { |
241 | 0 | phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); |
242 | 0 | } |
243 | | |
244 | | bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, |
245 | 0 | unsigned &DestReg) { |
246 | 0 | PHIInfoElementT *InfoElement = |
247 | 0 | findPHIInfoElementFromSource(SourceReg, SourceMBB); |
248 | 0 | if (InfoElement != nullptr0 ) { |
249 | 0 | DestReg = phiInfoElementGetDest(InfoElement); |
250 | 0 | return true; |
251 | 0 | } |
252 | 0 | return false; |
253 | 0 | } |
254 | | |
255 | 0 | bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { |
256 | 0 | unsigned DestReg; |
257 | 0 | return findDest(Reg, SourceMBB, DestReg); |
258 | 0 | } |
259 | | |
260 | 0 | unsigned PHILinearize::getNumSources(unsigned DestReg) { |
261 | 0 | return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); |
262 | 0 | } |
263 | | |
264 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
265 | | LLVM_DUMP_METHOD void PHILinearize::dump(MachineRegisterInfo *MRI) { |
266 | | const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); |
267 | | dbgs() << "=PHIInfo Start=\n"; |
268 | | for (auto PII : this->PHIInfo) { |
269 | | PHIInfoElementT &Element = *PII; |
270 | | dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) |
271 | | << " Sources: {"; |
272 | | for (auto &SI : Element.Sources) { |
273 | | dbgs() << PrintReg(SI.first, TRI) << "(BB#" |
274 | | << SI.second->getNumber() << "),"; |
275 | | } |
276 | | dbgs() << "}\n"; |
277 | | } |
278 | | dbgs() << "=PHIInfo End=\n"; |
279 | | } |
280 | | #endif |
281 | | |
282 | 0 | void PHILinearize::clear() { PHIInfo = PHIInfoT(); } |
283 | | |
284 | 0 | PHILinearize::dest_iterator PHILinearize::dests_begin() { |
285 | 0 | return PHILinearizeDestIterator(PHIInfo.begin()); |
286 | 0 | } |
287 | | |
288 | 0 | PHILinearize::dest_iterator PHILinearize::dests_end() { |
289 | 0 | return PHILinearizeDestIterator(PHIInfo.end()); |
290 | 0 | } |
291 | | |
292 | 0 | PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { |
293 | 0 | auto InfoElement = findPHIInfoElement(Reg); |
294 | 0 | return phiInfoElementGetSources(InfoElement).begin(); |
295 | 0 | } |
296 | | |
297 | 0 | PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { |
298 | 0 | auto InfoElement = findPHIInfoElement(Reg); |
299 | 0 | return phiInfoElementGetSources(InfoElement).end(); |
300 | 0 | } |
301 | | |
302 | 0 | static unsigned getPHINumInputs(MachineInstr &PHI) { |
303 | 0 | assert(PHI.isPHI()); |
304 | 0 | return (PHI.getNumOperands() - 1) / 2; |
305 | 0 | } |
306 | | |
307 | 0 | static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { |
308 | 0 | assert(PHI.isPHI()); |
309 | 0 | return PHI.getOperand(Index * 2 + 2).getMBB(); |
310 | 0 | } |
311 | | |
312 | | static void setPhiPred(MachineInstr &PHI, unsigned Index, |
313 | 0 | MachineBasicBlock *NewPred) { |
314 | 0 | PHI.getOperand(Index * 2 + 2).setMBB(NewPred); |
315 | 0 | } |
316 | | |
317 | 0 | static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { |
318 | 0 | assert(PHI.isPHI()); |
319 | 0 | return PHI.getOperand(Index * 2 + 1).getReg(); |
320 | 0 | } |
321 | | |
322 | 0 | static unsigned getPHIDestReg(MachineInstr &PHI) { |
323 | 0 | assert(PHI.isPHI()); |
324 | 0 | return PHI.getOperand(0).getReg(); |
325 | 0 | } |
326 | | |
327 | | namespace { |
328 | | |
329 | | class RegionMRT; |
330 | | class MBBMRT; |
331 | | |
332 | | class LinearizedRegion { |
333 | | protected: |
334 | | MachineBasicBlock *Entry; |
335 | | // The exit block is part of the region, and is the last |
336 | | // merge block before exiting the region. |
337 | | MachineBasicBlock *Exit; |
338 | | DenseSet<unsigned> LiveOuts; |
339 | | SmallPtrSet<MachineBasicBlock *, 1> MBBs; |
340 | | bool HasLoop; |
341 | | LinearizedRegion *Parent; |
342 | | RegionMRT *RMRT; |
343 | | |
344 | | void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, |
345 | | MachineInstr *DefInstr, const MachineRegisterInfo *MRI, |
346 | | const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); |
347 | | |
348 | | void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, |
349 | | MachineInstr *DefInstr, |
350 | | const MachineRegisterInfo *MRI, |
351 | | const TargetRegisterInfo *TRI, |
352 | | PHILinearize &PHIInfo); |
353 | | |
354 | | void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, |
355 | | const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, |
356 | | RegionMRT *TopRegion); |
357 | | |
358 | | void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, |
359 | | const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); |
360 | | |
361 | | void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, |
362 | | const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, |
363 | | RegionMRT *TopRegion = nullptr); |
364 | | |
365 | | public: |
366 | | LinearizedRegion(); |
367 | | LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, |
368 | | const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); |
369 | 0 | ~LinearizedRegion() = default; |
370 | | |
371 | 0 | void setRegionMRT(RegionMRT *Region) { RMRT = Region; } |
372 | | |
373 | 0 | RegionMRT *getRegionMRT() { return RMRT; } |
374 | | |
375 | 0 | void setParent(LinearizedRegion *P) { Parent = P; } |
376 | | |
377 | 0 | LinearizedRegion *getParent() { return Parent; } |
378 | | |
379 | | void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); |
380 | | |
381 | | void setBBSelectRegIn(unsigned Reg); |
382 | | |
383 | | unsigned getBBSelectRegIn(); |
384 | | |
385 | | void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); |
386 | | |
387 | | unsigned getBBSelectRegOut(); |
388 | | |
389 | | void setHasLoop(bool Value); |
390 | | |
391 | | bool getHasLoop(); |
392 | | |
393 | | void addLiveOut(unsigned VReg); |
394 | | |
395 | | void removeLiveOut(unsigned Reg); |
396 | | |
397 | | void replaceLiveOut(unsigned OldReg, unsigned NewReg); |
398 | | |
399 | | void replaceRegister(unsigned Register, unsigned NewRegister, |
400 | | MachineRegisterInfo *MRI, bool ReplaceInside, |
401 | | bool ReplaceOutside, bool IncludeLoopPHIs); |
402 | | |
403 | | void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, |
404 | | bool IncludeLoopPHIs, |
405 | | MachineRegisterInfo *MRI); |
406 | | |
407 | | void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, |
408 | | bool IncludeLoopPHIs, |
409 | | MachineRegisterInfo *MRI); |
410 | | |
411 | | DenseSet<unsigned> *getLiveOuts(); |
412 | | |
413 | | void setEntry(MachineBasicBlock *NewEntry); |
414 | | |
415 | | MachineBasicBlock *getEntry(); |
416 | | |
417 | | void setExit(MachineBasicBlock *NewExit); |
418 | | |
419 | | MachineBasicBlock *getExit(); |
420 | | |
421 | | void addMBB(MachineBasicBlock *MBB); |
422 | | |
423 | | void addMBBs(LinearizedRegion *InnerRegion); |
424 | | |
425 | | bool contains(MachineBasicBlock *MBB); |
426 | | |
427 | | bool isLiveOut(unsigned Reg); |
428 | | |
429 | | bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); |
430 | | |
431 | | void removeFalseRegisterKills(MachineRegisterInfo *MRI); |
432 | | |
433 | | void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, |
434 | | const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); |
435 | | }; |
436 | | |
437 | | class MRT { |
438 | | protected: |
439 | | RegionMRT *Parent; |
440 | | unsigned BBSelectRegIn; |
441 | | unsigned BBSelectRegOut; |
442 | | |
443 | | public: |
444 | 0 | virtual ~MRT() = default; |
445 | | |
446 | 0 | unsigned getBBSelectRegIn() { return BBSelectRegIn; } |
447 | | |
448 | 0 | unsigned getBBSelectRegOut() { return BBSelectRegOut; } |
449 | | |
450 | 0 | void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } |
451 | | |
452 | 0 | void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } |
453 | | |
454 | 0 | virtual RegionMRT *getRegionMRT() { return nullptr; } |
455 | | |
456 | 0 | virtual MBBMRT *getMBBMRT() { return nullptr; } |
457 | | |
458 | 0 | bool isRegion() { return getRegionMRT() != nullptr; } |
459 | | |
460 | 0 | bool isMBB() { return getMBBMRT() != nullptr; } |
461 | | |
462 | 0 | bool isRoot() { return Parent == nullptr; } |
463 | | |
464 | 0 | void setParent(RegionMRT *Region) { Parent = Region; } |
465 | | |
466 | 0 | RegionMRT *getParent() { return Parent; } |
467 | | |
468 | | static MachineBasicBlock * |
469 | | initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, |
470 | | DenseMap<MachineRegion *, RegionMRT *> &RegionMap); |
471 | | |
472 | | static RegionMRT *buildMRT(MachineFunction &MF, |
473 | | const MachineRegionInfo *RegionInfo, |
474 | | const SIInstrInfo *TII, |
475 | | MachineRegisterInfo *MRI); |
476 | | |
477 | | virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; |
478 | | |
479 | 0 | void dumpDepth(int depth) { |
480 | 0 | for (int i = depth; i > 00 ; --i0 ) { |
481 | 0 | dbgs() << " "; |
482 | 0 | } |
483 | 0 | } |
484 | | }; |
485 | | |
486 | | class MBBMRT : public MRT { |
487 | | MachineBasicBlock *MBB; |
488 | | |
489 | | public: |
490 | 0 | MBBMRT(MachineBasicBlock *BB) : MBB(BB) { |
491 | 0 | setParent(nullptr); |
492 | 0 | setBBSelectRegOut(0); |
493 | 0 | setBBSelectRegIn(0); |
494 | 0 | } |
495 | | |
496 | 0 | MBBMRT *getMBBMRT() override { return this; } |
497 | | |
498 | 0 | MachineBasicBlock *getMBB() { return MBB; } |
499 | | |
500 | 0 | void dump(const TargetRegisterInfo *TRI, int depth = 0) override { |
501 | 0 | dumpDepth(depth); |
502 | 0 | dbgs() << "MBB: " << getMBB()->getNumber(); |
503 | 0 | dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); |
504 | 0 | dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; |
505 | 0 | } |
506 | | }; |
507 | | |
508 | | class RegionMRT : public MRT { |
509 | | protected: |
510 | | MachineRegion *Region; |
511 | | LinearizedRegion *LRegion = nullptr; |
512 | | MachineBasicBlock *Succ = nullptr; |
513 | | SetVector<MRT *> Children; |
514 | | |
515 | | public: |
516 | 0 | RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) { |
517 | 0 | setParent(nullptr); |
518 | 0 | setBBSelectRegOut(0); |
519 | 0 | setBBSelectRegIn(0); |
520 | 0 | } |
521 | | |
522 | 0 | ~RegionMRT() override { |
523 | 0 | if (LRegion0 ) { |
524 | 0 | delete LRegion; |
525 | 0 | } |
526 | 0 |
|
527 | 0 | for (auto CI : Children) { |
528 | 0 | delete &(*CI); |
529 | 0 | } |
530 | 0 | } |
531 | | |
532 | 0 | RegionMRT *getRegionMRT() override { return this; } |
533 | | |
534 | 0 | void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { |
535 | 0 | LRegion = LinearizeRegion; |
536 | 0 | } |
537 | | |
538 | 0 | LinearizedRegion *getLinearizedRegion() { return LRegion; } |
539 | | |
540 | 0 | MachineRegion *getMachineRegion() { return Region; } |
541 | | |
542 | 0 | unsigned getInnerOutputRegister() { |
543 | 0 | return (*(Children.begin()))->getBBSelectRegOut(); |
544 | 0 | } |
545 | | |
546 | 0 | void addChild(MRT *Tree) { Children.insert(Tree); } |
547 | | |
548 | 0 | SetVector<MRT *> *getChildren() { return &Children; } |
549 | | |
550 | 0 | void dump(const TargetRegisterInfo *TRI, int depth = 0) override { |
551 | 0 | dumpDepth(depth); |
552 | 0 | dbgs() << "Region: " << (void *)Region; |
553 | 0 | dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); |
554 | 0 | dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; |
555 | 0 |
|
556 | 0 | dumpDepth(depth); |
557 | 0 | if (getSucc()) |
558 | 0 | dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; |
559 | 0 | else |
560 | 0 | dbgs() << "Succ: none \n"; |
561 | 0 | for (auto MRTI : Children) { |
562 | 0 | MRTI->dump(TRI, depth + 1); |
563 | 0 | } |
564 | 0 | } |
565 | | |
566 | 0 | MRT *getEntryTree() { return Children.back(); } |
567 | | |
568 | 0 | MRT *getExitTree() { return Children.front(); } |
569 | | |
570 | 0 | MachineBasicBlock *getEntry() { |
571 | 0 | MRT *Tree = Children.back(); |
572 | 0 | return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() |
573 | 0 | : Tree->getMBBMRT()->getMBB(); |
574 | 0 | } |
575 | | |
576 | 0 | MachineBasicBlock *getExit() { |
577 | 0 | MRT *Tree = Children.front(); |
578 | 0 | return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() |
579 | 0 | : Tree->getMBBMRT()->getMBB(); |
580 | 0 | } |
581 | | |
582 | 0 | void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } |
583 | | |
584 | 0 | MachineBasicBlock *getSucc() { return Succ; } |
585 | | |
586 | 0 | bool contains(MachineBasicBlock *MBB) { |
587 | 0 | for (auto CI : Children) { |
588 | 0 | if (CI->isMBB()0 ) { |
589 | 0 | if (MBB == CI->getMBBMRT()->getMBB()0 ) { |
590 | 0 | return true; |
591 | 0 | } |
592 | 0 | } else { |
593 | 0 | if (CI->getRegionMRT()->contains(MBB)0 ) { |
594 | 0 | return true; |
595 | 0 | } else if (0 CI->getRegionMRT()->getLinearizedRegion() != nullptr && |
596 | 0 | CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)0 ) { |
597 | 0 | return true; |
598 | 0 | } |
599 | 0 | } |
600 | 0 | } |
601 | 0 | return false; |
602 | 0 | } |
603 | | |
604 | 0 | void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { |
605 | 0 | LinearizedRegion *LRegion = getLinearizedRegion(); |
606 | 0 | LRegion->replaceLiveOut(Register, NewRegister); |
607 | 0 | for (auto &CI : Children) { |
608 | 0 | if (CI->isRegion()0 ) { |
609 | 0 | CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); |
610 | 0 | } |
611 | 0 | } |
612 | 0 | } |
613 | | }; |
614 | | |
615 | | } // end anonymous namespace |
616 | | |
617 | | static unsigned createBBSelectReg(const SIInstrInfo *TII, |
618 | 0 | MachineRegisterInfo *MRI) { |
619 | 0 | return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); |
620 | 0 | } |
621 | | |
622 | | MachineBasicBlock * |
623 | | MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, |
624 | 0 | DenseMap<MachineRegion *, RegionMRT *> &RegionMap) { |
625 | 0 | for (auto &MFI : MF) { |
626 | 0 | MachineBasicBlock *ExitMBB = &MFI; |
627 | 0 | if (ExitMBB->succ_size() == 00 ) { |
628 | 0 | return ExitMBB; |
629 | 0 | } |
630 | 0 | } |
631 | 0 | llvm_unreachable0 ("CFG has no exit block"); |
632 | 0 | return nullptr; |
633 | 0 | } |
634 | | |
635 | | RegionMRT *MRT::buildMRT(MachineFunction &MF, |
636 | | const MachineRegionInfo *RegionInfo, |
637 | 0 | const SIInstrInfo *TII, MachineRegisterInfo *MRI) { |
638 | 0 | SmallPtrSet<MachineRegion *, 4> PlacedRegions; |
639 | 0 | DenseMap<MachineRegion *, RegionMRT *> RegionMap; |
640 | 0 | MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); |
641 | 0 | RegionMRT *Result = new RegionMRT(TopLevelRegion); |
642 | 0 | RegionMap[TopLevelRegion] = Result; |
643 | 0 |
|
644 | 0 | // Insert the exit block first, we need it to be the merge node |
645 | 0 | // for the top level region. |
646 | 0 | MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); |
647 | 0 |
|
648 | 0 | unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); |
649 | 0 | MBBMRT *ExitMRT = new MBBMRT(Exit); |
650 | 0 | RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); |
651 | 0 | ExitMRT->setBBSelectRegIn(BBSelectRegIn); |
652 | 0 |
|
653 | 0 | for (auto MBBI : post_order(&(MF.front()))) { |
654 | 0 | MachineBasicBlock *MBB = &(*MBBI); |
655 | 0 |
|
656 | 0 | // Skip Exit since we already added it |
657 | 0 | if (MBB == Exit0 ) { |
658 | 0 | continue; |
659 | 0 | } |
660 | 0 |
|
661 | 0 | DEBUG0 (dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); |
662 | 0 | MBBMRT *NewMBB = new MBBMRT(MBB); |
663 | 0 | MachineRegion *Region = RegionInfo->getRegionFor(MBB); |
664 | 0 |
|
665 | 0 | // Ensure we have the MRT region |
666 | 0 | if (RegionMap.count(Region) == 00 ) { |
667 | 0 | RegionMRT *NewMRTRegion = new RegionMRT(Region); |
668 | 0 | RegionMap[Region] = NewMRTRegion; |
669 | 0 |
|
670 | 0 | // Ensure all parents are in the RegionMap |
671 | 0 | MachineRegion *Parent = Region->getParent(); |
672 | 0 | while (RegionMap.count(Parent) == 00 ) { |
673 | 0 | RegionMRT *NewMRTParent = new RegionMRT(Parent); |
674 | 0 | NewMRTParent->addChild(NewMRTRegion); |
675 | 0 | NewMRTRegion->setParent(NewMRTParent); |
676 | 0 | RegionMap[Parent] = NewMRTParent; |
677 | 0 | NewMRTRegion = NewMRTParent; |
678 | 0 | Parent = Parent->getParent(); |
679 | 0 | } |
680 | 0 | RegionMap[Parent]->addChild(NewMRTRegion); |
681 | 0 | NewMRTRegion->setParent(RegionMap[Parent]); |
682 | 0 | } |
683 | 0 |
|
684 | 0 | // Add MBB to Region MRT |
685 | 0 | RegionMap[Region]->addChild(NewMBB); |
686 | 0 | NewMBB->setParent(RegionMap[Region]); |
687 | 0 | RegionMap[Region]->setSucc(Region->getExit()); |
688 | 0 | } |
689 | 0 | return Result; |
690 | 0 | } |
691 | | |
692 | | void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, |
693 | | MachineInstr *DefInstr, |
694 | | const MachineRegisterInfo *MRI, |
695 | | const TargetRegisterInfo *TRI, |
696 | 0 | PHILinearize &PHIInfo) { |
697 | 0 | if (TRI->isVirtualRegister(Reg)0 ) { |
698 | 0 | DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); |
699 | 0 | // If this is a source register to a PHI we are chaining, it |
700 | 0 | // must be live out. |
701 | 0 | if (PHIInfo.isSource(Reg)0 ) { |
702 | 0 | DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); |
703 | 0 | addLiveOut(Reg); |
704 | 0 | } else { |
705 | 0 | // If this is live out of the MBB |
706 | 0 | for (auto &UI : MRI->use_operands(Reg)) { |
707 | 0 | if (UI.getParent()->getParent() != MBB0 ) { |
708 | 0 | DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() |
709 | 0 | << "): " << PrintReg(Reg, TRI) << "\n"); |
710 | 0 | addLiveOut(Reg); |
711 | 0 | } else { |
712 | 0 | // If the use is in the same MBB we have to make sure |
713 | 0 | // it is after the def, otherwise it is live out in a loop |
714 | 0 | MachineInstr *UseInstr = UI.getParent(); |
715 | 0 | for (MachineBasicBlock::instr_iterator |
716 | 0 | MII = UseInstr->getIterator(), |
717 | 0 | MIE = UseInstr->getParent()->instr_end(); |
718 | 0 | MII != MIE0 ; ++MII0 ) { |
719 | 0 | if ((&(*MII)) == DefInstr0 ) { |
720 | 0 | DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) |
721 | 0 | << "\n"); |
722 | 0 | addLiveOut(Reg); |
723 | 0 | } |
724 | 0 | } |
725 | 0 | } |
726 | 0 | } |
727 | 0 | } |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | | void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, |
732 | | MachineInstr *DefInstr, |
733 | | const MachineRegisterInfo *MRI, |
734 | | const TargetRegisterInfo *TRI, |
735 | 0 | PHILinearize &PHIInfo) { |
736 | 0 | if (TRI->isVirtualRegister(Reg)0 ) { |
737 | 0 | DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); |
738 | 0 | for (auto &UI : MRI->use_operands(Reg)) { |
739 | 0 | if (!Region->contains(UI.getParent()->getParent())0 ) { |
740 | 0 | DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region |
741 | 0 | << "): " << PrintReg(Reg, TRI) << "\n"); |
742 | 0 | addLiveOut(Reg); |
743 | 0 | } |
744 | 0 | } |
745 | 0 | } |
746 | 0 | } |
747 | | |
748 | | void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, |
749 | | const MachineRegisterInfo *MRI, |
750 | | const TargetRegisterInfo *TRI, |
751 | 0 | PHILinearize &PHIInfo) { |
752 | 0 | DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); |
753 | 0 | for (auto &II : *MBB) { |
754 | 0 | for (auto &RI : II.defs()) { |
755 | 0 | storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); |
756 | 0 | } |
757 | 0 | for (auto &IRI : II.implicit_operands()) { |
758 | 0 | if (IRI.isDef()0 ) { |
759 | 0 | storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); |
760 | 0 | } |
761 | 0 | } |
762 | 0 | } |
763 | 0 |
|
764 | 0 | // If we have a successor with a PHI, source coming from this MBB we have to |
765 | 0 | // add the register as live out |
766 | 0 | for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
767 | 0 | E = MBB->succ_end(); |
768 | 0 | SI != E0 ; ++SI0 ) { |
769 | 0 | for (auto &II : *(*SI)) { |
770 | 0 | if (II.isPHI()0 ) { |
771 | 0 | MachineInstr &PHI = II; |
772 | 0 | int numPreds = getPHINumInputs(PHI); |
773 | 0 | for (int i = 0; i < numPreds0 ; ++i0 ) { |
774 | 0 | if (getPHIPred(PHI, i) == MBB0 ) { |
775 | 0 | unsigned PHIReg = getPHISourceReg(PHI, i); |
776 | 0 | DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() |
777 | 0 | << " -> BB#" << (*SI)->getNumber() |
778 | 0 | << "): " << PrintReg(PHIReg, TRI) << "\n"); |
779 | 0 | addLiveOut(PHIReg); |
780 | 0 | } |
781 | 0 | } |
782 | 0 | } |
783 | 0 | } |
784 | 0 | } |
785 | 0 |
|
786 | 0 | DEBUG(dbgs() << "-Store Live Outs Endn-\n"); |
787 | 0 | } |
788 | | |
789 | | void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, |
790 | | const MachineRegisterInfo *MRI, |
791 | | const TargetRegisterInfo *TRI, |
792 | | PHILinearize &PHIInfo, |
793 | 0 | RegionMRT *TopRegion) { |
794 | 0 | for (auto &II : *MBB) { |
795 | 0 | for (auto &RI : II.defs()) { |
796 | 0 | storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, |
797 | 0 | PHIInfo); |
798 | 0 | } |
799 | 0 | for (auto &IRI : II.implicit_operands()) { |
800 | 0 | if (IRI.isDef()0 ) { |
801 | 0 | storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, |
802 | 0 | TRI, PHIInfo); |
803 | 0 | } |
804 | 0 | } |
805 | 0 | } |
806 | 0 | } |
807 | | |
808 | | void LinearizedRegion::storeLiveOuts(RegionMRT *Region, |
809 | | const MachineRegisterInfo *MRI, |
810 | | const TargetRegisterInfo *TRI, |
811 | | PHILinearize &PHIInfo, |
812 | 0 | RegionMRT *CurrentTopRegion) { |
813 | 0 | MachineBasicBlock *Exit = Region->getSucc(); |
814 | 0 |
|
815 | 0 | RegionMRT *TopRegion = |
816 | 0 | CurrentTopRegion == nullptr ? Region0 : CurrentTopRegion0 ; |
817 | 0 |
|
818 | 0 | // Check if exit is end of function, if so, no live outs. |
819 | 0 | if (Exit == nullptr) |
820 | 0 | return; |
821 | 0 |
|
822 | 0 | auto Children = Region->getChildren(); |
823 | 0 | for (auto CI : *Children) { |
824 | 0 | if (CI->isMBB()0 ) { |
825 | 0 | auto MBB = CI->getMBBMRT()->getMBB(); |
826 | 0 | storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); |
827 | 0 | } else { |
828 | 0 | LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); |
829 | 0 | // We should be limited to only store registers that are live out from the |
830 | 0 | // lineaized region |
831 | 0 | for (auto MBBI : SubRegion->MBBs) { |
832 | 0 | storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); |
833 | 0 | } |
834 | 0 | } |
835 | 0 | } |
836 | 0 |
|
837 | 0 | if (CurrentTopRegion == nullptr0 ) { |
838 | 0 | auto Succ = Region->getSucc(); |
839 | 0 | for (auto &II : *Succ) { |
840 | 0 | if (II.isPHI()0 ) { |
841 | 0 | MachineInstr &PHI = II; |
842 | 0 | int numPreds = getPHINumInputs(PHI); |
843 | 0 | for (int i = 0; i < numPreds0 ; ++i0 ) { |
844 | 0 | if (Region->contains(getPHIPred(PHI, i))0 ) { |
845 | 0 | unsigned PHIReg = getPHISourceReg(PHI, i); |
846 | 0 | DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region |
847 | 0 | << "): " << PrintReg(PHIReg, TRI) << "\n"); |
848 | 0 | addLiveOut(PHIReg); |
849 | 0 | } |
850 | 0 | } |
851 | 0 | } |
852 | 0 | } |
853 | 0 | } |
854 | 0 | } |
855 | | |
856 | | #ifndef NDEBUG |
857 | | void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { |
858 | | OS << "Linearized Region {"; |
859 | | bool IsFirst = true; |
860 | | for (const auto &MBB : MBBs) { |
861 | | if (IsFirst) { |
862 | | IsFirst = false; |
863 | | } else { |
864 | | OS << " ,"; |
865 | | } |
866 | | OS << MBB->getNumber(); |
867 | | } |
868 | | OS << "} (" << Entry->getNumber() << ", " |
869 | | << (Exit == nullptr ? -1 : Exit->getNumber()) |
870 | | << "): In:" << PrintReg(getBBSelectRegIn(), TRI) |
871 | | << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; |
872 | | for (auto &LI : LiveOuts) { |
873 | | OS << PrintReg(LI, TRI) << " "; |
874 | | } |
875 | | OS << "} \n"; |
876 | | } |
877 | | #endif |
878 | | |
879 | 0 | unsigned LinearizedRegion::getBBSelectRegIn() { |
880 | 0 | return getRegionMRT()->getBBSelectRegIn(); |
881 | 0 | } |
882 | | |
883 | 0 | unsigned LinearizedRegion::getBBSelectRegOut() { |
884 | 0 | return getRegionMRT()->getBBSelectRegOut(); |
885 | 0 | } |
886 | | |
887 | 0 | void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } |
888 | | |
889 | 0 | bool LinearizedRegion::getHasLoop() { return HasLoop; } |
890 | | |
891 | 0 | void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } |
892 | | |
893 | 0 | void LinearizedRegion::removeLiveOut(unsigned Reg) { |
894 | 0 | if (isLiveOut(Reg)) |
895 | 0 | LiveOuts.erase(Reg); |
896 | 0 | } |
897 | | |
898 | 0 | void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { |
899 | 0 | if (isLiveOut(OldReg)0 ) { |
900 | 0 | removeLiveOut(OldReg); |
901 | 0 | addLiveOut(NewReg); |
902 | 0 | } |
903 | 0 | } |
904 | | |
905 | | void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, |
906 | | MachineRegisterInfo *MRI, |
907 | | bool ReplaceInside, bool ReplaceOutside, |
908 | 0 | bool IncludeLoopPHI) { |
909 | 0 | assert(Register != NewRegister && "Cannot replace a reg with itself"); |
910 | 0 |
|
911 | 0 | DEBUG(dbgs() << "Pepareing to replace register (region): " |
912 | 0 | << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " |
913 | 0 | << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); |
914 | 0 |
|
915 | 0 | // If we are replacing outside, we also need to update the LiveOuts |
916 | 0 | if (ReplaceOutside && |
917 | 0 | (isLiveOut(Register) || 0 this->getParent()->isLiveOut(Register)0 )) { |
918 | 0 | LinearizedRegion *Current = this; |
919 | 0 | while (Current != nullptr && 0 Current->getEntry() != nullptr0 ) { |
920 | 0 | DEBUG(dbgs() << "Region before register replace\n"); |
921 | 0 | DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); |
922 | 0 | Current->replaceLiveOut(Register, NewRegister); |
923 | 0 | DEBUG(dbgs() << "Region after register replace\n"); |
924 | 0 | DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); |
925 | 0 | Current = Current->getParent(); |
926 | 0 | } |
927 | 0 | } |
928 | 0 |
|
929 | 0 | for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), |
930 | 0 | E = MRI->reg_end(); |
931 | 0 | I != E0 ;) { |
932 | 0 | MachineOperand &O = *I; |
933 | 0 | ++I; |
934 | 0 |
|
935 | 0 | // We don't rewrite defs. |
936 | 0 | if (O.isDef()) |
937 | 0 | continue; |
938 | 0 |
|
939 | 0 | bool IsInside = contains(O.getParent()->getParent()); |
940 | 0 | bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && |
941 | 0 | O.getParent()->getParent() == getEntry()); |
942 | 0 | bool ShouldReplace = (IsInside && ReplaceInside) || |
943 | 0 | (!IsInside && 0 ReplaceOutside0 ) || |
944 | 0 | (IncludeLoopPHI && 0 IsLoopPHI0 ); |
945 | 0 | if (ShouldReplace0 ) { |
946 | 0 |
|
947 | 0 | if (TargetRegisterInfo::isPhysicalRegister(NewRegister)0 ) { |
948 | 0 | DEBUG(dbgs() << "Trying to substitute physical register: " |
949 | 0 | << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) |
950 | 0 | << "\n"); |
951 | 0 | llvm_unreachable("Cannot substitute physical registers"); |
952 | 0 | } else { |
953 | 0 | DEBUG(dbgs() << "Replacing register (region): " |
954 | 0 | << PrintReg(Register, MRI->getTargetRegisterInfo()) |
955 | 0 | << " with " |
956 | 0 | << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) |
957 | 0 | << "\n"); |
958 | 0 | O.setReg(NewRegister); |
959 | 0 | } |
960 | 0 | } |
961 | 0 | } |
962 | 0 | } |
963 | | |
964 | | void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, |
965 | | unsigned NewRegister, |
966 | | bool IncludeLoopPHIs, |
967 | 0 | MachineRegisterInfo *MRI) { |
968 | 0 | replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); |
969 | 0 | } |
970 | | |
971 | | void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, |
972 | | unsigned NewRegister, |
973 | | bool IncludeLoopPHIs, |
974 | 0 | MachineRegisterInfo *MRI) { |
975 | 0 | replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); |
976 | 0 | } |
977 | | |
978 | 0 | DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; } |
979 | | |
980 | 0 | void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { |
981 | 0 | Entry = NewEntry; |
982 | 0 | } |
983 | | |
984 | 0 | MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } |
985 | | |
986 | 0 | void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } |
987 | | |
988 | 0 | MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } |
989 | | |
990 | 0 | void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } |
991 | | |
992 | 0 | void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { |
993 | 0 | for (const auto &MBB : InnerRegion->MBBs) { |
994 | 0 | addMBB(MBB); |
995 | 0 | } |
996 | 0 | } |
997 | | |
998 | 0 | bool LinearizedRegion::contains(MachineBasicBlock *MBB) { |
999 | 0 | return MBBs.count(MBB) == 1; |
1000 | 0 | } |
1001 | | |
1002 | 0 | bool LinearizedRegion::isLiveOut(unsigned Reg) { |
1003 | 0 | return LiveOuts.count(Reg) == 1; |
1004 | 0 | } |
1005 | | |
1006 | 0 | bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { |
1007 | 0 | return MRI->def_begin(Reg) == MRI->def_end(); |
1008 | 0 | } |
1009 | | |
1010 | | // After the code has been structurized, what was flagged as kills |
1011 | | // before are no longer register kills. |
1012 | 0 | void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { |
1013 | 0 | const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); |
1014 | 0 | for (auto MBBI : MBBs) { |
1015 | 0 | MachineBasicBlock *MBB = MBBI; |
1016 | 0 | for (auto &II : *MBB) { |
1017 | 0 | for (auto &RI : II.uses()) { |
1018 | 0 | if (RI.isReg()0 ) { |
1019 | 0 | unsigned Reg = RI.getReg(); |
1020 | 0 | if (TRI->isVirtualRegister(Reg)0 ) { |
1021 | 0 | if (hasNoDef(Reg, MRI)) |
1022 | 0 | continue; |
1023 | 0 | if (0 !MRI->hasOneDef(Reg)0 ) { |
1024 | 0 | DEBUG(this->getEntry()->getParent()->dump()); |
1025 | 0 | DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); |
1026 | 0 | } |
1027 | 0 |
|
1028 | 0 | if (MRI->def_begin(Reg) == MRI->def_end()0 ) { |
1029 | 0 | DEBUG(dbgs() << "Register " |
1030 | 0 | << PrintReg(Reg, MRI->getTargetRegisterInfo()) |
1031 | 0 | << " has NO defs\n"); |
1032 | 0 | } else if (0 !MRI->hasOneDef(Reg)0 ) { |
1033 | 0 | DEBUG(dbgs() << "Register " |
1034 | 0 | << PrintReg(Reg, MRI->getTargetRegisterInfo()) |
1035 | 0 | << " has multiple defs\n"); |
1036 | 0 | } |
1037 | 0 |
|
1038 | 0 | assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); |
1039 | 0 | MachineOperand *Def = &(*(MRI->def_begin(Reg))); |
1040 | 0 | MachineOperand *UseOperand = &(RI); |
1041 | 0 | bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; |
1042 | 0 | if (UseIsOutsideDefMBB && 0 UseOperand->isKill()0 ) { |
1043 | 0 | DEBUG(dbgs() << "Removing kill flag on register: " |
1044 | 0 | << PrintReg(Reg, TRI) << "\n"); |
1045 | 0 | UseOperand->setIsKill(false); |
1046 | 0 | } |
1047 | 0 | } |
1048 | 0 | } |
1049 | 0 | } |
1050 | 0 | } |
1051 | 0 | } |
1052 | 0 | } |
1053 | | |
1054 | | void LinearizedRegion::initLiveOut(RegionMRT *Region, |
1055 | | const MachineRegisterInfo *MRI, |
1056 | | const TargetRegisterInfo *TRI, |
1057 | 0 | PHILinearize &PHIInfo) { |
1058 | 0 | storeLiveOuts(Region, MRI, TRI, PHIInfo); |
1059 | 0 | } |
1060 | | |
1061 | | LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, |
1062 | | const MachineRegisterInfo *MRI, |
1063 | | const TargetRegisterInfo *TRI, |
1064 | 0 | PHILinearize &PHIInfo) { |
1065 | 0 | setEntry(MBB); |
1066 | 0 | setExit(MBB); |
1067 | 0 | storeLiveOuts(MBB, MRI, TRI, PHIInfo); |
1068 | 0 | MBBs.insert(MBB); |
1069 | 0 | Parent = nullptr; |
1070 | 0 | } |
1071 | | |
1072 | 0 | LinearizedRegion::LinearizedRegion() { |
1073 | 0 | setEntry(nullptr); |
1074 | 0 | setExit(nullptr); |
1075 | 0 | Parent = nullptr; |
1076 | 0 | } |
1077 | | |
1078 | | namespace { |
1079 | | |
1080 | | class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { |
1081 | | private: |
1082 | | const MachineRegionInfo *Regions; |
1083 | | const SIInstrInfo *TII; |
1084 | | const TargetRegisterInfo *TRI; |
1085 | | MachineRegisterInfo *MRI; |
1086 | | unsigned BBSelectRegister; |
1087 | | PHILinearize PHIInfo; |
1088 | | DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap; |
1089 | | RegionMRT *RMRT; |
1090 | | |
1091 | | void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, |
1092 | | SmallVector<unsigned, 2> &RegionIndices); |
1093 | | void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, |
1094 | | SmallVector<unsigned, 2> &RegionIndices); |
1095 | | void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, |
1096 | | SmallVector<unsigned, 2> &PHINonRegionIndices); |
1097 | | |
1098 | | void storePHILinearizationInfoDest( |
1099 | | unsigned LDestReg, MachineInstr &PHI, |
1100 | | SmallVector<unsigned, 2> *RegionIndices = nullptr); |
1101 | | |
1102 | | unsigned storePHILinearizationInfo(MachineInstr &PHI, |
1103 | | SmallVector<unsigned, 2> *RegionIndices); |
1104 | | |
1105 | | void extractKilledPHIs(MachineBasicBlock *MBB); |
1106 | | |
1107 | | bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices, |
1108 | | unsigned *ReplaceReg); |
1109 | | |
1110 | | bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, |
1111 | | MachineBasicBlock *SourceMBB, |
1112 | | SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg); |
1113 | | |
1114 | | void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, |
1115 | | MachineBasicBlock *LastMerge, |
1116 | | SmallVector<unsigned, 2> &PHIRegionIndices); |
1117 | | void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, |
1118 | | MachineBasicBlock *IfMBB, |
1119 | | SmallVector<unsigned, 2> &PHIRegionIndices); |
1120 | | void replaceLiveOutRegs(MachineInstr &PHI, |
1121 | | SmallVector<unsigned, 2> &PHIRegionIndices, |
1122 | | unsigned CombinedSourceReg, |
1123 | | LinearizedRegion *LRegion); |
1124 | | void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, |
1125 | | MachineInstr &PHI, LinearizedRegion *LRegion); |
1126 | | |
1127 | | void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, |
1128 | | LinearizedRegion *LRegion); |
1129 | | void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, |
1130 | | MachineInstr &PHI); |
1131 | | void rewriteRegionEntryPHIs(LinearizedRegion *Region, |
1132 | | MachineBasicBlock *IfMBB); |
1133 | | |
1134 | | bool regionIsSimpleIf(RegionMRT *Region); |
1135 | | |
1136 | | void transformSimpleIfRegion(RegionMRT *Region); |
1137 | | |
1138 | | void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); |
1139 | | |
1140 | | void insertUnconditionalBranch(MachineBasicBlock *MBB, |
1141 | | MachineBasicBlock *Dest, |
1142 | | const DebugLoc &DL = DebugLoc()); |
1143 | | |
1144 | | MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); |
1145 | | |
1146 | | void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, |
1147 | | MachineBasicBlock *MergeBB, unsigned DestRegister, |
1148 | | unsigned IfSourceRegister, unsigned CodeSourceRegister, |
1149 | | bool IsUndefIfSource = false); |
1150 | | |
1151 | | MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, |
1152 | | MachineBasicBlock *CodeBBStart, |
1153 | | MachineBasicBlock *CodeBBEnd, |
1154 | | MachineBasicBlock *SelectBB, unsigned IfReg, |
1155 | | bool InheritPreds); |
1156 | | |
1157 | | void prunePHIInfo(MachineBasicBlock *MBB); |
1158 | | void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); |
1159 | | |
1160 | | void createEntryPHIs(LinearizedRegion *CurrentRegion); |
1161 | | void resolvePHIInfos(MachineBasicBlock *FunctionEntry); |
1162 | | |
1163 | | void replaceRegisterWith(unsigned Register, unsigned NewRegister); |
1164 | | |
1165 | | MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, |
1166 | | MachineBasicBlock *CodeBB, |
1167 | | LinearizedRegion *LRegion, |
1168 | | unsigned BBSelectRegIn, |
1169 | | unsigned BBSelectRegOut); |
1170 | | |
1171 | | MachineBasicBlock * |
1172 | | createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, |
1173 | | LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, |
1174 | | unsigned BBSelectRegIn, unsigned BBSelectRegOut); |
1175 | | void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond); |
1176 | | |
1177 | | void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, |
1178 | | MachineBasicBlock *MergeBB, |
1179 | | unsigned BBSelectReg); |
1180 | | |
1181 | | MachineInstr *getDefInstr(unsigned Reg); |
1182 | | void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, |
1183 | | MachineBasicBlock *MergeBB, |
1184 | | LinearizedRegion *InnerRegion, unsigned DestReg, |
1185 | | unsigned SourceReg); |
1186 | | bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, |
1187 | | unsigned Register); |
1188 | | void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, |
1189 | | MachineBasicBlock *MergeBB, |
1190 | | LinearizedRegion *InnerRegion, |
1191 | | LinearizedRegion *LRegion); |
1192 | | |
1193 | | void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, |
1194 | | MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); |
1195 | | void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, |
1196 | | LinearizedRegion *LRegion); |
1197 | | |
1198 | | MachineBasicBlock *splitExit(LinearizedRegion *LRegion); |
1199 | | |
1200 | | MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); |
1201 | | |
1202 | | LinearizedRegion *initLinearizedRegion(RegionMRT *Region); |
1203 | | |
1204 | | bool structurizeComplexRegion(RegionMRT *Region); |
1205 | | |
1206 | | bool structurizeRegion(RegionMRT *Region); |
1207 | | |
1208 | | bool structurizeRegions(RegionMRT *Region, bool isTopRegion); |
1209 | | |
1210 | | public: |
1211 | | static char ID; |
1212 | | |
1213 | 0 | AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { |
1214 | 0 | initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); |
1215 | 0 | } |
1216 | | |
1217 | 0 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1218 | 0 | AU.addRequired<MachineRegionInfoPass>(); |
1219 | 0 | MachineFunctionPass::getAnalysisUsage(AU); |
1220 | 0 | } |
1221 | | |
1222 | | void initFallthroughMap(MachineFunction &MF); |
1223 | | |
1224 | | void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); |
1225 | | |
1226 | | unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, |
1227 | | MachineRegisterInfo *MRI, |
1228 | | const SIInstrInfo *TII); |
1229 | | |
1230 | 0 | void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } |
1231 | | |
1232 | 0 | RegionMRT *getRegionMRT() { return RMRT; } |
1233 | | |
1234 | | bool runOnMachineFunction(MachineFunction &MF) override; |
1235 | | }; |
1236 | | |
1237 | | } // end anonymous namespace |
1238 | | |
1239 | | char AMDGPUMachineCFGStructurizer::ID = 0; |
1240 | | |
1241 | 0 | bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { |
1242 | 0 | MachineBasicBlock *Entry = Region->getEntry(); |
1243 | 0 | MachineBasicBlock *Succ = Region->getSucc(); |
1244 | 0 | bool FoundBypass = false; |
1245 | 0 | bool FoundIf = false; |
1246 | 0 |
|
1247 | 0 | if (Entry->succ_size() != 2) { |
1248 | 0 | return false; |
1249 | 0 | } |
1250 | 0 |
|
1251 | 0 | for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), |
1252 | 0 | E = Entry->succ_end(); |
1253 | 0 | SI != E; ++SI) { |
1254 | 0 | MachineBasicBlock *Current = *SI; |
1255 | 0 |
|
1256 | 0 | if (Current == Succ) { |
1257 | 0 | FoundBypass = true; |
1258 | 0 | } else if ((Current->succ_size() == 1) && |
1259 | 0 | *(Current->succ_begin()) == Succ) { |
1260 | 0 | FoundIf = true; |
1261 | 0 | } |
1262 | 0 | } |
1263 | 0 |
|
1264 | 0 | return FoundIf && FoundBypass; |
1265 | 0 | } |
1266 | | |
1267 | 0 | void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { |
1268 | 0 | MachineBasicBlock *Entry = Region->getEntry(); |
1269 | 0 | MachineBasicBlock *Exit = Region->getExit(); |
1270 | 0 | TII->convertNonUniformIfRegion(Entry, Exit); |
1271 | 0 | } |
1272 | | |
1273 | 0 | static void fixMBBTerminator(MachineBasicBlock *MBB) { |
1274 | 0 | if (MBB->succ_size() == 10 ) { |
1275 | 0 | auto *Succ = *(MBB->succ_begin()); |
1276 | 0 | for (auto &TI : MBB->terminators()) { |
1277 | 0 | for (auto &UI : TI.uses()) { |
1278 | 0 | if (UI.isMBB() && 0 UI.getMBB() != Succ0 ) { |
1279 | 0 | UI.setMBB(Succ); |
1280 | 0 | } |
1281 | 0 | } |
1282 | 0 | } |
1283 | 0 | } |
1284 | 0 | } |
1285 | | |
1286 | 0 | static void fixRegionTerminator(RegionMRT *Region) { |
1287 | 0 | MachineBasicBlock *InternalSucc = nullptr; |
1288 | 0 | MachineBasicBlock *ExternalSucc = nullptr; |
1289 | 0 | LinearizedRegion *LRegion = Region->getLinearizedRegion(); |
1290 | 0 | auto Exit = LRegion->getExit(); |
1291 | 0 |
|
1292 | 0 | SmallPtrSet<MachineBasicBlock *, 2> Successors; |
1293 | 0 | for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), |
1294 | 0 | SE = Exit->succ_end(); |
1295 | 0 | SI != SE0 ; ++SI0 ) { |
1296 | 0 | MachineBasicBlock *Succ = *SI; |
1297 | 0 | if (LRegion->contains(Succ)0 ) { |
1298 | 0 | // Do not allow re-assign |
1299 | 0 | assert(InternalSucc == nullptr); |
1300 | 0 | InternalSucc = Succ; |
1301 | 0 | } else { |
1302 | 0 | // Do not allow re-assign |
1303 | 0 | assert(ExternalSucc == nullptr); |
1304 | 0 | ExternalSucc = Succ; |
1305 | 0 | } |
1306 | 0 | } |
1307 | 0 |
|
1308 | 0 | for (auto &TI : Exit->terminators()) { |
1309 | 0 | for (auto &UI : TI.uses()) { |
1310 | 0 | if (UI.isMBB()0 ) { |
1311 | 0 | auto Target = UI.getMBB(); |
1312 | 0 | if (Target != InternalSucc && 0 Target != ExternalSucc0 ) { |
1313 | 0 | UI.setMBB(ExternalSucc); |
1314 | 0 | } |
1315 | 0 | } |
1316 | 0 | } |
1317 | 0 | } |
1318 | 0 | } |
1319 | | |
1320 | | // If a region region is just a sequence of regions (and the exit |
1321 | | // block in the case of the top level region), we can simply skip |
1322 | | // linearizing it, because it is already linear |
1323 | 0 | bool regionIsSequence(RegionMRT *Region) { |
1324 | 0 | auto Children = Region->getChildren(); |
1325 | 0 | for (auto CI : *Children) { |
1326 | 0 | if (!CI->isRegion()0 ) { |
1327 | 0 | if (CI->getMBBMRT()->getMBB()->succ_size() > 10 ) { |
1328 | 0 | return false; |
1329 | 0 | } |
1330 | 0 | } |
1331 | 0 | } |
1332 | 0 | return true; |
1333 | 0 | } |
1334 | | |
1335 | 0 | void fixupRegionExits(RegionMRT *Region) { |
1336 | 0 | auto Children = Region->getChildren(); |
1337 | 0 | for (auto CI : *Children) { |
1338 | 0 | if (!CI->isRegion()0 ) { |
1339 | 0 | fixMBBTerminator(CI->getMBBMRT()->getMBB()); |
1340 | 0 | } else { |
1341 | 0 | fixRegionTerminator(CI->getRegionMRT()); |
1342 | 0 | } |
1343 | 0 | } |
1344 | 0 | } |
1345 | | |
1346 | | void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( |
1347 | | RegionMRT *Region, MachineInstr &PHI, |
1348 | 0 | SmallVector<unsigned, 2> &PHIRegionIndices) { |
1349 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1350 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1351 | 0 | MachineBasicBlock *Pred = getPHIPred(PHI, i); |
1352 | 0 | if (Region->contains(Pred)0 ) { |
1353 | 0 | PHIRegionIndices.push_back(i); |
1354 | 0 | } |
1355 | 0 | } |
1356 | 0 | } |
1357 | | |
1358 | | void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( |
1359 | | LinearizedRegion *Region, MachineInstr &PHI, |
1360 | 0 | SmallVector<unsigned, 2> &PHIRegionIndices) { |
1361 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1362 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1363 | 0 | MachineBasicBlock *Pred = getPHIPred(PHI, i); |
1364 | 0 | if (Region->contains(Pred)0 ) { |
1365 | 0 | PHIRegionIndices.push_back(i); |
1366 | 0 | } |
1367 | 0 | } |
1368 | 0 | } |
1369 | | |
1370 | | void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( |
1371 | | LinearizedRegion *Region, MachineInstr &PHI, |
1372 | 0 | SmallVector<unsigned, 2> &PHINonRegionIndices) { |
1373 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1374 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1375 | 0 | MachineBasicBlock *Pred = getPHIPred(PHI, i); |
1376 | 0 | if (!Region->contains(Pred)0 ) { |
1377 | 0 | PHINonRegionIndices.push_back(i); |
1378 | 0 | } |
1379 | 0 | } |
1380 | 0 | } |
1381 | | |
1382 | | void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( |
1383 | | unsigned LDestReg, MachineInstr &PHI, |
1384 | 0 | SmallVector<unsigned, 2> *RegionIndices) { |
1385 | 0 | if (RegionIndices0 ) { |
1386 | 0 | for (auto i : *RegionIndices) { |
1387 | 0 | PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); |
1388 | 0 | } |
1389 | 0 | } else { |
1390 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1391 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1392 | 0 | PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); |
1393 | 0 | } |
1394 | 0 | } |
1395 | 0 | } |
1396 | | |
1397 | | unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( |
1398 | 0 | MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) { |
1399 | 0 | unsigned DestReg = getPHIDestReg(PHI); |
1400 | 0 | unsigned LinearizeDestReg = |
1401 | 0 | MRI->createVirtualRegister(MRI->getRegClass(DestReg)); |
1402 | 0 | PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); |
1403 | 0 | storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); |
1404 | 0 | return LinearizeDestReg; |
1405 | 0 | } |
1406 | | |
1407 | 0 | void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { |
1408 | 0 | // We need to create a new chain for the killed phi, but there is no |
1409 | 0 | // need to do the renaming outside or inside the block. |
1410 | 0 | SmallPtrSet<MachineInstr *, 2> PHIs; |
1411 | 0 | for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), |
1412 | 0 | E = MBB->instr_end(); |
1413 | 0 | I != E0 ; ++I0 ) { |
1414 | 0 | MachineInstr &Instr = *I; |
1415 | 0 | if (Instr.isPHI()0 ) { |
1416 | 0 | unsigned PHIDestReg = getPHIDestReg(Instr); |
1417 | 0 | DEBUG(dbgs() << "Extractking killed phi:\n"); |
1418 | 0 | DEBUG(Instr.dump()); |
1419 | 0 | PHIs.insert(&Instr); |
1420 | 0 | PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); |
1421 | 0 | storePHILinearizationInfoDest(PHIDestReg, Instr); |
1422 | 0 | } |
1423 | 0 | } |
1424 | 0 |
|
1425 | 0 | for (auto PI : PHIs) { |
1426 | 0 | PI->eraseFromParent(); |
1427 | 0 | } |
1428 | 0 | } |
1429 | | |
1430 | | static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices, |
1431 | 0 | unsigned Index) { |
1432 | 0 | for (auto i : PHIRegionIndices) { |
1433 | 0 | if (i == Index) |
1434 | 0 | return true; |
1435 | 0 | } |
1436 | 0 | return false; |
1437 | 0 | } |
1438 | | |
1439 | | bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, |
1440 | | SmallVector<unsigned, 2> &PHIIndices, |
1441 | 0 | unsigned *ReplaceReg) { |
1442 | 0 | return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); |
1443 | 0 | } |
1444 | | |
1445 | | bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, |
1446 | | unsigned CombinedSourceReg, |
1447 | | MachineBasicBlock *SourceMBB, |
1448 | | SmallVector<unsigned, 2> &PHIIndices, |
1449 | 0 | unsigned *ReplaceReg) { |
1450 | 0 | DEBUG(dbgs() << "Shrink PHI: "); |
1451 | 0 | DEBUG(PHI.dump()); |
1452 | 0 | DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) |
1453 | 0 | << "<def> = PHI("); |
1454 | 0 |
|
1455 | 0 | bool Replaced = false; |
1456 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1457 | 0 | int SingleExternalEntryIndex = -1; |
1458 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1459 | 0 | if (!isPHIRegionIndex(PHIIndices, i)0 ) { |
1460 | 0 | if (SingleExternalEntryIndex == -10 ) { |
1461 | 0 | // Single entry |
1462 | 0 | SingleExternalEntryIndex = i; |
1463 | 0 | } else { |
1464 | 0 | // Multiple entries |
1465 | 0 | SingleExternalEntryIndex = -2; |
1466 | 0 | } |
1467 | 0 | } |
1468 | 0 | } |
1469 | 0 |
|
1470 | 0 | if (SingleExternalEntryIndex > -10 ) { |
1471 | 0 | *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); |
1472 | 0 | // We should not rewrite the code, we should only pick up the single value |
1473 | 0 | // that represents the shrunk PHI. |
1474 | 0 | Replaced = true; |
1475 | 0 | } else { |
1476 | 0 | MachineBasicBlock *MBB = PHI.getParent(); |
1477 | 0 | MachineInstrBuilder MIB = |
1478 | 0 | BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), |
1479 | 0 | getPHIDestReg(PHI)); |
1480 | 0 | if (SourceMBB0 ) { |
1481 | 0 | MIB.addReg(CombinedSourceReg); |
1482 | 0 | MIB.addMBB(SourceMBB); |
1483 | 0 | DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" |
1484 | 0 | << SourceMBB->getNumber()); |
1485 | 0 | } |
1486 | 0 |
|
1487 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1488 | 0 | if (isPHIRegionIndex(PHIIndices, i)0 ) { |
1489 | 0 | continue; |
1490 | 0 | } |
1491 | 0 | unsigned SourceReg = getPHISourceReg(PHI, i); |
1492 | 0 | MachineBasicBlock *SourcePred = getPHIPred(PHI, i); |
1493 | 0 | MIB.addReg(SourceReg); |
1494 | 0 | MIB.addMBB(SourcePred); |
1495 | 0 | DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" |
1496 | 0 | << SourcePred->getNumber()); |
1497 | 0 | } |
1498 | 0 | DEBUG(dbgs() << ")\n"); |
1499 | 0 | } |
1500 | 0 | PHI.eraseFromParent(); |
1501 | 0 | return Replaced; |
1502 | 0 | } |
1503 | | |
1504 | | void AMDGPUMachineCFGStructurizer::replacePHI( |
1505 | | MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, |
1506 | 0 | SmallVector<unsigned, 2> &PHIRegionIndices) { |
1507 | 0 | DEBUG(dbgs() << "Replace PHI: "); |
1508 | 0 | DEBUG(PHI.dump()); |
1509 | 0 | DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) |
1510 | 0 | << "<def> = PHI("); |
1511 | 0 |
|
1512 | 0 | bool HasExternalEdge = false; |
1513 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1514 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1515 | 0 | if (!isPHIRegionIndex(PHIRegionIndices, i)0 ) { |
1516 | 0 | HasExternalEdge = true; |
1517 | 0 | } |
1518 | 0 | } |
1519 | 0 |
|
1520 | 0 | if (HasExternalEdge0 ) { |
1521 | 0 | MachineBasicBlock *MBB = PHI.getParent(); |
1522 | 0 | MachineInstrBuilder MIB = |
1523 | 0 | BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), |
1524 | 0 | getPHIDestReg(PHI)); |
1525 | 0 | MIB.addReg(CombinedSourceReg); |
1526 | 0 | MIB.addMBB(LastMerge); |
1527 | 0 | DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" |
1528 | 0 | << LastMerge->getNumber()); |
1529 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1530 | 0 | if (isPHIRegionIndex(PHIRegionIndices, i)0 ) { |
1531 | 0 | continue; |
1532 | 0 | } |
1533 | 0 | unsigned SourceReg = getPHISourceReg(PHI, i); |
1534 | 0 | MachineBasicBlock *SourcePred = getPHIPred(PHI, i); |
1535 | 0 | MIB.addReg(SourceReg); |
1536 | 0 | MIB.addMBB(SourcePred); |
1537 | 0 | DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" |
1538 | 0 | << SourcePred->getNumber()); |
1539 | 0 | } |
1540 | 0 | DEBUG(dbgs() << ")\n"); |
1541 | 0 | } else { |
1542 | 0 | replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); |
1543 | 0 | } |
1544 | 0 | PHI.eraseFromParent(); |
1545 | 0 | } |
1546 | | |
1547 | | void AMDGPUMachineCFGStructurizer::replaceEntryPHI( |
1548 | | MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, |
1549 | 0 | SmallVector<unsigned, 2> &PHIRegionIndices) { |
1550 | 0 | DEBUG(dbgs() << "Replace entry PHI: "); |
1551 | 0 | DEBUG(PHI.dump()); |
1552 | 0 | DEBUG(dbgs() << " with "); |
1553 | 0 |
|
1554 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1555 | 0 | unsigned NumNonRegionInputs = NumInputs; |
1556 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1557 | 0 | if (isPHIRegionIndex(PHIRegionIndices, i)0 ) { |
1558 | 0 | NumNonRegionInputs--; |
1559 | 0 | } |
1560 | 0 | } |
1561 | 0 |
|
1562 | 0 | if (NumNonRegionInputs == 00 ) { |
1563 | 0 | auto DestReg = getPHIDestReg(PHI); |
1564 | 0 | replaceRegisterWith(DestReg, CombinedSourceReg); |
1565 | 0 | DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); |
1566 | 0 | PHI.eraseFromParent(); |
1567 | 0 | } else { |
1568 | 0 | DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI("); |
1569 | 0 | MachineBasicBlock *MBB = PHI.getParent(); |
1570 | 0 | MachineInstrBuilder MIB = |
1571 | 0 | BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), |
1572 | 0 | getPHIDestReg(PHI)); |
1573 | 0 | MIB.addReg(CombinedSourceReg); |
1574 | 0 | MIB.addMBB(IfMBB); |
1575 | 0 | DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" |
1576 | 0 | << IfMBB->getNumber()); |
1577 | 0 | unsigned NumInputs = getPHINumInputs(PHI); |
1578 | 0 | for (unsigned i = 0; i < NumInputs0 ; ++i0 ) { |
1579 | 0 | if (isPHIRegionIndex(PHIRegionIndices, i)0 ) { |
1580 | 0 | continue; |
1581 | 0 | } |
1582 | 0 | unsigned SourceReg = getPHISourceReg(PHI, i); |
1583 | 0 | MachineBasicBlock *SourcePred = getPHIPred(PHI, i); |
1584 | 0 | MIB.addReg(SourceReg); |
1585 | 0 | MIB.addMBB(SourcePred); |
1586 | 0 | DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" |
1587 | 0 | << SourcePred->getNumber()); |
1588 | 0 | } |
1589 | 0 | DEBUG(dbgs() << ")\n"); |
1590 | 0 | PHI.eraseFromParent(); |
1591 | 0 | } |
1592 | 0 | } |
1593 | | |
1594 | | void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( |
1595 | | MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices, |
1596 | 0 | unsigned CombinedSourceReg, LinearizedRegion *LRegion) { |
1597 | 0 | bool WasLiveOut = false; |
1598 | 0 | for (auto PII : PHIRegionIndices) { |
1599 | 0 | unsigned Reg = getPHISourceReg(PHI, PII); |
1600 | 0 | if (LRegion->isLiveOut(Reg)0 ) { |
1601 | 0 | bool IsDead = true; |
1602 | 0 |
|
1603 | 0 | // Check if register is live out of the basic block |
1604 | 0 | MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); |
1605 | 0 | for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E0 ; ++UI0 ) { |
1606 | 0 | if ((*UI).getParent()->getParent() != DefMBB0 ) { |
1607 | 0 | IsDead = false; |
1608 | 0 | } |
1609 | 0 | } |
1610 | 0 |
|
1611 | 0 | DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " |
1612 | 0 | << (IsDead ? "dead" : "alive") << " after PHI replace\n"); |
1613 | 0 | if (IsDead0 ) { |
1614 | 0 | LRegion->removeLiveOut(Reg); |
1615 | 0 | } |
1616 | 0 | WasLiveOut = true; |
1617 | 0 | } |
1618 | 0 | } |
1619 | 0 |
|
1620 | 0 | if (WasLiveOut) |
1621 | 0 | LRegion->addLiveOut(CombinedSourceReg); |
1622 | 0 | } |
1623 | | |
1624 | | void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, |
1625 | | MachineBasicBlock *LastMerge, |
1626 | | MachineInstr &PHI, |
1627 | 0 | LinearizedRegion *LRegion) { |
1628 | 0 | SmallVector<unsigned, 2> PHIRegionIndices; |
1629 | 0 | getPHIRegionIndices(Region, PHI, PHIRegionIndices); |
1630 | 0 | unsigned LinearizedSourceReg = |
1631 | 0 | storePHILinearizationInfo(PHI, &PHIRegionIndices); |
1632 | 0 |
|
1633 | 0 | replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); |
1634 | 0 | replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); |
1635 | 0 | } |
1636 | | |
1637 | | void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, |
1638 | | MachineBasicBlock *IfMBB, |
1639 | 0 | MachineInstr &PHI) { |
1640 | 0 | SmallVector<unsigned, 2> PHINonRegionIndices; |
1641 | 0 | getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); |
1642 | 0 | unsigned LinearizedSourceReg = |
1643 | 0 | storePHILinearizationInfo(PHI, &PHINonRegionIndices); |
1644 | 0 | replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); |
1645 | 0 | } |
1646 | | |
1647 | | static void collectPHIs(MachineBasicBlock *MBB, |
1648 | 0 | SmallVector<MachineInstr *, 2> &PHIs) { |
1649 | 0 | for (auto &BBI : *MBB) { |
1650 | 0 | if (BBI.isPHI()0 ) { |
1651 | 0 | PHIs.push_back(&BBI); |
1652 | 0 | } |
1653 | 0 | } |
1654 | 0 | } |
1655 | | |
1656 | | void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, |
1657 | | MachineBasicBlock *LastMerge, |
1658 | 0 | LinearizedRegion *LRegion) { |
1659 | 0 | SmallVector<MachineInstr *, 2> PHIs; |
1660 | 0 | auto Exit = Region->getSucc(); |
1661 | 0 | if (Exit == nullptr) |
1662 | 0 | return; |
1663 | 0 |
|
1664 | 0 | collectPHIs(Exit, PHIs); |
1665 | 0 |
|
1666 | 0 | for (auto PHII : PHIs) { |
1667 | 0 | rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); |
1668 | 0 | } |
1669 | 0 | } |
1670 | | |
1671 | | void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, |
1672 | 0 | MachineBasicBlock *IfMBB) { |
1673 | 0 | SmallVector<MachineInstr *, 2> PHIs; |
1674 | 0 | auto Entry = Region->getEntry(); |
1675 | 0 |
|
1676 | 0 | collectPHIs(Entry, PHIs); |
1677 | 0 |
|
1678 | 0 | for (auto PHII : PHIs) { |
1679 | 0 | rewriteRegionEntryPHI(Region, IfMBB, *PHII); |
1680 | 0 | } |
1681 | 0 | } |
1682 | | |
1683 | | void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, |
1684 | | MachineBasicBlock *Dest, |
1685 | 0 | const DebugLoc &DL) { |
1686 | 0 | DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() |
1687 | 0 | << " -> " << Dest->getNumber() << "\n"); |
1688 | 0 | MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); |
1689 | 0 | bool HasTerminator = Terminator != MBB->instr_end(); |
1690 | 0 | if (HasTerminator0 ) { |
1691 | 0 | TII->ReplaceTailWithBranchTo(Terminator, Dest); |
1692 | 0 | } |
1693 | 0 | if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)0 ) { |
1694 | 0 | TII->insertUnconditionalBranch(*MBB, Dest, DL); |
1695 | 0 | } |
1696 | 0 | } |
1697 | | |
1698 | 0 | static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { |
1699 | 0 | MachineBasicBlock *result = nullptr; |
1700 | 0 | for (auto &MFI : MF) { |
1701 | 0 | if (MFI.succ_size() == 00 ) { |
1702 | 0 | if (result == nullptr0 ) { |
1703 | 0 | result = &MFI; |
1704 | 0 | } else { |
1705 | 0 | return nullptr; |
1706 | 0 | } |
1707 | 0 | } |
1708 | 0 | } |
1709 | 0 |
|
1710 | 0 | return result; |
1711 | 0 | } |
1712 | | |
1713 | 0 | static bool hasOneExitNode(MachineFunction &MF) { |
1714 | 0 | return getSingleExitNode(MF) != nullptr; |
1715 | 0 | } |
1716 | | |
1717 | | MachineBasicBlock * |
1718 | 0 | AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { |
1719 | 0 | auto Exit = Region->getSucc(); |
1720 | 0 |
|
1721 | 0 | // If the exit is the end of the function, we just use the existing |
1722 | 0 | MachineFunction *MF = Region->getEntry()->getParent(); |
1723 | 0 | if (Exit == nullptr && 0 hasOneExitNode(*MF)0 ) { |
1724 | 0 | return &(*(--(Region->getEntry()->getParent()->end()))); |
1725 | 0 | } |
1726 | 0 |
|
1727 | 0 | MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); |
1728 | 0 | if (Exit == nullptr0 ) { |
1729 | 0 | MachineFunction::iterator ExitIter = MF->end(); |
1730 | 0 | MF->insert(ExitIter, LastMerge); |
1731 | 0 | } else { |
1732 | 0 | MachineFunction::iterator ExitIter = Exit->getIterator(); |
1733 | 0 | MF->insert(ExitIter, LastMerge); |
1734 | 0 | LastMerge->addSuccessor(Exit); |
1735 | 0 | insertUnconditionalBranch(LastMerge, Exit); |
1736 | 0 | DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); |
1737 | 0 | } |
1738 | 0 | return LastMerge; |
1739 | 0 | } |
1740 | | |
1741 | | void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, |
1742 | | MachineBasicBlock *CodeBB, |
1743 | | MachineBasicBlock *MergeBB, |
1744 | | unsigned DestRegister, |
1745 | | unsigned IfSourceRegister, |
1746 | | unsigned CodeSourceRegister, |
1747 | 0 | bool IsUndefIfSource) { |
1748 | 0 | // If this is the function exit block, we don't need a phi. |
1749 | 0 | if (MergeBB->succ_begin() == MergeBB->succ_end()0 ) { |
1750 | 0 | return; |
1751 | 0 | } |
1752 | 0 | DEBUG0 (dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() |
1753 | 0 | << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI(" |
1754 | 0 | << PrintReg(IfSourceRegister, TRI) << ", BB#" |
1755 | 0 | << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) |
1756 | 0 | << ", BB#" << CodeBB->getNumber() << ")\n"); |
1757 | 0 | const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); |
1758 | 0 | MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, |
1759 | 0 | TII->get(TargetOpcode::PHI), DestRegister); |
1760 | 0 | if (IsUndefIfSource && 0 false0 ) { |
1761 | 0 | MIB.addReg(IfSourceRegister, RegState::Undef); |
1762 | 0 | } else { |
1763 | 0 | MIB.addReg(IfSourceRegister); |
1764 | 0 | } |
1765 | 0 | MIB.addMBB(IfBB); |
1766 | 0 | MIB.addReg(CodeSourceRegister); |
1767 | 0 | MIB.addMBB(CodeBB); |
1768 | 0 | } |
1769 | | |
1770 | 0 | static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { |
1771 | 0 | for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), |
1772 | 0 | E = MBB->succ_end(); |
1773 | 0 | PI != E0 ; ++PI0 ) { |
1774 | 0 | if ((*PI) != MBB0 ) { |
1775 | 0 | (MBB)->removeSuccessor(*PI); |
1776 | 0 | } |
1777 | 0 | } |
1778 | 0 | } |
1779 | | |
1780 | | static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, |
1781 | 0 | MachineBasicBlock *EndMBB) { |
1782 | 0 |
|
1783 | 0 | // We have to check against the StartMBB successor becasuse a |
1784 | 0 | // structurized region with a loop will have the entry block split, |
1785 | 0 | // and the backedge will go to the entry successor. |
1786 | 0 | DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs; |
1787 | 0 | unsigned SuccSize = StartMBB->succ_size(); |
1788 | 0 | if (SuccSize > 00 ) { |
1789 | 0 | MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); |
1790 | 0 | for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), |
1791 | 0 | E = EndMBB->succ_end(); |
1792 | 0 | PI != E0 ; ++PI0 ) { |
1793 | 0 | // Either we have a back-edge to the entry block, or a back-edge to the |
1794 | 0 | // successor of the entry block since the block may be split. |
1795 | 0 | if ((*PI) != StartMBB && |
1796 | 0 | !((*PI) == StartMBBSucc && 0 StartMBB != EndMBB0 && SuccSize == 10 )) { |
1797 | 0 | Succs.insert( |
1798 | 0 | std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI)); |
1799 | 0 | } |
1800 | 0 | } |
1801 | 0 | } |
1802 | 0 |
|
1803 | 0 | for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), |
1804 | 0 | E = StartMBB->pred_end(); |
1805 | 0 | PI != E0 ; ++PI0 ) { |
1806 | 0 | if ((*PI) != EndMBB0 ) { |
1807 | 0 | Succs.insert( |
1808 | 0 | std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB)); |
1809 | 0 | } |
1810 | 0 | } |
1811 | 0 |
|
1812 | 0 | for (auto SI : Succs) { |
1813 | 0 | std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; |
1814 | 0 | DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" |
1815 | 0 | << Edge.second->getNumber() << "\n"); |
1816 | 0 | Edge.first->removeSuccessor(Edge.second); |
1817 | 0 | } |
1818 | 0 | } |
1819 | | |
1820 | | MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( |
1821 | | MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, |
1822 | | MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, |
1823 | 0 | bool InheritPreds) { |
1824 | 0 | MachineFunction *MF = MergeBB->getParent(); |
1825 | 0 | MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); |
1826 | 0 |
|
1827 | 0 | if (InheritPreds0 ) { |
1828 | 0 | for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), |
1829 | 0 | E = CodeBBStart->pred_end(); |
1830 | 0 | PI != E0 ; ++PI0 ) { |
1831 | 0 | if ((*PI) != CodeBBEnd0 ) { |
1832 | 0 | MachineBasicBlock *Pred = (*PI); |
1833 | 0 | Pred->addSuccessor(IfBB); |
1834 | 0 | } |
1835 | 0 | } |
1836 | 0 | } |
1837 | 0 |
|
1838 | 0 | removeExternalCFGEdges(CodeBBStart, CodeBBEnd); |
1839 | 0 |
|
1840 | 0 | auto CodeBBStartI = CodeBBStart->getIterator(); |
1841 | 0 | auto CodeBBEndI = CodeBBEnd->getIterator(); |
1842 | 0 | auto MergeIter = MergeBB->getIterator(); |
1843 | 0 | MF->insert(MergeIter, IfBB); |
1844 | 0 | MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); |
1845 | 0 | IfBB->addSuccessor(MergeBB); |
1846 | 0 | IfBB->addSuccessor(CodeBBStart); |
1847 | 0 |
|
1848 | 0 | DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); |
1849 | 0 | // Ensure that the MergeBB is a successor of the CodeEndBB. |
1850 | 0 | if (!CodeBBEnd->isSuccessor(MergeBB)) |
1851 | 0 | CodeBBEnd->addSuccessor(MergeBB); |
1852 | 0 |
|
1853 | 0 | DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" |
1854 | 0 | << CodeBBEnd->getNumber() << "\n"); |
1855 | 0 |
|
1856 | 0 | // If we have a single predecessor we can find a reasonable debug location |
1857 | 0 | MachineBasicBlock *SinglePred = |
1858 | 0 | CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin())0 : nullptr0 ; |
1859 | 0 | const DebugLoc &DL = SinglePred |
1860 | 0 | ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) |
1861 | 0 | : DebugLoc(); |
1862 | 0 |
|
1863 | 0 | unsigned Reg = |
1864 | 0 | TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, |
1865 | 0 | SelectBB->getNumber() /* CodeBBStart->getNumber() */); |
1866 | 0 | if (&(*(IfBB->getParent()->begin())) == IfBB0 ) { |
1867 | 0 | TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, |
1868 | 0 | CodeBBStart->getNumber()); |
1869 | 0 | } |
1870 | 0 | MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); |
1871 | 0 | ArrayRef<MachineOperand> Cond(RegOp); |
1872 | 0 | TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); |
1873 | 0 |
|
1874 | 0 | return IfBB; |
1875 | 0 | } |
1876 | | |
1877 | | void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( |
1878 | 0 | SmallVector<MachineOperand, 1> Cond) { |
1879 | 0 | if (Cond.size() != 1) |
1880 | 0 | return; |
1881 | 0 | if (0 !Cond[0].isReg()0 ) |
1882 | 0 | return; |
1883 | 0 |
|
1884 | 0 | unsigned CondReg = Cond[0].getReg(); |
1885 | 0 | for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E0 ; ++UI0 ) { |
1886 | 0 | (*UI).setIsKill(false); |
1887 | 0 | } |
1888 | 0 | } |
1889 | | |
1890 | | void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, |
1891 | | MachineBasicBlock *MergeBB, |
1892 | 0 | unsigned BBSelectReg) { |
1893 | 0 | MachineBasicBlock *TrueBB = nullptr; |
1894 | 0 | MachineBasicBlock *FalseBB = nullptr; |
1895 | 0 | SmallVector<MachineOperand, 1> Cond; |
1896 | 0 | MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; |
1897 | 0 | TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); |
1898 | 0 |
|
1899 | 0 | const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); |
1900 | 0 |
|
1901 | 0 | if (FalseBB == nullptr && 0 TrueBB == nullptr0 && FallthroughBB == nullptr0 ) { |
1902 | 0 | // This is an exit block, hence no successors. We will assign the |
1903 | 0 | // bb select register to the entry block. |
1904 | 0 | TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, |
1905 | 0 | BBSelectReg, |
1906 | 0 | CodeBB->getParent()->begin()->getNumber()); |
1907 | 0 | insertUnconditionalBranch(CodeBB, MergeBB, DL); |
1908 | 0 | return; |
1909 | 0 | } |
1910 | 0 |
|
1911 | 0 | if (0 FalseBB == nullptr && 0 TrueBB == nullptr0 ) { |
1912 | 0 | TrueBB = FallthroughBB; |
1913 | 0 | } else if (0 TrueBB != nullptr0 ) { |
1914 | 0 | FalseBB = |
1915 | 0 | (FallthroughBB && (FallthroughBB != TrueBB)0 ) ? FallthroughBB0 : FalseBB0 ; |
1916 | 0 | } |
1917 | 0 |
|
1918 | 0 | if ((TrueBB != nullptr && 0 FalseBB == nullptr0 ) || (TrueBB == FalseBB)0 ) { |
1919 | 0 | TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, |
1920 | 0 | BBSelectReg, TrueBB->getNumber()); |
1921 | 0 | } else { |
1922 | 0 | const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); |
1923 | 0 | unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); |
1924 | 0 | unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); |
1925 | 0 | TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, |
1926 | 0 | TrueBBReg, TrueBB->getNumber()); |
1927 | 0 | TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, |
1928 | 0 | FalseBBReg, FalseBB->getNumber()); |
1929 | 0 | ensureCondIsNotKilled(Cond); |
1930 | 0 | TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, |
1931 | 0 | BBSelectReg, Cond, TrueBBReg, FalseBBReg); |
1932 | 0 | } |
1933 | 0 |
|
1934 | 0 | insertUnconditionalBranch(CodeBB, MergeBB, DL); |
1935 | 0 | } |
1936 | | |
1937 | 0 | MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { |
1938 | 0 | if (MRI->def_begin(Reg) == MRI->def_end()0 ) { |
1939 | 0 | DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) |
1940 | 0 | << " has NO defs\n"); |
1941 | 0 | } else if (0 !MRI->hasOneDef(Reg)0 ) { |
1942 | 0 | DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) |
1943 | 0 | << " has multiple defs\n"); |
1944 | 0 | DEBUG(dbgs() << "DEFS BEGIN:\n"); |
1945 | 0 | for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE0 ; ++DI0 ) { |
1946 | 0 | DEBUG(DI->getParent()->dump()); |
1947 | 0 | } |
1948 | 0 | DEBUG(dbgs() << "DEFS END\n"); |
1949 | 0 | } |
1950 | 0 |
|
1951 | 0 | assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); |
1952 | 0 | return (*(MRI->def_begin(Reg))).getParent(); |
1953 | 0 | } |
1954 | | |
1955 | | void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, |
1956 | | MachineBasicBlock *CodeBB, |
1957 | | MachineBasicBlock *MergeBB, |
1958 | | LinearizedRegion *InnerRegion, |
1959 | | unsigned DestReg, |
1960 | 0 | unsigned SourceReg) { |
1961 | 0 | // In this function we know we are part of a chain already, so we need |
1962 | 0 | // to add the registers to the existing chain, and rename the register |
1963 | 0 | // inside the region. |
1964 | 0 | bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); |
1965 | 0 | MachineInstr *DefInstr = getDefInstr(SourceReg); |
1966 | 0 | if (DefInstr->isPHI() && 0 DefInstr->getParent() == CodeBB0 && IsSingleBB0 ) { |
1967 | 0 | // Handle the case where the def is a PHI-def inside a basic |
1968 | 0 | // block, then we only need to do renaming. Special care needs to |
1969 | 0 | // be taken if the PHI-def is part of an existing chain, or if a |
1970 | 0 | // new one needs to be created. |
1971 | 0 | InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); |
1972 | 0 |
|
1973 | 0 | // We collect all PHI Information, and if we are at the region entry, |
1974 | 0 | // all PHIs will be removed, and then re-introduced if needed. |
1975 | 0 | storePHILinearizationInfoDest(DestReg, *DefInstr); |
1976 | 0 | // We have picked up all the information we need now and can remove |
1977 | 0 | // the PHI |
1978 | 0 | PHIInfo.removeSource(DestReg, SourceReg, CodeBB); |
1979 | 0 | DefInstr->eraseFromParent(); |
1980 | 0 | } else { |
1981 | 0 | // If this is not a phi-def, or it is a phi-def but from a linearized region |
1982 | 0 | if (IsSingleBB && 0 DefInstr->getParent() == InnerRegion->getEntry()0 ) { |
1983 | 0 | // If this is a single BB and the definition is in this block we |
1984 | 0 | // need to replace any uses outside the region. |
1985 | 0 | InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); |
1986 | 0 | } |
1987 | 0 | const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); |
1988 | 0 | unsigned NextDestReg = MRI->createVirtualRegister(RegClass); |
1989 | 0 | bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; |
1990 | 0 | DEBUG(dbgs() << "Insert Chained PHI\n"); |
1991 | 0 | insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, |
1992 | 0 | SourceReg, IsLastDef); |
1993 | 0 |
|
1994 | 0 | PHIInfo.removeSource(DestReg, SourceReg, CodeBB); |
1995 | 0 | if (IsLastDef0 ) { |
1996 | 0 | const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); |
1997 | 0 | TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, |
1998 | 0 | NextDestReg, 0); |
1999 | 0 | PHIInfo.deleteDef(DestReg); |
2000 | 0 | } else { |
2001 | 0 | PHIInfo.replaceDef(DestReg, NextDestReg); |
2002 | 0 | } |
2003 | 0 | } |
2004 | 0 | } |
2005 | | |
2006 | | bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, |
2007 | | LinearizedRegion *InnerRegion, |
2008 | 0 | unsigned Register) { |
2009 | 0 | return getDefInstr(Register)->getParent() == MBB || |
2010 | 0 | InnerRegion->contains(getDefInstr(Register)->getParent()); |
2011 | 0 | } |
2012 | | |
2013 | | void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, |
2014 | | MachineBasicBlock *CodeBB, |
2015 | | MachineBasicBlock *MergeBB, |
2016 | | LinearizedRegion *InnerRegion, |
2017 | 0 | LinearizedRegion *LRegion) { |
2018 | 0 | DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts(); |
2019 | 0 | SmallVector<unsigned, 4> OldLiveOuts; |
2020 | 0 | bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); |
2021 | 0 | for (auto OLI : *LiveOuts) { |
2022 | 0 | OldLiveOuts.push_back(OLI); |
2023 | 0 | } |
2024 | 0 |
|
2025 | 0 | for (auto LI : OldLiveOuts) { |
2026 | 0 | DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); |
2027 | 0 | if (!containsDef(CodeBB, InnerRegion, LI) || |
2028 | 0 | (!IsSingleBB && 0 (getDefInstr(LI)->getParent() == LRegion->getExit())0 )) { |
2029 | 0 | // If the register simly lives through the CodeBB, we don't have |
2030 | 0 | // to rewrite anything since the register is not defined in this |
2031 | 0 | // part of the code. |
2032 | 0 | DEBUG(dbgs() << "- through"); |
2033 | 0 | continue; |
2034 | 0 | } |
2035 | 0 | DEBUG0 (dbgs() << "\n"); |
2036 | 0 | unsigned Reg = LI; |
2037 | 0 | if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()0 ) { |
2038 | 0 | // If the register is live out, we do want to create a phi, |
2039 | 0 | // unless it is from the Exit block, becasuse in that case there |
2040 | 0 | // is already a PHI, and no need to create a new one. |
2041 | 0 |
|
2042 | 0 | // If the register is just a live out def and not part of a phi |
2043 | 0 | // chain, we need to create a PHI node to handle the if region, |
2044 | 0 | // and replace all uses outside of the region with the new dest |
2045 | 0 | // register, unless it is the outgoing BB select register. We have |
2046 | 0 | // already creaed phi nodes for these. |
2047 | 0 | const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); |
2048 | 0 | unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); |
2049 | 0 | unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); |
2050 | 0 | // Create initializer, this value is never used, but is needed |
2051 | 0 | // to satisfy SSA. |
2052 | 0 | DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); |
2053 | 0 | TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), |
2054 | 0 | IfSourceReg, 0); |
2055 | 0 |
|
2056 | 0 | InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); |
2057 | 0 | DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); |
2058 | 0 | insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, |
2059 | 0 | IfSourceReg, Reg, true); |
2060 | 0 | } |
2061 | 0 | } |
2062 | 0 |
|
2063 | 0 | // Handle the chained definitions in PHIInfo, checking if this basic block |
2064 | 0 | // is a source block for a definition. |
2065 | 0 | SmallVector<unsigned, 4> Sources; |
2066 | 0 | if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)0 ) { |
2067 | 0 | DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() |
2068 | 0 | << "\n"); |
2069 | 0 | for (auto SI : Sources) { |
2070 | 0 | unsigned DestReg; |
2071 | 0 | PHIInfo.findDest(SI, CodeBB, DestReg); |
2072 | 0 | insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); |
2073 | 0 | } |
2074 | 0 | DEBUG(dbgs() << "Insertion done.\n"); |
2075 | 0 | } |
2076 | 0 |
|
2077 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2078 | 0 | } |
2079 | | |
2080 | 0 | void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { |
2081 | 0 | DEBUG(dbgs() << "Before PHI Prune\n"); |
2082 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2083 | 0 | SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> |
2084 | 0 | ElimiatedSources; |
2085 | 0 | for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; |
2086 | 0 | ++DRI0 ) { |
2087 | 0 |
|
2088 | 0 | unsigned DestReg = *DRI; |
2089 | 0 | auto SE = PHIInfo.sources_end(DestReg); |
2090 | 0 |
|
2091 | 0 | bool MBBContainsPHISource = false; |
2092 | 0 | // Check if there is a PHI source in this MBB |
2093 | 0 | for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE0 ; ++SRI0 ) { |
2094 | 0 | unsigned SourceReg = (*SRI).first; |
2095 | 0 | MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); |
2096 | 0 | if (Def->getParent()->getParent() == MBB0 ) { |
2097 | 0 | MBBContainsPHISource = true; |
2098 | 0 | } |
2099 | 0 | } |
2100 | 0 |
|
2101 | 0 | // If so, all other sources are useless since we know this block |
2102 | 0 | // is always executed when the region is executed. |
2103 | 0 | if (MBBContainsPHISource0 ) { |
2104 | 0 | for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE0 ; ++SRI0 ) { |
2105 | 0 | PHILinearize::PHISourceT Source = *SRI; |
2106 | 0 | unsigned SourceReg = Source.first; |
2107 | 0 | MachineBasicBlock *SourceMBB = Source.second; |
2108 | 0 | MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); |
2109 | 0 | if (Def->getParent()->getParent() != MBB0 ) { |
2110 | 0 | ElimiatedSources.push_back( |
2111 | 0 | std::make_tuple(DestReg, SourceReg, SourceMBB)); |
2112 | 0 | } |
2113 | 0 | } |
2114 | 0 | } |
2115 | 0 | } |
2116 | 0 |
|
2117 | 0 | // Remove the PHI sources that are in the given MBB |
2118 | 0 | for (auto &SourceInfo : ElimiatedSources) { |
2119 | 0 | PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), |
2120 | 0 | std::get<2>(SourceInfo)); |
2121 | 0 | } |
2122 | 0 | DEBUG(dbgs() << "After PHI Prune\n"); |
2123 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2124 | 0 | } |
2125 | | |
2126 | | void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, |
2127 | 0 | unsigned DestReg) { |
2128 | 0 | MachineBasicBlock *Entry = CurrentRegion->getEntry(); |
2129 | 0 | MachineBasicBlock *Exit = CurrentRegion->getExit(); |
2130 | 0 |
|
2131 | 0 | DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() |
2132 | 0 | << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); |
2133 | 0 |
|
2134 | 0 | int NumSources = 0; |
2135 | 0 | auto SE = PHIInfo.sources_end(DestReg); |
2136 | 0 |
|
2137 | 0 | for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE0 ; ++SRI0 ) { |
2138 | 0 | NumSources++; |
2139 | 0 | } |
2140 | 0 |
|
2141 | 0 | if (NumSources == 10 ) { |
2142 | 0 | auto SRI = PHIInfo.sources_begin(DestReg); |
2143 | 0 | unsigned SourceReg = (*SRI).first; |
2144 | 0 | replaceRegisterWith(DestReg, SourceReg); |
2145 | 0 | } else { |
2146 | 0 | const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); |
2147 | 0 | MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, |
2148 | 0 | TII->get(TargetOpcode::PHI), DestReg); |
2149 | 0 | DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI("); |
2150 | 0 |
|
2151 | 0 | unsigned CurrentBackedgeReg = 0; |
2152 | 0 |
|
2153 | 0 | for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE0 ; ++SRI0 ) { |
2154 | 0 | unsigned SourceReg = (*SRI).first; |
2155 | 0 |
|
2156 | 0 | if (CurrentRegion->contains((*SRI).second)0 ) { |
2157 | 0 | if (CurrentBackedgeReg == 00 ) { |
2158 | 0 | CurrentBackedgeReg = SourceReg; |
2159 | 0 | } else { |
2160 | 0 | MachineInstr *PHIDefInstr = getDefInstr(SourceReg); |
2161 | 0 | MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); |
2162 | 0 | const TargetRegisterClass *RegClass = |
2163 | 0 | MRI->getRegClass(CurrentBackedgeReg); |
2164 | 0 | unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); |
2165 | 0 | MachineInstrBuilder BackedgePHI = |
2166 | 0 | BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, |
2167 | 0 | TII->get(TargetOpcode::PHI), NewBackedgeReg); |
2168 | 0 | BackedgePHI.addReg(CurrentBackedgeReg); |
2169 | 0 | BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); |
2170 | 0 | BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); |
2171 | 0 | BackedgePHI.addMBB((*SRI).second); |
2172 | 0 | CurrentBackedgeReg = NewBackedgeReg; |
2173 | 0 | DEBUG(dbgs() << "Inserting backedge PHI: " |
2174 | 0 | << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI(" |
2175 | 0 | << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" |
2176 | 0 | << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " |
2177 | 0 | << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) |
2178 | 0 | << ", BB#" << (*SRI).second->getNumber()); |
2179 | 0 | } |
2180 | 0 | } else { |
2181 | 0 | MIB.addReg(SourceReg); |
2182 | 0 | MIB.addMBB((*SRI).second); |
2183 | 0 | DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" |
2184 | 0 | << (*SRI).second->getNumber() << ", "); |
2185 | 0 | } |
2186 | 0 | } |
2187 | 0 |
|
2188 | 0 | // Add the final backedge register source to the entry phi |
2189 | 0 | if (CurrentBackedgeReg != 00 ) { |
2190 | 0 | MIB.addReg(CurrentBackedgeReg); |
2191 | 0 | MIB.addMBB(Exit); |
2192 | 0 | DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" |
2193 | 0 | << Exit->getNumber() << ")\n"); |
2194 | 0 | } else { |
2195 | 0 | DEBUG(dbgs() << ")\n"); |
2196 | 0 | } |
2197 | 0 | } |
2198 | 0 | } |
2199 | | |
2200 | 0 | void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { |
2201 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2202 | 0 |
|
2203 | 0 | for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; |
2204 | 0 | ++DRI0 ) { |
2205 | 0 |
|
2206 | 0 | unsigned DestReg = *DRI; |
2207 | 0 | createEntryPHI(CurrentRegion, DestReg); |
2208 | 0 | } |
2209 | 0 | PHIInfo.clear(); |
2210 | 0 | } |
2211 | | |
2212 | | void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, |
2213 | 0 | unsigned NewRegister) { |
2214 | 0 | assert(Register != NewRegister && "Cannot replace a reg with itself"); |
2215 | 0 |
|
2216 | 0 | for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), |
2217 | 0 | E = MRI->reg_end(); |
2218 | 0 | I != E0 ;) { |
2219 | 0 | MachineOperand &O = *I; |
2220 | 0 | ++I; |
2221 | 0 | if (TargetRegisterInfo::isPhysicalRegister(NewRegister)0 ) { |
2222 | 0 | DEBUG(dbgs() << "Trying to substitute physical register: " |
2223 | 0 | << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) |
2224 | 0 | << "\n"); |
2225 | 0 | llvm_unreachable("Cannot substitute physical registers"); |
2226 | 0 | // We don't handle physical registers, but if we need to |
2227 | 0 | // in the future This is how we do it: |
2228 | 0 | // O.substPhysReg(NewRegister, *TRI); |
2229 | 0 | } else { |
2230 | 0 | DEBUG(dbgs() << "Replacing register: " |
2231 | 0 | << PrintReg(Register, MRI->getTargetRegisterInfo()) |
2232 | 0 | << " with " |
2233 | 0 | << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) |
2234 | 0 | << "\n"); |
2235 | 0 | O.setReg(NewRegister); |
2236 | 0 | } |
2237 | 0 | } |
2238 | 0 | PHIInfo.deleteDef(Register); |
2239 | 0 |
|
2240 | 0 | getRegionMRT()->replaceLiveOutReg(Register, NewRegister); |
2241 | 0 |
|
2242 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2243 | 0 | } |
2244 | | |
2245 | 0 | void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { |
2246 | 0 | DEBUG(dbgs() << "Resolve PHI Infos\n"); |
2247 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2248 | 0 | for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; |
2249 | 0 | ++DRI0 ) { |
2250 | 0 | unsigned DestReg = *DRI; |
2251 | 0 | DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); |
2252 | 0 | auto SRI = PHIInfo.sources_begin(DestReg); |
2253 | 0 | unsigned SourceReg = (*SRI).first; |
2254 | 0 | DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) |
2255 | 0 | << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); |
2256 | 0 |
|
2257 | 0 | assert(PHIInfo.sources_end(DestReg) == ++SRI && |
2258 | 0 | "More than one phi source in entry node"); |
2259 | 0 | replaceRegisterWith(DestReg, SourceReg); |
2260 | 0 | } |
2261 | 0 | } |
2262 | | |
2263 | 0 | static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { |
2264 | 0 | return ((&(*(MBB->getParent()->begin()))) == MBB); |
2265 | 0 | } |
2266 | | |
2267 | | MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( |
2268 | | MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, |
2269 | | LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, |
2270 | 0 | unsigned BBSelectRegOut) { |
2271 | 0 | if (isFunctionEntryBlock(CodeBB) && 0 !CurrentRegion->getHasLoop()0 ) { |
2272 | 0 | // Handle non-loop function entry block. |
2273 | 0 | // We need to allow loops to the entry block and then |
2274 | 0 | rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); |
2275 | 0 | resolvePHIInfos(CodeBB); |
2276 | 0 | removeExternalCFGSuccessors(CodeBB); |
2277 | 0 | CodeBB->addSuccessor(MergeBB); |
2278 | 0 | CurrentRegion->addMBB(CodeBB); |
2279 | 0 | return nullptr; |
2280 | 0 | } |
2281 | 0 | if (0 CurrentRegion->getEntry() == CodeBB && 0 !CurrentRegion->getHasLoop()0 ) { |
2282 | 0 | // Handle non-loop region entry block. |
2283 | 0 | MachineFunction *MF = MergeBB->getParent(); |
2284 | 0 | auto MergeIter = MergeBB->getIterator(); |
2285 | 0 | auto CodeBBStartIter = CodeBB->getIterator(); |
2286 | 0 | auto CodeBBEndIter = ++(CodeBB->getIterator()); |
2287 | 0 | if (CodeBBEndIter != MergeIter0 ) { |
2288 | 0 | MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); |
2289 | 0 | } |
2290 | 0 | rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); |
2291 | 0 | prunePHIInfo(CodeBB); |
2292 | 0 | createEntryPHIs(CurrentRegion); |
2293 | 0 | removeExternalCFGSuccessors(CodeBB); |
2294 | 0 | CodeBB->addSuccessor(MergeBB); |
2295 | 0 | CurrentRegion->addMBB(CodeBB); |
2296 | 0 | return nullptr; |
2297 | 0 | } else { |
2298 | 0 | // Handle internal block. |
2299 | 0 | const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); |
2300 | 0 | unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); |
2301 | 0 | rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); |
2302 | 0 | bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; |
2303 | 0 | MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, |
2304 | 0 | BBSelectRegIn, IsRegionEntryBB); |
2305 | 0 | CurrentRegion->addMBB(IfBB); |
2306 | 0 | // If this is the entry block we need to make the If block the new |
2307 | 0 | // linearized region entry. |
2308 | 0 | if (IsRegionEntryBB0 ) { |
2309 | 0 | CurrentRegion->setEntry(IfBB); |
2310 | 0 |
|
2311 | 0 | if (CurrentRegion->getHasLoop()0 ) { |
2312 | 0 | MachineBasicBlock *RegionExit = CurrentRegion->getExit(); |
2313 | 0 | MachineBasicBlock *ETrueBB = nullptr; |
2314 | 0 | MachineBasicBlock *EFalseBB = nullptr; |
2315 | 0 | SmallVector<MachineOperand, 1> ECond; |
2316 | 0 |
|
2317 | 0 | const DebugLoc &DL = DebugLoc(); |
2318 | 0 | TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); |
2319 | 0 | TII->removeBranch(*RegionExit); |
2320 | 0 |
|
2321 | 0 | // We need to create a backedge if there is a loop |
2322 | 0 | unsigned Reg = TII->insertNE( |
2323 | 0 | RegionExit, RegionExit->instr_end(), DL, |
2324 | 0 | CurrentRegion->getRegionMRT()->getInnerOutputRegister(), |
2325 | 0 | CurrentRegion->getRegionMRT()->getEntry()->getNumber()); |
2326 | 0 | MachineOperand RegOp = |
2327 | 0 | MachineOperand::CreateReg(Reg, false, false, true); |
2328 | 0 | ArrayRef<MachineOperand> Cond(RegOp); |
2329 | 0 | DEBUG(dbgs() << "RegionExitReg: "); |
2330 | 0 | DEBUG(Cond[0].print(dbgs(), TRI)); |
2331 | 0 | DEBUG(dbgs() << "\n"); |
2332 | 0 | TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, |
2333 | 0 | Cond, DebugLoc()); |
2334 | 0 | RegionExit->addSuccessor(CurrentRegion->getEntry()); |
2335 | 0 | } |
2336 | 0 | } |
2337 | 0 | CurrentRegion->addMBB(CodeBB); |
2338 | 0 | LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); |
2339 | 0 |
|
2340 | 0 | InnerRegion.setParent(CurrentRegion); |
2341 | 0 | DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); |
2342 | 0 | insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, |
2343 | 0 | CodeBBSelectReg); |
2344 | 0 | InnerRegion.addMBB(MergeBB); |
2345 | 0 |
|
2346 | 0 | DEBUG(InnerRegion.print(dbgs(), TRI)); |
2347 | 0 | rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); |
2348 | 0 | extractKilledPHIs(CodeBB); |
2349 | 0 | if (IsRegionEntryBB0 ) { |
2350 | 0 | createEntryPHIs(CurrentRegion); |
2351 | 0 | } |
2352 | 0 | return IfBB; |
2353 | 0 | } |
2354 | 0 | } |
2355 | | |
2356 | | MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( |
2357 | | MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, |
2358 | | LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, |
2359 | 0 | unsigned BBSelectRegIn, unsigned BBSelectRegOut) { |
2360 | 0 | unsigned CodeBBSelectReg = |
2361 | 0 | InnerRegion->getRegionMRT()->getInnerOutputRegister(); |
2362 | 0 | MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); |
2363 | 0 | MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); |
2364 | 0 | MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, |
2365 | 0 | SelectBB, BBSelectRegIn, true); |
2366 | 0 | CurrentRegion->addMBB(IfBB); |
2367 | 0 | bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); |
2368 | 0 | if (isEntry0 ) { |
2369 | 0 |
|
2370 | 0 | if (CurrentRegion->getHasLoop()0 ) { |
2371 | 0 | MachineBasicBlock *RegionExit = CurrentRegion->getExit(); |
2372 | 0 | MachineBasicBlock *ETrueBB = nullptr; |
2373 | 0 | MachineBasicBlock *EFalseBB = nullptr; |
2374 | 0 | SmallVector<MachineOperand, 1> ECond; |
2375 | 0 |
|
2376 | 0 | const DebugLoc &DL = DebugLoc(); |
2377 | 0 | TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); |
2378 | 0 | TII->removeBranch(*RegionExit); |
2379 | 0 |
|
2380 | 0 | // We need to create a backedge if there is a loop |
2381 | 0 | unsigned Reg = |
2382 | 0 | TII->insertNE(RegionExit, RegionExit->instr_end(), DL, |
2383 | 0 | CurrentRegion->getRegionMRT()->getInnerOutputRegister(), |
2384 | 0 | CurrentRegion->getRegionMRT()->getEntry()->getNumber()); |
2385 | 0 | MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); |
2386 | 0 | ArrayRef<MachineOperand> Cond(RegOp); |
2387 | 0 | DEBUG(dbgs() << "RegionExitReg: "); |
2388 | 0 | DEBUG(Cond[0].print(dbgs(), TRI)); |
2389 | 0 | DEBUG(dbgs() << "\n"); |
2390 | 0 | TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, |
2391 | 0 | Cond, DebugLoc()); |
2392 | 0 | RegionExit->addSuccessor(IfBB); |
2393 | 0 | } |
2394 | 0 | } |
2395 | 0 | CurrentRegion->addMBBs(InnerRegion); |
2396 | 0 | DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); |
2397 | 0 | insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, |
2398 | 0 | CodeBBSelectReg); |
2399 | 0 |
|
2400 | 0 | rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, |
2401 | 0 | CurrentRegion); |
2402 | 0 |
|
2403 | 0 | rewriteRegionEntryPHIs(InnerRegion, IfBB); |
2404 | 0 |
|
2405 | 0 | if (isEntry0 ) { |
2406 | 0 | CurrentRegion->setEntry(IfBB); |
2407 | 0 | } |
2408 | 0 |
|
2409 | 0 | if (isEntry0 ) { |
2410 | 0 | createEntryPHIs(CurrentRegion); |
2411 | 0 | } |
2412 | 0 |
|
2413 | 0 | return IfBB; |
2414 | 0 | } |
2415 | | |
2416 | | void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, |
2417 | | MachineBasicBlock *Entry, |
2418 | | MachineBasicBlock *EntrySucc, |
2419 | 0 | LinearizedRegion *LRegion) { |
2420 | 0 | SmallVector<unsigned, 2> PHIRegionIndices; |
2421 | 0 | getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); |
2422 | 0 |
|
2423 | 0 | assert(PHIRegionIndices.size() == 1); |
2424 | 0 |
|
2425 | 0 | unsigned RegionIndex = PHIRegionIndices[0]; |
2426 | 0 | unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); |
2427 | 0 | MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); |
2428 | 0 | unsigned PHIDest = getPHIDestReg(PHI); |
2429 | 0 | unsigned PHISource = PHIDest; |
2430 | 0 | unsigned ReplaceReg; |
2431 | 0 |
|
2432 | 0 | if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)0 ) { |
2433 | 0 | PHISource = ReplaceReg; |
2434 | 0 | } |
2435 | 0 |
|
2436 | 0 | const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); |
2437 | 0 | unsigned NewDestReg = MRI->createVirtualRegister(RegClass); |
2438 | 0 | LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); |
2439 | 0 | MachineInstrBuilder MIB = |
2440 | 0 | BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), |
2441 | 0 | TII->get(TargetOpcode::PHI), NewDestReg); |
2442 | 0 | DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) |
2443 | 0 | << "<def> = PHI("); |
2444 | 0 | MIB.addReg(PHISource); |
2445 | 0 | MIB.addMBB(Entry); |
2446 | 0 | DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); |
2447 | 0 | MIB.addReg(RegionSourceReg); |
2448 | 0 | MIB.addMBB(RegionSourceMBB); |
2449 | 0 | DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" |
2450 | 0 | << RegionSourceMBB->getNumber() << ")\n"); |
2451 | 0 | } |
2452 | | |
2453 | | void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, |
2454 | | MachineBasicBlock *EntrySucc, |
2455 | 0 | LinearizedRegion *LRegion) { |
2456 | 0 | SmallVector<MachineInstr *, 2> PHIs; |
2457 | 0 | collectPHIs(Entry, PHIs); |
2458 | 0 |
|
2459 | 0 | for (auto PHII : PHIs) { |
2460 | 0 | splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); |
2461 | 0 | } |
2462 | 0 | } |
2463 | | |
2464 | | // Split the exit block so that we can insert a end control flow |
2465 | | MachineBasicBlock * |
2466 | 0 | AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { |
2467 | 0 | auto MRTRegion = LRegion->getRegionMRT(); |
2468 | 0 | auto Exit = LRegion->getExit(); |
2469 | 0 | auto MF = Exit->getParent(); |
2470 | 0 | auto Succ = MRTRegion->getSucc(); |
2471 | 0 |
|
2472 | 0 | auto NewExit = MF->CreateMachineBasicBlock(); |
2473 | 0 | auto AfterExitIter = Exit->getIterator(); |
2474 | 0 | AfterExitIter++; |
2475 | 0 | MF->insert(AfterExitIter, NewExit); |
2476 | 0 | Exit->removeSuccessor(Succ); |
2477 | 0 | Exit->addSuccessor(NewExit); |
2478 | 0 | NewExit->addSuccessor(Succ); |
2479 | 0 | insertUnconditionalBranch(NewExit, Succ); |
2480 | 0 | LRegion->addMBB(NewExit); |
2481 | 0 | LRegion->setExit(NewExit); |
2482 | 0 |
|
2483 | 0 | DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); |
2484 | 0 |
|
2485 | 0 | // Replace any PHI Predecessors in the successor with NewExit |
2486 | 0 | for (auto &II : *Succ) { |
2487 | 0 | MachineInstr &Instr = II; |
2488 | 0 |
|
2489 | 0 | // If we are past the PHI instructions we are done |
2490 | 0 | if (!Instr.isPHI()) |
2491 | 0 | break; |
2492 | 0 |
|
2493 | 0 | int numPreds = getPHINumInputs(Instr); |
2494 | 0 | for (int i = 0; i < numPreds0 ; ++i0 ) { |
2495 | 0 | auto Pred = getPHIPred(Instr, i); |
2496 | 0 | if (Pred == Exit0 ) { |
2497 | 0 | setPhiPred(Instr, i, NewExit); |
2498 | 0 | } |
2499 | 0 | } |
2500 | 0 | } |
2501 | 0 |
|
2502 | 0 | return NewExit; |
2503 | 0 | } |
2504 | | |
2505 | 0 | static MachineBasicBlock *split(MachineBasicBlock::iterator I) { |
2506 | 0 | // Create the fall-through block. |
2507 | 0 | MachineBasicBlock *MBB = (*I).getParent(); |
2508 | 0 | MachineFunction *MF = MBB->getParent(); |
2509 | 0 | MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); |
2510 | 0 | auto MBBIter = ++(MBB->getIterator()); |
2511 | 0 | MF->insert(MBBIter, SuccMBB); |
2512 | 0 | SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); |
2513 | 0 | MBB->addSuccessor(SuccMBB); |
2514 | 0 |
|
2515 | 0 | // Splice the code over. |
2516 | 0 | SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); |
2517 | 0 |
|
2518 | 0 | return SuccMBB; |
2519 | 0 | } |
2520 | | |
2521 | | // Split the entry block separating PHI-nodes and the rest of the code |
2522 | | // This is needed to insert an initializer for the bb select register |
2523 | | // inloop regions. |
2524 | | |
2525 | | MachineBasicBlock * |
2526 | 0 | AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { |
2527 | 0 | MachineBasicBlock *Entry = LRegion->getEntry(); |
2528 | 0 | MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); |
2529 | 0 | MachineBasicBlock *Exit = LRegion->getExit(); |
2530 | 0 |
|
2531 | 0 | DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" |
2532 | 0 | << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() |
2533 | 0 | << "\n"); |
2534 | 0 | LRegion->addMBB(EntrySucc); |
2535 | 0 |
|
2536 | 0 | // Make the backedge go to Entry Succ |
2537 | 0 | if (Exit->isSuccessor(Entry)0 ) { |
2538 | 0 | Exit->removeSuccessor(Entry); |
2539 | 0 | } |
2540 | 0 | Exit->addSuccessor(EntrySucc); |
2541 | 0 | MachineInstr &Branch = *(Exit->instr_rbegin()); |
2542 | 0 | for (auto &UI : Branch.uses()) { |
2543 | 0 | if (UI.isMBB() && 0 UI.getMBB() == Entry0 ) { |
2544 | 0 | UI.setMBB(EntrySucc); |
2545 | 0 | } |
2546 | 0 | } |
2547 | 0 |
|
2548 | 0 | splitLoopPHIs(Entry, EntrySucc, LRegion); |
2549 | 0 |
|
2550 | 0 | return EntrySucc; |
2551 | 0 | } |
2552 | | |
2553 | | LinearizedRegion * |
2554 | 0 | AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { |
2555 | 0 | LinearizedRegion *LRegion = Region->getLinearizedRegion(); |
2556 | 0 | LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); |
2557 | 0 | LRegion->setEntry(Region->getEntry()); |
2558 | 0 | return LRegion; |
2559 | 0 | } |
2560 | | |
2561 | 0 | static void removeOldExitPreds(RegionMRT *Region) { |
2562 | 0 | MachineBasicBlock *Exit = Region->getSucc(); |
2563 | 0 | if (Exit == nullptr0 ) { |
2564 | 0 | return; |
2565 | 0 | } |
2566 | 0 | for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), |
2567 | 0 | E = Exit->pred_end(); |
2568 | 0 | PI != E0 ; ++PI0 ) { |
2569 | 0 | if (Region->contains(*PI)0 ) { |
2570 | 0 | (*PI)->removeSuccessor(Exit); |
2571 | 0 | } |
2572 | 0 | } |
2573 | 0 | } |
2574 | | |
2575 | | static bool mbbHasBackEdge(MachineBasicBlock *MBB, |
2576 | 0 | SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { |
2577 | 0 | for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE0 ; ++SI0 ) { |
2578 | 0 | if (MBBs.count(*SI) != 00 ) { |
2579 | 0 | return true; |
2580 | 0 | } |
2581 | 0 | } |
2582 | 0 | return false; |
2583 | 0 | } |
2584 | | |
2585 | | static bool containsNewBackedge(MRT *Tree, |
2586 | 0 | SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { |
2587 | 0 | // Need to traverse this in reverse since it is in post order. |
2588 | 0 | if (Tree == nullptr) |
2589 | 0 | return false; |
2590 | 0 |
|
2591 | 0 | if (0 Tree->isMBB()0 ) { |
2592 | 0 | MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); |
2593 | 0 | MBBs.insert(MBB); |
2594 | 0 | if (mbbHasBackEdge(MBB, MBBs)0 ) { |
2595 | 0 | return true; |
2596 | 0 | } |
2597 | 0 | } else { |
2598 | 0 | RegionMRT *Region = Tree->getRegionMRT(); |
2599 | 0 | SetVector<MRT *> *Children = Region->getChildren(); |
2600 | 0 | for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE0 ; ++CI0 ) { |
2601 | 0 | if (containsNewBackedge(*CI, MBBs)) |
2602 | 0 | return true; |
2603 | 0 | } |
2604 | 0 | } |
2605 | 0 | return false; |
2606 | 0 | } |
2607 | | |
2608 | 0 | static bool containsNewBackedge(RegionMRT *Region) { |
2609 | 0 | SmallPtrSet<MachineBasicBlock *, 8> MBBs; |
2610 | 0 | return containsNewBackedge(Region, MBBs); |
2611 | 0 | } |
2612 | | |
2613 | 0 | bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { |
2614 | 0 | auto *LRegion = initLinearizedRegion(Region); |
2615 | 0 | LRegion->setHasLoop(containsNewBackedge(Region)); |
2616 | 0 | MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); |
2617 | 0 | MachineBasicBlock *CurrentMerge = LastMerge; |
2618 | 0 | LRegion->addMBB(LastMerge); |
2619 | 0 | LRegion->setExit(LastMerge); |
2620 | 0 |
|
2621 | 0 | rewriteRegionExitPHIs(Region, LastMerge, LRegion); |
2622 | 0 | removeOldExitPreds(Region); |
2623 | 0 |
|
2624 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2625 | 0 |
|
2626 | 0 | SetVector<MRT *> *Children = Region->getChildren(); |
2627 | 0 | DEBUG(dbgs() << "===========If Region Start===============\n"); |
2628 | 0 | if (LRegion->getHasLoop()0 ) { |
2629 | 0 | DEBUG(dbgs() << "Has Backedge: Yes\n"); |
2630 | 0 | } else { |
2631 | 0 | DEBUG(dbgs() << "Has Backedge: No\n"); |
2632 | 0 | } |
2633 | 0 |
|
2634 | 0 | unsigned BBSelectRegIn; |
2635 | 0 | unsigned BBSelectRegOut; |
2636 | 0 | for (auto CI = Children->begin(), CE = Children->end(); CI != CE0 ; ++CI0 ) { |
2637 | 0 | DEBUG(dbgs() << "CurrentRegion: \n"); |
2638 | 0 | DEBUG(LRegion->print(dbgs(), TRI)); |
2639 | 0 |
|
2640 | 0 | auto CNI = CI; |
2641 | 0 | ++CNI; |
2642 | 0 |
|
2643 | 0 | MRT *Child = (*CI); |
2644 | 0 |
|
2645 | 0 | if (Child->isRegion()0 ) { |
2646 | 0 |
|
2647 | 0 | LinearizedRegion *InnerLRegion = |
2648 | 0 | Child->getRegionMRT()->getLinearizedRegion(); |
2649 | 0 | // We found the block is the exit of an inner region, we need |
2650 | 0 | // to put it in the current linearized region. |
2651 | 0 |
|
2652 | 0 | DEBUG(dbgs() << "Linearizing region: "); |
2653 | 0 | DEBUG(InnerLRegion->print(dbgs(), TRI)); |
2654 | 0 | DEBUG(dbgs() << "\n"); |
2655 | 0 |
|
2656 | 0 | MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); |
2657 | 0 | if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry0 ) { |
2658 | 0 | // Entry has already been linearized, no need to do this region. |
2659 | 0 | unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); |
2660 | 0 | unsigned InnerSelectReg = |
2661 | 0 | InnerLRegion->getRegionMRT()->getInnerOutputRegister(); |
2662 | 0 | replaceRegisterWith(InnerSelectReg, OuterSelect), |
2663 | 0 | resolvePHIInfos(InnerEntry); |
2664 | 0 | if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) |
2665 | 0 | InnerLRegion->getExit()->addSuccessor(CurrentMerge); |
2666 | 0 | continue; |
2667 | 0 | } |
2668 | 0 |
|
2669 | 0 | BBSelectRegOut = Child->getBBSelectRegOut(); |
2670 | 0 | BBSelectRegIn = Child->getBBSelectRegIn(); |
2671 | 0 |
|
2672 | 0 | DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) |
2673 | 0 | << "\n"); |
2674 | 0 | DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) |
2675 | 0 | << "\n"); |
2676 | 0 |
|
2677 | 0 | MachineBasicBlock *IfEnd = CurrentMerge; |
2678 | 0 | CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, |
2679 | 0 | Child->getRegionMRT()->getEntry(), |
2680 | 0 | BBSelectRegIn, BBSelectRegOut); |
2681 | 0 | TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); |
2682 | 0 | } else { |
2683 | 0 | MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); |
2684 | 0 | DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); |
2685 | 0 |
|
2686 | 0 | if (MBB == getSingleExitNode(*(MBB->getParent()))0 ) { |
2687 | 0 | // If this is the exit block then we need to skip to the next. |
2688 | 0 | // The "in" register will be transferred to "out" in the next |
2689 | 0 | // iteration. |
2690 | 0 | continue; |
2691 | 0 | } |
2692 | 0 |
|
2693 | 0 | BBSelectRegOut = Child->getBBSelectRegOut(); |
2694 | 0 | BBSelectRegIn = Child->getBBSelectRegIn(); |
2695 | 0 |
|
2696 | 0 | DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) |
2697 | 0 | << "\n"); |
2698 | 0 | DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) |
2699 | 0 | << "\n"); |
2700 | 0 |
|
2701 | 0 | MachineBasicBlock *IfEnd = CurrentMerge; |
2702 | 0 | // This is a basic block that is not part of an inner region, we |
2703 | 0 | // need to put it in the current linearized region. |
2704 | 0 | CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, |
2705 | 0 | BBSelectRegOut); |
2706 | 0 | if (CurrentMerge0 ) { |
2707 | 0 | TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); |
2708 | 0 | } |
2709 | 0 |
|
2710 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2711 | 0 | } |
2712 | 0 | } |
2713 | 0 |
|
2714 | 0 | LRegion->removeFalseRegisterKills(MRI); |
2715 | 0 |
|
2716 | 0 | if (LRegion->getHasLoop()0 ) { |
2717 | 0 | MachineBasicBlock *NewSucc = splitEntry(LRegion); |
2718 | 0 | if (isFunctionEntryBlock(LRegion->getEntry())0 ) { |
2719 | 0 | resolvePHIInfos(LRegion->getEntry()); |
2720 | 0 | } |
2721 | 0 | const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); |
2722 | 0 | unsigned InReg = LRegion->getBBSelectRegIn(); |
2723 | 0 | unsigned InnerSelectReg = |
2724 | 0 | MRI->createVirtualRegister(MRI->getRegClass(InReg)); |
2725 | 0 | unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); |
2726 | 0 | TII->materializeImmediate(*(LRegion->getEntry()), |
2727 | 0 | LRegion->getEntry()->getFirstTerminator(), DL, |
2728 | 0 | NewInReg, Region->getEntry()->getNumber()); |
2729 | 0 | // Need to be careful about updating the registers inside the region. |
2730 | 0 | LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); |
2731 | 0 | DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); |
2732 | 0 | insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, |
2733 | 0 | InnerSelectReg, NewInReg, |
2734 | 0 | LRegion->getRegionMRT()->getInnerOutputRegister()); |
2735 | 0 | splitExit(LRegion); |
2736 | 0 | TII->convertNonUniformLoopRegion(NewSucc, LastMerge); |
2737 | 0 | } |
2738 | 0 |
|
2739 | 0 | if (Region->isRoot()0 ) { |
2740 | 0 | TII->insertReturn(*LastMerge); |
2741 | 0 | } |
2742 | 0 |
|
2743 | 0 | DEBUG(Region->getEntry()->getParent()->dump()); |
2744 | 0 | DEBUG(LRegion->print(dbgs(), TRI)); |
2745 | 0 | DEBUG(PHIInfo.dump(MRI)); |
2746 | 0 |
|
2747 | 0 | DEBUG(dbgs() << "===========If Region End===============\n"); |
2748 | 0 |
|
2749 | 0 | Region->setLinearizedRegion(LRegion); |
2750 | 0 | return true; |
2751 | 0 | } |
2752 | | |
2753 | 0 | bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { |
2754 | 0 | if (false && 0 regionIsSimpleIf(Region)0 ) { |
2755 | 0 | transformSimpleIfRegion(Region); |
2756 | 0 | return true; |
2757 | 0 | } else if (0 regionIsSequence(Region)0 ) { |
2758 | 0 | fixupRegionExits(Region); |
2759 | 0 | return false; |
2760 | 0 | } else { |
2761 | 0 | structurizeComplexRegion(Region); |
2762 | 0 | } |
2763 | 0 | return false; |
2764 | 0 | } |
2765 | | |
2766 | | static int structurize_once = 0; |
2767 | | |
2768 | | bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, |
2769 | 0 | bool isTopRegion) { |
2770 | 0 | bool Changed = false; |
2771 | 0 |
|
2772 | 0 | auto Children = Region->getChildren(); |
2773 | 0 | for (auto CI : *Children) { |
2774 | 0 | if (CI->isRegion()0 ) { |
2775 | 0 | Changed |= structurizeRegions(CI->getRegionMRT(), false); |
2776 | 0 | } |
2777 | 0 | } |
2778 | 0 |
|
2779 | 0 | if (structurize_once < 2 || 0 true0 ) { |
2780 | 0 | Changed |= structurizeRegion(Region); |
2781 | 0 | structurize_once++; |
2782 | 0 | } |
2783 | 0 | return Changed; |
2784 | 0 | } |
2785 | | |
2786 | 0 | void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { |
2787 | 0 | DEBUG(dbgs() << "Fallthrough Map:\n"); |
2788 | 0 | for (auto &MBBI : MF) { |
2789 | 0 | MachineBasicBlock *MBB = MBBI.getFallThrough(); |
2790 | 0 | if (MBB != nullptr0 ) { |
2791 | 0 | DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " |
2792 | 0 | << MBB->getNumber() << "\n"); |
2793 | 0 | } |
2794 | 0 | FallthroughMap[&MBBI] = MBB; |
2795 | 0 | } |
2796 | 0 | } |
2797 | | |
2798 | | void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, |
2799 | 0 | unsigned SelectOut) { |
2800 | 0 | LinearizedRegion *LRegion = new LinearizedRegion(); |
2801 | 0 | if (SelectOut0 ) { |
2802 | 0 | LRegion->addLiveOut(SelectOut); |
2803 | 0 | DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) |
2804 | 0 | << "\n"); |
2805 | 0 | } |
2806 | 0 | LRegion->setRegionMRT(Region); |
2807 | 0 | Region->setLinearizedRegion(LRegion); |
2808 | 0 | LRegion->setParent(Region->getParent() |
2809 | 0 | ? Region->getParent()->getLinearizedRegion() |
2810 | 0 | : nullptr); |
2811 | 0 | } |
2812 | | |
2813 | | unsigned |
2814 | | AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, |
2815 | | MachineRegisterInfo *MRI, |
2816 | 0 | const SIInstrInfo *TII) { |
2817 | 0 | if (MRT->isRegion()0 ) { |
2818 | 0 | RegionMRT *Region = MRT->getRegionMRT(); |
2819 | 0 | Region->setBBSelectRegOut(SelectOut); |
2820 | 0 | unsigned InnerSelectOut = createBBSelectReg(TII, MRI); |
2821 | 0 |
|
2822 | 0 | // Fixme: Move linearization creation to the original spot |
2823 | 0 | createLinearizedRegion(Region, SelectOut); |
2824 | 0 |
|
2825 | 0 | for (auto CI = Region->getChildren()->begin(), |
2826 | 0 | CE = Region->getChildren()->end(); |
2827 | 0 | CI != CE0 ; ++CI0 ) { |
2828 | 0 | InnerSelectOut = |
2829 | 0 | initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); |
2830 | 0 | } |
2831 | 0 | MRT->setBBSelectRegIn(InnerSelectOut); |
2832 | 0 | return InnerSelectOut; |
2833 | 0 | } else { |
2834 | 0 | MRT->setBBSelectRegOut(SelectOut); |
2835 | 0 | unsigned NewSelectIn = createBBSelectReg(TII, MRI); |
2836 | 0 | MRT->setBBSelectRegIn(NewSelectIn); |
2837 | 0 | return NewSelectIn; |
2838 | 0 | } |
2839 | 0 | } |
2840 | | |
2841 | 0 | static void checkRegOnlyPHIInputs(MachineFunction &MF) { |
2842 | 0 | for (auto &MBBI : MF) { |
2843 | 0 | for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), |
2844 | 0 | E = MBBI.instr_end(); |
2845 | 0 | I != E0 ; ++I0 ) { |
2846 | 0 | MachineInstr &Instr = *I; |
2847 | 0 | if (Instr.isPHI()0 ) { |
2848 | 0 | int numPreds = getPHINumInputs(Instr); |
2849 | 0 | for (int i = 0; i < numPreds0 ; ++i0 ) { |
2850 | 0 | assert(Instr.getOperand(i * 2 + 1).isReg() && |
2851 | 0 | "PHI Operand not a register"); |
2852 | 0 | } |
2853 | 0 | } |
2854 | 0 | } |
2855 | 0 | } |
2856 | 0 | } |
2857 | | |
2858 | 0 | bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { |
2859 | 0 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); |
2860 | 0 | const SIInstrInfo *TII = ST.getInstrInfo(); |
2861 | 0 | TRI = ST.getRegisterInfo(); |
2862 | 0 | MRI = &(MF.getRegInfo()); |
2863 | 0 | initFallthroughMap(MF); |
2864 | 0 |
|
2865 | 0 | checkRegOnlyPHIInputs(MF); |
2866 | 0 | DEBUG(dbgs() << "----STRUCTURIZER START----\n"); |
2867 | 0 | DEBUG(MF.dump()); |
2868 | 0 |
|
2869 | 0 | Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); |
2870 | 0 | DEBUG(Regions->dump()); |
2871 | 0 |
|
2872 | 0 | RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); |
2873 | 0 | setRegionMRT(RTree); |
2874 | 0 | initializeSelectRegisters(RTree, 0, MRI, TII); |
2875 | 0 | DEBUG(RTree->dump(TRI)); |
2876 | 0 | bool result = structurizeRegions(RTree, true); |
2877 | 0 | delete RTree; |
2878 | 0 | DEBUG(dbgs() << "----STRUCTURIZER END----\n"); |
2879 | 0 | initFallthroughMap(MF); |
2880 | 0 | return result; |
2881 | 0 | } |
2882 | | |
2883 | | char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; |
2884 | | |
2885 | 0 | INITIALIZE_PASS_BEGIN0 (AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
|
2886 | 0 | "AMDGPU Machine CFG Structurizer", false, false) |
2887 | 0 | INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) |
2888 | 0 | INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", |
2889 | | "AMDGPU Machine CFG Structurizer", false, false) |
2890 | | |
2891 | 0 | FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { |
2892 | 0 | return new AMDGPUMachineCFGStructurizer(); |
2893 | 0 | } |