/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This implements the SelectionDAGISel class. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "ScheduleDAGSDNodes.h" |
15 | | #include "SelectionDAGBuilder.h" |
16 | | #include "llvm/ADT/APInt.h" |
17 | | #include "llvm/ADT/DenseMap.h" |
18 | | #include "llvm/ADT/None.h" |
19 | | #include "llvm/ADT/PostOrderIterator.h" |
20 | | #include "llvm/ADT/STLExtras.h" |
21 | | #include "llvm/ADT/SmallPtrSet.h" |
22 | | #include "llvm/ADT/SmallSet.h" |
23 | | #include "llvm/ADT/SmallVector.h" |
24 | | #include "llvm/ADT/Statistic.h" |
25 | | #include "llvm/ADT/StringRef.h" |
26 | | #include "llvm/Analysis/AliasAnalysis.h" |
27 | | #include "llvm/Analysis/BranchProbabilityInfo.h" |
28 | | #include "llvm/Analysis/CFG.h" |
29 | | #include "llvm/Analysis/OptimizationDiagnosticInfo.h" |
30 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
31 | | #include "llvm/CodeGen/FastISel.h" |
32 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
33 | | #include "llvm/CodeGen/GCMetadata.h" |
34 | | #include "llvm/CodeGen/ISDOpcodes.h" |
35 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
36 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
37 | | #include "llvm/CodeGen/MachineFunction.h" |
38 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
39 | | #include "llvm/CodeGen/MachineInstr.h" |
40 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
41 | | #include "llvm/CodeGen/MachineMemOperand.h" |
42 | | #include "llvm/CodeGen/MachineOperand.h" |
43 | | #include "llvm/CodeGen/MachinePassRegistry.h" |
44 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
45 | | #include "llvm/CodeGen/MachineValueType.h" |
46 | | #include "llvm/CodeGen/SchedulerRegistry.h" |
47 | | #include "llvm/CodeGen/SelectionDAG.h" |
48 | | #include "llvm/CodeGen/SelectionDAGISel.h" |
49 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
50 | | #include "llvm/CodeGen/StackProtector.h" |
51 | | #include "llvm/CodeGen/ValueTypes.h" |
52 | | #include "llvm/IR/BasicBlock.h" |
53 | | #include "llvm/IR/Constants.h" |
54 | | #include "llvm/IR/DataLayout.h" |
55 | | #include "llvm/IR/DebugInfoMetadata.h" |
56 | | #include "llvm/IR/DebugLoc.h" |
57 | | #include "llvm/IR/DiagnosticInfo.h" |
58 | | #include "llvm/IR/Dominators.h" |
59 | | #include "llvm/IR/Function.h" |
60 | | #include "llvm/IR/InlineAsm.h" |
61 | | #include "llvm/IR/InstrTypes.h" |
62 | | #include "llvm/IR/Instruction.h" |
63 | | #include "llvm/IR/Instructions.h" |
64 | | #include "llvm/IR/IntrinsicInst.h" |
65 | | #include "llvm/IR/Intrinsics.h" |
66 | | #include "llvm/IR/Metadata.h" |
67 | | #include "llvm/IR/Type.h" |
68 | | #include "llvm/IR/User.h" |
69 | | #include "llvm/IR/Value.h" |
70 | | #include "llvm/MC/MCInstrDesc.h" |
71 | | #include "llvm/MC/MCRegisterInfo.h" |
72 | | #include "llvm/Pass.h" |
73 | | #include "llvm/Support/BranchProbability.h" |
74 | | #include "llvm/Support/Casting.h" |
75 | | #include "llvm/Support/CodeGen.h" |
76 | | #include "llvm/Support/CommandLine.h" |
77 | | #include "llvm/Support/Compiler.h" |
78 | | #include "llvm/Support/Debug.h" |
79 | | #include "llvm/Support/ErrorHandling.h" |
80 | | #include "llvm/Support/KnownBits.h" |
81 | | #include "llvm/Support/Timer.h" |
82 | | #include "llvm/Support/raw_ostream.h" |
83 | | #include "llvm/Target/TargetInstrInfo.h" |
84 | | #include "llvm/Target/TargetIntrinsicInfo.h" |
85 | | #include "llvm/Target/TargetLowering.h" |
86 | | #include "llvm/Target/TargetMachine.h" |
87 | | #include "llvm/Target/TargetOptions.h" |
88 | | #include "llvm/Target/TargetRegisterInfo.h" |
89 | | #include "llvm/Target/TargetSubtargetInfo.h" |
90 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
91 | | #include <algorithm> |
92 | | #include <cassert> |
93 | | #include <cstdint> |
94 | | #include <iterator> |
95 | | #include <limits> |
96 | | #include <memory> |
97 | | #include <string> |
98 | | #include <utility> |
99 | | #include <vector> |
100 | | |
101 | | using namespace llvm; |
102 | | |
103 | | #define DEBUG_TYPE "isel" |
104 | | |
105 | | STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); |
106 | | STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); |
107 | | STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); |
108 | | STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); |
109 | | STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); |
110 | | STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); |
111 | | STATISTIC(NumFastIselFailLowerArguments, |
112 | | "Number of entry blocks where fast isel failed to lower arguments"); |
113 | | |
114 | | static cl::opt<int> EnableFastISelAbort( |
115 | | "fast-isel-abort", cl::Hidden, |
116 | | cl::desc("Enable abort calls when \"fast\" instruction selection " |
117 | | "fails to lower an instruction: 0 disable the abort, 1 will " |
118 | | "abort but for args, calls and terminators, 2 will also " |
119 | | "abort for argument lowering, and 3 will never fallback " |
120 | | "to SelectionDAG.")); |
121 | | |
122 | | static cl::opt<bool> EnableFastISelFallbackReport( |
123 | | "fast-isel-report-on-fallback", cl::Hidden, |
124 | | cl::desc("Emit a diagnostic when \"fast\" instruction selection " |
125 | | "falls back to SelectionDAG.")); |
126 | | |
127 | | static cl::opt<bool> |
128 | | UseMBPI("use-mbpi", |
129 | | cl::desc("use Machine Branch Probability Info"), |
130 | | cl::init(true), cl::Hidden); |
131 | | |
132 | | #ifndef NDEBUG |
133 | | static cl::opt<std::string> |
134 | | FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, |
135 | | cl::desc("Only display the basic block whose name " |
136 | | "matches this for all view-*-dags options")); |
137 | | static cl::opt<bool> |
138 | | ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, |
139 | | cl::desc("Pop up a window to show dags before the first " |
140 | | "dag combine pass")); |
141 | | static cl::opt<bool> |
142 | | ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, |
143 | | cl::desc("Pop up a window to show dags before legalize types")); |
144 | | static cl::opt<bool> |
145 | | ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, |
146 | | cl::desc("Pop up a window to show dags before legalize")); |
147 | | static cl::opt<bool> |
148 | | ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, |
149 | | cl::desc("Pop up a window to show dags before the second " |
150 | | "dag combine pass")); |
151 | | static cl::opt<bool> |
152 | | ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, |
153 | | cl::desc("Pop up a window to show dags before the post legalize types" |
154 | | " dag combine pass")); |
155 | | static cl::opt<bool> |
156 | | ViewISelDAGs("view-isel-dags", cl::Hidden, |
157 | | cl::desc("Pop up a window to show isel dags as they are selected")); |
158 | | static cl::opt<bool> |
159 | | ViewSchedDAGs("view-sched-dags", cl::Hidden, |
160 | | cl::desc("Pop up a window to show sched dags as they are processed")); |
161 | | static cl::opt<bool> |
162 | | ViewSUnitDAGs("view-sunit-dags", cl::Hidden, |
163 | | cl::desc("Pop up a window to show SUnit dags after they are processed")); |
164 | | #else |
165 | | static const bool ViewDAGCombine1 = false, |
166 | | ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, |
167 | | ViewDAGCombine2 = false, |
168 | | ViewDAGCombineLT = false, |
169 | | ViewISelDAGs = false, ViewSchedDAGs = false, |
170 | | ViewSUnitDAGs = false; |
171 | | #endif |
172 | | |
173 | | //===---------------------------------------------------------------------===// |
174 | | /// |
175 | | /// RegisterScheduler class - Track the registration of instruction schedulers. |
176 | | /// |
177 | | //===---------------------------------------------------------------------===// |
178 | | MachinePassRegistry RegisterScheduler::Registry; |
179 | | |
180 | | //===---------------------------------------------------------------------===// |
181 | | /// |
182 | | /// ISHeuristic command line option for instruction schedulers. |
183 | | /// |
184 | | //===---------------------------------------------------------------------===// |
185 | | static cl::opt<RegisterScheduler::FunctionPassCtor, false, |
186 | | RegisterPassParser<RegisterScheduler>> |
187 | | ISHeuristic("pre-RA-sched", |
188 | | cl::init(&createDefaultScheduler), cl::Hidden, |
189 | | cl::desc("Instruction schedulers available (before register" |
190 | | " allocation):")); |
191 | | |
192 | | static RegisterScheduler |
193 | | defaultListDAGScheduler("default", "Best scheduler for the target", |
194 | | createDefaultScheduler); |
195 | | |
196 | | namespace llvm { |
197 | | |
198 | | //===--------------------------------------------------------------------===// |
199 | | /// \brief This class is used by SelectionDAGISel to temporarily override |
200 | | /// the optimization level on a per-function basis. |
201 | | class OptLevelChanger { |
202 | | SelectionDAGISel &IS; |
203 | | CodeGenOpt::Level SavedOptLevel; |
204 | | bool SavedFastISel; |
205 | | |
206 | | public: |
207 | | OptLevelChanger(SelectionDAGISel &ISel, |
208 | 437k | CodeGenOpt::Level NewOptLevel) : IS(ISel) { |
209 | 437k | SavedOptLevel = IS.OptLevel; |
210 | 437k | if (NewOptLevel == SavedOptLevel) |
211 | 437k | return; |
212 | 43 | IS.OptLevel = NewOptLevel; |
213 | 43 | IS.TM.setOptLevel(NewOptLevel); |
214 | 43 | DEBUG(dbgs() << "\nChanging optimization level for Function " |
215 | 43 | << IS.MF->getFunction()->getName() << "\n"); |
216 | 43 | DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel |
217 | 43 | << " ; After: -O" << NewOptLevel << "\n"); |
218 | 43 | SavedFastISel = IS.TM.Options.EnableFastISel; |
219 | 43 | if (NewOptLevel == CodeGenOpt::None43 ) { |
220 | 44 | IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); |
221 | 44 | DEBUG(dbgs() << "\tFastISel is " |
222 | 44 | << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") |
223 | 44 | << "\n"); |
224 | 44 | } |
225 | 437k | } |
226 | | |
227 | 436k | ~OptLevelChanger() { |
228 | 436k | if (IS.OptLevel == SavedOptLevel) |
229 | 436k | return; |
230 | 42 | DEBUG42 (dbgs() << "\nRestoring optimization level for Function " |
231 | 42 | << IS.MF->getFunction()->getName() << "\n"); |
232 | 42 | DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel |
233 | 436k | << " ; After: -O" << SavedOptLevel << "\n"); |
234 | 436k | IS.OptLevel = SavedOptLevel; |
235 | 436k | IS.TM.setOptLevel(SavedOptLevel); |
236 | 436k | IS.TM.setFastISel(SavedFastISel); |
237 | 436k | } |
238 | | }; |
239 | | |
240 | | //===--------------------------------------------------------------------===// |
241 | | /// createDefaultScheduler - This creates an instruction scheduler appropriate |
242 | | /// for the target. |
243 | | ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, |
244 | 3.42M | CodeGenOpt::Level OptLevel) { |
245 | 3.42M | const TargetLowering *TLI = IS->TLI; |
246 | 3.42M | const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); |
247 | 3.42M | |
248 | 3.42M | // Try first to see if the Target has its own way of selecting a scheduler |
249 | 3.42M | if (auto *SchedulerCtor3.42M = ST.getDAGScheduler(OptLevel)) { |
250 | 0 | return SchedulerCtor(IS, OptLevel); |
251 | 0 | } |
252 | 3.42M | |
253 | 3.42M | if (3.42M OptLevel == CodeGenOpt::None || |
254 | 3.41M | (ST.enableMachineScheduler() && 3.41M ST.enableMachineSchedDefaultSched()3.34M ) || |
255 | 71.7k | TLI->getSchedulingPreference() == Sched::Source) |
256 | 3.35M | return createSourceListDAGScheduler(IS, OptLevel); |
257 | 68.4k | if (68.4k TLI->getSchedulingPreference() == Sched::RegPressure68.4k ) |
258 | 17.5k | return createBURRListDAGScheduler(IS, OptLevel); |
259 | 50.9k | if (50.9k TLI->getSchedulingPreference() == Sched::Hybrid50.9k ) |
260 | 37.0k | return createHybridListDAGScheduler(IS, OptLevel); |
261 | 13.8k | if (13.8k TLI->getSchedulingPreference() == Sched::VLIW13.8k ) |
262 | 0 | return createVLIWDAGScheduler(IS, OptLevel); |
263 | 13.8k | assert(TLI->getSchedulingPreference() == Sched::ILP && |
264 | 13.8k | "Unknown sched type!"); |
265 | 13.8k | return createILPListDAGScheduler(IS, OptLevel); |
266 | 13.8k | } |
267 | | |
268 | | } // end namespace llvm |
269 | | |
270 | | // EmitInstrWithCustomInserter - This method should be implemented by targets |
271 | | // that mark instructions with the 'usesCustomInserter' flag. These |
272 | | // instructions are special in various ways, which require special support to |
273 | | // insert. The specified MachineInstr is created but not inserted into any |
274 | | // basic blocks, and this method is called to expand it into a sequence of |
275 | | // instructions, potentially also creating new basic blocks and control flow. |
276 | | // When new basic blocks are inserted and the edges from MBB to its successors |
277 | | // are modified, the method should insert pairs of <OldSucc, NewSucc> into the |
278 | | // DenseMap. |
279 | | MachineBasicBlock * |
280 | | TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
281 | 0 | MachineBasicBlock *MBB) const { |
282 | | #ifndef NDEBUG |
283 | | dbgs() << "If a target marks an instruction with " |
284 | | "'usesCustomInserter', it must implement " |
285 | | "TargetLowering::EmitInstrWithCustomInserter!"; |
286 | | #endif |
287 | 0 | llvm_unreachable(nullptr); |
288 | 0 | } |
289 | | |
290 | | void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, |
291 | 0 | SDNode *Node) const { |
292 | 0 | assert(!MI.hasPostISelHook() && |
293 | 0 | "If a target marks an instruction with 'hasPostISelHook', " |
294 | 0 | "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); |
295 | 0 | } |
296 | | |
297 | | //===----------------------------------------------------------------------===// |
298 | | // SelectionDAGISel code |
299 | | //===----------------------------------------------------------------------===// |
300 | | |
301 | | SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, |
302 | | CodeGenOpt::Level OL) : |
303 | | MachineFunctionPass(ID), TM(tm), |
304 | | FuncInfo(new FunctionLoweringInfo()), |
305 | | CurDAG(new SelectionDAG(tm, OL)), |
306 | | SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), |
307 | | AA(), GFI(), |
308 | | OptLevel(OL), |
309 | 35.1k | DAGSize(0) { |
310 | 35.1k | initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); |
311 | 35.1k | initializeBranchProbabilityInfoWrapperPassPass( |
312 | 35.1k | *PassRegistry::getPassRegistry()); |
313 | 35.1k | initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); |
314 | 35.1k | initializeTargetLibraryInfoWrapperPassPass( |
315 | 35.1k | *PassRegistry::getPassRegistry()); |
316 | 35.1k | } |
317 | | |
318 | 35.0k | SelectionDAGISel::~SelectionDAGISel() { |
319 | 35.0k | delete SDB; |
320 | 35.0k | delete CurDAG; |
321 | 35.0k | delete FuncInfo; |
322 | 35.0k | } |
323 | | |
324 | 35.0k | void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { |
325 | 35.0k | if (OptLevel != CodeGenOpt::None) |
326 | 33.5k | AU.addRequired<AAResultsWrapperPass>(); |
327 | 35.0k | AU.addRequired<GCModuleInfo>(); |
328 | 35.0k | AU.addRequired<StackProtector>(); |
329 | 35.0k | AU.addPreserved<StackProtector>(); |
330 | 35.0k | AU.addPreserved<GCModuleInfo>(); |
331 | 35.0k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
332 | 35.0k | if (UseMBPI && 35.0k OptLevel != CodeGenOpt::None35.0k ) |
333 | 33.5k | AU.addRequired<BranchProbabilityInfoWrapperPass>(); |
334 | 35.0k | MachineFunctionPass::getAnalysisUsage(AU); |
335 | 35.0k | } |
336 | | |
337 | | /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that |
338 | | /// may trap on it. In this case we have to split the edge so that the path |
339 | | /// through the predecessor block that doesn't go to the phi block doesn't |
340 | | /// execute the possibly trapping instruction. If available, we pass domtree |
341 | | /// and loop info to be updated when we split critical edges. This is because |
342 | | /// SelectionDAGISel preserves these analyses. |
343 | | /// This is required for correctness, so it must be done at -O0. |
344 | | /// |
345 | | static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, |
346 | 437k | LoopInfo *LI) { |
347 | 437k | // Loop for blocks with phi nodes. |
348 | 3.30M | for (BasicBlock &BB : Fn) { |
349 | 3.30M | PHINode *PN = dyn_cast<PHINode>(BB.begin()); |
350 | 3.30M | if (!PN3.30M ) continue2.70M ; |
351 | 601k | |
352 | 601k | ReprocessBlock: |
353 | 601k | // For each block with a PHI node, check to see if any of the input values |
354 | 601k | // are potentially trapping constant expressions. Constant expressions are |
355 | 601k | // the only potentially trapping value that can occur as the argument to a |
356 | 601k | // PHI. |
357 | 1.51M | for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I))1.51M ; ++I915k ) |
358 | 3.15M | for (unsigned i = 0, e = PN->getNumIncomingValues(); 915k i != e3.15M ; ++i2.23M ) { |
359 | 2.23M | ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); |
360 | 2.23M | if (!CE || 2.23M !CE->canTrap()21.7k ) continue2.23M ; |
361 | 2 | |
362 | 2 | // The only case we have to worry about is when the edge is critical. |
363 | 2 | // Since this block has a PHI Node, we assume it has multiple input |
364 | 2 | // edges: check to see if the pred has multiple successors. |
365 | 2 | BasicBlock *Pred = PN->getIncomingBlock(i); |
366 | 2 | if (Pred->getTerminator()->getNumSuccessors() == 1) |
367 | 2 | continue; |
368 | 0 |
|
369 | 0 | // Okay, we have to split this edge. |
370 | 0 | SplitCriticalEdge( |
371 | 0 | Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), |
372 | 0 | CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); |
373 | 0 | goto ReprocessBlock; |
374 | 0 | } |
375 | 3.30M | } |
376 | 437k | } |
377 | | |
378 | 595k | bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { |
379 | 595k | // If we already selected that function, we do not need to run SDISel. |
380 | 595k | if (mf.getProperties().hasProperty( |
381 | 595k | MachineFunctionProperties::Property::Selected)) |
382 | 158k | return false; |
383 | 437k | // Do some sanity-checking on the command-line options. |
384 | 595k | assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && |
385 | 437k | "-fast-isel-abort > 0 requires -fast-isel"); |
386 | 437k | |
387 | 437k | const Function &Fn = *mf.getFunction(); |
388 | 437k | MF = &mf; |
389 | 437k | |
390 | 437k | // Reset the target options before resetting the optimization |
391 | 437k | // level below. |
392 | 437k | // FIXME: This is a horrible hack and should be processed via |
393 | 437k | // codegen looking at the optimization level explicitly when |
394 | 437k | // it wants to look at it. |
395 | 437k | TM.resetTargetOptions(Fn); |
396 | 437k | // Reset OptLevel to None for optnone functions. |
397 | 437k | CodeGenOpt::Level NewOptLevel = OptLevel; |
398 | 437k | if (OptLevel != CodeGenOpt::None && 437k skipFunction(Fn)430k ) |
399 | 44 | NewOptLevel = CodeGenOpt::None; |
400 | 437k | OptLevelChanger OLC(*this, NewOptLevel); |
401 | 437k | |
402 | 437k | TII = MF->getSubtarget().getInstrInfo(); |
403 | 437k | TLI = MF->getSubtarget().getTargetLowering(); |
404 | 437k | RegInfo = &MF->getRegInfo(); |
405 | 437k | LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); |
406 | 437k | GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn)65 : nullptr436k ; |
407 | 437k | ORE = make_unique<OptimizationRemarkEmitter>(&Fn); |
408 | 437k | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
409 | 437k | DominatorTree *DT = DTWP ? &DTWP->getDomTree()430k : nullptr6.67k ; |
410 | 437k | auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); |
411 | 437k | LoopInfo *LI = LIWP ? &LIWP->getLoopInfo()430k : nullptr7.05k ; |
412 | 437k | |
413 | 437k | DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); |
414 | 437k | |
415 | 437k | SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); |
416 | 437k | |
417 | 437k | CurDAG->init(*MF, *ORE, this); |
418 | 437k | FuncInfo->set(Fn, *MF, CurDAG); |
419 | 437k | |
420 | 437k | // Now get the optional analyzes if we want to. |
421 | 437k | // This is based on the possibly changed OptLevel (after optnone is taken |
422 | 437k | // into account). That's unfortunate but OK because it just means we won't |
423 | 437k | // ask for passes that have been required anyway. |
424 | 437k | |
425 | 437k | if (UseMBPI && 437k OptLevel != CodeGenOpt::None437k ) |
426 | 429k | FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); |
427 | 437k | else |
428 | 7.10k | FuncInfo->BPI = nullptr; |
429 | 437k | |
430 | 437k | if (OptLevel != CodeGenOpt::None) |
431 | 429k | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
432 | 437k | else |
433 | 7.10k | AA = nullptr; |
434 | 437k | |
435 | 437k | SDB->init(GFI, AA, LibInfo); |
436 | 437k | |
437 | 437k | MF->setHasInlineAsm(false); |
438 | 437k | |
439 | 437k | FuncInfo->SplitCSR = false; |
440 | 437k | |
441 | 437k | // We split CSR if the target supports it for the given function |
442 | 437k | // and the function has only return exits. |
443 | 437k | if (OptLevel != CodeGenOpt::None && 437k TLI->supportSplitCSR(MF)429k ) { |
444 | 898 | FuncInfo->SplitCSR = true; |
445 | 898 | |
446 | 898 | // Collect all the return blocks. |
447 | 947 | for (const BasicBlock &BB : Fn) { |
448 | 947 | if (!succ_empty(&BB)) |
449 | 51 | continue; |
450 | 896 | |
451 | 896 | const TerminatorInst *Term = BB.getTerminator(); |
452 | 896 | if (isa<UnreachableInst>(Term) || 896 isa<ReturnInst>(Term)896 ) |
453 | 896 | continue; |
454 | 0 |
|
455 | 0 | // Bail out if the exit block is not Return nor Unreachable. |
456 | 0 | FuncInfo->SplitCSR = false; |
457 | 0 | break; |
458 | 0 | } |
459 | 898 | } |
460 | 437k | |
461 | 437k | MachineBasicBlock *EntryMBB = &MF->front(); |
462 | 437k | if (FuncInfo->SplitCSR) |
463 | 437k | // This performs initialization so lowering for SplitCSR will be correct. |
464 | 898 | TLI->initializeSplitCSR(EntryMBB); |
465 | 437k | |
466 | 437k | SelectAllBasicBlocks(Fn); |
467 | 437k | if (FastISelFailed && 437k EnableFastISelFallbackReport5.02k ) { |
468 | 2 | DiagnosticInfoISelFallback DiagFallback(Fn); |
469 | 2 | Fn.getContext().diagnose(DiagFallback); |
470 | 2 | } |
471 | 437k | |
472 | 437k | // If the first basic block in the function has live ins that need to be |
473 | 437k | // copied into vregs, emit the copies into the top of the block before |
474 | 437k | // emitting the code for the block. |
475 | 437k | const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); |
476 | 437k | RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); |
477 | 437k | |
478 | 437k | // Insert copies in the entry block and the return blocks. |
479 | 437k | if (FuncInfo->SplitCSR437k ) { |
480 | 898 | SmallVector<MachineBasicBlock*, 4> Returns; |
481 | 898 | // Collect all the return blocks. |
482 | 951 | for (MachineBasicBlock &MBB : mf) { |
483 | 951 | if (!MBB.succ_empty()) |
484 | 53 | continue; |
485 | 898 | |
486 | 898 | MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); |
487 | 898 | if (Term != MBB.end() && 898 Term->isReturn()894 ) { |
488 | 894 | Returns.push_back(&MBB); |
489 | 894 | continue; |
490 | 894 | } |
491 | 898 | } |
492 | 898 | TLI->insertCopiesSplitCSR(EntryMBB, Returns); |
493 | 898 | } |
494 | 437k | |
495 | 437k | DenseMap<unsigned, unsigned> LiveInMap; |
496 | 437k | if (!FuncInfo->ArgDbgValues.empty()) |
497 | 126 | for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), |
498 | 297 | E = RegInfo->livein_end(); LI != E297 ; ++LI171 ) |
499 | 171 | if (171 LI->second171 ) |
500 | 170 | LiveInMap.insert(std::make_pair(LI->first, LI->second)); |
501 | 437k | |
502 | 437k | // Insert DBG_VALUE instructions for function arguments to the entry block. |
503 | 437k | for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e437k ; ++i205 ) { |
504 | 205 | MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; |
505 | 205 | bool hasFI = MI->getOperand(0).isFI(); |
506 | 205 | unsigned Reg = |
507 | 205 | hasFI ? TRI.getFrameRegister(*MF)13 : MI->getOperand(0).getReg()192 ; |
508 | 205 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
509 | 190 | EntryMBB->insert(EntryMBB->begin(), MI); |
510 | 15 | else { |
511 | 15 | MachineInstr *Def = RegInfo->getVRegDef(Reg); |
512 | 15 | if (Def15 ) { |
513 | 15 | MachineBasicBlock::iterator InsertPos = Def; |
514 | 15 | // FIXME: VR def may not be in entry block. |
515 | 15 | Def->getParent()->insert(std::next(InsertPos), MI); |
516 | 15 | } else |
517 | 15 | DEBUG(dbgs() << "Dropping debug info for dead vreg" |
518 | 15 | << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); |
519 | 15 | } |
520 | 205 | |
521 | 205 | // If Reg is live-in then update debug info to track its copy in a vreg. |
522 | 205 | DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); |
523 | 205 | if (LDI != LiveInMap.end()205 ) { |
524 | 138 | assert(!hasFI && "There's no handling of frame pointer updating here yet " |
525 | 138 | "- add if needed"); |
526 | 138 | MachineInstr *Def = RegInfo->getVRegDef(LDI->second); |
527 | 138 | MachineBasicBlock::iterator InsertPos = Def; |
528 | 138 | const MDNode *Variable = MI->getDebugVariable(); |
529 | 138 | const MDNode *Expr = MI->getDebugExpression(); |
530 | 138 | DebugLoc DL = MI->getDebugLoc(); |
531 | 138 | bool IsIndirect = MI->isIndirectDebugValue(); |
532 | 138 | if (IsIndirect) |
533 | 138 | assert(MI->getOperand(1).getImm() == 0 && |
534 | 138 | "DBG_VALUE with nonzero offset"); |
535 | 138 | assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && |
536 | 138 | "Expected inlined-at fields to agree"); |
537 | 138 | // Def is never a terminator here, so it is ok to increment InsertPos. |
538 | 138 | BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), |
539 | 138 | IsIndirect, LDI->second, Variable, Expr); |
540 | 138 | |
541 | 138 | // If this vreg is directly copied into an exported register then |
542 | 138 | // that COPY instructions also need DBG_VALUE, if it is the only |
543 | 138 | // user of LDI->second. |
544 | 138 | MachineInstr *CopyUseMI = nullptr; |
545 | 138 | for (MachineRegisterInfo::use_instr_iterator |
546 | 138 | UI = RegInfo->use_instr_begin(LDI->second), |
547 | 196 | E = RegInfo->use_instr_end(); UI != E196 ; ) { |
548 | 175 | MachineInstr *UseMI = &*(UI++); |
549 | 175 | if (UseMI->isDebugValue()175 ) continue21 ; |
550 | 154 | if (154 UseMI->isCopy() && 154 !CopyUseMI45 && UseMI->getParent() == EntryMBB37 ) { |
551 | 37 | CopyUseMI = UseMI; continue; |
552 | 37 | } |
553 | 117 | // Otherwise this is another use or second copy use. |
554 | 117 | CopyUseMI = nullptr; break; |
555 | 117 | } |
556 | 138 | if (CopyUseMI138 ) { |
557 | 21 | // Use MI's debug location, which describes where Variable was |
558 | 21 | // declared, rather than whatever is attached to CopyUseMI. |
559 | 21 | MachineInstr *NewMI = |
560 | 21 | BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, |
561 | 21 | CopyUseMI->getOperand(0).getReg(), Variable, Expr); |
562 | 21 | MachineBasicBlock::iterator Pos = CopyUseMI; |
563 | 21 | EntryMBB->insertAfter(Pos, NewMI); |
564 | 21 | } |
565 | 138 | } |
566 | 205 | } |
567 | 437k | |
568 | 437k | // Determine if there are any calls in this machine function. |
569 | 437k | MachineFrameInfo &MFI = MF->getFrameInfo(); |
570 | 3.42M | for (const auto &MBB : *MF) { |
571 | 3.42M | if (MFI.hasCalls() && 3.42M MF->hasInlineAsm()2.16M ) |
572 | 636 | break; |
573 | 3.42M | |
574 | 3.42M | for (const auto &MI : MBB) 3.42M { |
575 | 31.6M | const MCInstrDesc &MCID = TII->get(MI.getOpcode()); |
576 | 31.6M | if ((MCID.isCall() && 31.6M !MCID.isReturn()1.79M ) || |
577 | 31.6M | MI.isStackAligningInlineAsm()30.0M ) { |
578 | 1.56M | MFI.setHasCalls(true); |
579 | 1.56M | } |
580 | 31.6M | if (MI.isInlineAsm()31.6M ) { |
581 | 10.7k | MF->setHasInlineAsm(true); |
582 | 10.7k | } |
583 | 31.6M | } |
584 | 3.42M | } |
585 | 437k | |
586 | 437k | // Determine if there is a call to setjmp in the machine function. |
587 | 437k | MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); |
588 | 437k | |
589 | 437k | // Replace forward-declared registers with the registers containing |
590 | 437k | // the desired value. |
591 | 437k | MachineRegisterInfo &MRI = MF->getRegInfo(); |
592 | 437k | for (DenseMap<unsigned, unsigned>::iterator |
593 | 437k | I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); |
594 | 445k | I != E445k ; ++I8.63k ) { |
595 | 8.63k | unsigned From = I->first; |
596 | 8.63k | unsigned To = I->second; |
597 | 8.63k | // If To is also scheduled to be replaced, find what its ultimate |
598 | 8.63k | // replacement is. |
599 | 9.09k | while (true9.09k ) { |
600 | 9.09k | DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); |
601 | 9.09k | if (J == E9.09k ) break8.63k ; |
602 | 455 | To = J->second; |
603 | 455 | } |
604 | 8.63k | // Make sure the new register has a sufficiently constrained register class. |
605 | 8.63k | if (TargetRegisterInfo::isVirtualRegister(From) && |
606 | 8.63k | TargetRegisterInfo::isVirtualRegister(To)) |
607 | 8.63k | MRI.constrainRegClass(To, MRI.getRegClass(From)); |
608 | 8.63k | // Replace it. |
609 | 8.63k | |
610 | 8.63k | |
611 | 8.63k | // Replacing one register with another won't touch the kill flags. |
612 | 8.63k | // We need to conservatively clear the kill flags as a kill on the old |
613 | 8.63k | // register might dominate existing uses of the new register. |
614 | 8.63k | if (!MRI.use_empty(To)) |
615 | 476 | MRI.clearKillFlags(From); |
616 | 8.63k | MRI.replaceRegWith(From, To); |
617 | 8.63k | } |
618 | 437k | |
619 | 437k | TLI->finalizeLowering(*MF); |
620 | 437k | |
621 | 437k | // Release function-specific state. SDB and CurDAG are already cleared |
622 | 437k | // at this point. |
623 | 437k | FuncInfo->clear(); |
624 | 437k | |
625 | 437k | DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); |
626 | 437k | DEBUG(MF->print(dbgs())); |
627 | 595k | |
628 | 595k | return true; |
629 | 595k | } |
630 | | |
631 | | static void reportFastISelFailure(MachineFunction &MF, |
632 | | OptimizationRemarkEmitter &ORE, |
633 | | OptimizationRemarkMissed &R, |
634 | 8.61k | bool ShouldAbort) { |
635 | 8.61k | // Print the function name explicitly if we don't have a debug location (which |
636 | 8.61k | // makes the diagnostic less useful) or if we're going to emit a raw error. |
637 | 8.61k | if (!R.getLocation().isValid() || 8.61k ShouldAbort194 ) |
638 | 8.42k | R << (" (in function: " + MF.getName() + ")").str(); |
639 | 8.61k | |
640 | 8.61k | if (ShouldAbort) |
641 | 3 | report_fatal_error(R.getMsg()); |
642 | 8.61k | |
643 | 8.61k | ORE.emit(R); |
644 | 8.61k | } |
645 | | |
646 | | void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, |
647 | | BasicBlock::const_iterator End, |
648 | 3.30M | bool &HadTailCall) { |
649 | 3.30M | // Allow creating illegal types during DAG building for the basic block. |
650 | 3.30M | CurDAG->NewNodesMustHaveLegalTypes = false; |
651 | 3.30M | |
652 | 3.30M | // Lower the instructions. If a call is emitted as a tail call, cease emitting |
653 | 3.30M | // nodes for this block. |
654 | 21.3M | for (BasicBlock::const_iterator I = Begin; I != End && 21.3M !SDB->HasTailCall18.2M ; ++I18.0M ) { |
655 | 18.0M | if (!ElidedArgCopyInstrs.count(&*I)) |
656 | 18.0M | SDB->visit(*I); |
657 | 18.0M | } |
658 | 3.30M | |
659 | 3.30M | // Make sure the root of the DAG is up-to-date. |
660 | 3.30M | CurDAG->setRoot(SDB->getControlRoot()); |
661 | 3.30M | HadTailCall = SDB->HasTailCall; |
662 | 3.30M | SDB->clear(); |
663 | 3.30M | |
664 | 3.30M | // Final step, emit the lowered DAG as machine code. |
665 | 3.30M | CodeGenAndEmitDAG(); |
666 | 3.30M | } |
667 | | |
668 | 3.42M | void SelectionDAGISel::ComputeLiveOutVRegInfo() { |
669 | 3.42M | SmallPtrSet<SDNode*, 16> VisitedNodes; |
670 | 3.42M | SmallVector<SDNode*, 128> Worklist; |
671 | 3.42M | |
672 | 3.42M | Worklist.push_back(CurDAG->getRoot().getNode()); |
673 | 3.42M | |
674 | 3.42M | KnownBits Known; |
675 | 3.42M | |
676 | 32.0M | do { |
677 | 32.0M | SDNode *N = Worklist.pop_back_val(); |
678 | 32.0M | |
679 | 32.0M | // If we've already seen this node, ignore it. |
680 | 32.0M | if (!VisitedNodes.insert(N).second) |
681 | 3.04M | continue; |
682 | 28.9M | |
683 | 28.9M | // Otherwise, add all chain operands to the worklist. |
684 | 28.9M | for (const SDValue &Op : N->op_values()) |
685 | 74.7M | if (74.7M Op.getValueType() == MVT::Other74.7M ) |
686 | 28.5M | Worklist.push_back(Op.getNode()); |
687 | 28.9M | |
688 | 28.9M | // If this is a CopyToReg with a vreg dest, process it. |
689 | 28.9M | if (N->getOpcode() != ISD::CopyToReg) |
690 | 22.1M | continue; |
691 | 6.76M | |
692 | 6.76M | unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); |
693 | 6.76M | if (!TargetRegisterInfo::isVirtualRegister(DestReg)) |
694 | 4.22M | continue; |
695 | 2.54M | |
696 | 2.54M | // Ignore non-scalar or non-integer values. |
697 | 2.54M | SDValue Src = N->getOperand(2); |
698 | 2.54M | EVT SrcVT = Src.getValueType(); |
699 | 2.54M | if (!SrcVT.isInteger() || 2.54M SrcVT.isVector()2.48M ) |
700 | 84.9k | continue; |
701 | 2.45M | |
702 | 2.45M | unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); |
703 | 2.45M | CurDAG->computeKnownBits(Src, Known); |
704 | 2.45M | FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known); |
705 | 32.0M | } while (!Worklist.empty()); |
706 | 3.42M | } |
707 | | |
708 | 3.42M | void SelectionDAGISel::CodeGenAndEmitDAG() { |
709 | 3.42M | StringRef GroupName = "sdag"; |
710 | 3.42M | StringRef GroupDescription = "Instruction Selection and Scheduling"; |
711 | 3.42M | std::string BlockName; |
712 | 3.42M | int BlockNumber = -1; |
713 | 3.42M | (void)BlockNumber; |
714 | 3.42M | bool MatchFilterBB = false; (void)MatchFilterBB; |
715 | 3.42M | |
716 | 3.42M | // Pre-type legalization allow creation of any node types. |
717 | 3.42M | CurDAG->NewNodesMustHaveLegalTypes = false; |
718 | 3.42M | |
719 | | #ifndef NDEBUG |
720 | | MatchFilterBB = (FilterDAGBasicBlockName.empty() || |
721 | | FilterDAGBasicBlockName == |
722 | | FuncInfo->MBB->getBasicBlock()->getName().str()); |
723 | | #endif |
724 | | #ifdef NDEBUG |
725 | 3.42M | if (ViewDAGCombine1 || 3.42M ViewLegalizeTypesDAGs0 || ViewLegalizeDAGs0 || |
726 | 3.42M | ViewDAGCombine20 || ViewDAGCombineLT0 || ViewISelDAGs0 || ViewSchedDAGs0 || |
727 | 0 | ViewSUnitDAGs) |
728 | 3.42M | #endif |
729 | 0 | { |
730 | 0 | BlockNumber = FuncInfo->MBB->getNumber(); |
731 | 0 | BlockName = |
732 | 0 | (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); |
733 | 0 | } |
734 | 3.42M | DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber |
735 | 3.42M | << " '" << BlockName << "'\n"; CurDAG->dump()); |
736 | 3.42M | |
737 | 3.42M | if (ViewDAGCombine1 && 3.42M MatchFilterBB0 ) |
738 | 0 | CurDAG->viewGraph("dag-combine1 input for " + BlockName); |
739 | 3.42M | |
740 | 3.42M | // Run the DAG combiner in pre-legalize mode. |
741 | 3.42M | { |
742 | 3.42M | NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, |
743 | 3.42M | GroupDescription, TimePassesIsEnabled); |
744 | 3.42M | CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); |
745 | 3.42M | } |
746 | 3.42M | |
747 | 3.42M | DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber |
748 | 3.42M | << " '" << BlockName << "'\n"; CurDAG->dump()); |
749 | 3.42M | |
750 | 3.42M | // Second step, hack on the DAG until it only uses operations and types that |
751 | 3.42M | // the target supports. |
752 | 3.42M | if (ViewLegalizeTypesDAGs && 3.42M MatchFilterBB0 ) |
753 | 0 | CurDAG->viewGraph("legalize-types input for " + BlockName); |
754 | 3.42M | |
755 | 3.42M | bool Changed; |
756 | 3.42M | { |
757 | 3.42M | NamedRegionTimer T("legalize_types", "Type Legalization", GroupName, |
758 | 3.42M | GroupDescription, TimePassesIsEnabled); |
759 | 3.42M | Changed = CurDAG->LegalizeTypes(); |
760 | 3.42M | } |
761 | 3.42M | |
762 | 3.42M | DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber |
763 | 3.42M | << " '" << BlockName << "'\n"; CurDAG->dump()); |
764 | 3.42M | |
765 | 3.42M | // Only allow creation of legal node types. |
766 | 3.42M | CurDAG->NewNodesMustHaveLegalTypes = true; |
767 | 3.42M | |
768 | 3.42M | if (Changed3.42M ) { |
769 | 544k | if (ViewDAGCombineLT && 544k MatchFilterBB0 ) |
770 | 0 | CurDAG->viewGraph("dag-combine-lt input for " + BlockName); |
771 | 544k | |
772 | 544k | // Run the DAG combiner in post-type-legalize mode. |
773 | 544k | { |
774 | 544k | NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", |
775 | 544k | GroupName, GroupDescription, TimePassesIsEnabled); |
776 | 544k | CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); |
777 | 544k | } |
778 | 544k | |
779 | 544k | DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber |
780 | 544k | << " '" << BlockName << "'\n"; CurDAG->dump()); |
781 | 544k | } |
782 | 3.42M | |
783 | 3.42M | { |
784 | 3.42M | NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName, |
785 | 3.42M | GroupDescription, TimePassesIsEnabled); |
786 | 3.42M | Changed = CurDAG->LegalizeVectors(); |
787 | 3.42M | } |
788 | 3.42M | |
789 | 3.42M | if (Changed3.42M ) { |
790 | 22.2k | DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber |
791 | 22.2k | << " '" << BlockName << "'\n"; CurDAG->dump()); |
792 | 22.2k | |
793 | 22.2k | { |
794 | 22.2k | NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, |
795 | 22.2k | GroupDescription, TimePassesIsEnabled); |
796 | 22.2k | CurDAG->LegalizeTypes(); |
797 | 22.2k | } |
798 | 22.2k | |
799 | 22.2k | DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber |
800 | 22.2k | << " '" << BlockName << "'\n"; CurDAG->dump()); |
801 | 22.2k | |
802 | 22.2k | if (ViewDAGCombineLT && 22.2k MatchFilterBB0 ) |
803 | 0 | CurDAG->viewGraph("dag-combine-lv input for " + BlockName); |
804 | 22.2k | |
805 | 22.2k | // Run the DAG combiner in post-type-legalize mode. |
806 | 22.2k | { |
807 | 22.2k | NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", |
808 | 22.2k | GroupName, GroupDescription, TimePassesIsEnabled); |
809 | 22.2k | CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); |
810 | 22.2k | } |
811 | 22.2k | |
812 | 22.2k | DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" |
813 | 22.2k | << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); |
814 | 22.2k | } |
815 | 3.42M | |
816 | 3.42M | if (ViewLegalizeDAGs && 3.42M MatchFilterBB0 ) |
817 | 0 | CurDAG->viewGraph("legalize input for " + BlockName); |
818 | 3.42M | |
819 | 3.42M | { |
820 | 3.42M | NamedRegionTimer T("legalize", "DAG Legalization", GroupName, |
821 | 3.42M | GroupDescription, TimePassesIsEnabled); |
822 | 3.42M | CurDAG->Legalize(); |
823 | 3.42M | } |
824 | 3.42M | |
825 | 3.42M | DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber |
826 | 3.42M | << " '" << BlockName << "'\n"; CurDAG->dump()); |
827 | 3.42M | |
828 | 3.42M | if (ViewDAGCombine2 && 3.42M MatchFilterBB0 ) |
829 | 0 | CurDAG->viewGraph("dag-combine2 input for " + BlockName); |
830 | 3.42M | |
831 | 3.42M | // Run the DAG combiner in post-legalize mode. |
832 | 3.42M | { |
833 | 3.42M | NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, |
834 | 3.42M | GroupDescription, TimePassesIsEnabled); |
835 | 3.42M | CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); |
836 | 3.42M | } |
837 | 3.42M | |
838 | 3.42M | DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber |
839 | 3.42M | << " '" << BlockName << "'\n"; CurDAG->dump()); |
840 | 3.42M | |
841 | 3.42M | if (OptLevel != CodeGenOpt::None) |
842 | 3.42M | ComputeLiveOutVRegInfo(); |
843 | 3.42M | |
844 | 3.42M | if (ViewISelDAGs && 3.42M MatchFilterBB0 ) |
845 | 0 | CurDAG->viewGraph("isel input for " + BlockName); |
846 | 3.42M | |
847 | 3.42M | // Third, instruction select all of the operations to machine code, adding the |
848 | 3.42M | // code to the MachineBasicBlock. |
849 | 3.42M | { |
850 | 3.42M | NamedRegionTimer T("isel", "Instruction Selection", GroupName, |
851 | 3.42M | GroupDescription, TimePassesIsEnabled); |
852 | 3.42M | DoInstructionSelection(); |
853 | 3.42M | } |
854 | 3.42M | |
855 | 3.42M | DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber |
856 | 3.42M | << " '" << BlockName << "'\n"; CurDAG->dump()); |
857 | 3.42M | |
858 | 3.42M | if (ViewSchedDAGs && 3.42M MatchFilterBB0 ) |
859 | 0 | CurDAG->viewGraph("scheduler input for " + BlockName); |
860 | 3.42M | |
861 | 3.42M | // Schedule machine code. |
862 | 3.42M | ScheduleDAGSDNodes *Scheduler = CreateScheduler(); |
863 | 3.42M | { |
864 | 3.42M | NamedRegionTimer T("sched", "Instruction Scheduling", GroupName, |
865 | 3.42M | GroupDescription, TimePassesIsEnabled); |
866 | 3.42M | Scheduler->Run(CurDAG, FuncInfo->MBB); |
867 | 3.42M | } |
868 | 3.42M | |
869 | 3.42M | if (ViewSUnitDAGs && 3.42M MatchFilterBB0 ) |
870 | 0 | Scheduler->viewGraph(); |
871 | 3.42M | |
872 | 3.42M | // Emit machine code to BB. This can change 'BB' to the last block being |
873 | 3.42M | // inserted into. |
874 | 3.42M | MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; |
875 | 3.42M | { |
876 | 3.42M | NamedRegionTimer T("emit", "Instruction Creation", GroupName, |
877 | 3.42M | GroupDescription, TimePassesIsEnabled); |
878 | 3.42M | |
879 | 3.42M | // FuncInfo->InsertPt is passed by reference and set to the end of the |
880 | 3.42M | // scheduled instructions. |
881 | 3.42M | LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); |
882 | 3.42M | } |
883 | 3.42M | |
884 | 3.42M | // If the block was split, make sure we update any references that are used to |
885 | 3.42M | // update PHI nodes later on. |
886 | 3.42M | if (FirstMBB != LastMBB) |
887 | 0 | SDB->UpdateSplitBlock(FirstMBB, LastMBB); |
888 | 3.42M | |
889 | 3.42M | // Free the scheduler state. |
890 | 3.42M | { |
891 | 3.42M | NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName, |
892 | 3.42M | GroupDescription, TimePassesIsEnabled); |
893 | 3.42M | delete Scheduler; |
894 | 3.42M | } |
895 | 3.42M | |
896 | 3.42M | // Free the SelectionDAG state, now that we're finished with it. |
897 | 3.42M | CurDAG->clear(); |
898 | 3.42M | } |
899 | | |
900 | | namespace { |
901 | | |
902 | | /// ISelUpdater - helper class to handle updates of the instruction selection |
903 | | /// graph. |
904 | | class ISelUpdater : public SelectionDAG::DAGUpdateListener { |
905 | | SelectionDAG::allnodes_iterator &ISelPosition; |
906 | | |
907 | | public: |
908 | | ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) |
909 | 3.42M | : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} |
910 | | |
911 | | /// NodeDeleted - Handle nodes deleted from the graph. If the node being |
912 | | /// deleted is the current ISelPosition node, update ISelPosition. |
913 | | /// |
914 | 12.3M | void NodeDeleted(SDNode *N, SDNode *E) override { |
915 | 12.3M | if (ISelPosition == SelectionDAG::allnodes_iterator(N)) |
916 | 1.55M | ++ISelPosition; |
917 | 12.3M | } |
918 | | }; |
919 | | |
920 | | } // end anonymous namespace |
921 | | |
922 | 3.42M | void SelectionDAGISel::DoInstructionSelection() { |
923 | 3.42M | DEBUG(dbgs() << "===== Instruction selection begins: BB#" |
924 | 3.42M | << FuncInfo->MBB->getNumber() |
925 | 3.42M | << " '" << FuncInfo->MBB->getName() << "'\n"); |
926 | 3.42M | |
927 | 3.42M | PreprocessISelDAG(); |
928 | 3.42M | |
929 | 3.42M | // Select target instructions for the DAG. |
930 | 3.42M | { |
931 | 3.42M | // Number all nodes with a topological order and set DAGSize. |
932 | 3.42M | DAGSize = CurDAG->AssignTopologicalOrder(); |
933 | 3.42M | |
934 | 3.42M | // Create a dummy node (which is not added to allnodes), that adds |
935 | 3.42M | // a reference to the root node, preventing it from being deleted, |
936 | 3.42M | // and tracking any changes of the root. |
937 | 3.42M | HandleSDNode Dummy(CurDAG->getRoot()); |
938 | 3.42M | SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); |
939 | 3.42M | ++ISelPosition; |
940 | 3.42M | |
941 | 3.42M | // Make sure that ISelPosition gets properly updated when nodes are deleted |
942 | 3.42M | // in calls made from this function. |
943 | 3.42M | ISelUpdater ISU(*CurDAG, ISelPosition); |
944 | 3.42M | |
945 | 3.42M | // The AllNodes list is now topological-sorted. Visit the |
946 | 3.42M | // nodes by starting at the end of the list (the root of the |
947 | 3.42M | // graph) and preceding back toward the beginning (the entry |
948 | 3.42M | // node). |
949 | 63.9M | while (ISelPosition != CurDAG->allnodes_begin()63.9M ) { |
950 | 60.5M | SDNode *Node = &*--ISelPosition; |
951 | 60.5M | // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, |
952 | 60.5M | // but there are currently some corner cases that it misses. Also, this |
953 | 60.5M | // makes it theoretically possible to disable the DAGCombiner. |
954 | 60.5M | if (Node->use_empty()) |
955 | 2.10k | continue; |
956 | 60.5M | |
957 | 60.5M | // When we are using non-default rounding modes or FP exception behavior |
958 | 60.5M | // FP operations are represented by StrictFP pseudo-operations. They |
959 | 60.5M | // need to be simplified here so that the target-specific instruction |
960 | 60.5M | // selectors know how to handle them. |
961 | 60.5M | // |
962 | 60.5M | // If the current node is a strict FP pseudo-op, the isStrictFPOp() |
963 | 60.5M | // function will provide the corresponding normal FP opcode to which the |
964 | 60.5M | // node should be mutated. |
965 | 60.5M | // |
966 | 60.5M | // FIXME: The backends need a way to handle FP constraints. |
967 | 60.5M | if (60.5M Node->isStrictFPOpcode()60.5M ) |
968 | 18 | Node = CurDAG->mutateStrictFPToFP(Node); |
969 | 60.5M | |
970 | 60.5M | Select(Node); |
971 | 60.5M | } |
972 | 3.42M | |
973 | 3.42M | CurDAG->setRoot(Dummy.getValue()); |
974 | 3.42M | } |
975 | 3.42M | |
976 | 3.42M | DEBUG(dbgs() << "===== Instruction selection ends:\n"); |
977 | 3.42M | |
978 | 3.42M | PostprocessISelDAG(); |
979 | 3.42M | } |
980 | | |
981 | 104 | static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { |
982 | 150 | for (const User *U : CPI->users()) { |
983 | 150 | if (const IntrinsicInst *EHPtrCall150 = dyn_cast<IntrinsicInst>(U)) { |
984 | 6 | Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); |
985 | 6 | if (IID == Intrinsic::eh_exceptionpointer || |
986 | 3 | IID == Intrinsic::eh_exceptioncode) |
987 | 6 | return true; |
988 | 98 | } |
989 | 150 | } |
990 | 98 | return false; |
991 | 98 | } |
992 | | |
993 | | /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and |
994 | | /// do other setup for EH landing-pad blocks. |
995 | 12.9k | bool SelectionDAGISel::PrepareEHLandingPad() { |
996 | 12.9k | MachineBasicBlock *MBB = FuncInfo->MBB; |
997 | 12.9k | const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); |
998 | 12.9k | const BasicBlock *LLVMBB = MBB->getBasicBlock(); |
999 | 12.9k | const TargetRegisterClass *PtrRC = |
1000 | 12.9k | TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); |
1001 | 12.9k | |
1002 | 12.9k | // Catchpads have one live-in register, which typically holds the exception |
1003 | 12.9k | // pointer or code. |
1004 | 12.9k | if (const auto *CPI12.9k = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { |
1005 | 104 | if (hasExceptionPointerOrCodeUser(CPI)104 ) { |
1006 | 6 | // Get or create the virtual register to hold the pointer or code. Mark |
1007 | 6 | // the live in physreg and copy into the vreg. |
1008 | 6 | MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); |
1009 | 6 | assert(EHPhysReg && "target lacks exception pointer register"); |
1010 | 6 | MBB->addLiveIn(EHPhysReg); |
1011 | 6 | unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); |
1012 | 6 | BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), |
1013 | 6 | TII->get(TargetOpcode::COPY), VReg) |
1014 | 6 | .addReg(EHPhysReg, RegState::Kill); |
1015 | 6 | } |
1016 | 104 | return true; |
1017 | 104 | } |
1018 | 12.8k | |
1019 | 12.8k | if (12.8k !LLVMBB->isLandingPad()12.8k ) |
1020 | 38 | return true; |
1021 | 12.8k | |
1022 | 12.8k | // Add a label to mark the beginning of the landing pad. Deletion of the |
1023 | 12.8k | // landing pad can thus be detected via the MachineModuleInfo. |
1024 | 12.8k | MCSymbol *Label = MF->addLandingPad(MBB); |
1025 | 12.8k | |
1026 | 12.8k | // Assign the call site to the landing pad's begin label. |
1027 | 12.8k | MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); |
1028 | 12.8k | |
1029 | 12.8k | const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); |
1030 | 12.8k | BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) |
1031 | 12.8k | .addSym(Label); |
1032 | 12.8k | |
1033 | 12.8k | // Mark exception register as live in. |
1034 | 12.8k | if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) |
1035 | 12.7k | FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); |
1036 | 12.8k | |
1037 | 12.8k | // Mark exception selector register as live in. |
1038 | 12.8k | if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) |
1039 | 12.7k | FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); |
1040 | 12.9k | |
1041 | 12.9k | return true; |
1042 | 12.9k | } |
1043 | | |
1044 | | /// isFoldedOrDeadInstruction - Return true if the specified instruction is |
1045 | | /// side-effect free and is either dead or folded into a generated instruction. |
1046 | | /// Return false if it needs to be emitted. |
1047 | | static bool isFoldedOrDeadInstruction(const Instruction *I, |
1048 | 51.7k | FunctionLoweringInfo *FuncInfo) { |
1049 | 51.7k | return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. |
1050 | 40.0k | !isa<TerminatorInst>(I) && // Terminators aren't folded. |
1051 | 30.0k | !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. |
1052 | 29.2k | !I->isEHPad() && // EH pad instructions aren't folded. |
1053 | 29.1k | !FuncInfo->isExportedInst(I); // Exported instrs must be computed. |
1054 | 51.7k | } |
1055 | | |
1056 | | /// Set up SwiftErrorVals by going through the function. If the function has |
1057 | | /// swifterror argument, it will be the first entry. |
1058 | | static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, |
1059 | 437k | FunctionLoweringInfo *FuncInfo) { |
1060 | 437k | if (!TLI->supportSwiftError()) |
1061 | 56.5k | return; |
1062 | 380k | |
1063 | 380k | FuncInfo->SwiftErrorVals.clear(); |
1064 | 380k | FuncInfo->SwiftErrorVRegDefMap.clear(); |
1065 | 380k | FuncInfo->SwiftErrorVRegUpwardsUse.clear(); |
1066 | 380k | FuncInfo->SwiftErrorVRegDefUses.clear(); |
1067 | 380k | FuncInfo->SwiftErrorArg = nullptr; |
1068 | 380k | |
1069 | 380k | // Check if function has a swifterror argument. |
1070 | 380k | bool HaveSeenSwiftErrorArg = false; |
1071 | 380k | for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end(); |
1072 | 1.24M | AI != AE1.24M ; ++AI864k ) |
1073 | 864k | if (864k AI->hasSwiftErrorAttr()864k ) { |
1074 | 101 | assert(!HaveSeenSwiftErrorArg && |
1075 | 101 | "Must have only one swifterror parameter"); |
1076 | 101 | (void)HaveSeenSwiftErrorArg; // silence warning. |
1077 | 101 | HaveSeenSwiftErrorArg = true; |
1078 | 101 | FuncInfo->SwiftErrorArg = &*AI; |
1079 | 101 | FuncInfo->SwiftErrorVals.push_back(&*AI); |
1080 | 101 | } |
1081 | 380k | |
1082 | 380k | for (const auto &LLVMBB : Fn) |
1083 | 3.22M | for (const auto &Inst : LLVMBB) 3.22M { |
1084 | 18.7M | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst)) |
1085 | 81.5k | if (81.5k Alloca->isSwiftError()81.5k ) |
1086 | 65 | FuncInfo->SwiftErrorVals.push_back(Alloca); |
1087 | 3.22M | } |
1088 | 437k | } |
1089 | | |
1090 | | static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo, |
1091 | | FastISel *FastIS, |
1092 | | const TargetLowering *TLI, |
1093 | | const TargetInstrInfo *TII, |
1094 | 437k | SelectionDAGBuilder *SDB) { |
1095 | 437k | if (!TLI->supportSwiftError()) |
1096 | 56.5k | return; |
1097 | 380k | |
1098 | 380k | // We only need to do this when we have swifterror parameter or swifterror |
1099 | 380k | // alloc. |
1100 | 380k | if (380k FuncInfo->SwiftErrorVals.empty()380k ) |
1101 | 380k | return; |
1102 | 148 | |
1103 | 380k | assert(FuncInfo->MBB == &*FuncInfo->MF->begin() && |
1104 | 148 | "expected to insert into entry block"); |
1105 | 148 | auto &DL = FuncInfo->MF->getDataLayout(); |
1106 | 148 | auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); |
1107 | 166 | for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { |
1108 | 166 | // We will always generate a copy from the argument. It is always used at |
1109 | 166 | // least by the 'return' of the swifterror. |
1110 | 166 | if (FuncInfo->SwiftErrorArg && 166 FuncInfo->SwiftErrorArg == SwiftErrorVal115 ) |
1111 | 101 | continue; |
1112 | 65 | unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); |
1113 | 65 | // Assign Undef to Vreg. We construct MI directly to make sure it works |
1114 | 65 | // with FastISel. |
1115 | 65 | BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(), |
1116 | 65 | SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), |
1117 | 65 | VReg); |
1118 | 65 | |
1119 | 65 | // Keep FastIS informed about the value we just inserted. |
1120 | 65 | if (FastIS) |
1121 | 24 | FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); |
1122 | 166 | |
1123 | 166 | FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg); |
1124 | 166 | } |
1125 | 437k | } |
1126 | | |
1127 | | /// Collect llvm.dbg.declare information. This is done after argument lowering |
1128 | | /// in case the declarations refer to arguments. |
1129 | 437k | static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { |
1130 | 437k | MachineFunction *MF = FuncInfo->MF; |
1131 | 437k | const DataLayout &DL = MF->getDataLayout(); |
1132 | 3.30M | for (const BasicBlock &BB : *FuncInfo->Fn) { |
1133 | 19.1M | for (const Instruction &I : BB) { |
1134 | 19.1M | const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); |
1135 | 19.1M | if (!DI) |
1136 | 19.1M | continue; |
1137 | 729 | |
1138 | 19.1M | assert(DI->getVariable() && "Missing variable"); |
1139 | 729 | assert(DI->getDebugLoc() && "Missing location"); |
1140 | 729 | const Value *Address = DI->getAddress(); |
1141 | 729 | if (!Address) |
1142 | 8 | continue; |
1143 | 721 | |
1144 | 721 | // Look through casts and constant offset GEPs. These mostly come from |
1145 | 721 | // inalloca. |
1146 | 721 | APInt Offset(DL.getPointerSizeInBits(0), 0); |
1147 | 721 | Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); |
1148 | 721 | |
1149 | 721 | // Check if the variable is a static alloca or a byval or inalloca |
1150 | 721 | // argument passed in memory. If it is not, then we will ignore this |
1151 | 721 | // intrinsic and handle this during isel like dbg.value. |
1152 | 721 | int FI = std::numeric_limits<int>::max(); |
1153 | 721 | if (const auto *AI721 = dyn_cast<AllocaInst>(Address)) { |
1154 | 622 | auto SI = FuncInfo->StaticAllocaMap.find(AI); |
1155 | 622 | if (SI != FuncInfo->StaticAllocaMap.end()) |
1156 | 612 | FI = SI->second; |
1157 | 721 | } else if (const auto *99 Arg99 = dyn_cast<Argument>(Address)) |
1158 | 59 | FI = FuncInfo->getArgumentFrameIndex(Arg); |
1159 | 721 | |
1160 | 721 | if (FI == std::numeric_limits<int>::max()) |
1161 | 91 | continue; |
1162 | 630 | |
1163 | 630 | DIExpression *Expr = DI->getExpression(); |
1164 | 630 | if (Offset.getBoolValue()) |
1165 | 6 | Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, |
1166 | 6 | Offset.getZExtValue()); |
1167 | 19.1M | MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); |
1168 | 19.1M | } |
1169 | 3.30M | } |
1170 | 437k | } |
1171 | | |
1172 | | /// Propagate swifterror values through the machine function CFG. |
1173 | 436k | static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { |
1174 | 436k | auto *TLI = FuncInfo->TLI; |
1175 | 436k | if (!TLI->supportSwiftError()) |
1176 | 56.5k | return; |
1177 | 380k | |
1178 | 380k | // We only need to do this when we have swifterror parameter or swifterror |
1179 | 380k | // alloc. |
1180 | 380k | if (380k FuncInfo->SwiftErrorVals.empty()380k ) |
1181 | 380k | return; |
1182 | 150 | |
1183 | 150 | // For each machine basic block in reverse post order. |
1184 | 150 | ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF); |
1185 | 328 | for (MachineBasicBlock *MBB : RPOT) { |
1186 | 328 | // For each swifterror value in the function. |
1187 | 362 | for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { |
1188 | 362 | auto Key = std::make_pair(MBB, SwiftErrorVal); |
1189 | 362 | auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); |
1190 | 362 | auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key); |
1191 | 362 | bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end(); |
1192 | 362 | unsigned UUseVReg = UpwardsUse ? UUseIt->second45 : 0317 ; |
1193 | 362 | bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end(); |
1194 | 362 | assert(!(UpwardsUse && !DownwardDef) && |
1195 | 362 | "We can't have an upwards use but no downwards def"); |
1196 | 362 | |
1197 | 362 | // If there is no upwards exposed use and an entry for the swifterror in |
1198 | 362 | // the def map for this value we don't need to do anything: We already |
1199 | 362 | // have a downward def for this basic block. |
1200 | 362 | if (!UpwardsUse && 362 DownwardDef317 ) |
1201 | 197 | continue; |
1202 | 165 | |
1203 | 165 | // Otherwise we either have an upwards exposed use vreg that we need to |
1204 | 165 | // materialize or need to forward the downward def from predecessors. |
1205 | 165 | |
1206 | 165 | // Check whether we have a single vreg def from all predecessors. |
1207 | 165 | // Otherwise we need a phi. |
1208 | 165 | SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs; |
1209 | 165 | SmallSet<const MachineBasicBlock*, 8> Visited; |
1210 | 232 | for (auto *Pred : MBB->predecessors()) { |
1211 | 232 | if (!Visited.insert(Pred).second) |
1212 | 0 | continue; |
1213 | 232 | VRegs.push_back(std::make_pair( |
1214 | 232 | Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal))); |
1215 | 232 | if (Pred != MBB) |
1216 | 230 | continue; |
1217 | 2 | // We have a self-edge. |
1218 | 2 | // If there was no upwards use in this basic block there is now one: the |
1219 | 2 | // phi needs to use it self. |
1220 | 2 | if (2 !UpwardsUse2 ) { |
1221 | 0 | UpwardsUse = true; |
1222 | 0 | UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); |
1223 | 0 | assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end()); |
1224 | 0 | UUseVReg = UUseIt->second; |
1225 | 0 | } |
1226 | 232 | } |
1227 | 165 | |
1228 | 165 | // We need a phi node if we have more than one predecessor with different |
1229 | 165 | // downward defs. |
1230 | 165 | bool needPHI = |
1231 | 165 | VRegs.size() >= 1 && |
1232 | 165 | std::find_if( |
1233 | 165 | VRegs.begin(), VRegs.end(), |
1234 | 165 | [&](const std::pair<const MachineBasicBlock *, unsigned> &V) |
1235 | 232 | -> bool { return V.second != VRegs[0].second; }) != |
1236 | 165 | VRegs.end(); |
1237 | 165 | |
1238 | 165 | // If there is no upwards exposed used and we don't need a phi just |
1239 | 165 | // forward the swifterror vreg from the predecessor(s). |
1240 | 165 | if (!UpwardsUse && 165 !needPHI120 ) { |
1241 | 111 | assert(!VRegs.empty() && |
1242 | 111 | "No predecessors? The entry block should bail out earlier"); |
1243 | 111 | // Just forward the swifterror vreg from the predecessor(s). |
1244 | 111 | FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second); |
1245 | 111 | continue; |
1246 | 111 | } |
1247 | 54 | |
1248 | 54 | auto DLoc = isa<Instruction>(SwiftErrorVal) |
1249 | 6 | ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc() |
1250 | 48 | : DebugLoc(); |
1251 | 54 | const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); |
1252 | 54 | |
1253 | 54 | // If we don't need a phi create a copy to the upward exposed vreg. |
1254 | 54 | if (!needPHI54 ) { |
1255 | 34 | assert(UpwardsUse); |
1256 | 34 | assert(!VRegs.empty() && |
1257 | 34 | "No predecessors? Is the Calling Convention correct?"); |
1258 | 34 | unsigned DestReg = UUseVReg; |
1259 | 34 | BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), |
1260 | 34 | DestReg) |
1261 | 34 | .addReg(VRegs[0].second); |
1262 | 34 | continue; |
1263 | 34 | } |
1264 | 20 | |
1265 | 20 | // We need a phi: if there is an upwards exposed use we already have a |
1266 | 20 | // destination virtual register number otherwise we generate a new one. |
1267 | 20 | auto &DL = FuncInfo->MF->getDataLayout(); |
1268 | 20 | auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); |
1269 | 20 | unsigned PHIVReg = |
1270 | 11 | UpwardsUse ? UUseVReg |
1271 | 9 | : FuncInfo->MF->getRegInfo().createVirtualRegister(RC); |
1272 | 20 | MachineInstrBuilder SwiftErrorPHI = |
1273 | 20 | BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, |
1274 | 20 | TII->get(TargetOpcode::PHI), PHIVReg); |
1275 | 40 | for (auto BBRegPair : VRegs) { |
1276 | 40 | SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first); |
1277 | 40 | } |
1278 | 20 | |
1279 | 20 | // We did not have a definition in this block before: store the phi's vreg |
1280 | 20 | // as this block downward exposed def. |
1281 | 20 | if (!UpwardsUse) |
1282 | 9 | FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg); |
1283 | 362 | } |
1284 | 328 | } |
1285 | 436k | } |
1286 | | |
1287 | | static void preassignSwiftErrorRegs(const TargetLowering *TLI, |
1288 | | FunctionLoweringInfo *FuncInfo, |
1289 | | BasicBlock::const_iterator Begin, |
1290 | 10.0k | BasicBlock::const_iterator End) { |
1291 | 10.0k | if (!TLI->supportSwiftError() || 10.0k FuncInfo->SwiftErrorVals.empty()7.15k ) |
1292 | 9.94k | return; |
1293 | 121 | |
1294 | 121 | // Iterator over instructions and assign vregs to swifterror defs and uses. |
1295 | 582 | for (auto It = Begin; 121 It != End582 ; ++It461 ) { |
1296 | 461 | ImmutableCallSite CS(&*It); |
1297 | 461 | if (CS461 ) { |
1298 | 80 | // A call-site with a swifterror argument is both use and def. |
1299 | 80 | const Value *SwiftErrorAddr = nullptr; |
1300 | 195 | for (auto &Arg : CS.args()) { |
1301 | 195 | if (!Arg->isSwiftError()) |
1302 | 152 | continue; |
1303 | 43 | // Use of swifterror. |
1304 | 195 | assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); |
1305 | 43 | SwiftErrorAddr = &*Arg; |
1306 | 43 | assert(SwiftErrorAddr->isSwiftError() && |
1307 | 43 | "Must have a swifterror value argument"); |
1308 | 43 | unsigned VReg; bool CreatedReg; |
1309 | 43 | std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( |
1310 | 43 | &*It, FuncInfo->MBB, SwiftErrorAddr); |
1311 | 43 | assert(CreatedReg); |
1312 | 43 | } |
1313 | 80 | if (!SwiftErrorAddr) |
1314 | 37 | continue; |
1315 | 43 | |
1316 | 43 | // Def of swifterror. |
1317 | 43 | unsigned VReg; bool CreatedReg; |
1318 | 43 | std::tie(VReg, CreatedReg) = |
1319 | 43 | FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); |
1320 | 43 | assert(CreatedReg); |
1321 | 43 | FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); |
1322 | 43 | |
1323 | 43 | // A load is a use. |
1324 | 461 | } else if (const LoadInst *381 LI381 = dyn_cast<const LoadInst>(&*It)) { |
1325 | 41 | const Value *V = LI->getOperand(0); |
1326 | 41 | if (!V->isSwiftError()) |
1327 | 22 | continue; |
1328 | 19 | |
1329 | 19 | unsigned VReg; bool CreatedReg; |
1330 | 19 | std::tie(VReg, CreatedReg) = |
1331 | 19 | FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V); |
1332 | 19 | assert(CreatedReg); |
1333 | 19 | |
1334 | 19 | // A store is a def. |
1335 | 381 | } else if (const StoreInst *340 SI340 = dyn_cast<const StoreInst>(&*It)) { |
1336 | 82 | const Value *SwiftErrorAddr = SI->getOperand(1); |
1337 | 82 | if (!SwiftErrorAddr->isSwiftError()) |
1338 | 43 | continue; |
1339 | 39 | |
1340 | 39 | // Def of swifterror. |
1341 | 39 | unsigned VReg; bool CreatedReg; |
1342 | 39 | std::tie(VReg, CreatedReg) = |
1343 | 39 | FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); |
1344 | 39 | assert(CreatedReg); |
1345 | 39 | FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); |
1346 | 39 | |
1347 | 39 | // A return in a swiferror returning function is a use. |
1348 | 340 | } else if (const ReturnInst *258 R258 = dyn_cast<const ReturnInst>(&*It)) { |
1349 | 62 | const Function *F = R->getParent()->getParent(); |
1350 | 62 | if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) |
1351 | 18 | continue; |
1352 | 44 | |
1353 | 44 | unsigned VReg; bool CreatedReg; |
1354 | 44 | std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( |
1355 | 44 | R, FuncInfo->MBB, FuncInfo->SwiftErrorArg); |
1356 | 44 | assert(CreatedReg); |
1357 | 44 | } |
1358 | 461 | } |
1359 | 10.0k | } |
1360 | | |
1361 | 437k | void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { |
1362 | 437k | FastISelFailed = false; |
1363 | 437k | // Initialize the Fast-ISel state, if needed. |
1364 | 437k | FastISel *FastIS = nullptr; |
1365 | 437k | if (TM.Options.EnableFastISel) |
1366 | 10.7k | FastIS = TLI->createFastISel(*FuncInfo, LibInfo); |
1367 | 437k | |
1368 | 437k | setupSwiftErrorVals(Fn, TLI, FuncInfo); |
1369 | 437k | |
1370 | 437k | ReversePostOrderTraversal<const Function*> RPOT(&Fn); |
1371 | 437k | |
1372 | 437k | // Lower arguments up front. An RPO iteration always visits the entry block |
1373 | 437k | // first. |
1374 | 437k | assert(*RPOT.begin() == &Fn.getEntryBlock()); |
1375 | 437k | ++NumEntryBlocks; |
1376 | 437k | |
1377 | 437k | // Set up FuncInfo for ISel. Entry blocks never have PHIs. |
1378 | 437k | FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; |
1379 | 437k | FuncInfo->InsertPt = FuncInfo->MBB->begin(); |
1380 | 437k | |
1381 | 437k | if (!FastIS437k ) { |
1382 | 429k | LowerArguments(Fn); |
1383 | 437k | } else { |
1384 | 7.94k | // See if fast isel can lower the arguments. |
1385 | 7.94k | FastIS->startNewBlock(); |
1386 | 7.94k | if (!FastIS->lowerArguments()7.94k ) { |
1387 | 4.10k | FastISelFailed = true; |
1388 | 4.10k | // Fast isel failed to lower these arguments |
1389 | 4.10k | ++NumFastIselFailLowerArguments; |
1390 | 4.10k | |
1391 | 4.10k | OptimizationRemarkMissed R("sdagisel", "FastISelFailure", |
1392 | 4.10k | Fn.getSubprogram(), |
1393 | 4.10k | &Fn.getEntryBlock()); |
1394 | 4.10k | R << "FastISel didn't lower all arguments: " |
1395 | 4.10k | << ore::NV("Prototype", Fn.getType()); |
1396 | 4.10k | reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1); |
1397 | 4.10k | |
1398 | 4.10k | // Use SelectionDAG argument lowering |
1399 | 4.10k | LowerArguments(Fn); |
1400 | 4.10k | CurDAG->setRoot(SDB->getControlRoot()); |
1401 | 4.10k | SDB->clear(); |
1402 | 4.10k | CodeGenAndEmitDAG(); |
1403 | 4.10k | } |
1404 | 7.94k | |
1405 | 7.94k | // If we inserted any instructions at the beginning, make a note of |
1406 | 7.94k | // where they are, so we can be sure to emit subsequent instructions |
1407 | 7.94k | // after them. |
1408 | 7.94k | if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) |
1409 | 6.33k | FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); |
1410 | 7.94k | else |
1411 | 1.60k | FastIS->setLastLocalValue(nullptr); |
1412 | 7.94k | } |
1413 | 437k | createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB); |
1414 | 437k | |
1415 | 437k | processDbgDeclares(FuncInfo); |
1416 | 437k | |
1417 | 437k | // Iterate over all basic blocks in the function. |
1418 | 3.30M | for (const BasicBlock *LLVMBB : RPOT) { |
1419 | 3.30M | if (OptLevel != CodeGenOpt::None3.30M ) { |
1420 | 3.29M | bool AllPredsVisited = true; |
1421 | 3.29M | for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); |
1422 | 7.16M | PI != PE7.16M ; ++PI3.86M ) { |
1423 | 4.21M | if (!FuncInfo->VisitedBBs.count(*PI)4.21M ) { |
1424 | 352k | AllPredsVisited = false; |
1425 | 352k | break; |
1426 | 352k | } |
1427 | 4.21M | } |
1428 | 3.29M | |
1429 | 3.29M | if (AllPredsVisited3.29M ) { |
1430 | 2.94M | for (BasicBlock::const_iterator I = LLVMBB->begin(); |
1431 | 3.35M | const PHINode *PN3.35M = dyn_cast<PHINode>(I); ++I411k ) |
1432 | 411k | FuncInfo->ComputePHILiveOutRegInfo(PN); |
1433 | 3.29M | } else { |
1434 | 352k | for (BasicBlock::const_iterator I = LLVMBB->begin(); |
1435 | 856k | const PHINode *PN856k = dyn_cast<PHINode>(I); ++I503k ) |
1436 | 503k | FuncInfo->InvalidatePHILiveOutRegInfo(PN); |
1437 | 352k | } |
1438 | 3.29M | |
1439 | 3.29M | FuncInfo->VisitedBBs.insert(LLVMBB); |
1440 | 3.29M | } |
1441 | 3.30M | |
1442 | 3.30M | BasicBlock::const_iterator const Begin = |
1443 | 3.30M | LLVMBB->getFirstNonPHI()->getIterator(); |
1444 | 3.30M | BasicBlock::const_iterator const End = LLVMBB->end(); |
1445 | 3.30M | BasicBlock::const_iterator BI = End; |
1446 | 3.30M | |
1447 | 3.30M | FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; |
1448 | 3.30M | if (!FuncInfo->MBB) |
1449 | 96 | continue; // Some blocks like catchpads have no code or MBB. |
1450 | 3.30M | |
1451 | 3.30M | // Insert new instructions after any phi or argument setup code. |
1452 | 3.30M | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1453 | 3.30M | |
1454 | 3.30M | // Setup an EH landing-pad block. |
1455 | 3.30M | FuncInfo->ExceptionPointerVirtReg = 0; |
1456 | 3.30M | FuncInfo->ExceptionSelectorVirtReg = 0; |
1457 | 3.30M | if (LLVMBB->isEHPad()) |
1458 | 12.9k | if (12.9k !PrepareEHLandingPad()12.9k ) |
1459 | 0 | continue; |
1460 | 3.30M | |
1461 | 3.30M | // Before doing SelectionDAG ISel, see if FastISel has been requested. |
1462 | 3.30M | if (3.30M FastIS3.30M ) { |
1463 | 10.0k | if (LLVMBB != &Fn.getEntryBlock()) |
1464 | 2.12k | FastIS->startNewBlock(); |
1465 | 10.0k | |
1466 | 10.0k | unsigned NumFastIselRemaining = std::distance(Begin, End); |
1467 | 10.0k | |
1468 | 10.0k | // Pre-assign swifterror vregs. |
1469 | 10.0k | preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End); |
1470 | 10.0k | |
1471 | 10.0k | // Do FastISel on as many instructions as possible. |
1472 | 38.5k | for (; BI != Begin38.5k ; --BI28.5k ) { |
1473 | 31.4k | const Instruction *Inst = &*std::prev(BI); |
1474 | 31.4k | |
1475 | 31.4k | // If we no longer require this instruction, skip it. |
1476 | 31.4k | if (isFoldedOrDeadInstruction(Inst, FuncInfo) || |
1477 | 31.4k | ElidedArgCopyInstrs.count(Inst)27.1k ) { |
1478 | 4.42k | --NumFastIselRemaining; |
1479 | 4.42k | continue; |
1480 | 4.42k | } |
1481 | 27.0k | |
1482 | 27.0k | // Bottom-up: reset the insert pos at the top, after any local-value |
1483 | 27.0k | // instructions. |
1484 | 27.0k | FastIS->recomputeInsertPt(); |
1485 | 27.0k | |
1486 | 27.0k | // Try to select the instruction with FastISel. |
1487 | 27.0k | if (FastIS->selectInstruction(Inst)27.0k ) { |
1488 | 22.5k | --NumFastIselRemaining; |
1489 | 22.5k | ++NumFastIselSuccess; |
1490 | 22.5k | // If fast isel succeeded, skip over all the folded instructions, and |
1491 | 22.5k | // then see if there is a load right before the selected instructions. |
1492 | 22.5k | // Try to fold the load if so. |
1493 | 22.5k | const Instruction *BeforeInst = Inst; |
1494 | 26.5k | while (BeforeInst != &*Begin26.5k ) { |
1495 | 20.2k | BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); |
1496 | 20.2k | if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) |
1497 | 16.2k | break; |
1498 | 20.2k | } |
1499 | 22.5k | if (BeforeInst != Inst && 22.5k isa<LoadInst>(BeforeInst)17.7k && |
1500 | 2.47k | BeforeInst->hasOneUse() && |
1501 | 22.5k | FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)2.28k ) { |
1502 | 311 | // If we succeeded, don't re-select the load. |
1503 | 311 | BI = std::next(BasicBlock::const_iterator(BeforeInst)); |
1504 | 311 | --NumFastIselRemaining; |
1505 | 311 | ++NumFastIselSuccess; |
1506 | 311 | } |
1507 | 22.5k | continue; |
1508 | 22.5k | } |
1509 | 4.51k | |
1510 | 4.51k | FastISelFailed = true; |
1511 | 4.51k | |
1512 | 4.51k | // Then handle certain instructions as single-LLVM-Instruction blocks. |
1513 | 4.51k | // We cannot separate out GCrelocates to their own blocks since we need |
1514 | 4.51k | // to keep track of gc-relocates for a particular gc-statepoint. This is |
1515 | 4.51k | // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before |
1516 | 4.51k | // visitGCRelocate. |
1517 | 4.51k | if (isa<CallInst>(Inst) && 4.51k !isStatepoint(Inst)1.61k && !isGCRelocate(Inst)1.61k ) { |
1518 | 1.61k | OptimizationRemarkMissed R("sdagisel", "FastISelFailure", |
1519 | 1.61k | Inst->getDebugLoc(), LLVMBB); |
1520 | 1.61k | |
1521 | 1.61k | R << "FastISel missed call"; |
1522 | 1.61k | |
1523 | 1.61k | if (R.isEnabled() || 1.61k EnableFastISelAbort1.60k ) { |
1524 | 65 | std::string InstStrStorage; |
1525 | 65 | raw_string_ostream InstStr(InstStrStorage); |
1526 | 65 | InstStr << *Inst; |
1527 | 65 | |
1528 | 65 | R << ": " << InstStr.str(); |
1529 | 65 | } |
1530 | 1.61k | |
1531 | 1.61k | reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2); |
1532 | 1.61k | |
1533 | 1.61k | if (!Inst->getType()->isVoidTy() && 1.61k !Inst->getType()->isTokenTy()1.25k && |
1534 | 1.61k | !Inst->use_empty()1.25k ) { |
1535 | 1.20k | unsigned &R = FuncInfo->ValueMap[Inst]; |
1536 | 1.20k | if (!R) |
1537 | 1 | R = FuncInfo->CreateRegs(Inst->getType()); |
1538 | 1.20k | } |
1539 | 1.61k | |
1540 | 1.61k | bool HadTailCall = false; |
1541 | 1.61k | MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; |
1542 | 1.61k | SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); |
1543 | 1.61k | |
1544 | 1.61k | // If the call was emitted as a tail call, we're done with the block. |
1545 | 1.61k | // We also need to delete any previously emitted instructions. |
1546 | 1.61k | if (HadTailCall1.61k ) { |
1547 | 51 | FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); |
1548 | 51 | --BI; |
1549 | 51 | break; |
1550 | 51 | } |
1551 | 1.56k | |
1552 | 1.56k | // Recompute NumFastIselRemaining as Selection DAG instruction |
1553 | 1.56k | // selection may have handled the call, input args, etc. |
1554 | 1.56k | unsigned RemainingNow = std::distance(Begin, BI); |
1555 | 1.56k | NumFastIselFailures += NumFastIselRemaining - RemainingNow; |
1556 | 1.56k | NumFastIselRemaining = RemainingNow; |
1557 | 1.56k | continue; |
1558 | 1.56k | } |
1559 | 2.90k | |
1560 | 2.90k | OptimizationRemarkMissed R("sdagisel", "FastISelFailure", |
1561 | 2.90k | Inst->getDebugLoc(), LLVMBB); |
1562 | 2.90k | |
1563 | 2.90k | bool ShouldAbort = EnableFastISelAbort; |
1564 | 2.90k | if (isa<TerminatorInst>(Inst)2.90k ) { |
1565 | 720 | // Use a different message for terminator misses. |
1566 | 720 | R << "FastISel missed terminator"; |
1567 | 720 | // Don't abort for terminator unless the level is really high |
1568 | 720 | ShouldAbort = (EnableFastISelAbort > 2); |
1569 | 2.90k | } else { |
1570 | 2.18k | R << "FastISel missed"; |
1571 | 2.18k | } |
1572 | 2.90k | |
1573 | 2.90k | if (R.isEnabled() || 2.90k EnableFastISelAbort2.88k ) { |
1574 | 109 | std::string InstStrStorage; |
1575 | 109 | raw_string_ostream InstStr(InstStrStorage); |
1576 | 109 | InstStr << *Inst; |
1577 | 109 | R << ": " << InstStr.str(); |
1578 | 109 | } |
1579 | 31.4k | |
1580 | 31.4k | reportFastISelFailure(*MF, *ORE, R, ShouldAbort); |
1581 | 31.4k | |
1582 | 31.4k | NumFastIselFailures += NumFastIselRemaining; |
1583 | 31.4k | break; |
1584 | 31.4k | } |
1585 | 10.0k | |
1586 | 10.0k | FastIS->recomputeInsertPt(); |
1587 | 10.0k | } |
1588 | 3.30M | |
1589 | 3.30M | if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)3.30M ) { |
1590 | 3.40k | bool FunctionBasedInstrumentation = |
1591 | 3.40k | TLI->getSSPStackGuardCheck(*Fn.getParent()); |
1592 | 3.40k | SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], |
1593 | 3.40k | FunctionBasedInstrumentation); |
1594 | 3.40k | } |
1595 | 3.30M | |
1596 | 3.30M | if (Begin != BI) |
1597 | 3.30M | ++NumDAGBlocks; |
1598 | 3.30M | else |
1599 | 7.14k | ++NumFastIselBlocks; |
1600 | 3.30M | |
1601 | 3.30M | if (Begin != BI3.30M ) { |
1602 | 3.30M | // Run SelectionDAG instruction selection on the remainder of the block |
1603 | 3.30M | // not handled by FastISel. If FastISel is not run, this is the entire |
1604 | 3.30M | // block. |
1605 | 3.30M | bool HadTailCall; |
1606 | 3.30M | SelectBasicBlock(Begin, BI, HadTailCall); |
1607 | 3.30M | |
1608 | 3.30M | // But if FastISel was run, we already selected some of the block. |
1609 | 3.30M | // If we emitted a tail-call, we need to delete any previously emitted |
1610 | 3.30M | // instruction that follows it. |
1611 | 3.30M | if (HadTailCall && 3.30M FuncInfo->InsertPt != FuncInfo->MBB->end()231k ) |
1612 | 1 | FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); |
1613 | 3.30M | } |
1614 | 3.30M | |
1615 | 3.30M | FinishBasicBlock(); |
1616 | 3.30M | FuncInfo->PHINodesToUpdate.clear(); |
1617 | 3.30M | ElidedArgCopyInstrs.clear(); |
1618 | 3.30M | } |
1619 | 437k | |
1620 | 437k | propagateSwiftErrorVRegs(FuncInfo); |
1621 | 437k | |
1622 | 437k | delete FastIS; |
1623 | 437k | SDB->clearDanglingDebugInfo(); |
1624 | 437k | SDB->SPDescriptor.resetPerFunctionState(); |
1625 | 437k | } |
1626 | | |
1627 | | /// Given that the input MI is before a partial terminator sequence TSeq, return |
1628 | | /// true if M + TSeq also a partial terminator sequence. |
1629 | | /// |
1630 | | /// A Terminator sequence is a sequence of MachineInstrs which at this point in |
1631 | | /// lowering copy vregs into physical registers, which are then passed into |
1632 | | /// terminator instructors so we can satisfy ABI constraints. A partial |
1633 | | /// terminator sequence is an improper subset of a terminator sequence (i.e. it |
1634 | | /// may be the whole terminator sequence). |
1635 | 4.99k | static bool MIIsInTerminatorSequence(const MachineInstr &MI) { |
1636 | 4.99k | // If we do not have a copy or an implicit def, we return true if and only if |
1637 | 4.99k | // MI is a debug value. |
1638 | 4.99k | if (!MI.isCopy() && 4.99k !MI.isImplicitDef()2.89k ) |
1639 | 4.99k | // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the |
1640 | 4.99k | // physical registers if there is debug info associated with the terminator |
1641 | 4.99k | // of our mbb. We want to include said debug info in our terminator |
1642 | 4.99k | // sequence, so we return true in that case. |
1643 | 2.89k | return MI.isDebugValue(); |
1644 | 2.09k | |
1645 | 2.09k | // We have left the terminator sequence if we are not doing one of the |
1646 | 2.09k | // following: |
1647 | 2.09k | // |
1648 | 2.09k | // 1. Copying a vreg into a physical register. |
1649 | 2.09k | // 2. Copying a vreg into a vreg. |
1650 | 2.09k | // 3. Defining a register via an implicit def. |
1651 | 2.09k | |
1652 | 2.09k | // OPI should always be a register definition... |
1653 | 2.09k | MachineInstr::const_mop_iterator OPI = MI.operands_begin(); |
1654 | 2.09k | if (!OPI->isReg() || 2.09k !OPI->isDef()2.09k ) |
1655 | 0 | return false; |
1656 | 2.09k | |
1657 | 2.09k | // Defining any register via an implicit def is always ok. |
1658 | 2.09k | if (2.09k MI.isImplicitDef()2.09k ) |
1659 | 4 | return true; |
1660 | 2.09k | |
1661 | 2.09k | // Grab the copy source... |
1662 | 2.09k | MachineInstr::const_mop_iterator OPI2 = OPI; |
1663 | 2.09k | ++OPI2; |
1664 | 2.09k | assert(OPI2 != MI.operands_end() |
1665 | 2.09k | && "Should have a copy implying we should have 2 arguments."); |
1666 | 2.09k | |
1667 | 2.09k | // Make sure that the copy dest is not a vreg when the copy source is a |
1668 | 2.09k | // physical register. |
1669 | 2.09k | if (!OPI2->isReg() || |
1670 | 2.09k | (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && |
1671 | 318 | TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) |
1672 | 314 | return false; |
1673 | 1.78k | |
1674 | 1.78k | return true; |
1675 | 1.78k | } |
1676 | | |
1677 | | /// Find the split point at which to splice the end of BB into its success stack |
1678 | | /// protector check machine basic block. |
1679 | | /// |
1680 | | /// On many platforms, due to ABI constraints, terminators, even before register |
1681 | | /// allocation, use physical registers. This creates an issue for us since |
1682 | | /// physical registers at this point can not travel across basic |
1683 | | /// blocks. Luckily, selectiondag always moves physical registers into vregs |
1684 | | /// when they enter functions and moves them through a sequence of copies back |
1685 | | /// into the physical registers right before the terminator creating a |
1686 | | /// ``Terminator Sequence''. This function is searching for the beginning of the |
1687 | | /// terminator sequence so that we can ensure that we splice off not just the |
1688 | | /// terminator, but additionally the copies that move the vregs into the |
1689 | | /// physical registers. |
1690 | | static MachineBasicBlock::iterator |
1691 | 3.40k | FindSplitPointForStackProtector(MachineBasicBlock *BB) { |
1692 | 3.40k | MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); |
1693 | 3.40k | // |
1694 | 3.40k | if (SplitPoint == BB->begin()) |
1695 | 183 | return SplitPoint; |
1696 | 3.22k | |
1697 | 3.22k | MachineBasicBlock::iterator Start = BB->begin(); |
1698 | 3.22k | MachineBasicBlock::iterator Previous = SplitPoint; |
1699 | 3.22k | --Previous; |
1700 | 3.22k | |
1701 | 4.99k | while (MIIsInTerminatorSequence(*Previous)4.99k ) { |
1702 | 1.78k | SplitPoint = Previous; |
1703 | 1.78k | if (Previous == Start) |
1704 | 22 | break; |
1705 | 1.76k | --Previous; |
1706 | 1.76k | } |
1707 | 3.40k | |
1708 | 3.40k | return SplitPoint; |
1709 | 3.40k | } |
1710 | | |
1711 | | void |
1712 | 3.30M | SelectionDAGISel::FinishBasicBlock() { |
1713 | 3.30M | DEBUG(dbgs() << "Total amount of phi nodes to update: " |
1714 | 3.30M | << FuncInfo->PHINodesToUpdate.size() << "\n"; |
1715 | 3.30M | for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) |
1716 | 3.30M | dbgs() << "Node " << i << " : (" |
1717 | 3.30M | << FuncInfo->PHINodesToUpdate[i].first |
1718 | 3.30M | << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); |
1719 | 3.30M | |
1720 | 3.30M | // Next, now that we know what the last MBB the LLVM BB expanded is, update |
1721 | 3.30M | // PHI nodes in successors. |
1722 | 5.53M | for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e5.53M ; ++i2.22M ) { |
1723 | 2.22M | MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); |
1724 | 2.22M | assert(PHI->isPHI() && |
1725 | 2.22M | "This is not a machine PHI node that we are updating!"); |
1726 | 2.22M | if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) |
1727 | 48.6k | continue; |
1728 | 2.17M | PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); |
1729 | 2.17M | } |
1730 | 3.30M | |
1731 | 3.30M | // Handle stack protector. |
1732 | 3.30M | if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()3.30M ) { |
1733 | 66 | // The target provides a guard check function. There is no need to |
1734 | 66 | // generate error handling code or to split current basic block. |
1735 | 66 | MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); |
1736 | 66 | |
1737 | 66 | // Add load and check to the basicblock. |
1738 | 66 | FuncInfo->MBB = ParentMBB; |
1739 | 66 | FuncInfo->InsertPt = |
1740 | 66 | FindSplitPointForStackProtector(ParentMBB); |
1741 | 66 | SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); |
1742 | 66 | CurDAG->setRoot(SDB->getRoot()); |
1743 | 66 | SDB->clear(); |
1744 | 66 | CodeGenAndEmitDAG(); |
1745 | 66 | |
1746 | 66 | // Clear the Per-BB State. |
1747 | 66 | SDB->SPDescriptor.resetPerBBState(); |
1748 | 3.30M | } else if (3.30M SDB->SPDescriptor.shouldEmitStackProtector()3.30M ) { |
1749 | 3.34k | MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); |
1750 | 3.34k | MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); |
1751 | 3.34k | |
1752 | 3.34k | // Find the split point to split the parent mbb. At the same time copy all |
1753 | 3.34k | // physical registers used in the tail of parent mbb into virtual registers |
1754 | 3.34k | // before the split point and back into physical registers after the split |
1755 | 3.34k | // point. This prevents us needing to deal with Live-ins and many other |
1756 | 3.34k | // register allocation issues caused by us splitting the parent mbb. The |
1757 | 3.34k | // register allocator will clean up said virtual copies later on. |
1758 | 3.34k | MachineBasicBlock::iterator SplitPoint = |
1759 | 3.34k | FindSplitPointForStackProtector(ParentMBB); |
1760 | 3.34k | |
1761 | 3.34k | // Splice the terminator of ParentMBB into SuccessMBB. |
1762 | 3.34k | SuccessMBB->splice(SuccessMBB->end(), ParentMBB, |
1763 | 3.34k | SplitPoint, |
1764 | 3.34k | ParentMBB->end()); |
1765 | 3.34k | |
1766 | 3.34k | // Add compare/jump on neq/jump to the parent BB. |
1767 | 3.34k | FuncInfo->MBB = ParentMBB; |
1768 | 3.34k | FuncInfo->InsertPt = ParentMBB->end(); |
1769 | 3.34k | SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); |
1770 | 3.34k | CurDAG->setRoot(SDB->getRoot()); |
1771 | 3.34k | SDB->clear(); |
1772 | 3.34k | CodeGenAndEmitDAG(); |
1773 | 3.34k | |
1774 | 3.34k | // CodeGen Failure MBB if we have not codegened it yet. |
1775 | 3.34k | MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); |
1776 | 3.34k | if (FailureMBB->empty()3.34k ) { |
1777 | 3.26k | FuncInfo->MBB = FailureMBB; |
1778 | 3.26k | FuncInfo->InsertPt = FailureMBB->end(); |
1779 | 3.26k | SDB->visitSPDescriptorFailure(SDB->SPDescriptor); |
1780 | 3.26k | CurDAG->setRoot(SDB->getRoot()); |
1781 | 3.26k | SDB->clear(); |
1782 | 3.26k | CodeGenAndEmitDAG(); |
1783 | 3.26k | } |
1784 | 3.30M | |
1785 | 3.30M | // Clear the Per-BB State. |
1786 | 3.30M | SDB->SPDescriptor.resetPerBBState(); |
1787 | 3.30M | } |
1788 | 3.30M | |
1789 | 3.30M | // Lower each BitTestBlock. |
1790 | 621 | for (auto &BTB : SDB->BitTestCases) { |
1791 | 621 | // Lower header first, if it wasn't already lowered |
1792 | 621 | if (!BTB.Emitted621 ) { |
1793 | 2 | // Set the current basic block to the mbb we wish to insert the code into |
1794 | 2 | FuncInfo->MBB = BTB.Parent; |
1795 | 2 | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1796 | 2 | // Emit the code |
1797 | 2 | SDB->visitBitTestHeader(BTB, FuncInfo->MBB); |
1798 | 2 | CurDAG->setRoot(SDB->getRoot()); |
1799 | 2 | SDB->clear(); |
1800 | 2 | CodeGenAndEmitDAG(); |
1801 | 2 | } |
1802 | 621 | |
1803 | 621 | BranchProbability UnhandledProb = BTB.Prob; |
1804 | 1.38k | for (unsigned j = 0, ej = BTB.Cases.size(); j != ej1.38k ; ++j761 ) { |
1805 | 768 | UnhandledProb -= BTB.Cases[j].ExtraProb; |
1806 | 768 | // Set the current basic block to the mbb we wish to insert the code into |
1807 | 768 | FuncInfo->MBB = BTB.Cases[j].ThisBB; |
1808 | 768 | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1809 | 768 | // Emit the code |
1810 | 768 | |
1811 | 768 | // If all cases cover a contiguous range, it is not necessary to jump to |
1812 | 768 | // the default block after the last bit test fails. This is because the |
1813 | 768 | // range check during bit test header creation has guaranteed that every |
1814 | 768 | // case here doesn't go outside the range. In this case, there is no need |
1815 | 768 | // to perform the last bit test, as it will always be true. Instead, make |
1816 | 768 | // the second-to-last bit-test fall through to the target of the last bit |
1817 | 768 | // test, and delete the last bit test. |
1818 | 768 | |
1819 | 768 | MachineBasicBlock *NextMBB; |
1820 | 768 | if (BTB.ContiguousRange && 768 j + 2 == ej11 ) { |
1821 | 7 | // Second-to-last bit-test with contiguous range: fall through to the |
1822 | 7 | // target of the final bit test. |
1823 | 7 | NextMBB = BTB.Cases[j + 1].TargetBB; |
1824 | 768 | } else if (761 j + 1 == ej761 ) { |
1825 | 614 | // For the last bit test, fall through to Default. |
1826 | 614 | NextMBB = BTB.Default; |
1827 | 761 | } else { |
1828 | 147 | // Otherwise, fall through to the next bit test. |
1829 | 147 | NextMBB = BTB.Cases[j + 1].ThisBB; |
1830 | 147 | } |
1831 | 768 | |
1832 | 768 | SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], |
1833 | 768 | FuncInfo->MBB); |
1834 | 768 | |
1835 | 768 | CurDAG->setRoot(SDB->getRoot()); |
1836 | 768 | SDB->clear(); |
1837 | 768 | CodeGenAndEmitDAG(); |
1838 | 768 | |
1839 | 768 | if (BTB.ContiguousRange && 768 j + 2 == ej11 ) { |
1840 | 7 | // Since we're not going to use the final bit test, remove it. |
1841 | 7 | BTB.Cases.pop_back(); |
1842 | 7 | break; |
1843 | 7 | } |
1844 | 768 | } |
1845 | 621 | |
1846 | 621 | // Update PHI Nodes |
1847 | 621 | for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); |
1848 | 1.12k | pi != pe1.12k ; ++pi503 ) { |
1849 | 503 | MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); |
1850 | 503 | MachineBasicBlock *PHIBB = PHI->getParent(); |
1851 | 503 | assert(PHI->isPHI() && |
1852 | 503 | "This is not a machine PHI node that we are updating!"); |
1853 | 503 | // This is "default" BB. We have two jumps to it. From "header" BB and |
1854 | 503 | // from last "case" BB, unless the latter was skipped. |
1855 | 503 | if (PHIBB == BTB.Default503 ) { |
1856 | 273 | PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent); |
1857 | 273 | if (!BTB.ContiguousRange273 ) { |
1858 | 271 | PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) |
1859 | 271 | .addMBB(BTB.Cases.back().ThisBB); |
1860 | 271 | } |
1861 | 273 | } |
1862 | 503 | // One of "cases" BB. |
1863 | 503 | for (unsigned j = 0, ej = BTB.Cases.size(); |
1864 | 1.11k | j != ej1.11k ; ++j611 ) { |
1865 | 611 | MachineBasicBlock* cBB = BTB.Cases[j].ThisBB; |
1866 | 611 | if (cBB->isSuccessor(PHIBB)) |
1867 | 485 | PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); |
1868 | 611 | } |
1869 | 503 | } |
1870 | 621 | } |
1871 | 3.30M | SDB->BitTestCases.clear(); |
1872 | 3.30M | |
1873 | 3.30M | // If the JumpTable record is filled in, then we need to emit a jump table. |
1874 | 3.30M | // Updating the PHI nodes is tricky in this case, since we need to determine |
1875 | 3.30M | // whether the PHI is a successor of the range check MBB or the jump table MBB |
1876 | 3.31M | for (unsigned i = 0, e = SDB->JTCases.size(); i != e3.31M ; ++i5.16k ) { |
1877 | 5.16k | // Lower header first, if it wasn't already lowered |
1878 | 5.16k | if (!SDB->JTCases[i].first.Emitted5.16k ) { |
1879 | 45 | // Set the current basic block to the mbb we wish to insert the code into |
1880 | 45 | FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; |
1881 | 45 | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1882 | 45 | // Emit the code |
1883 | 45 | SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, |
1884 | 45 | FuncInfo->MBB); |
1885 | 45 | CurDAG->setRoot(SDB->getRoot()); |
1886 | 45 | SDB->clear(); |
1887 | 45 | CodeGenAndEmitDAG(); |
1888 | 45 | } |
1889 | 5.16k | |
1890 | 5.16k | // Set the current basic block to the mbb we wish to insert the code into |
1891 | 5.16k | FuncInfo->MBB = SDB->JTCases[i].second.MBB; |
1892 | 5.16k | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1893 | 5.16k | // Emit the code |
1894 | 5.16k | SDB->visitJumpTable(SDB->JTCases[i].second); |
1895 | 5.16k | CurDAG->setRoot(SDB->getRoot()); |
1896 | 5.16k | SDB->clear(); |
1897 | 5.16k | CodeGenAndEmitDAG(); |
1898 | 5.16k | |
1899 | 5.16k | // Update PHI Nodes |
1900 | 5.16k | for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); |
1901 | 10.7k | pi != pe10.7k ; ++pi5.58k ) { |
1902 | 5.58k | MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); |
1903 | 5.58k | MachineBasicBlock *PHIBB = PHI->getParent(); |
1904 | 5.58k | assert(PHI->isPHI() && |
1905 | 5.58k | "This is not a machine PHI node that we are updating!"); |
1906 | 5.58k | // "default" BB. We can go there only from header BB. |
1907 | 5.58k | if (PHIBB == SDB->JTCases[i].second.Default) |
1908 | 3.50k | PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) |
1909 | 3.50k | .addMBB(SDB->JTCases[i].first.HeaderBB); |
1910 | 5.58k | // JT BB. Just iterate over successors here |
1911 | 5.58k | if (FuncInfo->MBB->isSuccessor(PHIBB)) |
1912 | 2.45k | PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); |
1913 | 5.58k | } |
1914 | 5.16k | } |
1915 | 3.30M | SDB->JTCases.clear(); |
1916 | 3.30M | |
1917 | 3.30M | // If we generated any switch lowering information, build and codegen any |
1918 | 3.30M | // additional DAGs necessary. |
1919 | 3.41M | for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e3.41M ; ++i110k ) { |
1920 | 110k | // Set the current basic block to the mbb we wish to insert the code into |
1921 | 110k | FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; |
1922 | 110k | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1923 | 110k | |
1924 | 110k | // Determine the unique successors. |
1925 | 110k | SmallVector<MachineBasicBlock *, 2> Succs; |
1926 | 110k | Succs.push_back(SDB->SwitchCases[i].TrueBB); |
1927 | 110k | if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) |
1928 | 110k | Succs.push_back(SDB->SwitchCases[i].FalseBB); |
1929 | 110k | |
1930 | 110k | // Emit the code. Note that this could result in FuncInfo->MBB being split. |
1931 | 110k | SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); |
1932 | 110k | CurDAG->setRoot(SDB->getRoot()); |
1933 | 110k | SDB->clear(); |
1934 | 110k | CodeGenAndEmitDAG(); |
1935 | 110k | |
1936 | 110k | // Remember the last block, now that any splitting is done, for use in |
1937 | 110k | // populating PHI nodes in successors. |
1938 | 110k | MachineBasicBlock *ThisBB = FuncInfo->MBB; |
1939 | 110k | |
1940 | 110k | // Handle any PHI nodes in successors of this chunk, as if we were coming |
1941 | 110k | // from the original BB before switch expansion. Note that PHI nodes can |
1942 | 110k | // occur multiple times in PHINodesToUpdate. We have to be very careful to |
1943 | 110k | // handle them the right number of times. |
1944 | 330k | for (unsigned i = 0, e = Succs.size(); i != e330k ; ++i220k ) { |
1945 | 220k | FuncInfo->MBB = Succs[i]; |
1946 | 220k | FuncInfo->InsertPt = FuncInfo->MBB->end(); |
1947 | 220k | // FuncInfo->MBB may have been removed from the CFG if a branch was |
1948 | 220k | // constant folded. |
1949 | 220k | if (ThisBB->isSuccessor(FuncInfo->MBB)220k ) { |
1950 | 220k | for (MachineBasicBlock::iterator |
1951 | 220k | MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); |
1952 | 301k | MBBI != MBBE && 301k MBBI->isPHI()98.6k ; ++MBBI81.5k ) { |
1953 | 81.5k | MachineInstrBuilder PHI(*MF, MBBI); |
1954 | 81.5k | // This value for this PHI node is recorded in PHINodesToUpdate. |
1955 | 81.5k | for (unsigned pn = 0; ; ++pn73.4k ) { |
1956 | 155k | assert(pn != FuncInfo->PHINodesToUpdate.size() && |
1957 | 155k | "Didn't find PHI entry!"); |
1958 | 155k | if (FuncInfo->PHINodesToUpdate[pn].first == PHI155k ) { |
1959 | 81.5k | PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); |
1960 | 81.5k | break; |
1961 | 81.5k | } |
1962 | 155k | } |
1963 | 81.5k | } |
1964 | 220k | } |
1965 | 220k | } |
1966 | 110k | } |
1967 | 3.30M | SDB->SwitchCases.clear(); |
1968 | 3.30M | } |
1969 | | |
1970 | | /// Create the scheduler. If a specific scheduler was specified |
1971 | | /// via the SchedulerRegistry, use it, otherwise select the |
1972 | | /// one preferred by the target. |
1973 | | /// |
1974 | 3.42M | ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { |
1975 | 3.42M | return ISHeuristic(this, OptLevel); |
1976 | 3.42M | } |
1977 | | |
1978 | | //===----------------------------------------------------------------------===// |
1979 | | // Helper functions used by the generated instruction selector. |
1980 | | //===----------------------------------------------------------------------===// |
1981 | | // Calls to these methods are generated by tblgen. |
1982 | | |
1983 | | /// CheckAndMask - The isel is trying to match something like (and X, 255). If |
1984 | | /// the dag combiner simplified the 255, we still want to match. RHS is the |
1985 | | /// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value |
1986 | | /// specified in the .td file (e.g. 255). |
1987 | | bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, |
1988 | 711k | int64_t DesiredMaskS) const { |
1989 | 711k | const APInt &ActualMask = RHS->getAPIntValue(); |
1990 | 711k | const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); |
1991 | 711k | |
1992 | 711k | // If the actual mask exactly matches, success! |
1993 | 711k | if (ActualMask == DesiredMask) |
1994 | 85.9k | return true; |
1995 | 625k | |
1996 | 625k | // If the actual AND mask is allowing unallowed bits, this doesn't match. |
1997 | 625k | if (625k ActualMask.intersects(~DesiredMask)625k ) |
1998 | 281k | return false; |
1999 | 343k | |
2000 | 343k | // Otherwise, the DAG Combiner may have proven that the value coming in is |
2001 | 343k | // either already zero or is not demanded. Check for known zero input bits. |
2002 | 343k | APInt NeededMask = DesiredMask & ~ActualMask; |
2003 | 343k | if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) |
2004 | 429 | return true; |
2005 | 343k | |
2006 | 343k | // TODO: check to see if missing bits are just not demanded. |
2007 | 343k | |
2008 | 343k | // Otherwise, this pattern doesn't match. |
2009 | 343k | return false; |
2010 | 343k | } |
2011 | | |
2012 | | /// CheckOrMask - The isel is trying to match something like (or X, 255). If |
2013 | | /// the dag combiner simplified the 255, we still want to match. RHS is the |
2014 | | /// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value |
2015 | | /// specified in the .td file (e.g. 255). |
2016 | | bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, |
2017 | 644 | int64_t DesiredMaskS) const { |
2018 | 644 | const APInt &ActualMask = RHS->getAPIntValue(); |
2019 | 644 | const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); |
2020 | 644 | |
2021 | 644 | // If the actual mask exactly matches, success! |
2022 | 644 | if (ActualMask == DesiredMask) |
2023 | 4 | return true; |
2024 | 640 | |
2025 | 640 | // If the actual AND mask is allowing unallowed bits, this doesn't match. |
2026 | 640 | if (640 ActualMask.intersects(~DesiredMask)640 ) |
2027 | 157 | return false; |
2028 | 483 | |
2029 | 483 | // Otherwise, the DAG Combiner may have proven that the value coming in is |
2030 | 483 | // either already zero or is not demanded. Check for known zero input bits. |
2031 | 483 | APInt NeededMask = DesiredMask & ~ActualMask; |
2032 | 483 | |
2033 | 483 | KnownBits Known; |
2034 | 483 | CurDAG->computeKnownBits(LHS, Known); |
2035 | 483 | |
2036 | 483 | // If all the missing bits in the or are already known to be set, match! |
2037 | 483 | if (NeededMask.isSubsetOf(Known.One)) |
2038 | 0 | return true; |
2039 | 483 | |
2040 | 483 | // TODO: check to see if missing bits are just not demanded. |
2041 | 483 | |
2042 | 483 | // Otherwise, this pattern doesn't match. |
2043 | 483 | return false; |
2044 | 483 | } |
2045 | | |
2046 | | /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated |
2047 | | /// by tblgen. Others should not call it. |
2048 | | void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, |
2049 | 11.3k | const SDLoc &DL) { |
2050 | 11.3k | std::vector<SDValue> InOps; |
2051 | 11.3k | std::swap(InOps, Ops); |
2052 | 11.3k | |
2053 | 11.3k | Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 |
2054 | 11.3k | Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 |
2055 | 11.3k | Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc |
2056 | 11.3k | Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) |
2057 | 11.3k | |
2058 | 11.3k | unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); |
2059 | 11.3k | if (InOps[e-1].getValueType() == MVT::Glue) |
2060 | 1.61k | --e; // Don't process a glue operand if it is here. |
2061 | 11.3k | |
2062 | 65.8k | while (i != e65.8k ) { |
2063 | 54.4k | unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); |
2064 | 54.4k | if (!InlineAsm::isMemKind(Flags)54.4k ) { |
2065 | 54.0k | // Just skip over this operand, copying the operands verbatim. |
2066 | 54.0k | Ops.insert(Ops.end(), InOps.begin()+i, |
2067 | 54.0k | InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); |
2068 | 54.0k | i += InlineAsm::getNumOperandRegisters(Flags) + 1; |
2069 | 54.4k | } else { |
2070 | 394 | assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && |
2071 | 394 | "Memory operand with multiple values?"); |
2072 | 394 | |
2073 | 394 | unsigned TiedToOperand; |
2074 | 394 | if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)394 ) { |
2075 | 0 | // We need the constraint ID from the operand this is tied to. |
2076 | 0 | unsigned CurOp = InlineAsm::Op_FirstOperand; |
2077 | 0 | Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); |
2078 | 0 | for (; TiedToOperand0 ; --TiedToOperand0 ) { |
2079 | 0 | CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; |
2080 | 0 | Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); |
2081 | 0 | } |
2082 | 0 | } |
2083 | 394 | |
2084 | 394 | // Otherwise, this is a memory operand. Ask the target to select it. |
2085 | 394 | std::vector<SDValue> SelOps; |
2086 | 394 | unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); |
2087 | 394 | if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) |
2088 | 0 | report_fatal_error("Could not match memory address. Inline asm" |
2089 | 0 | " failure!"); |
2090 | 394 | |
2091 | 394 | // Add this to the output node. |
2092 | 394 | unsigned NewFlags = |
2093 | 394 | InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); |
2094 | 394 | NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); |
2095 | 394 | Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); |
2096 | 394 | Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); |
2097 | 394 | i += 2; |
2098 | 394 | } |
2099 | 54.4k | } |
2100 | 11.3k | |
2101 | 11.3k | // Add the glue input back if present. |
2102 | 11.3k | if (11.3k e != InOps.size()11.3k ) |
2103 | 1.61k | Ops.push_back(InOps.back()); |
2104 | 11.3k | } |
2105 | | |
2106 | | /// findGlueUse - Return use of MVT::Glue value produced by the specified |
2107 | | /// SDNode. |
2108 | | /// |
2109 | 3.67k | static SDNode *findGlueUse(SDNode *N) { |
2110 | 3.67k | unsigned FlagResNo = N->getNumValues()-1; |
2111 | 6.57k | for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E6.57k ; ++I2.89k ) { |
2112 | 5.01k | SDUse &Use = I.getUse(); |
2113 | 5.01k | if (Use.getResNo() == FlagResNo) |
2114 | 2.11k | return Use.getUser(); |
2115 | 5.01k | } |
2116 | 1.55k | return nullptr; |
2117 | 3.67k | } |
2118 | | |
2119 | | /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". |
2120 | | /// This function iteratively traverses up the operand chain, ignoring |
2121 | | /// certain nodes. |
2122 | | static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, |
2123 | | SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, |
2124 | 51.1k | bool IgnoreChains) { |
2125 | 51.1k | // The NodeID's are given uniques ID's where a node ID is guaranteed to be |
2126 | 51.1k | // greater than all of its (recursive) operands. If we scan to a point where |
2127 | 51.1k | // 'use' is smaller than the node we're scanning for, then we know we will |
2128 | 51.1k | // never find it. |
2129 | 51.1k | // |
2130 | 51.1k | // The Use may be -1 (unassigned) if it is a newly allocated node. This can |
2131 | 51.1k | // happen because we scan down to newly selected nodes in the case of glue |
2132 | 51.1k | // uses. |
2133 | 51.1k | std::vector<SDNode *> WorkList; |
2134 | 51.1k | WorkList.push_back(Use); |
2135 | 51.1k | |
2136 | 370k | while (!WorkList.empty()370k ) { |
2137 | 319k | Use = WorkList.back(); |
2138 | 319k | WorkList.pop_back(); |
2139 | 319k | if (Use->getNodeId() < Def->getNodeId() && 319k Use->getNodeId() != -1161k ) |
2140 | 157k | continue; |
2141 | 162k | |
2142 | 162k | // Don't revisit nodes if we already scanned it and didn't fail, we know we |
2143 | 162k | // won't fail if we scan it again. |
2144 | 162k | if (162k !Visited.insert(Use).second162k ) |
2145 | 5.88k | continue; |
2146 | 156k | |
2147 | 156k | for (const SDValue &Op : Use->op_values()) 156k { |
2148 | 346k | // Ignore chain uses, they are validated by HandleMergeInputChains. |
2149 | 346k | if (Op.getValueType() == MVT::Other && 346k IgnoreChains28.7k ) |
2150 | 25.2k | continue; |
2151 | 321k | |
2152 | 321k | SDNode *N = Op.getNode(); |
2153 | 321k | if (N == Def321k ) { |
2154 | 51.9k | if (Use == ImmedUse || 51.9k Use == Root208 ) |
2155 | 51.8k | continue; // We are not looking for immediate use. |
2156 | 0 | assert(N != Root); |
2157 | 110 | return true; |
2158 | 110 | } |
2159 | 269k | |
2160 | 269k | // Traverse up the operand chain. |
2161 | 269k | WorkList.push_back(N); |
2162 | 269k | } |
2163 | 319k | } |
2164 | 51.0k | return false; |
2165 | 51.1k | } |
2166 | | |
2167 | | /// IsProfitableToFold - Returns true if it's profitable to fold the specific |
2168 | | /// operand node N of U during instruction selection that starts at Root. |
2169 | | bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, |
2170 | 9.60k | SDNode *Root) const { |
2171 | 9.60k | if (OptLevel == CodeGenOpt::None9.60k ) return false61 ; |
2172 | 9.54k | return N.hasOneUse(); |
2173 | 9.54k | } |
2174 | | |
2175 | | /// IsLegalToFold - Returns true if the specific operand node N of |
2176 | | /// U can be folded during instruction selection that starts at Root. |
2177 | | bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, |
2178 | | CodeGenOpt::Level OptLevel, |
2179 | 51.1k | bool IgnoreChains) { |
2180 | 51.1k | if (OptLevel == CodeGenOpt::None51.1k ) return false0 ; |
2181 | 51.1k | |
2182 | 51.1k | // If Root use can somehow reach N through a path that that doesn't contain |
2183 | 51.1k | // U then folding N would create a cycle. e.g. In the following |
2184 | 51.1k | // diagram, Root can reach N through X. If N is folded into into Root, then |
2185 | 51.1k | // X is both a predecessor and a successor of U. |
2186 | 51.1k | // |
2187 | 51.1k | // [N*] // |
2188 | 51.1k | // ^ ^ // |
2189 | 51.1k | // / \ // |
2190 | 51.1k | // [U*] [X]? // |
2191 | 51.1k | // ^ ^ // |
2192 | 51.1k | // \ / // |
2193 | 51.1k | // \ / // |
2194 | 51.1k | // [Root*] // |
2195 | 51.1k | // |
2196 | 51.1k | // * indicates nodes to be folded together. |
2197 | 51.1k | // |
2198 | 51.1k | // If Root produces glue, then it gets (even more) interesting. Since it |
2199 | 51.1k | // will be "glued" together with its glue use in the scheduler, we need to |
2200 | 51.1k | // check if it might reach N. |
2201 | 51.1k | // |
2202 | 51.1k | // [N*] // |
2203 | 51.1k | // ^ ^ // |
2204 | 51.1k | // / \ // |
2205 | 51.1k | // [U*] [X]? // |
2206 | 51.1k | // ^ ^ // |
2207 | 51.1k | // \ \ // |
2208 | 51.1k | // \ | // |
2209 | 51.1k | // [Root*] | // |
2210 | 51.1k | // ^ | // |
2211 | 51.1k | // f | // |
2212 | 51.1k | // | / // |
2213 | 51.1k | // [Y] / // |
2214 | 51.1k | // ^ / // |
2215 | 51.1k | // f / // |
2216 | 51.1k | // | / // |
2217 | 51.1k | // [GU] // |
2218 | 51.1k | // |
2219 | 51.1k | // If GU (glue use) indirectly reaches N (the load), and Root folds N |
2220 | 51.1k | // (call it Fold), then X is a predecessor of GU and a successor of |
2221 | 51.1k | // Fold. But since Fold and GU are glued together, this will create |
2222 | 51.1k | // a cycle in the scheduling graph. |
2223 | 51.1k | |
2224 | 51.1k | // If the node has glue, walk down the graph to the "lowest" node in the |
2225 | 51.1k | // glueged set. |
2226 | 51.1k | EVT VT = Root->getValueType(Root->getNumValues()-1); |
2227 | 53.2k | while (VT == MVT::Glue53.2k ) { |
2228 | 3.67k | SDNode *GU = findGlueUse(Root); |
2229 | 3.67k | if (!GU) |
2230 | 1.55k | break; |
2231 | 2.11k | Root = GU; |
2232 | 2.11k | VT = Root->getValueType(Root->getNumValues()-1); |
2233 | 2.11k | |
2234 | 2.11k | // If our query node has a glue result with a use, we've walked up it. If |
2235 | 2.11k | // the user (which has already been selected) has a chain or indirectly uses |
2236 | 2.11k | // the chain, our WalkChainUsers predicate will not consider it. Because of |
2237 | 2.11k | // this, we cannot ignore chains in this predicate. |
2238 | 2.11k | IgnoreChains = false; |
2239 | 2.11k | } |
2240 | 51.1k | |
2241 | 51.1k | SmallPtrSet<SDNode*, 16> Visited; |
2242 | 51.1k | return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); |
2243 | 51.1k | } |
2244 | | |
2245 | 11.3k | void SelectionDAGISel::Select_INLINEASM(SDNode *N) { |
2246 | 11.3k | SDLoc DL(N); |
2247 | 11.3k | |
2248 | 11.3k | std::vector<SDValue> Ops(N->op_begin(), N->op_end()); |
2249 | 11.3k | SelectInlineAsmMemoryOperands(Ops, DL); |
2250 | 11.3k | |
2251 | 11.3k | const EVT VTs[] = {MVT::Other, MVT::Glue}; |
2252 | 11.3k | SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); |
2253 | 11.3k | New->setNodeId(-1); |
2254 | 11.3k | ReplaceUses(N, New.getNode()); |
2255 | 11.3k | CurDAG->RemoveDeadNode(N); |
2256 | 11.3k | } |
2257 | | |
2258 | 56 | void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { |
2259 | 56 | SDLoc dl(Op); |
2260 | 56 | MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); |
2261 | 56 | const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); |
2262 | 56 | unsigned Reg = |
2263 | 56 | TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), |
2264 | 56 | *CurDAG); |
2265 | 56 | SDValue New = CurDAG->getCopyFromReg( |
2266 | 56 | Op->getOperand(0), dl, Reg, Op->getValueType(0)); |
2267 | 56 | New->setNodeId(-1); |
2268 | 56 | ReplaceUses(Op, New.getNode()); |
2269 | 56 | CurDAG->RemoveDeadNode(Op); |
2270 | 56 | } |
2271 | | |
2272 | 31 | void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { |
2273 | 31 | SDLoc dl(Op); |
2274 | 31 | MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); |
2275 | 31 | const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); |
2276 | 31 | unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), |
2277 | 31 | Op->getOperand(2).getValueType(), |
2278 | 31 | *CurDAG); |
2279 | 31 | SDValue New = CurDAG->getCopyToReg( |
2280 | 31 | Op->getOperand(0), dl, Reg, Op->getOperand(2)); |
2281 | 31 | New->setNodeId(-1); |
2282 | 31 | ReplaceUses(Op, New.getNode()); |
2283 | 31 | CurDAG->RemoveDeadNode(Op); |
2284 | 31 | } |
2285 | | |
2286 | 7.32k | void SelectionDAGISel::Select_UNDEF(SDNode *N) { |
2287 | 7.32k | CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); |
2288 | 7.32k | } |
2289 | | |
2290 | | /// GetVBR - decode a vbr encoding whose top bit is set. |
2291 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t |
2292 | 33.3M | GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { |
2293 | 33.3M | assert(Val >= 128 && "Not a VBR"); |
2294 | 33.3M | Val &= 127; // Remove first vbr bit. |
2295 | 33.3M | |
2296 | 33.3M | unsigned Shift = 7; |
2297 | 33.3M | uint64_t NextBits; |
2298 | 38.8M | do { |
2299 | 38.8M | NextBits = MatcherTable[Idx++]; |
2300 | 38.8M | Val |= (NextBits&127) << Shift; |
2301 | 38.8M | Shift += 7; |
2302 | 38.8M | } while (NextBits & 128); |
2303 | 33.3M | |
2304 | 33.3M | return Val; |
2305 | 33.3M | } |
2306 | | |
2307 | | /// When a match is complete, this method updates uses of interior chain results |
2308 | | /// to use the new results. |
2309 | | void SelectionDAGISel::UpdateChains( |
2310 | | SDNode *NodeToMatch, SDValue InputChain, |
2311 | 20.0M | SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { |
2312 | 20.0M | SmallVector<SDNode*, 4> NowDeadNodes; |
2313 | 20.0M | |
2314 | 20.0M | // Now that all the normal results are replaced, we replace the chain and |
2315 | 20.0M | // glue results if present. |
2316 | 20.0M | if (!ChainNodesMatched.empty()20.0M ) { |
2317 | 13.0M | assert(InputChain.getNode() && |
2318 | 13.0M | "Matched input chains but didn't produce a chain"); |
2319 | 13.0M | // Loop over all of the nodes we matched that produced a chain result. |
2320 | 13.0M | // Replace all the chain results with the final chain we ended up with. |
2321 | 26.1M | for (unsigned i = 0, e = ChainNodesMatched.size(); i != e26.1M ; ++i13.0M ) { |
2322 | 13.0M | SDNode *ChainNode = ChainNodesMatched[i]; |
2323 | 13.0M | // If ChainNode is null, it's because we replaced it on a previous |
2324 | 13.0M | // iteration and we cleared it out of the map. Just skip it. |
2325 | 13.0M | if (!ChainNode) |
2326 | 0 | continue; |
2327 | 13.0M | |
2328 | 13.0M | assert(ChainNode->getOpcode() != ISD::DELETED_NODE && |
2329 | 13.0M | "Deleted node left in chain"); |
2330 | 13.0M | |
2331 | 13.0M | // Don't replace the results of the root node if we're doing a |
2332 | 13.0M | // MorphNodeTo. |
2333 | 13.0M | if (ChainNode == NodeToMatch && 13.0M isMorphNodeTo13.0M ) |
2334 | 13.0M | continue; |
2335 | 6.29k | |
2336 | 6.29k | SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); |
2337 | 6.29k | if (ChainVal.getValueType() == MVT::Glue) |
2338 | 0 | ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); |
2339 | 6.29k | assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); |
2340 | 6.29k | SelectionDAG::DAGNodeDeletedListener NDL( |
2341 | 0 | *CurDAG, [&](SDNode *N, SDNode *E) { |
2342 | 0 | std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, |
2343 | 0 | static_cast<SDNode *>(nullptr)); |
2344 | 0 | }); |
2345 | 6.29k | CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); |
2346 | 6.29k | |
2347 | 6.29k | // If the node became dead and we haven't already seen it, delete it. |
2348 | 6.29k | if (ChainNode != NodeToMatch && 6.29k ChainNode->use_empty()5.81k && |
2349 | 5.81k | !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) |
2350 | 5.81k | NowDeadNodes.push_back(ChainNode); |
2351 | 13.0M | } |
2352 | 13.0M | } |
2353 | 20.0M | |
2354 | 20.0M | if (!NowDeadNodes.empty()) |
2355 | 5.80k | CurDAG->RemoveDeadNodes(NowDeadNodes); |
2356 | 20.0M | |
2357 | 20.0M | DEBUG(dbgs() << "ISEL: Match complete!\n"); |
2358 | 20.0M | } |
2359 | | |
2360 | | enum ChainResult { |
2361 | | CR_Simple, |
2362 | | CR_InducesCycle, |
2363 | | CR_LeadsToInteriorNode |
2364 | | }; |
2365 | | |
2366 | | /// WalkChainUsers - Walk down the users of the specified chained node that is |
2367 | | /// part of the pattern we're matching, looking at all of the users we find. |
2368 | | /// This determines whether something is an interior node, whether we have a |
2369 | | /// non-pattern node in between two pattern nodes (which prevent folding because |
2370 | | /// it would induce a cycle) and whether we have a TokenFactor node sandwiched |
2371 | | /// between pattern nodes (in which case the TF becomes part of the pattern). |
2372 | | /// |
2373 | | /// The walk we do here is guaranteed to be small because we quickly get down to |
2374 | | /// already selected nodes "below" us. |
2375 | | static ChainResult |
2376 | | WalkChainUsers(const SDNode *ChainedNode, |
2377 | | SmallVectorImpl<SDNode *> &ChainedNodesInPattern, |
2378 | | DenseMap<const SDNode *, ChainResult> &TokenFactorResult, |
2379 | 15.1M | SmallVectorImpl<SDNode *> &InteriorChainedNodes) { |
2380 | 15.1M | ChainResult Result = CR_Simple; |
2381 | 15.1M | |
2382 | 15.1M | for (SDNode::use_iterator UI = ChainedNode->use_begin(), |
2383 | 40.1M | E = ChainedNode->use_end(); UI != E40.1M ; ++UI24.9M ) { |
2384 | 24.9M | // Make sure the use is of the chain, not some other value we produce. |
2385 | 24.9M | if (UI.getUse().getValueType() != MVT::Other24.9M ) continue5.49M ; |
2386 | 19.5M | |
2387 | 19.5M | SDNode *User = *UI; |
2388 | 19.5M | |
2389 | 19.5M | if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. |
2390 | 3.09M | continue; |
2391 | 16.4M | |
2392 | 16.4M | // If we see an already-selected machine node, then we've gone beyond the |
2393 | 16.4M | // pattern that we're selecting down into the already selected chunk of the |
2394 | 16.4M | // DAG. |
2395 | 16.4M | unsigned UserOpcode = User->getOpcode(); |
2396 | 16.4M | if (User->isMachineOpcode() || |
2397 | 5.02M | UserOpcode == ISD::CopyToReg || |
2398 | 3.25M | UserOpcode == ISD::CopyFromReg || |
2399 | 2.14M | UserOpcode == ISD::INLINEASM || |
2400 | 2.13M | UserOpcode == ISD::EH_LABEL || |
2401 | 2.11M | UserOpcode == ISD::LIFETIME_START || |
2402 | 16.4M | UserOpcode == ISD::LIFETIME_END2.10M ) { |
2403 | 14.3M | // If their node ID got reset to -1 then they've already been selected. |
2404 | 14.3M | // Treat them like a MachineOpcode. |
2405 | 14.3M | if (User->getNodeId() == -1) |
2406 | 14.3M | continue; |
2407 | 2.06M | } |
2408 | 2.06M | |
2409 | 2.06M | // If we have a TokenFactor, we handle it specially. |
2410 | 2.06M | if (2.06M User->getOpcode() != ISD::TokenFactor2.06M ) { |
2411 | 2.95k | // If the node isn't a token factor and isn't part of our pattern, then it |
2412 | 2.95k | // must be a random chained node in between two nodes we're selecting. |
2413 | 2.95k | // This happens when we have something like: |
2414 | 2.95k | // x = load ptr |
2415 | 2.95k | // call |
2416 | 2.95k | // y = x+4 |
2417 | 2.95k | // store y -> ptr |
2418 | 2.95k | // Because we structurally match the load/store as a read/modify/write, |
2419 | 2.95k | // but the call is chained between them. We cannot fold in this case |
2420 | 2.95k | // because it would induce a cycle in the graph. |
2421 | 2.95k | if (!std::count(ChainedNodesInPattern.begin(), |
2422 | 2.95k | ChainedNodesInPattern.end(), User)) |
2423 | 937 | return CR_InducesCycle; |
2424 | 2.01k | |
2425 | 2.01k | // Otherwise we found a node that is part of our pattern. For example in: |
2426 | 2.01k | // x = load ptr |
2427 | 2.01k | // y = x+4 |
2428 | 2.01k | // store y -> ptr |
2429 | 2.01k | // This would happen when we're scanning down from the load and see the |
2430 | 2.01k | // store as a user. Record that there is a use of ChainedNode that is |
2431 | 2.01k | // part of the pattern and keep scanning uses. |
2432 | 2.01k | Result = CR_LeadsToInteriorNode; |
2433 | 2.01k | InteriorChainedNodes.push_back(User); |
2434 | 2.01k | continue; |
2435 | 2.01k | } |
2436 | 2.06M | |
2437 | 2.06M | // If we found a TokenFactor, there are two cases to consider: first if the |
2438 | 2.06M | // TokenFactor is just hanging "below" the pattern we're matching (i.e. no |
2439 | 2.06M | // uses of the TF are in our pattern) we just want to ignore it. Second, |
2440 | 2.06M | // the TokenFactor can be sandwiched in between two chained nodes, like so: |
2441 | 2.06M | // [Load chain] |
2442 | 2.06M | // ^ |
2443 | 2.06M | // | |
2444 | 2.06M | // [Load] |
2445 | 2.06M | // ^ ^ |
2446 | 2.06M | // | \ DAG's like cheese |
2447 | 2.06M | // / \ do you? |
2448 | 2.06M | // / | |
2449 | 2.06M | // [TokenFactor] [Op] |
2450 | 2.06M | // ^ ^ |
2451 | 2.06M | // | | |
2452 | 2.06M | // \ / |
2453 | 2.06M | // \ / |
2454 | 2.06M | // [Store] |
2455 | 2.06M | // |
2456 | 2.06M | // In this case, the TokenFactor becomes part of our match and we rewrite it |
2457 | 2.06M | // as a new TokenFactor. |
2458 | 2.06M | // |
2459 | 2.06M | // To distinguish these two cases, do a recursive walk down the uses. |
2460 | 2.06M | auto MemoizeResult = TokenFactorResult.find(User); |
2461 | 2.06M | bool Visited = MemoizeResult != TokenFactorResult.end(); |
2462 | 2.06M | // Recursively walk chain users only if the result is not memoized. |
2463 | 2.06M | if (!Visited2.06M ) { |
2464 | 2.04M | auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, |
2465 | 2.04M | InteriorChainedNodes); |
2466 | 2.04M | MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; |
2467 | 2.04M | } |
2468 | 2.06M | switch (MemoizeResult->second) { |
2469 | 2.06M | case CR_Simple: |
2470 | 2.06M | // If the uses of the TokenFactor are just already-selected nodes, ignore |
2471 | 2.06M | // it, it is "below" our pattern. |
2472 | 2.06M | continue; |
2473 | 652 | case CR_InducesCycle: |
2474 | 652 | // If the uses of the TokenFactor lead to nodes that are not part of our |
2475 | 652 | // pattern that are not selected, folding would turn this into a cycle, |
2476 | 652 | // bail out now. |
2477 | 652 | return CR_InducesCycle; |
2478 | 157 | case CR_LeadsToInteriorNode: |
2479 | 157 | break; // Otherwise, keep processing. |
2480 | 157 | } |
2481 | 157 | |
2482 | 157 | // Okay, we know we're in the interesting interior case. The TokenFactor |
2483 | 157 | // is now going to be considered part of the pattern so that we rewrite its |
2484 | 157 | // uses (it may have uses that are not part of the pattern) with the |
2485 | 157 | // ultimate chain result of the generated code. We will also add its chain |
2486 | 157 | // inputs as inputs to the ultimate TokenFactor we create. |
2487 | 157 | Result = CR_LeadsToInteriorNode; |
2488 | 157 | if (!Visited157 ) { |
2489 | 157 | ChainedNodesInPattern.push_back(User); |
2490 | 157 | InteriorChainedNodes.push_back(User); |
2491 | 157 | } |
2492 | 24.9M | } |
2493 | 15.1M | |
2494 | 15.1M | return Result; |
2495 | 15.1M | } |
2496 | | |
2497 | | /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains |
2498 | | /// operation for when the pattern matched at least one node with a chains. The |
2499 | | /// input vector contains a list of all of the chained nodes that we match. We |
2500 | | /// must determine if this is a valid thing to cover (i.e. matching it won't |
2501 | | /// induce cycles in the DAG) and if so, creating a TokenFactor node. that will |
2502 | | /// be used as the input node chain for the generated nodes. |
2503 | | static SDValue |
2504 | | HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, |
2505 | 13.0M | SelectionDAG *CurDAG) { |
2506 | 13.0M | // Used for memoization. Without it WalkChainUsers could take exponential |
2507 | 13.0M | // time to run. |
2508 | 13.0M | DenseMap<const SDNode *, ChainResult> TokenFactorResult; |
2509 | 13.0M | // Walk all of the chained nodes we've matched, recursively scanning down the |
2510 | 13.0M | // users of the chain result. This adds any TokenFactor nodes that are caught |
2511 | 13.0M | // in between chained nodes to the chained and interior nodes list. |
2512 | 13.0M | SmallVector<SDNode*, 3> InteriorChainedNodes; |
2513 | 26.1M | for (unsigned i = 0, e = ChainNodesMatched.size(); i != e26.1M ; ++i13.0M ) { |
2514 | 13.0M | if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, |
2515 | 13.0M | TokenFactorResult, |
2516 | 13.0M | InteriorChainedNodes) == CR_InducesCycle) |
2517 | 937 | return SDValue(); // Would induce a cycle. |
2518 | 13.0M | } |
2519 | 13.0M | |
2520 | 13.0M | // Okay, we have walked all the matched nodes and collected TokenFactor nodes |
2521 | 13.0M | // that we are interested in. Form our input TokenFactor node. |
2522 | 13.0M | SmallVector<SDValue, 3> InputChains; |
2523 | 26.1M | for (unsigned i = 0, e = ChainNodesMatched.size(); i != e26.1M ; ++i13.0M ) { |
2524 | 13.0M | // Add the input chain of this node to the InputChains list (which will be |
2525 | 13.0M | // the operands of the generated TokenFactor) if it's not an interior node. |
2526 | 13.0M | SDNode *N = ChainNodesMatched[i]; |
2527 | 13.0M | if (N->getOpcode() != ISD::TokenFactor13.0M ) { |
2528 | 13.0M | if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) |
2529 | 2.01k | continue; |
2530 | 13.0M | |
2531 | 13.0M | // Otherwise, add the input chain. |
2532 | 13.0M | SDValue InChain = ChainNodesMatched[i]->getOperand(0); |
2533 | 13.0M | assert(InChain.getValueType() == MVT::Other && "Not a chain"); |
2534 | 13.0M | InputChains.push_back(InChain); |
2535 | 13.0M | continue; |
2536 | 13.0M | } |
2537 | 156 | |
2538 | 156 | // If we have a token factor, we want to add all inputs of the token factor |
2539 | 156 | // that are not part of the pattern we're matching. |
2540 | 156 | for (const SDValue &Op : N->op_values()) 156 { |
2541 | 363 | if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), |
2542 | 363 | Op.getNode())) |
2543 | 206 | InputChains.push_back(Op); |
2544 | 363 | } |
2545 | 13.0M | } |
2546 | 13.0M | |
2547 | 13.0M | if (InputChains.size() == 1) |
2548 | 13.0M | return InputChains[0]; |
2549 | 283 | return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), |
2550 | 283 | MVT::Other, InputChains); |
2551 | 283 | } |
2552 | | |
2553 | | /// MorphNode - Handle morphing a node in place for the selector. |
2554 | | SDNode *SelectionDAGISel:: |
2555 | | MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, |
2556 | 19.8M | ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) { |
2557 | 19.8M | // It is possible we're using MorphNodeTo to replace a node with no |
2558 | 19.8M | // normal results with one that has a normal result (or we could be |
2559 | 19.8M | // adding a chain) and the input could have glue and chains as well. |
2560 | 19.8M | // In this case we need to shift the operands down. |
2561 | 19.8M | // FIXME: This is a horrible hack and broken in obscure cases, no worse |
2562 | 19.8M | // than the old isel though. |
2563 | 19.8M | int OldGlueResultNo = -1, OldChainResultNo = -1; |
2564 | 19.8M | |
2565 | 19.8M | unsigned NTMNumResults = Node->getNumValues(); |
2566 | 19.8M | if (Node->getValueType(NTMNumResults-1) == MVT::Glue19.8M ) { |
2567 | 4.96M | OldGlueResultNo = NTMNumResults-1; |
2568 | 4.96M | if (NTMNumResults != 1 && |
2569 | 4.93M | Node->getValueType(NTMNumResults-2) == MVT::Other) |
2570 | 4.93M | OldChainResultNo = NTMNumResults-2; |
2571 | 19.8M | } else if (14.8M Node->getValueType(NTMNumResults-1) == MVT::Other14.8M ) |
2572 | 8.12M | OldChainResultNo = NTMNumResults-1; |
2573 | 19.8M | |
2574 | 19.8M | // Call the underlying SelectionDAG routine to do the transmogrification. Note |
2575 | 19.8M | // that this deletes operands of the old node that become dead. |
2576 | 19.8M | SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); |
2577 | 19.8M | |
2578 | 19.8M | // MorphNodeTo can operate in two ways: if an existing node with the |
2579 | 19.8M | // specified operands exists, it can just return it. Otherwise, it |
2580 | 19.8M | // updates the node in place to have the requested operands. |
2581 | 19.8M | if (Res == Node19.8M ) { |
2582 | 19.8M | // If we updated the node in place, reset the node ID. To the isel, |
2583 | 19.8M | // this should be just like a newly allocated machine node. |
2584 | 19.8M | Res->setNodeId(-1); |
2585 | 19.8M | } |
2586 | 19.8M | |
2587 | 19.8M | unsigned ResNumResults = Res->getNumValues(); |
2588 | 19.8M | // Move the glue if needed. |
2589 | 19.8M | if ((EmitNodeInfo & OPFL_GlueOutput) && 19.8M OldGlueResultNo != -14.73M && |
2590 | 4.73M | (unsigned)OldGlueResultNo != ResNumResults-1) |
2591 | 4.68M | CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), |
2592 | 4.68M | SDValue(Res, ResNumResults-1)); |
2593 | 19.8M | |
2594 | 19.8M | if ((EmitNodeInfo & OPFL_GlueOutput) != 0) |
2595 | 4.73M | --ResNumResults; |
2596 | 19.8M | |
2597 | 19.8M | // Move the chain reference if needed. |
2598 | 19.8M | if ((EmitNodeInfo & OPFL_Chain) && 19.8M OldChainResultNo != -113.0M && |
2599 | 13.0M | (unsigned)OldChainResultNo != ResNumResults-1) |
2600 | 4.66M | CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), |
2601 | 4.66M | SDValue(Res, ResNumResults-1)); |
2602 | 19.8M | |
2603 | 19.8M | // Otherwise, no replacement happened because the node already exists. Replace |
2604 | 19.8M | // Uses of the old node with the new one. |
2605 | 19.8M | if (Res != Node19.8M ) { |
2606 | 9.02k | CurDAG->ReplaceAllUsesWith(Node, Res); |
2607 | 9.02k | CurDAG->RemoveDeadNode(Node); |
2608 | 9.02k | } |
2609 | 19.8M | |
2610 | 19.8M | return Res; |
2611 | 19.8M | } |
2612 | | |
2613 | | /// CheckSame - Implements OP_CheckSame. |
2614 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2615 | | CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2616 | | SDValue N, |
2617 | 42.8k | const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) { |
2618 | 42.8k | // Accept if it is exactly the same as a previously recorded node. |
2619 | 42.8k | unsigned RecNo = MatcherTable[MatcherIndex++]; |
2620 | 42.8k | assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); |
2621 | 42.8k | return N == RecordedNodes[RecNo].first; |
2622 | 42.8k | } |
2623 | | |
2624 | | /// CheckChildSame - Implements OP_CheckChildXSame. |
2625 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2626 | | CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2627 | | SDValue N, |
2628 | | const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes, |
2629 | 42.0k | unsigned ChildNo) { |
2630 | 42.0k | if (ChildNo >= N.getNumOperands()) |
2631 | 0 | return false; // Match fails if out of range child #. |
2632 | 42.0k | return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), |
2633 | 42.0k | RecordedNodes); |
2634 | 42.0k | } |
2635 | | |
2636 | | /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. |
2637 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2638 | | CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2639 | 3.08M | const SelectionDAGISel &SDISel) { |
2640 | 3.08M | return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); |
2641 | 3.08M | } |
2642 | | |
2643 | | /// CheckNodePredicate - Implements OP_CheckNodePredicate. |
2644 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2645 | | CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2646 | 35.7M | const SelectionDAGISel &SDISel, SDNode *N) { |
2647 | 35.7M | return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); |
2648 | 35.7M | } |
2649 | | |
2650 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2651 | | CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2652 | 25.9M | SDNode *N) { |
2653 | 25.9M | uint16_t Opc = MatcherTable[MatcherIndex++]; |
2654 | 25.9M | Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; |
2655 | 25.9M | return N->getOpcode() == Opc; |
2656 | 25.9M | } |
2657 | | |
2658 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2659 | | CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, |
2660 | 23.3M | const TargetLowering *TLI, const DataLayout &DL) { |
2661 | 23.3M | MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
2662 | 23.3M | if (N.getValueType() == VT23.3M ) return true8.65M ; |
2663 | 14.7M | |
2664 | 14.7M | // Handle the case when VT is iPTR. |
2665 | 14.7M | return VT == MVT::iPTR && 14.7M N.getValueType() == TLI->getPointerTy(DL)47.4k ; |
2666 | 23.3M | } |
2667 | | |
2668 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2669 | | CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2670 | | SDValue N, const TargetLowering *TLI, const DataLayout &DL, |
2671 | 17.8M | unsigned ChildNo) { |
2672 | 17.8M | if (ChildNo >= N.getNumOperands()) |
2673 | 0 | return false; // Match fails if out of range child #. |
2674 | 17.8M | return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI, |
2675 | 17.8M | DL); |
2676 | 17.8M | } |
2677 | | |
2678 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2679 | | CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2680 | 55.3k | SDValue N) { |
2681 | 55.3k | return cast<CondCodeSDNode>(N)->get() == |
2682 | 55.3k | (ISD::CondCode)MatcherTable[MatcherIndex++]; |
2683 | 55.3k | } |
2684 | | |
2685 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2686 | | CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2687 | 59.7k | SDValue N, const TargetLowering *TLI, const DataLayout &DL) { |
2688 | 59.7k | MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
2689 | 59.7k | if (cast<VTSDNode>(N)->getVT() == VT) |
2690 | 28.8k | return true; |
2691 | 30.9k | |
2692 | 30.9k | // Handle the case when VT is iPTR. |
2693 | 30.9k | return VT == MVT::iPTR && 30.9k cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL)0 ; |
2694 | 59.7k | } |
2695 | | |
2696 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2697 | | CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2698 | 5.08M | SDValue N) { |
2699 | 5.08M | int64_t Val = MatcherTable[MatcherIndex++]; |
2700 | 5.08M | if (Val & 128) |
2701 | 3.39M | Val = GetVBR(Val, MatcherTable, MatcherIndex); |
2702 | 5.08M | |
2703 | 5.08M | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); |
2704 | 4.23M | return C && C->getSExtValue() == Val; |
2705 | 5.08M | } |
2706 | | |
2707 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2708 | | CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2709 | 5.01M | SDValue N, unsigned ChildNo) { |
2710 | 5.01M | if (ChildNo >= N.getNumOperands()) |
2711 | 0 | return false; // Match fails if out of range child #. |
2712 | 5.01M | return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); |
2713 | 5.01M | } |
2714 | | |
2715 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2716 | | CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2717 | 1.66M | SDValue N, const SelectionDAGISel &SDISel) { |
2718 | 1.66M | int64_t Val = MatcherTable[MatcherIndex++]; |
2719 | 1.66M | if (Val & 128) |
2720 | 1.66M | Val = GetVBR(Val, MatcherTable, MatcherIndex); |
2721 | 1.66M | |
2722 | 1.66M | if (N->getOpcode() != ISD::AND1.66M ) return false626k ; |
2723 | 1.03M | |
2724 | 1.03M | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
2725 | 711k | return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); |
2726 | 1.66M | } |
2727 | | |
2728 | | LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool |
2729 | | CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, |
2730 | 4.68k | SDValue N, const SelectionDAGISel &SDISel) { |
2731 | 4.68k | int64_t Val = MatcherTable[MatcherIndex++]; |
2732 | 4.68k | if (Val & 128) |
2733 | 4.68k | Val = GetVBR(Val, MatcherTable, MatcherIndex); |
2734 | 4.68k | |
2735 | 4.68k | if (N->getOpcode() != ISD::OR4.68k ) return false0 ; |
2736 | 4.68k | |
2737 | 4.68k | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
2738 | 644 | return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); |
2739 | 4.68k | } |
2740 | | |
2741 | | /// IsPredicateKnownToFail - If we know how and can do so without pushing a |
2742 | | /// scope, evaluate the current node. If the current predicate is known to |
2743 | | /// fail, set Result=true and return anything. If the current predicate is |
2744 | | /// known to pass, set Result=false and return the MatcherIndex to continue |
2745 | | /// with. If the current predicate is unknown, set Result=false and return the |
2746 | | /// MatcherIndex to continue with. |
2747 | | static unsigned IsPredicateKnownToFail(const unsigned char *Table, |
2748 | | unsigned Index, SDValue N, |
2749 | | bool &Result, |
2750 | | const SelectionDAGISel &SDISel, |
2751 | 45.4M | SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) { |
2752 | 45.4M | switch (Table[Index++]) { |
2753 | 16.0M | default: |
2754 | 16.0M | Result = false; |
2755 | 16.0M | return Index-1; // Could not evaluate this predicate. |
2756 | 0 | case SelectionDAGISel::OPC_CheckSame: |
2757 | 0 | Result = !::CheckSame(Table, Index, N, RecordedNodes); |
2758 | 0 | return Index; |
2759 | 31.5k | case SelectionDAGISel::OPC_CheckChild0Same: |
2760 | 31.5k | case SelectionDAGISel::OPC_CheckChild1Same: |
2761 | 31.5k | case SelectionDAGISel::OPC_CheckChild2Same: |
2762 | 31.5k | case SelectionDAGISel::OPC_CheckChild3Same: |
2763 | 31.5k | Result = !::CheckChildSame(Table, Index, N, RecordedNodes, |
2764 | 31.5k | Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); |
2765 | 31.5k | return Index; |
2766 | 1.22M | case SelectionDAGISel::OPC_CheckPatternPredicate: |
2767 | 1.22M | Result = !::CheckPatternPredicate(Table, Index, SDISel); |
2768 | 1.22M | return Index; |
2769 | 11.0M | case SelectionDAGISel::OPC_CheckPredicate: |
2770 | 11.0M | Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); |
2771 | 11.0M | return Index; |
2772 | 152k | case SelectionDAGISel::OPC_CheckOpcode: |
2773 | 152k | Result = !::CheckOpcode(Table, Index, N.getNode()); |
2774 | 152k | return Index; |
2775 | 55.6k | case SelectionDAGISel::OPC_CheckType: |
2776 | 55.6k | Result = !::CheckType(Table, Index, N, SDISel.TLI, |
2777 | 55.6k | SDISel.CurDAG->getDataLayout()); |
2778 | 55.6k | return Index; |
2779 | 12.1M | case SelectionDAGISel::OPC_CheckChild0Type: |
2780 | 12.1M | case SelectionDAGISel::OPC_CheckChild1Type: |
2781 | 12.1M | case SelectionDAGISel::OPC_CheckChild2Type: |
2782 | 12.1M | case SelectionDAGISel::OPC_CheckChild3Type: |
2783 | 12.1M | case SelectionDAGISel::OPC_CheckChild4Type: |
2784 | 12.1M | case SelectionDAGISel::OPC_CheckChild5Type: |
2785 | 12.1M | case SelectionDAGISel::OPC_CheckChild6Type: |
2786 | 12.1M | case SelectionDAGISel::OPC_CheckChild7Type: |
2787 | 12.1M | Result = !::CheckChildType( |
2788 | 12.1M | Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(), |
2789 | 12.1M | Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type); |
2790 | 12.1M | return Index; |
2791 | 37.4k | case SelectionDAGISel::OPC_CheckCondCode: |
2792 | 37.4k | Result = !::CheckCondCode(Table, Index, N); |
2793 | 37.4k | return Index; |
2794 | 58.3k | case SelectionDAGISel::OPC_CheckValueType: |
2795 | 58.3k | Result = !::CheckValueType(Table, Index, N, SDISel.TLI, |
2796 | 58.3k | SDISel.CurDAG->getDataLayout()); |
2797 | 58.3k | return Index; |
2798 | 65.0k | case SelectionDAGISel::OPC_CheckInteger: |
2799 | 65.0k | Result = !::CheckInteger(Table, Index, N); |
2800 | 65.0k | return Index; |
2801 | 3.94M | case SelectionDAGISel::OPC_CheckChild0Integer: |
2802 | 3.94M | case SelectionDAGISel::OPC_CheckChild1Integer: |
2803 | 3.94M | case SelectionDAGISel::OPC_CheckChild2Integer: |
2804 | 3.94M | case SelectionDAGISel::OPC_CheckChild3Integer: |
2805 | 3.94M | case SelectionDAGISel::OPC_CheckChild4Integer: |
2806 | 3.94M | Result = !::CheckChildInteger(Table, Index, N, |
2807 | 3.94M | Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); |
2808 | 3.94M | return Index; |
2809 | 682k | case SelectionDAGISel::OPC_CheckAndImm: |
2810 | 682k | Result = !::CheckAndImm(Table, Index, N, SDISel); |
2811 | 682k | return Index; |
2812 | 0 | case SelectionDAGISel::OPC_CheckOrImm: |
2813 | 0 | Result = !::CheckOrImm(Table, Index, N, SDISel); |
2814 | 0 | return Index; |
2815 | 0 | } |
2816 | 0 | } |
2817 | | |
2818 | | namespace { |
2819 | | |
2820 | | struct MatchScope { |
2821 | | /// FailIndex - If this match fails, this is the index to continue with. |
2822 | | unsigned FailIndex; |
2823 | | |
2824 | | /// NodeStack - The node stack when the scope was formed. |
2825 | | SmallVector<SDValue, 4> NodeStack; |
2826 | | |
2827 | | /// NumRecordedNodes - The number of recorded nodes when the scope was formed. |
2828 | | unsigned NumRecordedNodes; |
2829 | | |
2830 | | /// NumMatchedMemRefs - The number of matched memref entries. |
2831 | | unsigned NumMatchedMemRefs; |
2832 | | |
2833 | | /// InputChain/InputGlue - The current chain/glue |
2834 | | SDValue InputChain, InputGlue; |
2835 | | |
2836 | | /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. |
2837 | | bool HasChainNodesMatched; |
2838 | | }; |
2839 | | |
2840 | | /// \\brief A DAG update listener to keep the matching state |
2841 | | /// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to |
2842 | | /// change the DAG while matching. X86 addressing mode matcher is an example |
2843 | | /// for this. |
2844 | | class MatchStateUpdater : public SelectionDAG::DAGUpdateListener |
2845 | | { |
2846 | | SDNode **NodeToMatch; |
2847 | | SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes; |
2848 | | SmallVectorImpl<MatchScope> &MatchScopes; |
2849 | | |
2850 | | public: |
2851 | | MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch, |
2852 | | SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN, |
2853 | | SmallVectorImpl<MatchScope> &MS) |
2854 | | : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch), |
2855 | 323k | RecordedNodes(RN), MatchScopes(MS) {} |
2856 | | |
2857 | 12 | void NodeDeleted(SDNode *N, SDNode *E) override { |
2858 | 12 | // Some early-returns here to avoid the search if we deleted the node or |
2859 | 12 | // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we |
2860 | 12 | // do, so it's unnecessary to update matching state at that point). |
2861 | 12 | // Neither of these can occur currently because we only install this |
2862 | 12 | // update listener during matching a complex patterns. |
2863 | 12 | if (!E || 12 E->isMachineOpcode()12 ) |
2864 | 2 | return; |
2865 | 10 | // Check if NodeToMatch was updated. |
2866 | 10 | if (10 N == *NodeToMatch10 ) |
2867 | 5 | *NodeToMatch = E; |
2868 | 10 | // Performing linear search here does not matter because we almost never |
2869 | 10 | // run this code. You'd have to have a CSE during complex pattern |
2870 | 10 | // matching. |
2871 | 10 | for (auto &I : RecordedNodes) |
2872 | 62 | if (62 I.first.getNode() == N62 ) |
2873 | 6 | I.first.setNode(E); |
2874 | 10 | |
2875 | 10 | for (auto &I : MatchScopes) |
2876 | 20 | for (auto &J : I.NodeStack) |
2877 | 21 | if (21 J.getNode() == N21 ) |
2878 | 11 | J.setNode(E); |
2879 | 12 | } |
2880 | | }; |
2881 | | |
2882 | | } // end anonymous namespace |
2883 | | |
2884 | | void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, |
2885 | | const unsigned char *MatcherTable, |
2886 | 59.4M | unsigned TableSize) { |
2887 | 59.4M | // FIXME: Should these even be selected? Handle these cases in the caller? |
2888 | 59.4M | switch (NodeToMatch->getOpcode()) { |
2889 | 20.0M | default: |
2890 | 20.0M | break; |
2891 | 38.8M | case ISD::EntryToken: // These nodes remain the same. |
2892 | 38.8M | case ISD::BasicBlock: |
2893 | 38.8M | case ISD::Register: |
2894 | 38.8M | case ISD::RegisterMask: |
2895 | 38.8M | case ISD::HANDLENODE: |
2896 | 38.8M | case ISD::MDNODE_SDNODE: |
2897 | 38.8M | case ISD::TargetConstant: |
2898 | 38.8M | case ISD::TargetConstantFP: |
2899 | 38.8M | case ISD::TargetConstantPool: |
2900 | 38.8M | case ISD::TargetFrameIndex: |
2901 | 38.8M | case ISD::TargetExternalSymbol: |
2902 | 38.8M | case ISD::MCSymbol: |
2903 | 38.8M | case ISD::TargetBlockAddress: |
2904 | 38.8M | case ISD::TargetJumpTable: |
2905 | 38.8M | case ISD::TargetGlobalTLSAddress: |
2906 | 38.8M | case ISD::TargetGlobalAddress: |
2907 | 38.8M | case ISD::TokenFactor: |
2908 | 38.8M | case ISD::CopyFromReg: |
2909 | 38.8M | case ISD::CopyToReg: |
2910 | 38.8M | case ISD::EH_LABEL: |
2911 | 38.8M | case ISD::ANNOTATION_LABEL: |
2912 | 38.8M | case ISD::LIFETIME_START: |
2913 | 38.8M | case ISD::LIFETIME_END: |
2914 | 38.8M | NodeToMatch->setNodeId(-1); // Mark selected. |
2915 | 38.8M | return; |
2916 | 498k | case ISD::AssertSext: |
2917 | 498k | case ISD::AssertZext: |
2918 | 498k | CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), |
2919 | 498k | NodeToMatch->getOperand(0)); |
2920 | 498k | CurDAG->RemoveDeadNode(NodeToMatch); |
2921 | 498k | return; |
2922 | 11.3k | case ISD::INLINEASM: |
2923 | 11.3k | Select_INLINEASM(NodeToMatch); |
2924 | 11.3k | return; |
2925 | 56 | case ISD::READ_REGISTER: |
2926 | 56 | Select_READ_REGISTER(NodeToMatch); |
2927 | 56 | return; |
2928 | 31 | case ISD::WRITE_REGISTER: |
2929 | 31 | Select_WRITE_REGISTER(NodeToMatch); |
2930 | 31 | return; |
2931 | 7.32k | case ISD::UNDEF: |
2932 | 7.32k | Select_UNDEF(NodeToMatch); |
2933 | 7.32k | return; |
2934 | 20.0M | } |
2935 | 20.0M | |
2936 | 59.4M | assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); |
2937 | 20.0M | |
2938 | 20.0M | // Set up the node stack with NodeToMatch as the only node on the stack. |
2939 | 20.0M | SmallVector<SDValue, 8> NodeStack; |
2940 | 20.0M | SDValue N = SDValue(NodeToMatch, 0); |
2941 | 20.0M | NodeStack.push_back(N); |
2942 | 20.0M | |
2943 | 20.0M | // MatchScopes - Scopes used when matching, if a match failure happens, this |
2944 | 20.0M | // indicates where to continue checking. |
2945 | 20.0M | SmallVector<MatchScope, 8> MatchScopes; |
2946 | 20.0M | |
2947 | 20.0M | // RecordedNodes - This is the set of nodes that have been recorded by the |
2948 | 20.0M | // state machine. The second value is the parent of the node, or null if the |
2949 | 20.0M | // root is recorded. |
2950 | 20.0M | SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; |
2951 | 20.0M | |
2952 | 20.0M | // MatchedMemRefs - This is the set of MemRef's we've seen in the input |
2953 | 20.0M | // pattern. |
2954 | 20.0M | SmallVector<MachineMemOperand*, 2> MatchedMemRefs; |
2955 | 20.0M | |
2956 | 20.0M | // These are the current input chain and glue for use when generating nodes. |
2957 | 20.0M | // Various Emit operations change these. For example, emitting a copytoreg |
2958 | 20.0M | // uses and updates these. |
2959 | 20.0M | SDValue InputChain, InputGlue; |
2960 | 20.0M | |
2961 | 20.0M | // ChainNodesMatched - If a pattern matches nodes that have input/output |
2962 | 20.0M | // chains, the OPC_EmitMergeInputChains operation is emitted which indicates |
2963 | 20.0M | // which ones they are. The result is captured into this list so that we can |
2964 | 20.0M | // update the chain results when the pattern is complete. |
2965 | 20.0M | SmallVector<SDNode*, 3> ChainNodesMatched; |
2966 | 20.0M | |
2967 | 20.0M | DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; |
2968 | 20.0M | NodeToMatch->dump(CurDAG); |
2969 | 20.0M | dbgs() << '\n'); |
2970 | 20.0M | |
2971 | 20.0M | // Determine where to start the interpreter. Normally we start at opcode #0, |
2972 | 20.0M | // but if the state machine starts with an OPC_SwitchOpcode, then we |
2973 | 20.0M | // accelerate the first lookup (which is guaranteed to be hot) with the |
2974 | 20.0M | // OpcodeOffset table. |
2975 | 20.0M | unsigned MatcherIndex = 0; |
2976 | 20.0M | |
2977 | 20.0M | if (!OpcodeOffset.empty()20.0M ) { |
2978 | 20.0M | // Already computed the OpcodeOffset table, just index into it. |
2979 | 20.0M | if (N.getOpcode() < OpcodeOffset.size()) |
2980 | 20.0M | MatcherIndex = OpcodeOffset[N.getOpcode()]; |
2981 | 20.0M | DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); |
2982 | 20.0M | |
2983 | 20.0M | } else if (32.3k MatcherTable[0] == OPC_SwitchOpcode32.3k ) { |
2984 | 31.1k | // Otherwise, the table isn't computed, but the state machine does start |
2985 | 31.1k | // with an OPC_SwitchOpcode instruction. Populate the table now, since this |
2986 | 31.1k | // is the first time we're selecting an instruction. |
2987 | 31.1k | unsigned Idx = 1; |
2988 | 7.08M | while (true7.08M ) { |
2989 | 7.08M | // Get the size of this case. |
2990 | 7.08M | unsigned CaseSize = MatcherTable[Idx++]; |
2991 | 7.08M | if (CaseSize & 128) |
2992 | 3.78M | CaseSize = GetVBR(CaseSize, MatcherTable, Idx); |
2993 | 7.08M | if (CaseSize == 07.08M ) break31.1k ; |
2994 | 7.05M | |
2995 | 7.05M | // Get the opcode, add the index to the table. |
2996 | 7.05M | uint16_t Opc = MatcherTable[Idx++]; |
2997 | 7.05M | Opc |= (unsigned short)MatcherTable[Idx++] << 8; |
2998 | 7.05M | if (Opc >= OpcodeOffset.size()) |
2999 | 48.3k | OpcodeOffset.resize((Opc+1)*2); |
3000 | 7.08M | OpcodeOffset[Opc] = Idx; |
3001 | 7.08M | Idx += CaseSize; |
3002 | 7.08M | } |
3003 | 31.1k | |
3004 | 31.1k | // Okay, do the lookup for the first opcode. |
3005 | 31.1k | if (N.getOpcode() < OpcodeOffset.size()) |
3006 | 31.1k | MatcherIndex = OpcodeOffset[N.getOpcode()]; |
3007 | 32.3k | } |
3008 | 20.0M | |
3009 | 281M | while (true281M ) { |
3010 | 281M | assert(MatcherIndex < TableSize && "Invalid index"); |
3011 | | #ifndef NDEBUG |
3012 | | unsigned CurrentOpcodeIndex = MatcherIndex; |
3013 | | #endif |
3014 | | BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; |
3015 | 281M | switch (Opcode) { |
3016 | 27.4M | case OPC_Scope: { |
3017 | 27.4M | // Okay, the semantics of this operation are that we should push a scope |
3018 | 27.4M | // then evaluate the first child. However, pushing a scope only to have |
3019 | 27.4M | // the first check fail (which then pops it) is inefficient. If we can |
3020 | 27.4M | // determine immediately that the first check (or first several) will |
3021 | 27.4M | // immediately fail, don't even bother pushing a scope for them. |
3022 | 27.4M | unsigned FailIndex; |
3023 | 27.4M | |
3024 | 45.7M | while (true45.7M ) { |
3025 | 45.7M | unsigned NumToSkip = MatcherTable[MatcherIndex++]; |
3026 | 45.7M | if (NumToSkip & 128) |
3027 | 7.60M | NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); |
3028 | 45.7M | // Found the end of the scope with no match. |
3029 | 45.7M | if (NumToSkip == 045.7M ) { |
3030 | 361k | FailIndex = 0; |
3031 | 361k | break; |
3032 | 361k | } |
3033 | 45.4M | |
3034 | 45.4M | FailIndex = MatcherIndex+NumToSkip; |
3035 | 45.4M | |
3036 | 45.4M | unsigned MatcherIndexOfPredicate = MatcherIndex; |
3037 | 45.4M | (void)MatcherIndexOfPredicate; // silence warning. |
3038 | 45.4M | |
3039 | 45.4M | // If we can't evaluate this predicate without pushing a scope (e.g. if |
3040 | 45.4M | // it is a 'MoveParent') or if the predicate succeeds on this node, we |
3041 | 45.4M | // push the scope and evaluate the full predicate chain. |
3042 | 45.4M | bool Result; |
3043 | 45.4M | MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, |
3044 | 45.4M | Result, *this, RecordedNodes); |
3045 | 45.4M | if (!Result) |
3046 | 27.1M | break; |
3047 | 18.2M | |
3048 | 18.2M | DEBUG18.2M (dbgs() << " Skipped scope entry (due to false predicate) at " |
3049 | 18.2M | << "index " << MatcherIndexOfPredicate |
3050 | 18.2M | << ", continuing at " << FailIndex << "\n"); |
3051 | 18.2M | ++NumDAGIselRetries; |
3052 | 18.2M | |
3053 | 18.2M | // Otherwise, we know that this case of the Scope is guaranteed to fail, |
3054 | 18.2M | // move to the next case. |
3055 | 18.2M | MatcherIndex = FailIndex; |
3056 | 18.2M | } |
3057 | 27.4M | |
3058 | 27.4M | // If the whole scope failed to match, bail. |
3059 | 27.4M | if (FailIndex == 027.4M ) break361k ; |
3060 | 27.1M | |
3061 | 27.1M | // Push a MatchScope which indicates where to go if the first child fails |
3062 | 27.1M | // to match. |
3063 | 27.1M | MatchScope NewEntry; |
3064 | 27.1M | NewEntry.FailIndex = FailIndex; |
3065 | 27.1M | NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); |
3066 | 27.1M | NewEntry.NumRecordedNodes = RecordedNodes.size(); |
3067 | 27.1M | NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); |
3068 | 27.1M | NewEntry.InputChain = InputChain; |
3069 | 27.1M | NewEntry.InputGlue = InputGlue; |
3070 | 27.1M | NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); |
3071 | 27.1M | MatchScopes.push_back(NewEntry); |
3072 | 27.1M | continue; |
3073 | 27.1M | } |
3074 | 14.3M | case OPC_RecordNode: { |
3075 | 14.3M | // Remember this node, it may end up being an operand in the pattern. |
3076 | 14.3M | SDNode *Parent = nullptr; |
3077 | 14.3M | if (NodeStack.size() > 1) |
3078 | 137k | Parent = NodeStack[NodeStack.size()-2].getNode(); |
3079 | 14.3M | RecordedNodes.push_back(std::make_pair(N, Parent)); |
3080 | 14.3M | continue; |
3081 | 27.1M | } |
3082 | 27.1M | |
3083 | 39.6M | case OPC_RecordChild0: 39.6M case OPC_RecordChild1: |
3084 | 39.6M | case OPC_RecordChild2: 39.6M case OPC_RecordChild3: |
3085 | 39.6M | case OPC_RecordChild4: 39.6M case OPC_RecordChild5: |
3086 | 39.6M | case OPC_RecordChild6: 39.6M case OPC_RecordChild7: { |
3087 | 39.6M | unsigned ChildNo = Opcode-OPC_RecordChild0; |
3088 | 39.6M | if (ChildNo >= N.getNumOperands()) |
3089 | 0 | break; // Match fails if out of range child #. |
3090 | 39.6M | |
3091 | 39.6M | RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), |
3092 | 39.6M | N.getNode())); |
3093 | 39.6M | continue; |
3094 | 39.6M | } |
3095 | 3.87M | case OPC_RecordMemRef: |
3096 | 3.87M | MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); |
3097 | 3.87M | continue; |
3098 | 39.6M | |
3099 | 3.84M | case OPC_CaptureGlueInput: |
3100 | 3.84M | // If the current node has an input glue, capture it in InputGlue. |
3101 | 3.84M | if (N->getNumOperands() != 0 && |
3102 | 3.84M | N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) |
3103 | 3.49M | InputGlue = N->getOperand(N->getNumOperands()-1); |
3104 | 3.84M | continue; |
3105 | 39.6M | |
3106 | 5.74k | case OPC_MoveChild: { |
3107 | 5.74k | unsigned ChildNo = MatcherTable[MatcherIndex++]; |
3108 | 5.74k | if (ChildNo >= N.getNumOperands()) |
3109 | 10 | break; // Match fails if out of range child #. |
3110 | 5.73k | N = N.getOperand(ChildNo); |
3111 | 5.73k | NodeStack.push_back(N); |
3112 | 5.73k | continue; |
3113 | 5.73k | } |
3114 | 5.73k | |
3115 | 32.0M | case OPC_MoveChild0: 32.0M case OPC_MoveChild1: |
3116 | 32.0M | case OPC_MoveChild2: 32.0M case OPC_MoveChild3: |
3117 | 32.0M | case OPC_MoveChild4: 32.0M case OPC_MoveChild5: |
3118 | 32.0M | case OPC_MoveChild6: 32.0M case OPC_MoveChild7: { |
3119 | 32.0M | unsigned ChildNo = Opcode-OPC_MoveChild0; |
3120 | 32.0M | if (ChildNo >= N.getNumOperands()) |
3121 | 164 | break; // Match fails if out of range child #. |
3122 | 32.0M | N = N.getOperand(ChildNo); |
3123 | 32.0M | NodeStack.push_back(N); |
3124 | 32.0M | continue; |
3125 | 32.0M | } |
3126 | 32.0M | |
3127 | 17.4M | case OPC_MoveParent: |
3128 | 17.4M | // Pop the current node off the NodeStack. |
3129 | 17.4M | NodeStack.pop_back(); |
3130 | 17.4M | assert(!NodeStack.empty() && "Node stack imbalance!"); |
3131 | 17.4M | N = NodeStack.back(); |
3132 | 17.4M | continue; |
3133 | 32.0M | |
3134 | 760 | case OPC_CheckSame: |
3135 | 760 | if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)760 ) break0 ; |
3136 | 760 | continue; |
3137 | 760 | |
3138 | 10.5k | case OPC_CheckChild0Same: 10.5k case OPC_CheckChild1Same: |
3139 | 10.5k | case OPC_CheckChild2Same: 10.5k case OPC_CheckChild3Same: |
3140 | 10.5k | if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, |
3141 | 10.5k | Opcode-OPC_CheckChild0Same)) |
3142 | 4.50k | break; |
3143 | 6.00k | continue; |
3144 | 6.00k | |
3145 | 1.86M | case OPC_CheckPatternPredicate: |
3146 | 1.86M | if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)1.86M ) break466k ; |
3147 | 1.39M | continue; |
3148 | 24.7M | case OPC_CheckPredicate: |
3149 | 24.7M | if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, |
3150 | 24.7M | N.getNode())) |
3151 | 14.3M | break; |
3152 | 10.3M | continue; |
3153 | 18.7M | case OPC_CheckComplexPat: { |
3154 | 18.7M | unsigned CPNum = MatcherTable[MatcherIndex++]; |
3155 | 18.7M | unsigned RecNo = MatcherTable[MatcherIndex++]; |
3156 | 18.7M | assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); |
3157 | 18.7M | |
3158 | 18.7M | // If target can modify DAG during matching, keep the matching state |
3159 | 18.7M | // consistent. |
3160 | 18.7M | std::unique_ptr<MatchStateUpdater> MSU; |
3161 | 18.7M | if (ComplexPatternFuncMutatesDAG()) |
3162 | 323k | MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes, |
3163 | 323k | MatchScopes)); |
3164 | 18.7M | |
3165 | 18.7M | if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, |
3166 | 18.7M | RecordedNodes[RecNo].first, CPNum, |
3167 | 18.7M | RecordedNodes)) |
3168 | 13.7M | break; |
3169 | 5.05M | continue; |
3170 | 5.05M | } |
3171 | 25.7M | case OPC_CheckOpcode: |
3172 | 25.7M | if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())25.7M ) break11.6M ; |
3173 | 14.1M | continue; |
3174 | 14.1M | |
3175 | 5.52M | case OPC_CheckType: |
3176 | 5.52M | if (!::CheckType(MatcherTable, MatcherIndex, N, TLI, |
3177 | 5.52M | CurDAG->getDataLayout())) |
3178 | 1.25M | break; |
3179 | 4.26M | continue; |
3180 | 4.26M | |
3181 | 5.98M | case OPC_SwitchOpcode: { |
3182 | 5.98M | unsigned CurNodeOpcode = N.getOpcode(); |
3183 | 5.98M | unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; |
3184 | 5.98M | unsigned CaseSize; |
3185 | 16.8M | while (true16.8M ) { |
3186 | 16.8M | // Get the size of this case. |
3187 | 16.8M | CaseSize = MatcherTable[MatcherIndex++]; |
3188 | 16.8M | if (CaseSize & 128) |
3189 | 3.29M | CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); |
3190 | 16.8M | if (CaseSize == 016.8M ) break2.62M ; |
3191 | 14.1M | |
3192 | 14.1M | uint16_t Opc = MatcherTable[MatcherIndex++]; |
3193 | 14.1M | Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; |
3194 | 14.1M | |
3195 | 14.1M | // If the opcode matches, then we will execute this case. |
3196 | 14.1M | if (CurNodeOpcode == Opc) |
3197 | 3.35M | break; |
3198 | 10.8M | |
3199 | 10.8M | // Otherwise, skip over this case. |
3200 | 10.8M | MatcherIndex += CaseSize; |
3201 | 10.8M | } |
3202 | 5.98M | |
3203 | 5.98M | // If no cases matched, bail out. |
3204 | 5.98M | if (CaseSize == 05.98M ) break2.62M ; |
3205 | 3.35M | |
3206 | 3.35M | // Otherwise, execute the case we found. |
3207 | 3.35M | DEBUG3.35M (dbgs() << " OpcodeSwitch from " << SwitchStart |
3208 | 3.35M | << " to " << MatcherIndex << "\n"); |
3209 | 3.35M | continue; |
3210 | 3.35M | } |
3211 | 3.35M | |
3212 | 9.83M | case OPC_SwitchType: { |
3213 | 9.83M | MVT CurNodeVT = N.getSimpleValueType(); |
3214 | 9.83M | unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; |
3215 | 9.83M | unsigned CaseSize; |
3216 | 18.9M | while (true18.9M ) { |
3217 | 18.9M | // Get the size of this case. |
3218 | 18.9M | CaseSize = MatcherTable[MatcherIndex++]; |
3219 | 18.9M | if (CaseSize & 128) |
3220 | 126k | CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); |
3221 | 18.9M | if (CaseSize == 018.9M ) break912k ; |
3222 | 18.0M | |
3223 | 18.0M | MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
3224 | 18.0M | if (CaseVT == MVT::iPTR) |
3225 | 0 | CaseVT = TLI->getPointerTy(CurDAG->getDataLayout()); |
3226 | 18.0M | |
3227 | 18.0M | // If the VT matches, then we will execute this case. |
3228 | 18.0M | if (CurNodeVT == CaseVT) |
3229 | 8.91M | break; |
3230 | 9.14M | |
3231 | 9.14M | // Otherwise, skip over this case. |
3232 | 9.14M | MatcherIndex += CaseSize; |
3233 | 9.14M | } |
3234 | 9.83M | |
3235 | 9.83M | // If no cases matched, bail out. |
3236 | 9.83M | if (CaseSize == 09.83M ) break912k ; |
3237 | 8.91M | |
3238 | 8.91M | // Otherwise, execute the case we found. |
3239 | 8.91M | DEBUG8.91M (dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() |
3240 | 8.91M | << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); |
3241 | 8.91M | continue; |
3242 | 8.91M | } |
3243 | 5.71M | case OPC_CheckChild0Type: 5.71M case OPC_CheckChild1Type: |
3244 | 5.71M | case OPC_CheckChild2Type: 5.71M case OPC_CheckChild3Type: |
3245 | 5.71M | case OPC_CheckChild4Type: 5.71M case OPC_CheckChild5Type: |
3246 | 5.71M | case OPC_CheckChild6Type: 5.71M case OPC_CheckChild7Type: |
3247 | 5.71M | if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, |
3248 | 5.71M | CurDAG->getDataLayout(), |
3249 | 5.71M | Opcode - OPC_CheckChild0Type)) |
3250 | 5.01M | break; |
3251 | 700k | continue; |
3252 | 17.9k | case OPC_CheckCondCode: |
3253 | 17.9k | if (!::CheckCondCode(MatcherTable, MatcherIndex, N)17.9k ) break15.1k ; |
3254 | 2.74k | continue; |
3255 | 1.47k | case OPC_CheckValueType: |
3256 | 1.47k | if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI, |
3257 | 1.47k | CurDAG->getDataLayout())) |
3258 | 108 | break; |
3259 | 1.36k | continue; |
3260 | 4.82k | case OPC_CheckInteger: |
3261 | 4.82k | if (!::CheckInteger(MatcherTable, MatcherIndex, N)4.82k ) break4.79k ; |
3262 | 31 | continue; |
3263 | 1.07M | case OPC_CheckChild0Integer: 1.07M case OPC_CheckChild1Integer: |
3264 | 1.07M | case OPC_CheckChild2Integer: 1.07M case OPC_CheckChild3Integer: |
3265 | 1.07M | case OPC_CheckChild4Integer: |
3266 | 1.07M | if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, |
3267 | 1.07M | Opcode-OPC_CheckChild0Integer)) break; |
3268 | 196k | continue; |
3269 | 981k | case OPC_CheckAndImm: |
3270 | 981k | if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)981k ) break896k ; |
3271 | 84.8k | continue; |
3272 | 4.68k | case OPC_CheckOrImm: |
3273 | 4.68k | if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)4.68k ) break4.67k ; |
3274 | 4 | continue; |
3275 | 4 | |
3276 | 97.3k | case OPC_CheckFoldableChainNode: { |
3277 | 97.3k | assert(NodeStack.size() != 1 && "No parent node"); |
3278 | 97.3k | // Verify that all intermediate nodes between the root and this one have |
3279 | 97.3k | // a single use. |
3280 | 97.3k | bool HasMultipleUses = false; |
3281 | 119k | for (unsigned i = 1, e = NodeStack.size()-1; i != e119k ; ++i21.6k ) |
3282 | 29.9k | if (29.9k !NodeStack[i].getNode()->hasOneUse()29.9k ) { |
3283 | 8.26k | HasMultipleUses = true; |
3284 | 8.26k | break; |
3285 | 8.26k | } |
3286 | 97.3k | if (HasMultipleUses97.3k ) break8.26k ; |
3287 | 89.0k | |
3288 | 89.0k | // Check to see that the target thinks this is profitable to fold and that |
3289 | 89.0k | // we can fold it without inducing cycles in the graph. |
3290 | 89.0k | if (89.0k !IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), |
3291 | 89.0k | NodeToMatch) || |
3292 | 50.5k | !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), |
3293 | 50.5k | NodeToMatch, OptLevel, |
3294 | 50.5k | true/*We validate our own chains*/)) |
3295 | 38.6k | break; |
3296 | 50.4k | |
3297 | 50.4k | continue; |
3298 | 50.4k | } |
3299 | 3.04M | case OPC_EmitInteger: { |
3300 | 3.04M | MVT::SimpleValueType VT = |
3301 | 3.04M | (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
3302 | 3.04M | int64_t Val = MatcherTable[MatcherIndex++]; |
3303 | 3.04M | if (Val & 128) |
3304 | 92.1k | Val = GetVBR(Val, MatcherTable, MatcherIndex); |
3305 | 3.04M | RecordedNodes.push_back(std::pair<SDValue, SDNode*>( |
3306 | 3.04M | CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), |
3307 | 3.04M | VT), nullptr)); |
3308 | 3.04M | continue; |
3309 | 50.4k | } |
3310 | 692k | case OPC_EmitRegister: { |
3311 | 692k | MVT::SimpleValueType VT = |
3312 | 692k | (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
3313 | 692k | unsigned RegNo = MatcherTable[MatcherIndex++]; |
3314 | 692k | RecordedNodes.push_back(std::pair<SDValue, SDNode*>( |
3315 | 692k | CurDAG->getRegister(RegNo, VT), nullptr)); |
3316 | 692k | continue; |
3317 | 50.4k | } |
3318 | 449 | case OPC_EmitRegister2: { |
3319 | 449 | // For targets w/ more than 256 register names, the register enum |
3320 | 449 | // values are stored in two bytes in the matcher table (just like |
3321 | 449 | // opcodes). |
3322 | 449 | MVT::SimpleValueType VT = |
3323 | 449 | (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
3324 | 449 | unsigned RegNo = MatcherTable[MatcherIndex++]; |
3325 | 449 | RegNo |= MatcherTable[MatcherIndex++] << 8; |
3326 | 449 | RecordedNodes.push_back(std::pair<SDValue, SDNode*>( |
3327 | 449 | CurDAG->getRegister(RegNo, VT), nullptr)); |
3328 | 449 | continue; |
3329 | 50.4k | } |
3330 | 50.4k | |
3331 | 2.89M | case OPC_EmitConvertToTarget: { |
3332 | 2.89M | // Convert from IMM/FPIMM to target version. |
3333 | 2.89M | unsigned RecNo = MatcherTable[MatcherIndex++]; |
3334 | 2.89M | assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); |
3335 | 2.89M | SDValue Imm = RecordedNodes[RecNo].first; |
3336 | 2.89M | |
3337 | 2.89M | if (Imm->getOpcode() == ISD::Constant2.89M ) { |
3338 | 2.63M | const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); |
3339 | 2.63M | Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch), |
3340 | 2.63M | Imm.getValueType()); |
3341 | 2.89M | } else if (263k Imm->getOpcode() == ISD::ConstantFP263k ) { |
3342 | 34.4k | const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); |
3343 | 34.4k | Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch), |
3344 | 34.4k | Imm.getValueType()); |
3345 | 34.4k | } |
3346 | 2.89M | |
3347 | 2.89M | RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); |
3348 | 2.89M | continue; |
3349 | 50.4k | } |
3350 | 50.4k | |
3351 | 13.0M | case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 |
3352 | 13.0M | case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1 |
3353 | 13.0M | case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2 |
3354 | 13.0M | // These are space-optimized forms of OPC_EmitMergeInputChains. |
3355 | 13.0M | assert(!InputChain.getNode() && |
3356 | 13.0M | "EmitMergeInputChains should be the first chain producing node"); |
3357 | 13.0M | assert(ChainNodesMatched.empty() && |
3358 | 13.0M | "Should only have one EmitMergeInputChains per match"); |
3359 | 13.0M | |
3360 | 13.0M | // Read all of the chained nodes. |
3361 | 13.0M | unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0; |
3362 | 13.0M | assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); |
3363 | 13.0M | ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); |
3364 | 13.0M | |
3365 | 13.0M | // FIXME: What if other value results of the node have uses not matched |
3366 | 13.0M | // by this pattern? |
3367 | 13.0M | if (ChainNodesMatched.back() != NodeToMatch && |
3368 | 13.0M | !RecordedNodes[RecNo].first.hasOneUse()32.0k ) { |
3369 | 1.02k | ChainNodesMatched.clear(); |
3370 | 1.02k | break; |
3371 | 1.02k | } |
3372 | 13.0M | |
3373 | 13.0M | // Merge the input chains if they are not intra-pattern references. |
3374 | 13.0M | InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); |
3375 | 13.0M | |
3376 | 13.0M | if (!InputChain.getNode()) |
3377 | 853 | break; // Failed to merge. |
3378 | 13.0M | continue; |
3379 | 13.0M | } |
3380 | 13.0M | |
3381 | 2.56k | case OPC_EmitMergeInputChains: { |
3382 | 2.56k | assert(!InputChain.getNode() && |
3383 | 2.56k | "EmitMergeInputChains should be the first chain producing node"); |
3384 | 2.56k | // This node gets a list of nodes we matched in the input that have |
3385 | 2.56k | // chains. We want to token factor all of the input chains to these nodes |
3386 | 2.56k | // together. However, if any of the input chains is actually one of the |
3387 | 2.56k | // nodes matched in this pattern, then we have an intra-match reference. |
3388 | 2.56k | // Ignore these because the newly token factored chain should not refer to |
3389 | 2.56k | // the old nodes. |
3390 | 2.56k | unsigned NumChains = MatcherTable[MatcherIndex++]; |
3391 | 2.56k | assert(NumChains != 0 && "Can't TF zero chains"); |
3392 | 2.56k | |
3393 | 2.56k | assert(ChainNodesMatched.empty() && |
3394 | 2.56k | "Should only have one EmitMergeInputChains per match"); |
3395 | 2.56k | |
3396 | 2.56k | // Read all of the chained nodes. |
3397 | 7.34k | for (unsigned i = 0; i != NumChains7.34k ; ++i4.78k ) { |
3398 | 4.78k | unsigned RecNo = MatcherTable[MatcherIndex++]; |
3399 | 4.78k | assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); |
3400 | 4.78k | ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); |
3401 | 4.78k | |
3402 | 4.78k | // FIXME: What if other value results of the node have uses not matched |
3403 | 4.78k | // by this pattern? |
3404 | 4.78k | if (ChainNodesMatched.back() != NodeToMatch && |
3405 | 4.78k | !RecordedNodes[RecNo].first.hasOneUse()2.59k ) { |
3406 | 0 | ChainNodesMatched.clear(); |
3407 | 0 | break; |
3408 | 0 | } |
3409 | 4.78k | } |
3410 | 2.56k | |
3411 | 2.56k | // If the inner loop broke out, the match fails. |
3412 | 2.56k | if (ChainNodesMatched.empty()) |
3413 | 0 | break; |
3414 | 2.56k | |
3415 | 2.56k | // Merge the input chains if they are not intra-pattern references. |
3416 | 2.56k | InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); |
3417 | 2.56k | |
3418 | 2.56k | if (!InputChain.getNode()) |
3419 | 84 | break; // Failed to merge. |
3420 | 2.47k | |
3421 | 2.47k | continue; |
3422 | 2.47k | } |
3423 | 2.47k | |
3424 | 983k | case OPC_EmitCopyToReg: { |
3425 | 983k | unsigned RecNo = MatcherTable[MatcherIndex++]; |
3426 | 983k | assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); |
3427 | 983k | unsigned DestPhysReg = MatcherTable[MatcherIndex++]; |
3428 | 983k | |
3429 | 983k | if (!InputChain.getNode()) |
3430 | 186k | InputChain = CurDAG->getEntryNode(); |
3431 | 983k | |
3432 | 983k | InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), |
3433 | 983k | DestPhysReg, RecordedNodes[RecNo].first, |
3434 | 983k | InputGlue); |
3435 | 983k | |
3436 | 983k | InputGlue = InputChain.getValue(1); |
3437 | 983k | continue; |
3438 | 2.47k | } |
3439 | 2.47k | |
3440 | 729k | case OPC_EmitNodeXForm: { |
3441 | 729k | unsigned XFormNo = MatcherTable[MatcherIndex++]; |
3442 | 729k | unsigned RecNo = MatcherTable[MatcherIndex++]; |
3443 | 729k | assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); |
3444 | 729k | SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); |
3445 | 729k | RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr)); |
3446 | 729k | continue; |
3447 | 2.47k | } |
3448 | 0 | case OPC_Coverage: { |
3449 | 0 | // This is emitted right before MorphNode/EmitNode. |
3450 | 0 | // So it should be safe to assume that this node has been selected |
3451 | 0 | unsigned index = MatcherTable[MatcherIndex++]; |
3452 | 0 | index |= (MatcherTable[MatcherIndex++] << 8); |
3453 | 0 | dbgs() << "COVERED: " << getPatternForIndex(index) << "\n"; |
3454 | 0 | dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n"; |
3455 | 0 | continue; |
3456 | 2.47k | } |
3457 | 2.47k | |
3458 | 20.7M | case OPC_EmitNode: 20.7M case OPC_MorphNodeTo: |
3459 | 20.7M | case OPC_EmitNode0: 20.7M case OPC_EmitNode1: 20.7M case OPC_EmitNode2: |
3460 | 20.7M | case OPC_MorphNodeTo0: 20.7M case OPC_MorphNodeTo1: 20.7M case OPC_MorphNodeTo2: { |
3461 | 20.7M | uint16_t TargetOpc = MatcherTable[MatcherIndex++]; |
3462 | 20.7M | TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; |
3463 | 20.7M | unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; |
3464 | 20.7M | // Get the result VT list. |
3465 | 20.7M | unsigned NumVTs; |
3466 | 20.7M | // If this is one of the compressed forms, get the number of VTs based |
3467 | 20.7M | // on the Opcode. Otherwise read the next byte from the table. |
3468 | 20.7M | if (Opcode >= OPC_MorphNodeTo0 && 20.7M Opcode <= OPC_MorphNodeTo219.8M ) |
3469 | 19.8M | NumVTs = Opcode - OPC_MorphNodeTo0; |
3470 | 931k | else if (931k Opcode >= OPC_EmitNode0 && 931k Opcode <= OPC_EmitNode2931k ) |
3471 | 931k | NumVTs = Opcode - OPC_EmitNode0; |
3472 | 931k | else |
3473 | 4 | NumVTs = MatcherTable[MatcherIndex++]; |
3474 | 20.7M | SmallVector<EVT, 4> VTs; |
3475 | 36.7M | for (unsigned i = 0; i != NumVTs36.7M ; ++i15.9M ) { |
3476 | 15.9M | MVT::SimpleValueType VT = |
3477 | 15.9M | (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; |
3478 | 15.9M | if (VT == MVT::iPTR) |
3479 | 453 | VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy; |
3480 | 15.9M | VTs.push_back(VT); |
3481 | 15.9M | } |
3482 | 20.7M | |
3483 | 20.7M | if (EmitNodeInfo & OPFL_Chain) |
3484 | 13.1M | VTs.push_back(MVT::Other); |
3485 | 20.7M | if (EmitNodeInfo & OPFL_GlueOutput) |
3486 | 4.73M | VTs.push_back(MVT::Glue); |
3487 | 20.7M | |
3488 | 20.7M | // This is hot code, so optimize the two most common cases of 1 and 2 |
3489 | 20.7M | // results. |
3490 | 20.7M | SDVTList VTList; |
3491 | 20.7M | if (VTs.size() == 1) |
3492 | 12.4M | VTList = CurDAG->getVTList(VTs[0]); |
3493 | 8.35M | else if (8.35M VTs.size() == 28.35M ) |
3494 | 3.68M | VTList = CurDAG->getVTList(VTs[0], VTs[1]); |
3495 | 8.35M | else |
3496 | 4.66M | VTList = CurDAG->getVTList(VTs); |
3497 | 20.7M | |
3498 | 20.7M | // Get the operand list. |
3499 | 20.7M | unsigned NumOps = MatcherTable[MatcherIndex++]; |
3500 | 20.7M | SmallVector<SDValue, 8> Ops; |
3501 | 62.5M | for (unsigned i = 0; i != NumOps62.5M ; ++i41.7M ) { |
3502 | 41.7M | unsigned RecNo = MatcherTable[MatcherIndex++]; |
3503 | 41.7M | if (RecNo & 128) |
3504 | 7.06k | RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); |
3505 | 41.7M | |
3506 | 41.7M | assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); |
3507 | 41.7M | Ops.push_back(RecordedNodes[RecNo].first); |
3508 | 41.7M | } |
3509 | 20.7M | |
3510 | 20.7M | // If there are variadic operands to add, handle them now. |
3511 | 20.7M | if (EmitNodeInfo & OPFL_VariadicInfo20.7M ) { |
3512 | 2.15M | // Determine the start index to copy from. |
3513 | 2.15M | unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); |
3514 | 2.15M | FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 12.15M : 06.08k ; |
3515 | 2.15M | assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && |
3516 | 2.15M | "Invalid variadic node"); |
3517 | 2.15M | // Copy all of the variadic operands, not including a potential glue |
3518 | 2.15M | // input. |
3519 | 2.15M | for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); |
3520 | 8.19M | i != e8.19M ; ++i6.03M ) { |
3521 | 7.94M | SDValue V = NodeToMatch->getOperand(i); |
3522 | 7.94M | if (V.getValueType() == MVT::Glue7.94M ) break1.90M ; |
3523 | 6.03M | Ops.push_back(V); |
3524 | 6.03M | } |
3525 | 2.15M | } |
3526 | 20.7M | |
3527 | 20.7M | // If this has chain/glue inputs, add them. |
3528 | 20.7M | if (EmitNodeInfo & OPFL_Chain) |
3529 | 13.1M | Ops.push_back(InputChain); |
3530 | 20.7M | if ((EmitNodeInfo & OPFL_GlueInput) && 20.7M InputGlue.getNode() != nullptr4.73M ) |
3531 | 4.47M | Ops.push_back(InputGlue); |
3532 | 20.7M | |
3533 | 20.7M | // Create the node. |
3534 | 20.7M | SDNode *Res = nullptr; |
3535 | 20.7M | bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || |
3536 | 20.7M | (Opcode >= OPC_MorphNodeTo0 && 20.7M Opcode <= OPC_MorphNodeTo219.8M ); |
3537 | 20.7M | if (!IsMorphNodeTo20.7M ) { |
3538 | 931k | // If this is a normal EmitNode command, just create the new node and |
3539 | 931k | // add the results to the RecordedNodes list. |
3540 | 931k | Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), |
3541 | 931k | VTList, Ops); |
3542 | 931k | |
3543 | 931k | // Add all the non-glue/non-chain results to the RecordedNodes list. |
3544 | 1.87M | for (unsigned i = 0, e = VTs.size(); i != e1.87M ; ++i941k ) { |
3545 | 1.01M | if (VTs[i] == MVT::Other || 1.01M VTs[i] == MVT::Glue941k ) break69.8k ; |
3546 | 941k | RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), |
3547 | 941k | nullptr)); |
3548 | 941k | } |
3549 | 20.7M | } else { |
3550 | 19.8M | assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE && |
3551 | 19.8M | "NodeToMatch was removed partway through selection"); |
3552 | 19.8M | SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, |
3553 | 10.1M | SDNode *E) { |
3554 | 10.1M | auto &Chain = ChainNodesMatched; |
3555 | 10.1M | assert((!E || !is_contained(Chain, N)) && |
3556 | 10.1M | "Chain node replaced during MorphNode"); |
3557 | 10.1M | Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); |
3558 | 10.1M | }); |
3559 | 19.8M | Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); |
3560 | 19.8M | } |
3561 | 20.7M | |
3562 | 20.7M | // If the node had chain/glue results, update our notion of the current |
3563 | 20.7M | // chain and glue. |
3564 | 20.7M | if (EmitNodeInfo & OPFL_GlueOutput20.7M ) { |
3565 | 4.73M | InputGlue = SDValue(Res, VTs.size()-1); |
3566 | 4.73M | if (EmitNodeInfo & OPFL_Chain) |
3567 | 4.70M | InputChain = SDValue(Res, VTs.size()-2); |
3568 | 20.7M | } else if (16.0M EmitNodeInfo & OPFL_Chain16.0M ) |
3569 | 8.45M | InputChain = SDValue(Res, VTs.size()-1); |
3570 | 20.7M | |
3571 | 20.7M | // If the OPFL_MemRefs glue is set on this node, slap all of the |
3572 | 20.7M | // accumulated memrefs onto it. |
3573 | 20.7M | // |
3574 | 20.7M | // FIXME: This is vastly incorrect for patterns with multiple outputs |
3575 | 20.7M | // instructions that access memory and for ComplexPatterns that match |
3576 | 20.7M | // loads. |
3577 | 20.7M | if (EmitNodeInfo & OPFL_MemRefs20.7M ) { |
3578 | 3.77M | // Only attach load or store memory operands if the generated |
3579 | 3.77M | // instruction may load or store. |
3580 | 3.77M | const MCInstrDesc &MCID = TII->get(TargetOpc); |
3581 | 3.77M | bool mayLoad = MCID.mayLoad(); |
3582 | 3.77M | bool mayStore = MCID.mayStore(); |
3583 | 3.77M | |
3584 | 3.77M | unsigned NumMemRefs = 0; |
3585 | 3.77M | for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = |
3586 | 7.55M | MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E7.55M ; ++I3.77M ) { |
3587 | 3.77M | if ((*I)->isLoad()3.77M ) { |
3588 | 2.10M | if (mayLoad) |
3589 | 2.10M | ++NumMemRefs; |
3590 | 3.77M | } else if (1.67M (*I)->isStore()1.67M ) { |
3591 | 1.67M | if (mayStore) |
3592 | 1.67M | ++NumMemRefs; |
3593 | 0 | } else { |
3594 | 0 | ++NumMemRefs; |
3595 | 0 | } |
3596 | 3.77M | } |
3597 | 3.77M | |
3598 | 3.77M | MachineSDNode::mmo_iterator MemRefs = |
3599 | 3.77M | MF->allocateMemRefsArray(NumMemRefs); |
3600 | 3.77M | |
3601 | 3.77M | MachineSDNode::mmo_iterator MemRefsPos = MemRefs; |
3602 | 3.77M | for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = |
3603 | 7.55M | MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E7.55M ; ++I3.77M ) { |
3604 | 3.77M | if ((*I)->isLoad()3.77M ) { |
3605 | 2.10M | if (mayLoad) |
3606 | 2.10M | *MemRefsPos++ = *I; |
3607 | 3.77M | } else if (1.67M (*I)->isStore()1.67M ) { |
3608 | 1.67M | if (mayStore) |
3609 | 1.67M | *MemRefsPos++ = *I; |
3610 | 0 | } else { |
3611 | 0 | *MemRefsPos++ = *I; |
3612 | 0 | } |
3613 | 3.77M | } |
3614 | 3.77M | |
3615 | 3.77M | cast<MachineSDNode>(Res) |
3616 | 3.77M | ->setMemRefs(MemRefs, MemRefs + NumMemRefs); |
3617 | 3.77M | } |
3618 | 20.7M | |
3619 | 20.7M | DEBUG(dbgs() << " " |
3620 | 20.7M | << (IsMorphNodeTo ? "Morphed" : "Created") |
3621 | 20.7M | << " node: "; Res->dump(CurDAG); dbgs() << "\n"); |
3622 | 20.7M | |
3623 | 20.7M | // If this was a MorphNodeTo then we're completely done! |
3624 | 20.7M | if (IsMorphNodeTo20.7M ) { |
3625 | 19.8M | // Update chain uses. |
3626 | 19.8M | UpdateChains(Res, InputChain, ChainNodesMatched, true); |
3627 | 19.8M | return; |
3628 | 19.8M | } |
3629 | 931k | continue; |
3630 | 931k | } |
3631 | 931k | |
3632 | 170k | case OPC_CompleteMatch: { |
3633 | 170k | // The match has been completed, and any new nodes (if any) have been |
3634 | 170k | // created. Patch up references to the matched dag to use the newly |
3635 | 170k | // created nodes. |
3636 | 170k | unsigned NumResults = MatcherTable[MatcherIndex++]; |
3637 | 170k | |
3638 | 341k | for (unsigned i = 0; i != NumResults341k ; ++i170k ) { |
3639 | 170k | unsigned ResSlot = MatcherTable[MatcherIndex++]; |
3640 | 170k | if (ResSlot & 128) |
3641 | 0 | ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); |
3642 | 170k | |
3643 | 170k | assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); |
3644 | 170k | SDValue Res = RecordedNodes[ResSlot].first; |
3645 | 170k | |
3646 | 170k | assert(i < NodeToMatch->getNumValues() && |
3647 | 170k | NodeToMatch->getValueType(i) != MVT::Other && |
3648 | 170k | NodeToMatch->getValueType(i) != MVT::Glue && |
3649 | 170k | "Invalid number of results to complete!"); |
3650 | 170k | assert((NodeToMatch->getValueType(i) == Res.getValueType() || |
3651 | 170k | NodeToMatch->getValueType(i) == MVT::iPTR || |
3652 | 170k | Res.getValueType() == MVT::iPTR || |
3653 | 170k | NodeToMatch->getValueType(i).getSizeInBits() == |
3654 | 170k | Res.getValueSizeInBits()) && |
3655 | 170k | "invalid replacement"); |
3656 | 170k | CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); |
3657 | 170k | } |
3658 | 170k | |
3659 | 170k | // Update chain uses. |
3660 | 170k | UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false); |
3661 | 170k | |
3662 | 170k | // If the root node defines glue, we need to update it to the glue result. |
3663 | 170k | // TODO: This never happens in our tests and I think it can be removed / |
3664 | 170k | // replaced with an assert, but if we do it this the way the change is |
3665 | 170k | // NFC. |
3666 | 170k | if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == |
3667 | 170k | MVT::Glue && |
3668 | 0 | InputGlue.getNode()) |
3669 | 0 | CurDAG->ReplaceAllUsesOfValueWith( |
3670 | 0 | SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); |
3671 | 170k | |
3672 | 170k | assert(NodeToMatch->use_empty() && |
3673 | 170k | "Didn't replace all uses of the node?"); |
3674 | 170k | CurDAG->RemoveDeadNode(NodeToMatch); |
3675 | 170k | |
3676 | 170k | return; |
3677 | 52.2M | } |
3678 | 52.2M | } |
3679 | 52.2M | |
3680 | 52.2M | // If the code reached this point, then the match failed. See if there is |
3681 | 52.2M | // another child to try in the current 'Scope', otherwise pop it until we |
3682 | 52.2M | // find a case to check. |
3683 | 52.2M | DEBUG52.2M (dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); |
3684 | 52.2M | ++NumDAGIselRetries; |
3685 | 58.3M | while (true58.3M ) { |
3686 | 58.3M | if (MatchScopes.empty()58.3M ) { |
3687 | 30 | CannotYetSelect(NodeToMatch); |
3688 | 30 | return; |
3689 | 30 | } |
3690 | 58.3M | |
3691 | 58.3M | // Restore the interpreter state back to the point where the scope was |
3692 | 58.3M | // formed. |
3693 | 58.3M | MatchScope &LastScope = MatchScopes.back(); |
3694 | 58.3M | RecordedNodes.resize(LastScope.NumRecordedNodes); |
3695 | 58.3M | NodeStack.clear(); |
3696 | 58.3M | NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); |
3697 | 58.3M | N = NodeStack.back(); |
3698 | 58.3M | |
3699 | 58.3M | if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) |
3700 | 100k | MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); |
3701 | 58.3M | MatcherIndex = LastScope.FailIndex; |
3702 | 58.3M | |
3703 | 58.3M | DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); |
3704 | 58.3M | |
3705 | 58.3M | InputChain = LastScope.InputChain; |
3706 | 58.3M | InputGlue = LastScope.InputGlue; |
3707 | 58.3M | if (!LastScope.HasChainNodesMatched) |
3708 | 58.3M | ChainNodesMatched.clear(); |
3709 | 58.3M | |
3710 | 58.3M | // Check to see what the offset is at the new MatcherIndex. If it is zero |
3711 | 58.3M | // we have reached the end of this scope, otherwise we have another child |
3712 | 58.3M | // in the current scope to try. |
3713 | 58.3M | unsigned NumToSkip = MatcherTable[MatcherIndex++]; |
3714 | 58.3M | if (NumToSkip & 128) |
3715 | 13.3M | NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); |
3716 | 58.3M | |
3717 | 58.3M | // If we have another child in this scope to match, update FailIndex and |
3718 | 58.3M | // try it. |
3719 | 58.3M | if (NumToSkip != 058.3M ) { |
3720 | 52.2M | LastScope.FailIndex = MatcherIndex+NumToSkip; |
3721 | 52.2M | break; |
3722 | 52.2M | } |
3723 | 6.13M | |
3724 | 6.13M | // End of this scope, pop it and try the next child in the containing |
3725 | 6.13M | // scope. |
3726 | 6.13M | MatchScopes.pop_back(); |
3727 | 6.13M | } |
3728 | 281M | } |
3729 | 59.4M | } |
3730 | | |
3731 | 30 | void SelectionDAGISel::CannotYetSelect(SDNode *N) { |
3732 | 30 | std::string msg; |
3733 | 30 | raw_string_ostream Msg(msg); |
3734 | 30 | Msg << "Cannot select: "; |
3735 | 30 | |
3736 | 30 | if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && |
3737 | 29 | N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && |
3738 | 30 | N->getOpcode() != ISD::INTRINSIC_VOID17 ) { |
3739 | 9 | N->printrFull(Msg, CurDAG); |
3740 | 9 | Msg << "\nIn function: " << MF->getName(); |
3741 | 30 | } else { |
3742 | 21 | bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; |
3743 | 21 | unsigned iid = |
3744 | 21 | cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); |
3745 | 21 | if (iid < Intrinsic::num_intrinsics) |
3746 | 21 | Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None); |
3747 | 0 | else if (const TargetIntrinsicInfo *0 TII0 = TM.getIntrinsicInfo()) |
3748 | 0 | Msg << "target intrinsic %" << TII->getName(iid); |
3749 | 0 | else |
3750 | 0 | Msg << "unknown intrinsic #" << iid; |
3751 | 21 | } |
3752 | 30 | report_fatal_error(Msg.str()); |
3753 | 30 | } |
3754 | | |
3755 | | char SelectionDAGISel::ID = 0; |