/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/X86/X86FixupLEAs.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines the pass that finds instructions that can be |
11 | | // re-written as LEA instructions in order to reduce pipeline delays. |
12 | | // When optimizing for size it replaces suitable LEAs with INC or DEC. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "X86.h" |
17 | | #include "X86InstrInfo.h" |
18 | | #include "X86Subtarget.h" |
19 | | #include "llvm/ADT/Statistic.h" |
20 | | #include "llvm/CodeGen/LiveVariables.h" |
21 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
24 | | #include "llvm/CodeGen/Passes.h" |
25 | | #include "llvm/Support/Debug.h" |
26 | | #include "llvm/Support/raw_ostream.h" |
27 | | #include "llvm/Target/TargetInstrInfo.h" |
28 | | using namespace llvm; |
29 | | |
30 | | namespace llvm { |
31 | | void initializeFixupLEAPassPass(PassRegistry &); |
32 | | } |
33 | | |
34 | 7.88k | #define FIXUPLEA_DESC "X86 LEA Fixup" |
35 | | #define FIXUPLEA_NAME "x86-fixup-LEAs" |
36 | | |
37 | | #define DEBUG_TYPE FIXUPLEA_NAME |
38 | | |
39 | | STATISTIC(NumLEAs, "Number of LEA instructions created"); |
40 | | |
41 | | namespace { |
42 | | class FixupLEAPass : public MachineFunctionPass { |
43 | | enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; |
44 | | |
45 | | /// \brief Loop over all of the instructions in the basic block |
46 | | /// replacing applicable instructions with LEA instructions, |
47 | | /// where appropriate. |
48 | | bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); |
49 | | |
50 | | |
51 | | /// \brief Given a machine register, look for the instruction |
52 | | /// which writes it in the current basic block. If found, |
53 | | /// try to replace it with an equivalent LEA instruction. |
54 | | /// If replacement succeeds, then also process the newly created |
55 | | /// instruction. |
56 | | void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, |
57 | | MachineFunction::iterator MFI); |
58 | | |
59 | | /// \brief Given a memory access or LEA instruction |
60 | | /// whose address mode uses a base and/or index register, look for |
61 | | /// an opportunity to replace the instruction which sets the base or index |
62 | | /// register with an equivalent LEA instruction. |
63 | | void processInstruction(MachineBasicBlock::iterator &I, |
64 | | MachineFunction::iterator MFI); |
65 | | |
66 | | /// \brief Given a LEA instruction which is unprofitable |
67 | | /// on Silvermont try to replace it with an equivalent ADD instruction |
68 | | void processInstructionForSLM(MachineBasicBlock::iterator &I, |
69 | | MachineFunction::iterator MFI); |
70 | | |
71 | | |
72 | | /// \brief Given a LEA instruction which is unprofitable |
73 | | /// on SNB+ try to replace it with other instructions. |
74 | | /// According to Intel's Optimization Reference Manual: |
75 | | /// " For LEA instructions with three source operands and some specific |
76 | | /// situations, instruction latency has increased to 3 cycles, and must |
77 | | /// dispatch via port 1: |
78 | | /// - LEA that has all three source operands: base, index, and offset |
79 | | /// - LEA that uses base and index registers where the base is EBP, RBP, |
80 | | /// or R13 |
81 | | /// - LEA that uses RIP relative addressing mode |
82 | | /// - LEA that uses 16-bit addressing mode " |
83 | | /// This function currently handles the first 2 cases only. |
84 | | MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, |
85 | | MachineFunction::iterator MFI); |
86 | | |
87 | | /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg |
88 | | /// and convert them to INC or DEC respectively. |
89 | | bool fixupIncDec(MachineBasicBlock::iterator &I, |
90 | | MachineFunction::iterator MFI) const; |
91 | | |
92 | | /// \brief Determine if an instruction references a machine register |
93 | | /// and, if so, whether it reads or writes the register. |
94 | | RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); |
95 | | |
96 | | /// \brief Step backwards through a basic block, looking |
97 | | /// for an instruction which writes a register within |
98 | | /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. |
99 | | MachineBasicBlock::iterator searchBackwards(MachineOperand &p, |
100 | | MachineBasicBlock::iterator &I, |
101 | | MachineFunction::iterator MFI); |
102 | | |
103 | | /// \brief if an instruction can be converted to an |
104 | | /// equivalent LEA, insert the new instruction into the basic block |
105 | | /// and return a pointer to it. Otherwise, return zero. |
106 | | MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI, |
107 | | MachineBasicBlock::iterator &MBBI) const; |
108 | | |
109 | | public: |
110 | | static char ID; |
111 | | |
112 | 7.88k | StringRef getPassName() const override { return 7.88k FIXUPLEA_DESC7.88k ; } |
113 | | |
114 | 7.89k | FixupLEAPass() : MachineFunctionPass(ID) { |
115 | 7.89k | initializeFixupLEAPassPass(*PassRegistry::getPassRegistry()); |
116 | 7.89k | } |
117 | | |
118 | | /// \brief Loop over all of the basic blocks, |
119 | | /// replacing instructions by equivalent LEA instructions |
120 | | /// if needed and when possible. |
121 | | bool runOnMachineFunction(MachineFunction &MF) override; |
122 | | |
123 | | // This pass runs after regalloc and doesn't support VReg operands. |
124 | 7.87k | MachineFunctionProperties getRequiredProperties() const override { |
125 | 7.87k | return MachineFunctionProperties().set( |
126 | 7.87k | MachineFunctionProperties::Property::NoVRegs); |
127 | 7.87k | } |
128 | | |
129 | | private: |
130 | | MachineFunction *MF; |
131 | | const X86InstrInfo *TII; // Machine instruction info. |
132 | | bool OptIncDec; |
133 | | bool OptLEA; |
134 | | }; |
135 | | } |
136 | | |
137 | | char FixupLEAPass::ID = 0; |
138 | | |
139 | | INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) |
140 | | |
141 | | MachineInstr * |
142 | | FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, |
143 | 81 | MachineBasicBlock::iterator &MBBI) const { |
144 | 81 | MachineInstr &MI = *MBBI; |
145 | 81 | switch (MI.getOpcode()) { |
146 | 1 | case X86::MOV32rr: |
147 | 1 | case X86::MOV64rr: { |
148 | 1 | const MachineOperand &Src = MI.getOperand(1); |
149 | 1 | const MachineOperand &Dest = MI.getOperand(0); |
150 | 1 | MachineInstr *NewMI = |
151 | 1 | BuildMI(*MF, MI.getDebugLoc(), |
152 | 0 | TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r |
153 | 1 | : X86::LEA64r)) |
154 | 1 | .add(Dest) |
155 | 1 | .add(Src) |
156 | 1 | .addImm(1) |
157 | 1 | .addReg(0) |
158 | 1 | .addImm(0) |
159 | 1 | .addReg(0); |
160 | 1 | MFI->insert(MBBI, NewMI); // Insert the new inst |
161 | 1 | return NewMI; |
162 | 1 | } |
163 | 2 | case X86::ADD64ri32: |
164 | 2 | case X86::ADD64ri8: |
165 | 2 | case X86::ADD64ri32_DB: |
166 | 2 | case X86::ADD64ri8_DB: |
167 | 2 | case X86::ADD32ri: |
168 | 2 | case X86::ADD32ri8: |
169 | 2 | case X86::ADD32ri_DB: |
170 | 2 | case X86::ADD32ri8_DB: |
171 | 2 | case X86::ADD16ri: |
172 | 2 | case X86::ADD16ri8: |
173 | 2 | case X86::ADD16ri_DB: |
174 | 2 | case X86::ADD16ri8_DB: |
175 | 2 | if (!MI.getOperand(2).isImm()2 ) { |
176 | 0 | // convertToThreeAddress will call getImm() |
177 | 0 | // which requires isImm() to be true |
178 | 0 | return nullptr; |
179 | 0 | } |
180 | 2 | break; |
181 | 1 | case X86::ADD16rr: |
182 | 1 | case X86::ADD16rr_DB: |
183 | 1 | if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg()1 ) { |
184 | 1 | // if src1 != src2, then convertToThreeAddress will |
185 | 1 | // need to create a Virtual register, which we cannot do |
186 | 1 | // after register allocation. |
187 | 1 | return nullptr; |
188 | 1 | } |
189 | 79 | } |
190 | 79 | return TII->convertToThreeAddress(MFI, MI, nullptr); |
191 | 79 | } |
192 | | |
193 | 7.89k | FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } |
194 | | |
195 | 70.9k | bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { |
196 | 70.9k | if (skipFunction(*Func.getFunction())) |
197 | 31 | return false; |
198 | 70.8k | |
199 | 70.8k | MF = &Func; |
200 | 70.8k | const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>(); |
201 | 11.9k | OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize(); |
202 | 70.8k | OptLEA = ST.LEAusesAG() || ST.slowLEA()70.5k || ST.slow3OpsLEA()70.1k ; |
203 | 70.8k | |
204 | 70.8k | if (!OptLEA && 70.8k !OptIncDec58.7k ) |
205 | 96 | return false; |
206 | 70.7k | |
207 | 70.7k | TII = ST.getInstrInfo(); |
208 | 70.7k | |
209 | 70.7k | DEBUG(dbgs() << "Start X86FixupLEAs\n";); |
210 | 70.7k | // Process all basic blocks. |
211 | 203k | for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E203k ; ++I133k ) |
212 | 133k | processBasicBlock(Func, I); |
213 | 70.7k | DEBUG(dbgs() << "End X86FixupLEAs\n";); |
214 | 70.9k | |
215 | 70.9k | return true; |
216 | 70.9k | } |
217 | | |
218 | | FixupLEAPass::RegUsageState |
219 | 1.87k | FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { |
220 | 1.87k | RegUsageState RegUsage = RU_NotUsed; |
221 | 1.87k | MachineInstr &MI = *I; |
222 | 1.87k | |
223 | 9.46k | for (unsigned int i = 0; i < MI.getNumOperands()9.46k ; ++i7.59k ) { |
224 | 7.67k | MachineOperand &opnd = MI.getOperand(i); |
225 | 7.67k | if (opnd.isReg() && 7.67k opnd.getReg() == p.getReg()5.71k ) { |
226 | 1.29k | if (opnd.isDef()) |
227 | 81 | return RU_Write; |
228 | 1.21k | RegUsage = RU_Read; |
229 | 1.21k | } |
230 | 7.67k | } |
231 | 1.78k | return RegUsage; |
232 | 1.87k | } |
233 | | |
234 | | /// getPreviousInstr - Given a reference to an instruction in a basic |
235 | | /// block, return a reference to the previous instruction in the block, |
236 | | /// wrapping around to the last instruction of the block if the block |
237 | | /// branches to itself. |
238 | | static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, |
239 | 2.81k | MachineFunction::iterator MFI) { |
240 | 2.81k | if (I == MFI->begin()2.81k ) { |
241 | 710 | if (MFI->isPredecessor(&*MFI)710 ) { |
242 | 25 | I = --MFI->end(); |
243 | 25 | return true; |
244 | 25 | } else |
245 | 685 | return false; |
246 | 2.10k | } |
247 | 2.10k | --I; |
248 | 2.10k | return true; |
249 | 2.10k | } |
250 | | |
251 | | MachineBasicBlock::iterator |
252 | | FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, |
253 | 1.02k | MachineFunction::iterator MFI) { |
254 | 1.02k | int InstrDistance = 1; |
255 | 1.02k | MachineBasicBlock::iterator CurInst; |
256 | 1.02k | static const int INSTR_DISTANCE_THRESHOLD = 5; |
257 | 1.02k | |
258 | 1.02k | CurInst = I; |
259 | 1.02k | bool Found; |
260 | 1.02k | Found = getPreviousInstr(CurInst, MFI); |
261 | 2.81k | while (Found && 2.81k I != CurInst2.13k ) { |
262 | 2.12k | if (CurInst->isCall() || 2.12k CurInst->isInlineAsm()2.08k ) |
263 | 49 | break; |
264 | 2.07k | if (2.07k InstrDistance > INSTR_DISTANCE_THRESHOLD2.07k ) |
265 | 208 | break; // too far back to make a difference |
266 | 1.87k | if (1.87k usesRegister(p, CurInst) == RU_Write1.87k ) { |
267 | 81 | return CurInst; |
268 | 81 | } |
269 | 1.78k | InstrDistance += TII->getInstrLatency( |
270 | 1.78k | MF->getSubtarget().getInstrItineraryData(), *CurInst); |
271 | 1.78k | Found = getPreviousInstr(CurInst, MFI); |
272 | 1.78k | } |
273 | 948 | return MachineBasicBlock::iterator(); |
274 | 1.02k | } |
275 | | |
276 | 879k | static inline bool isLEA(const int Opcode) { |
277 | 879k | return Opcode == X86::LEA16r || Opcode == X86::LEA32r || |
278 | 879k | Opcode == X86::LEA64r869k || Opcode == X86::LEA64_32r853k ; |
279 | 879k | } |
280 | | |
281 | 5.85k | static inline bool isInefficientLEAReg(unsigned int Reg) { |
282 | 5.85k | return Reg == X86::EBP || Reg == X86::RBP5.83k || Reg == X86::R135.38k ; |
283 | 5.85k | } |
284 | | |
285 | 11.2k | static inline bool isRegOperand(const MachineOperand &Op) { |
286 | 11.2k | return Op.isReg() && Op.getReg() != X86::NoRegister; |
287 | 11.2k | } |
288 | | /// hasIneffecientLEARegs - LEA that uses base and index registers |
289 | | /// where the base is EBP, RBP, or R13 |
290 | | static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, |
291 | 5.00k | const MachineOperand &Index) { |
292 | 5.00k | return Base.isReg() && isInefficientLEAReg(Base.getReg()) && |
293 | 454 | isRegOperand(Index); |
294 | 5.00k | } |
295 | | |
296 | 1.35k | static inline bool hasLEAOffset(const MachineOperand &Offset) { |
297 | 1.35k | return (Offset.isImm() && Offset.getImm() != 01.35k ) || Offset.isGlobal()558 ; |
298 | 1.35k | } |
299 | | |
300 | | // LEA instruction that has all three operands: offset, base and index |
301 | | static inline bool isThreeOperandsLEA(const MachineOperand &Base, |
302 | | const MachineOperand &Index, |
303 | 5.41k | const MachineOperand &Offset) { |
304 | 5.41k | return isRegOperand(Base) && isRegOperand(Index)5.35k && hasLEAOffset(Offset)953 ; |
305 | 5.41k | } |
306 | | |
307 | 413 | static inline int getADDrrFromLEA(int LEAOpcode) { |
308 | 413 | switch (LEAOpcode) { |
309 | 0 | default: |
310 | 0 | llvm_unreachable("Unexpected LEA instruction"); |
311 | 0 | case X86::LEA16r: |
312 | 0 | return X86::ADD16rr; |
313 | 11 | case X86::LEA32r: |
314 | 11 | return X86::ADD32rr; |
315 | 402 | case X86::LEA64_32r: |
316 | 402 | case X86::LEA64r: |
317 | 402 | return X86::ADD64rr; |
318 | 0 | } |
319 | 0 | } |
320 | | |
321 | 412 | static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) { |
322 | 412 | bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); |
323 | 412 | switch (LEAOpcode) { |
324 | 0 | default: |
325 | 0 | llvm_unreachable("Unexpected LEA instruction"); |
326 | 0 | case X86::LEA16r: |
327 | 0 | return IsInt8 ? X86::ADD16ri80 : X86::ADD16ri0 ; |
328 | 93 | case X86::LEA32r: |
329 | 93 | case X86::LEA64_32r: |
330 | 93 | return IsInt8 ? X86::ADD32ri861 : X86::ADD32ri32 ; |
331 | 319 | case X86::LEA64r: |
332 | 319 | return IsInt8 ? X86::ADD64ri8251 : X86::ADD64ri3268 ; |
333 | 0 | } |
334 | 0 | } |
335 | | |
336 | | /// isLEASimpleIncOrDec - Does this LEA have one these forms: |
337 | | /// lea %reg, 1(%reg) |
338 | | /// lea %reg, -1(%reg) |
339 | 23.2k | static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) { |
340 | 23.2k | unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg(); |
341 | 23.2k | unsigned DstReg = LEA.getOperand(0).getReg(); |
342 | 23.2k | unsigned AddrDispOp = 1 + X86::AddrDisp; |
343 | 23.2k | return SrcReg == DstReg && |
344 | 2.42k | LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && |
345 | 1.65k | LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && |
346 | 1.65k | LEA.getOperand(AddrDispOp).isImm() && |
347 | 336 | (LEA.getOperand(AddrDispOp).getImm() == 1 || |
348 | 336 | LEA.getOperand(AddrDispOp).getImm() == -1); |
349 | 23.2k | } |
350 | | |
351 | | bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I, |
352 | 763k | MachineFunction::iterator MFI) const { |
353 | 763k | MachineInstr &MI = *I; |
354 | 763k | int Opcode = MI.getOpcode(); |
355 | 763k | if (!isLEA(Opcode)) |
356 | 740k | return false; |
357 | 23.2k | |
358 | 23.2k | if (23.2k isLEASimpleIncOrDec(MI) && 23.2k TII->isSafeToClobberEFLAGS(*MFI, I)214 ) { |
359 | 68 | int NewOpcode; |
360 | 68 | bool isINC = MI.getOperand(4).getImm() == 1; |
361 | 68 | switch (Opcode) { |
362 | 0 | case X86::LEA16r: |
363 | 0 | NewOpcode = isINC ? X86::INC16r0 : X86::DEC16r0 ; |
364 | 0 | break; |
365 | 62 | case X86::LEA32r: |
366 | 62 | case X86::LEA64_32r: |
367 | 62 | NewOpcode = isINC ? X86::INC32r29 : X86::DEC32r33 ; |
368 | 62 | break; |
369 | 6 | case X86::LEA64r: |
370 | 6 | NewOpcode = isINC ? X86::INC64r6 : X86::DEC64r0 ; |
371 | 62 | break; |
372 | 68 | } |
373 | 68 | |
374 | 68 | MachineInstr *NewMI = |
375 | 68 | BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode)) |
376 | 68 | .add(MI.getOperand(0)) |
377 | 68 | .add(MI.getOperand(1)); |
378 | 68 | MFI->erase(I); |
379 | 68 | I = static_cast<MachineBasicBlock::iterator>(NewMI); |
380 | 68 | return true; |
381 | 68 | } |
382 | 23.1k | return false; |
383 | 23.1k | } |
384 | | |
385 | | void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, |
386 | 2.32k | MachineFunction::iterator MFI) { |
387 | 2.32k | // Process a load, store, or LEA instruction. |
388 | 2.32k | MachineInstr &MI = *I; |
389 | 2.32k | const MCInstrDesc &Desc = MI.getDesc(); |
390 | 2.32k | int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); |
391 | 2.32k | if (AddrOffset >= 02.32k ) { |
392 | 558 | AddrOffset += X86II::getOperandBias(Desc); |
393 | 558 | MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); |
394 | 558 | if (p.isReg() && 558 p.getReg() != X86::ESP558 ) { |
395 | 471 | seekLEAFixup(p, I, MFI); |
396 | 471 | } |
397 | 558 | MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); |
398 | 558 | if (q.isReg() && 558 q.getReg() != X86::ESP558 ) { |
399 | 558 | seekLEAFixup(q, I, MFI); |
400 | 558 | } |
401 | 558 | } |
402 | 2.32k | } |
403 | | |
404 | | void FixupLEAPass::seekLEAFixup(MachineOperand &p, |
405 | | MachineBasicBlock::iterator &I, |
406 | 1.02k | MachineFunction::iterator MFI) { |
407 | 1.02k | MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI); |
408 | 1.02k | if (MBI != MachineBasicBlock::iterator()1.02k ) { |
409 | 81 | MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI); |
410 | 81 | if (NewMI81 ) { |
411 | 10 | ++NumLEAs; |
412 | 10 | DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); |
413 | 10 | // now to replace with an equivalent LEA... |
414 | 10 | DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); |
415 | 10 | MFI->erase(MBI); |
416 | 10 | MachineBasicBlock::iterator J = |
417 | 10 | static_cast<MachineBasicBlock::iterator>(NewMI); |
418 | 10 | processInstruction(J, MFI); |
419 | 10 | } |
420 | 81 | } |
421 | 1.02k | } |
422 | | |
423 | | void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, |
424 | 1.55k | MachineFunction::iterator MFI) { |
425 | 1.55k | MachineInstr &MI = *I; |
426 | 1.55k | const int Opcode = MI.getOpcode(); |
427 | 1.55k | if (!isLEA(Opcode)) |
428 | 1.46k | return; |
429 | 85 | if (85 MI.getOperand(5).getReg() != 0 || 85 !MI.getOperand(4).isImm()85 || |
430 | 85 | !TII->isSafeToClobberEFLAGS(*MFI, I)) |
431 | 0 | return; |
432 | 85 | const unsigned DstR = MI.getOperand(0).getReg(); |
433 | 85 | const unsigned SrcR1 = MI.getOperand(1).getReg(); |
434 | 85 | const unsigned SrcR2 = MI.getOperand(3).getReg(); |
435 | 85 | if ((SrcR1 == 0 || 85 SrcR1 != DstR75 ) && (SrcR2 == 0 || 81 SrcR2 != DstR76 )) |
436 | 81 | return; |
437 | 4 | if (4 MI.getOperand(2).getImm() > 14 ) |
438 | 3 | return; |
439 | 1 | DEBUG1 (dbgs() << "FixLEA: Candidate to replace:"; I->dump();); |
440 | 1 | DEBUG(dbgs() << "FixLEA: Replaced by: ";); |
441 | 1 | MachineInstr *NewMI = nullptr; |
442 | 1 | // Make ADD instruction for two registers writing to LEA's destination |
443 | 1 | if (SrcR1 != 0 && 1 SrcR2 != 01 ) { |
444 | 1 | const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); |
445 | 1 | const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 31 : 10 ); |
446 | 1 | NewMI = |
447 | 1 | BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); |
448 | 1 | DEBUG(NewMI->dump();); |
449 | 1 | } |
450 | 1 | // Make ADD instruction for immediate |
451 | 1 | if (MI.getOperand(4).getImm() != 01 ) { |
452 | 0 | const MCInstrDesc &ADDri = |
453 | 0 | TII->get(getADDriFromLEA(Opcode, MI.getOperand(4))); |
454 | 0 | const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 10 : 30 ); |
455 | 0 | NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR) |
456 | 0 | .add(SrcR) |
457 | 0 | .addImm(MI.getOperand(4).getImm()); |
458 | 0 | DEBUG(NewMI->dump();); |
459 | 0 | } |
460 | 1 | if (NewMI1 ) { |
461 | 1 | MFI->erase(I); |
462 | 1 | I = NewMI; |
463 | 1 | } |
464 | 1.55k | } |
465 | | |
466 | | MachineInstr * |
467 | | FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, |
468 | 114k | MachineFunction::iterator MFI) { |
469 | 114k | |
470 | 114k | const int LEAOpcode = MI.getOpcode(); |
471 | 114k | if (!isLEA(LEAOpcode)) |
472 | 109k | return nullptr; |
473 | 5.41k | |
474 | 5.41k | const MachineOperand &Dst = MI.getOperand(0); |
475 | 5.41k | const MachineOperand &Base = MI.getOperand(1); |
476 | 5.41k | const MachineOperand &Scale = MI.getOperand(2); |
477 | 5.41k | const MachineOperand &Index = MI.getOperand(3); |
478 | 5.41k | const MachineOperand &Offset = MI.getOperand(4); |
479 | 5.41k | const MachineOperand &Segment = MI.getOperand(5); |
480 | 5.41k | |
481 | 5.41k | if (!(isThreeOperandsLEA(Base, Index, Offset) || |
482 | 5.00k | hasInefficientLEABaseReg(Base, Index)) || |
483 | 441 | !TII->isSafeToClobberEFLAGS(*MFI, MI) || |
484 | 426 | Segment.getReg() != X86::NoRegister) |
485 | 4.98k | return nullptr; |
486 | 426 | |
487 | 426 | unsigned int DstR = Dst.getReg(); |
488 | 426 | unsigned int BaseR = Base.getReg(); |
489 | 426 | unsigned int IndexR = Index.getReg(); |
490 | 426 | unsigned SSDstR = |
491 | 426 | (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64)94 : DstR332 ; |
492 | 426 | bool IsScale1 = Scale.getImm() == 1; |
493 | 426 | bool IsInefficientBase = isInefficientLEAReg(BaseR); |
494 | 426 | bool IsInefficientIndex = isInefficientLEAReg(IndexR); |
495 | 426 | |
496 | 426 | // Skip these cases since it takes more than 2 instructions |
497 | 426 | // to replace the LEA instruction. |
498 | 426 | if (IsInefficientBase && 426 SSDstR == BaseR71 && !IsScale19 ) |
499 | 3 | return nullptr; |
500 | 423 | if (423 LEAOpcode == X86::LEA64_32r && 423 IsInefficientBase93 && |
501 | 15 | (IsInefficientIndex || 15 !IsScale112 )) |
502 | 11 | return nullptr; |
503 | 412 | |
504 | 412 | const DebugLoc DL = MI.getDebugLoc(); |
505 | 412 | const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); |
506 | 412 | const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); |
507 | 412 | |
508 | 412 | DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); |
509 | 412 | DEBUG(dbgs() << "FixLEA: Replaced by: ";); |
510 | 412 | |
511 | 412 | // First try to replace LEA with one or two (for the 3-op LEA case) |
512 | 412 | // add instructions: |
513 | 412 | // 1.lea (%base,%index,1), %base => add %index,%base |
514 | 412 | // 2.lea (%base,%index,1), %index => add %base,%index |
515 | 412 | if (IsScale1 && 412 (DstR == BaseR || 275 DstR == IndexR264 )) { |
516 | 35 | const MachineOperand &Src = DstR == BaseR ? Index11 : Base24 ; |
517 | 35 | MachineInstr *NewMI = |
518 | 35 | BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); |
519 | 35 | DEBUG(NewMI->dump();); |
520 | 35 | // Create ADD instruction for the Offset in case of 3-Ops LEA. |
521 | 35 | if (hasLEAOffset(Offset)35 ) { |
522 | 33 | NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); |
523 | 33 | DEBUG(NewMI->dump();); |
524 | 33 | } |
525 | 35 | return NewMI; |
526 | 35 | } |
527 | 377 | // If the base is inefficient try switching the index and base operands, |
528 | 377 | // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: |
529 | 377 | // lea offset(%base,%index,scale),%dst => |
530 | 377 | // lea (%base,%index,scale); add offset,%dst |
531 | 377 | if (377 !IsInefficientBase || 377 (!IsInefficientIndex && 52 IsScale145 )) { |
532 | 358 | MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode)) |
533 | 358 | .add(Dst) |
534 | 358 | .add(IsInefficientBase ? Index33 : Base325 ) |
535 | 358 | .add(Scale) |
536 | 358 | .add(IsInefficientBase ? Base33 : Index325 ) |
537 | 358 | .addImm(0) |
538 | 358 | .add(Segment); |
539 | 358 | DEBUG(NewMI->dump();); |
540 | 358 | // Create ADD instruction for the Offset in case of 3-Ops LEA. |
541 | 358 | if (hasLEAOffset(Offset)358 ) { |
542 | 345 | NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); |
543 | 345 | DEBUG(NewMI->dump();); |
544 | 345 | } |
545 | 358 | return NewMI; |
546 | 358 | } |
547 | 19 | // Handle the rest of the cases with inefficient base register: |
548 | 377 | assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); |
549 | 19 | assert(IsInefficientBase && "efficient base should be handled already!"); |
550 | 19 | |
551 | 19 | // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst |
552 | 19 | if (IsScale1 && 19 !hasLEAOffset(Offset)4 ) { |
553 | 2 | TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, Base.isKill()); |
554 | 2 | DEBUG(MI.getPrevNode()->dump();); |
555 | 2 | |
556 | 2 | MachineInstr *NewMI = |
557 | 2 | BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); |
558 | 2 | DEBUG(NewMI->dump();); |
559 | 2 | return NewMI; |
560 | 2 | } |
561 | 17 | // lea offset(%base,%index,scale), %dst => |
562 | 17 | // lea offset( ,%index,scale), %dst; add %base,%dst |
563 | 17 | MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode)) |
564 | 17 | .add(Dst) |
565 | 17 | .addReg(0) |
566 | 17 | .add(Scale) |
567 | 17 | .add(Index) |
568 | 17 | .add(Offset) |
569 | 17 | .add(Segment); |
570 | 17 | DEBUG(NewMI->dump();); |
571 | 17 | |
572 | 17 | NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); |
573 | 17 | DEBUG(NewMI->dump();); |
574 | 114k | return NewMI; |
575 | 114k | } |
576 | | |
577 | | bool FixupLEAPass::processBasicBlock(MachineFunction &MF, |
578 | 133k | MachineFunction::iterator MFI) { |
579 | 133k | |
580 | 1.01M | for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end()1.01M ; ++I879k ) { |
581 | 879k | if (OptIncDec) |
582 | 763k | if (763k fixupIncDec(I, MFI)763k ) |
583 | 68 | continue; |
584 | 879k | |
585 | 879k | if (879k OptLEA879k ) { |
586 | 118k | if (MF.getSubtarget<X86Subtarget>().isSLM()) |
587 | 1.55k | processInstructionForSLM(I, MFI); |
588 | 118k | |
589 | 116k | else { |
590 | 116k | if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()116k ) { |
591 | 114k | if (auto *NewMI114k = processInstrForSlow3OpLEA(*I, MFI)) { |
592 | 412 | MFI->erase(I); |
593 | 412 | I = NewMI; |
594 | 412 | } |
595 | 114k | } else |
596 | 2.31k | processInstruction(I, MFI); |
597 | 116k | } |
598 | 118k | } |
599 | 879k | } |
600 | 133k | return false; |
601 | 133k | } |