Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/MachineCSE.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass performs global common subexpression elimination on machine
11
// instructions using a scoped hash table based value numbering scheme. It
12
// must be run while the machine function is still in SSA form.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "llvm/ADT/DenseMap.h"
17
#include "llvm/ADT/ScopedHashTable.h"
18
#include "llvm/ADT/SmallPtrSet.h"
19
#include "llvm/ADT/SmallSet.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/Analysis/AliasAnalysis.h"
23
#include "llvm/CodeGen/MachineBasicBlock.h"
24
#include "llvm/CodeGen/MachineDominators.h"
25
#include "llvm/CodeGen/MachineFunction.h"
26
#include "llvm/CodeGen/MachineFunctionPass.h"
27
#include "llvm/CodeGen/MachineInstr.h"
28
#include "llvm/CodeGen/MachineOperand.h"
29
#include "llvm/CodeGen/MachineRegisterInfo.h"
30
#include "llvm/CodeGen/Passes.h"
31
#include "llvm/MC/MCInstrDesc.h"
32
#include "llvm/MC/MCRegisterInfo.h"
33
#include "llvm/Pass.h"
34
#include "llvm/Support/Allocator.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/RecyclingAllocator.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include "llvm/Target/TargetInstrInfo.h"
39
#include "llvm/Target/TargetOpcodes.h"
40
#include "llvm/Target/TargetRegisterInfo.h"
41
#include "llvm/Target/TargetSubtargetInfo.h"
42
#include <cassert>
43
#include <iterator>
44
#include <utility>
45
#include <vector>
46
47
using namespace llvm;
48
49
#define DEBUG_TYPE "machine-cse"
50
51
STATISTIC(NumCoalesces, "Number of copies coalesced");
52
STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
53
STATISTIC(NumPhysCSEs,
54
          "Number of physreg referencing common subexpr eliminated");
55
STATISTIC(NumCrossBBCSEs,
56
          "Number of cross-MBB physreg referencing CS eliminated");
57
STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
58
59
namespace {
60
61
  class MachineCSE : public MachineFunctionPass {
62
    const TargetInstrInfo *TII;
63
    const TargetRegisterInfo *TRI;
64
    AliasAnalysis *AA;
65
    MachineDominatorTree *DT;
66
    MachineRegisterInfo *MRI;
67
68
  public:
69
    static char ID; // Pass identification
70
71
33.5k
    MachineCSE() : MachineFunctionPass(ID) {
72
33.5k
      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
73
33.5k
    }
74
75
    bool runOnMachineFunction(MachineFunction &MF) override;
76
77
33.4k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
78
33.4k
      AU.setPreservesCFG();
79
33.4k
      MachineFunctionPass::getAnalysisUsage(AU);
80
33.4k
      AU.addRequired<AAResultsWrapperPass>();
81
33.4k
      AU.addPreservedID(MachineLoopInfoID);
82
33.4k
      AU.addRequired<MachineDominatorTree>();
83
33.4k
      AU.addPreserved<MachineDominatorTree>();
84
33.4k
    }
85
86
603k
    void releaseMemory() override {
87
603k
      ScopeMap.clear();
88
603k
      Exps.clear();
89
603k
    }
90
91
  private:
92
    using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
93
                            ScopedHashTableVal<MachineInstr *, unsigned>>;
94
    using ScopedHTType =
95
        ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
96
                        AllocatorTy>;
97
    using ScopeType = ScopedHTType::ScopeTy;
98
99
    unsigned LookAheadLimit = 0;
100
    DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
101
    ScopedHTType VNT;
102
    SmallVector<MachineInstr *, 64> Exps;
103
    unsigned CurrVN = 0;
104
105
    bool PerformTrivialCopyPropagation(MachineInstr *MI,
106
                                       MachineBasicBlock *MBB);
107
    bool isPhysDefTriviallyDead(unsigned Reg,
108
                                MachineBasicBlock::const_iterator I,
109
                                MachineBasicBlock::const_iterator E) const;
110
    bool hasLivePhysRegDefUses(const MachineInstr *MI,
111
                               const MachineBasicBlock *MBB,
112
                               SmallSet<unsigned,8> &PhysRefs,
113
                               SmallVectorImpl<unsigned> &PhysDefs,
114
                               bool &PhysUseDef) const;
115
    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
116
                          SmallSet<unsigned,8> &PhysRefs,
117
                          SmallVectorImpl<unsigned> &PhysDefs,
118
                          bool &NonLocal) const;
119
    bool isCSECandidate(MachineInstr *MI);
120
    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
121
                           MachineInstr *CSMI, MachineInstr *MI);
122
    void EnterScope(MachineBasicBlock *MBB);
123
    void ExitScope(MachineBasicBlock *MBB);
124
    bool ProcessBlock(MachineBasicBlock *MBB);
125
    void ExitScopeIfDone(MachineDomTreeNode *Node,
126
                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
127
    bool PerformCSE(MachineDomTreeNode *Node);
128
  };
129
130
} // end anonymous namespace
131
132
char MachineCSE::ID = 0;
133
134
char &llvm::MachineCSEID = MachineCSE::ID;
135
136
36.7k
INITIALIZE_PASS_BEGIN36.7k
(MachineCSE, DEBUG_TYPE,
137
36.7k
                      "Machine Common Subexpression Elimination", false, false)
138
36.7k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
139
36.7k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
140
36.7k
INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
141
                    "Machine Common Subexpression Elimination", false, false)
142
143
/// The source register of a COPY machine instruction can be propagated to all
144
/// its users, and this propagation could increase the probability of finding
145
/// common subexpressions. If the COPY has only one user, the COPY itself can
146
/// be removed.
147
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
148
7.58M
                                               MachineBasicBlock *MBB) {
149
7.58M
  bool Changed = false;
150
27.0M
  for (MachineOperand &MO : MI->operands()) {
151
27.0M
    if (
!MO.isReg() || 27.0M
!MO.isUse()17.9M
)
152
18.2M
      continue;
153
8.79M
    unsigned Reg = MO.getReg();
154
8.79M
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
155
1.24M
      continue;
156
7.55M
    bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
157
7.55M
    MachineInstr *DefMI = MRI->getVRegDef(Reg);
158
7.55M
    if (!DefMI->isCopy())
159
5.29M
      continue;
160
2.25M
    unsigned SrcReg = DefMI->getOperand(1).getReg();
161
2.25M
    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
162
830k
      continue;
163
1.42M
    
if (1.42M
DefMI->getOperand(0).getSubReg()1.42M
)
164
12
      continue;
165
1.42M
    // FIXME: We should trivially coalesce subregister copies to expose CSE
166
1.42M
    // opportunities on instructions with truncated operands (see
167
1.42M
    // cse-add-with-overflow.ll). This can be done here as follows:
168
1.42M
    // if (SrcSubReg)
169
1.42M
    //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
170
1.42M
    //                                     SrcSubReg);
171
1.42M
    // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
172
1.42M
    //
173
1.42M
    // The 2-addr pass has been updated to handle coalesced subregs. However,
174
1.42M
    // some machine-specific code still can't handle it.
175
1.42M
    // To handle it properly we also need a way find a constrained subregister
176
1.42M
    // class given a super-reg class and subreg index.
177
1.42M
    
if (1.42M
DefMI->getOperand(1).getSubReg()1.42M
)
178
415k
      continue;
179
1.01M
    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
180
1.01M
    if (!MRI->constrainRegClass(SrcReg, RC))
181
58.2k
      continue;
182
952k
    
DEBUG952k
(dbgs() << "Coalescing: " << *DefMI);
183
952k
    DEBUG(dbgs() << "***     to: " << *MI);
184
952k
    // Propagate SrcReg of copies to MI.
185
952k
    MO.setReg(SrcReg);
186
952k
    MRI->clearKillFlags(SrcReg);
187
952k
    // Coalesce single use copies.
188
952k
    if (
OnlyOneUse952k
) {
189
417k
      DefMI->eraseFromParent();
190
417k
      ++NumCoalesces;
191
417k
    }
192
27.0M
    Changed = true;
193
27.0M
  }
194
7.58M
195
7.58M
  return Changed;
196
7.58M
}
197
198
bool
199
MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
200
                                   MachineBasicBlock::const_iterator I,
201
42.6k
                                   MachineBasicBlock::const_iterator E) const {
202
42.6k
  unsigned LookAheadLeft = LookAheadLimit;
203
57.6k
  while (
LookAheadLeft57.6k
) {
204
57.3k
    // Skip over dbg_value's.
205
57.3k
    I = skipDebugInstructionsForward(I, E);
206
57.3k
207
57.3k
    if (I == E)
208
57.3k
      // Reached end of block, we don't know if register is dead or not.
209
268
      return false;
210
57.1k
211
57.1k
    bool SeenDef = false;
212
196k
    for (const MachineOperand &MO : I->operands()) {
213
196k
      if (
MO.isRegMask() && 196k
MO.clobbersPhysReg(Reg)177
)
214
177
        SeenDef = true;
215
196k
      if (
!MO.isReg() || 196k
!MO.getReg()113k
)
216
83.9k
        continue;
217
112k
      
if (112k
!TRI->regsOverlap(MO.getReg(), Reg)112k
)
218
69.8k
        continue;
219
42.4k
      
if (42.4k
MO.isUse()42.4k
)
220
42.4k
        // Found a use!
221
40.4k
        return false;
222
2.05k
      SeenDef = true;
223
2.05k
    }
224
16.6k
    
if (16.6k
SeenDef16.6k
)
225
16.6k
      // See a def of Reg (or an alias) before encountering any use, it's
226
16.6k
      // trivially dead.
227
1.68k
      return true;
228
15.0k
229
15.0k
    --LookAheadLeft;
230
15.0k
    ++I;
231
15.0k
  }
232
318
  return false;
233
42.6k
}
234
235
/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
236
/// physical registers (except for dead defs of physical registers). It also
237
/// returns the physical register def by reference if it's the only one and the
238
/// instruction does not uses a physical register.
239
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
240
                                       const MachineBasicBlock *MBB,
241
                                       SmallSet<unsigned,8> &PhysRefs,
242
                                       SmallVectorImpl<unsigned> &PhysDefs,
243
564k
                                       bool &PhysUseDef) const{
244
564k
  // First, add all uses to PhysRefs.
245
1.72M
  for (const MachineOperand &MO : MI->operands()) {
246
1.72M
    if (
!MO.isReg() || 1.72M
MO.isDef()999k
)
247
1.36M
      continue;
248
364k
    unsigned Reg = MO.getReg();
249
364k
    if (!Reg)
250
22.5k
      continue;
251
342k
    
if (342k
TargetRegisterInfo::isVirtualRegister(Reg)342k
)
252
111k
      continue;
253
230k
    // Reading constant physregs is ok.
254
230k
    
if (230k
!MRI->isConstantPhysReg(Reg)230k
)
255
289k
      
for (MCRegAliasIterator AI(Reg, TRI, true); 99.2k
AI.isValid()289k
;
++AI190k
)
256
190k
        PhysRefs.insert(*AI);
257
1.72M
  }
258
564k
259
564k
  // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
260
564k
  // (which currently contains only uses), set the PhysUseDef flag.
261
564k
  PhysUseDef = false;
262
564k
  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
263
1.72M
  for (const MachineOperand &MO : MI->operands()) {
264
1.72M
    if (
!MO.isReg() || 1.72M
!MO.isDef()999k
)
265
1.09M
      continue;
266
634k
    unsigned Reg = MO.getReg();
267
634k
    if (!Reg)
268
0
      continue;
269
634k
    
if (634k
TargetRegisterInfo::isVirtualRegister(Reg)634k
)
270
523k
      continue;
271
110k
    // Check against PhysRefs even if the def is "dead".
272
110k
    
if (110k
PhysRefs.count(Reg)110k
)
273
30.1k
      PhysUseDef = true;
274
110k
    // If the def is dead, it's ok. But the def may not marked "dead". That's
275
110k
    // common since this pass is run before livevariables. We can scan
276
110k
    // forward a few instructions and check if it is obviously dead.
277
110k
    if (
!MO.isDead() && 110k
!isPhysDefTriviallyDead(Reg, I, MBB->end())42.6k
)
278
40.9k
      PhysDefs.push_back(Reg);
279
1.72M
  }
280
564k
281
564k
  // Finally, add all defs to PhysRefs as well.
282
605k
  for (unsigned i = 0, e = PhysDefs.size(); 
i != e605k
;
++i40.9k
)
283
93.3k
    
for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); 40.9k
AI.isValid()93.3k
;
++AI52.3k
)
284
52.3k
      PhysRefs.insert(*AI);
285
564k
286
564k
  return !PhysRefs.empty();
287
564k
}
288
289
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
290
                                  SmallSet<unsigned,8> &PhysRefs,
291
                                  SmallVectorImpl<unsigned> &PhysDefs,
292
106k
                                  bool &NonLocal) const {
293
106k
  // For now conservatively returns false if the common subexpression is
294
106k
  // not in the same basic block as the given instruction. The only exception
295
106k
  // is if the common subexpression is in the sole predecessor block.
296
106k
  const MachineBasicBlock *MBB = MI->getParent();
297
106k
  const MachineBasicBlock *CSMBB = CSMI->getParent();
298
106k
299
106k
  bool CrossMBB = false;
300
106k
  if (
CSMBB != MBB106k
) {
301
76.0k
    if (
MBB->pred_size() != 1 || 76.0k
*MBB->pred_begin() != CSMBB60.8k
)
302
33.3k
      return false;
303
42.7k
304
54.6k
    
for (unsigned i = 0, e = PhysDefs.size(); 42.7k
i != e54.6k
;
++i11.9k
) {
305
12.1k
      if (
MRI->isAllocatable(PhysDefs[i]) || 12.1k
MRI->isReserved(PhysDefs[i])12.0k
)
306
12.1k
        // Avoid extending live range of physical registers if they are
307
12.1k
        //allocatable or reserved.
308
210
        return false;
309
12.1k
    }
310
42.5k
    CrossMBB = true;
311
42.5k
  }
312
73.2k
  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
313
73.2k
  MachineBasicBlock::const_iterator E = MI;
314
73.2k
  MachineBasicBlock::const_iterator EE = CSMBB->end();
315
73.2k
  unsigned LookAheadLeft = LookAheadLimit;
316
918k
  while (
LookAheadLeft918k
) {
317
886k
    // Skip over dbg_value's.
318
886k
    while (
I != E && 886k
I != EE876k
&&
I->isDebugValue()844k
)
319
6
      ++I;
320
886k
321
886k
    if (
I == EE886k
) {
322
31.9k
      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
323
31.9k
      (void)CrossMBB;
324
31.9k
      CrossMBB = false;
325
31.9k
      NonLocal = true;
326
31.9k
      I = MBB->begin();
327
31.9k
      EE = MBB->end();
328
31.9k
      continue;
329
31.9k
    }
330
854k
331
854k
    
if (854k
I == E854k
)
332
9.48k
      return true;
333
844k
334
844k
    
for (const MachineOperand &MO : I->operands()) 844k
{
335
3.02M
      // RegMasks go on instructions like calls that clobber lots of physregs.
336
3.02M
      // Don't attempt to CSE across such an instruction.
337
3.02M
      if (MO.isRegMask())
338
98
        return false;
339
3.02M
      
if (3.02M
!MO.isReg() || 3.02M
!MO.isDef()2.22M
)
340
2.25M
        continue;
341
769k
      unsigned MOReg = MO.getReg();
342
769k
      if (TargetRegisterInfo::isVirtualRegister(MOReg))
343
707k
        continue;
344
61.3k
      
if (61.3k
PhysRefs.count(MOReg)61.3k
)
345
31.5k
        return false;
346
812k
    }
347
812k
348
812k
    --LookAheadLeft;
349
812k
    ++I;
350
812k
  }
351
73.2k
352
32.0k
  return false;
353
106k
}
354
355
37.6M
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
356
37.6M
  if (
MI->isPosition() || 37.6M
MI->isPHI()37.5M
||
MI->isImplicitDef()36.6M
||
MI->isKill()36.4M
||
357
37.6M
      
MI->isInlineAsm()36.4M
||
MI->isDebugValue()36.4M
)
358
1.20M
    return false;
359
36.4M
360
36.4M
  // Ignore copies.
361
36.4M
  
if (36.4M
MI->isCopyLike()36.4M
)
362
12.7M
    return false;
363
23.7M
364
23.7M
  // Ignore stuff that we obviously can't move.
365
23.7M
  
if (23.7M
MI->mayStore() || 23.7M
MI->isCall()21.7M
||
MI->isTerminator()19.3M
||
366
14.6M
      MI->hasUnmodeledSideEffects())
367
13.3M
    return false;
368
10.3M
369
10.3M
  
if (10.3M
MI->mayLoad()10.3M
) {
370
2.48M
    // Okay, this instruction does a load. As a refinement, we allow the target
371
2.48M
    // to decide whether the loaded value is actually a constant. If so, we can
372
2.48M
    // actually use it as a load.
373
2.48M
    if (!MI->isDereferenceableInvariantLoad(AA))
374
2.48M
      // FIXME: we should be able to hoist loads with no other side effects if
375
2.48M
      // there are no other instructions which can change memory in this loop.
376
2.48M
      // This is a trivial form of alias analysis.
377
2.26M
      return false;
378
8.12M
  }
379
8.12M
380
8.12M
  // Ignore stack guard loads, otherwise the register that holds CSEed value may
381
8.12M
  // be spilled and get loaded back with corrupted data.
382
8.12M
  
if (8.12M
MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD8.12M
)
383
6.45k
    return false;
384
8.11M
385
8.11M
  return true;
386
8.11M
}
387
388
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
389
/// common expression that defines Reg.
390
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
391
429k
                                   MachineInstr *CSMI, MachineInstr *MI) {
392
429k
  // FIXME: Heuristics that works around the lack the live range splitting.
393
429k
394
429k
  // If CSReg is used at all uses of Reg, CSE should not increase register
395
429k
  // pressure of CSReg.
396
429k
  bool MayIncreasePressure = true;
397
429k
  if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
398
429k
      
TargetRegisterInfo::isVirtualRegister(Reg)429k
) {
399
429k
    MayIncreasePressure = false;
400
429k
    SmallPtrSet<MachineInstr*, 8> CSUses;
401
1.80M
    for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
402
1.80M
      CSUses.insert(&MI);
403
1.80M
    }
404
427k
    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
405
427k
      if (
!CSUses.count(&MI)427k
) {
406
426k
        MayIncreasePressure = true;
407
426k
        break;
408
426k
      }
409
429k
    }
410
429k
  }
411
429k
  if (
!MayIncreasePressure429k
)
return true2.90k
;
412
426k
413
426k
  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
414
426k
  // an immediate predecessor. We don't want to increase register pressure and
415
426k
  // end up causing other computation to be spilled.
416
426k
  
if (426k
TII->isAsCheapAsAMove(*MI)426k
) {
417
219k
    MachineBasicBlock *CSBB = CSMI->getParent();
418
219k
    MachineBasicBlock *BB = MI->getParent();
419
219k
    if (
CSBB != BB && 219k
!CSBB->isSuccessor(BB)198k
)
420
134k
      return false;
421
291k
  }
422
291k
423
291k
  // Heuristics #2: If the expression doesn't not use a vr and the only use
424
291k
  // of the redundant computation are copies, do not cse.
425
291k
  bool HasVRegUse = false;
426
660k
  for (const MachineOperand &MO : MI->operands()) {
427
660k
    if (
MO.isReg() && 660k
MO.isUse()347k
&&
428
660k
        
TargetRegisterInfo::isVirtualRegister(MO.getReg())52.5k
) {
429
40.1k
      HasVRegUse = true;
430
40.1k
      break;
431
40.1k
    }
432
291k
  }
433
291k
  if (
!HasVRegUse291k
) {
434
251k
    bool HasNonCopyUse = false;
435
268k
    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
436
268k
      // Ignore copies.
437
268k
      if (
!MI.isCopyLike()268k
) {
438
190k
        HasNonCopyUse = true;
439
190k
        break;
440
190k
      }
441
251k
    }
442
251k
    if (!HasNonCopyUse)
443
61.2k
      return false;
444
230k
  }
445
230k
446
230k
  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
447
230k
  // it unless the defined value is already used in the BB of the new use.
448
230k
  bool HasPHI = false;
449
230k
  SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
450
1.40M
  for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
451
1.40M
    HasPHI |= MI.isPHI();
452
1.40M
    CSBBs.insert(MI.getParent());
453
1.40M
  }
454
230k
455
230k
  if (!HasPHI)
456
225k
    return true;
457
4.35k
  return CSBBs.count(MI->getParent());
458
4.35k
}
459
460
3.82M
void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
461
3.82M
  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
462
3.82M
  ScopeType *Scope = new ScopeType(VNT);
463
3.82M
  ScopeMap[MBB] = Scope;
464
3.82M
}
465
466
3.82M
void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
467
3.82M
  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
468
3.82M
  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
469
3.82M
  assert(SI != ScopeMap.end());
470
3.82M
  delete SI->second;
471
3.82M
  ScopeMap.erase(SI);
472
3.82M
}
473
474
3.82M
bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
475
3.82M
  bool Changed = false;
476
3.82M
477
3.82M
  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
478
3.82M
  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
479
3.82M
  SmallVector<unsigned, 2> ImplicitDefs;
480
41.4M
  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 
I != E41.4M
; ) {
481
37.6M
    MachineInstr *MI = &*I;
482
37.6M
    ++I;
483
37.6M
484
37.6M
    if (!isCSECandidate(MI))
485
29.5M
      continue;
486
8.11M
487
8.11M
    bool FoundCSE = VNT.count(MI);
488
8.11M
    if (
!FoundCSE8.11M
) {
489
7.58M
      // Using trivial copy propagation to find more CSE opportunities.
490
7.58M
      if (
PerformTrivialCopyPropagation(MI, MBB)7.58M
) {
491
806k
        Changed = true;
492
806k
493
806k
        // After coalescing MI itself may become a copy.
494
806k
        if (MI->isCopyLike())
495
0
          continue;
496
806k
497
806k
        // Try again to see if CSE is possible.
498
806k
        FoundCSE = VNT.count(MI);
499
806k
      }
500
7.58M
    }
501
8.11M
502
8.11M
    // Commute commutable instructions.
503
8.11M
    bool Commuted = false;
504
8.11M
    if (
!FoundCSE && 8.11M
MI->isCommutable()7.55M
) {
505
208k
      if (MachineInstr *
NewMI208k
= TII->commuteInstruction(*MI)) {
506
175k
        Commuted = true;
507
175k
        FoundCSE = VNT.count(NewMI);
508
175k
        if (
NewMI != MI175k
) {
509
0
          // New instruction. It doesn't need to be kept.
510
0
          NewMI->eraseFromParent();
511
0
          Changed = true;
512
175k
        } else 
if (175k
!FoundCSE175k
)
513
175k
          // MI was changed but it didn't help, commute it back!
514
175k
          (void)TII->commuteInstruction(*MI);
515
175k
      }
516
208k
    }
517
8.11M
518
8.11M
    // If the instruction defines physical registers and the values *may* be
519
8.11M
    // used, then it's not safe to replace it with a common subexpression.
520
8.11M
    // It's also not safe if the instruction uses physical registers.
521
8.11M
    bool CrossMBBPhysDef = false;
522
8.11M
    SmallSet<unsigned, 8> PhysRefs;
523
8.11M
    SmallVector<unsigned, 2> PhysDefs;
524
8.11M
    bool PhysUseDef = false;
525
8.11M
    if (
FoundCSE && 8.11M
hasLivePhysRegDefUses(MI, MBB, PhysRefs,
526
8.11M
                                          PhysDefs, PhysUseDef)) {
527
136k
      FoundCSE = false;
528
136k
529
136k
      // ... Unless the CS is local or is in the sole predecessor block
530
136k
      // and it also defines the physical register which is not clobbered
531
136k
      // in between and the physical register uses were not clobbered.
532
136k
      // This can never be the case if the instruction both uses and
533
136k
      // defines the same physical register, which was detected above.
534
136k
      if (
!PhysUseDef136k
) {
535
106k
        unsigned CSVN = VNT.lookup(MI);
536
106k
        MachineInstr *CSMI = Exps[CSVN];
537
106k
        if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
538
9.48k
          FoundCSE = true;
539
106k
      }
540
136k
    }
541
8.11M
542
8.11M
    if (
!FoundCSE8.11M
) {
543
7.67M
      VNT.insert(MI, CurrVN++);
544
7.67M
      Exps.push_back(MI);
545
7.67M
      continue;
546
7.67M
    }
547
437k
548
437k
    // Found a common subexpression, eliminate it.
549
437k
    unsigned CSVN = VNT.lookup(MI);
550
437k
    MachineInstr *CSMI = Exps[CSVN];
551
437k
    DEBUG(dbgs() << "Examining: " << *MI);
552
437k
    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
553
437k
554
437k
    // Check if it's profitable to perform this CSE.
555
437k
    bool DoCSE = true;
556
437k
    unsigned NumDefs = MI->getDesc().getNumDefs() +
557
437k
                       MI->getDesc().getNumImplicitDefs();
558
437k
559
710k
    for (unsigned i = 0, e = MI->getNumOperands(); 
NumDefs && 710k
i != e473k
;
++i273k
) {
560
473k
      MachineOperand &MO = MI->getOperand(i);
561
473k
      if (
!MO.isReg() || 473k
!MO.isDef()460k
)
562
26.9k
        continue;
563
446k
      unsigned OldReg = MO.getReg();
564
446k
      unsigned NewReg = CSMI->getOperand(i).getReg();
565
446k
566
446k
      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
567
446k
      // we should make sure it is not dead at CSMI.
568
446k
      if (
MO.isImplicit() && 446k
!MO.isDead()10.5k
&&
CSMI->getOperand(i).isDead()3.31k
)
569
52
        ImplicitDefsToUpdate.push_back(i);
570
446k
571
446k
      // Keep track of implicit defs of CSMI and MI, to clear possibly
572
446k
      // made-redundant kill flags.
573
446k
      if (
MO.isImplicit() && 446k
!MO.isDead()10.5k
&&
OldReg == NewReg3.31k
)
574
3.31k
        ImplicitDefs.push_back(OldReg);
575
446k
576
446k
      if (
OldReg == NewReg446k
) {
577
17.6k
        --NumDefs;
578
17.6k
        continue;
579
17.6k
      }
580
429k
581
446k
      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
582
429k
             TargetRegisterInfo::isVirtualRegister(NewReg) &&
583
429k
             "Do not CSE physical register defs!");
584
429k
585
429k
      if (
!isProfitableToCSE(NewReg, OldReg, CSMI, MI)429k
) {
586
200k
        DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
587
200k
        DoCSE = false;
588
200k
        break;
589
200k
      }
590
229k
591
229k
      // Don't perform CSE if the result of the old instruction cannot exist
592
229k
      // within the register class of the new instruction.
593
229k
      const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
594
229k
      if (
!MRI->constrainRegClass(NewReg, OldRC)229k
) {
595
3
        DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
596
3
        DoCSE = false;
597
3
        break;
598
3
      }
599
229k
600
229k
      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
601
229k
      --NumDefs;
602
229k
    }
603
437k
604
437k
    // Actually perform the elimination.
605
437k
    if (
DoCSE437k
) {
606
229k
      for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
607
229k
        unsigned OldReg = CSEPair.first;
608
229k
        unsigned NewReg = CSEPair.second;
609
229k
        // OldReg may have been unused but is used now, clear the Dead flag
610
229k
        MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
611
229k
        assert(Def != nullptr && "CSEd register has no unique definition?");
612
229k
        Def->clearRegisterDeads(NewReg);
613
229k
        // Replace with NewReg and clear kill flags which may be wrong now.
614
229k
        MRI->replaceRegWith(OldReg, NewReg);
615
229k
        MRI->clearKillFlags(NewReg);
616
229k
      }
617
236k
618
236k
      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
619
236k
      // we should make sure it is not dead at CSMI.
620
236k
      for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
621
52
        CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
622
236k
623
236k
      // Go through implicit defs of CSMI and MI, and clear the kill flags on
624
236k
      // their uses in all the instructions between CSMI and MI.
625
236k
      // We might have made some of the kill flags redundant, consider:
626
236k
      //   subs  ... %NZCV<imp-def>        <- CSMI
627
236k
      //   csinc ... %NZCV<imp-use,kill>   <- this kill flag isn't valid anymore
628
236k
      //   subs  ... %NZCV<imp-def>        <- MI, to be eliminated
629
236k
      //   csinc ... %NZCV<imp-use,kill>
630
236k
      // Since we eliminated MI, and reused a register imp-def'd by CSMI
631
236k
      // (here %NZCV), that register, if it was killed before MI, should have
632
236k
      // that kill flag removed, because it's lifetime was extended.
633
236k
      if (
CSMI->getParent() == MI->getParent()236k
) {
634
1.05M
        for (MachineBasicBlock::iterator II = CSMI, IE = MI; 
II != IE1.05M
;
++II1.03M
)
635
1.03M
          for (auto ImplicitDef : ImplicitDefs)
636
4.94k
            
if (MachineOperand *4.94k
MO4.94k
= II->findRegisterUseOperand(
637
4.94k
                    ImplicitDef, /*isKill=*/true, TRI))
638
0
              MO->setIsKill(false);
639
236k
      } else {
640
214k
        // If the instructions aren't in the same BB, bail out and clear the
641
214k
        // kill flag on all uses of the imp-def'd register.
642
214k
        for (auto ImplicitDef : ImplicitDefs)
643
1.63k
          MRI->clearKillFlags(ImplicitDef);
644
214k
      }
645
236k
646
236k
      if (
CrossMBBPhysDef236k
) {
647
1.63k
        // Add physical register defs now coming in from a predecessor to MBB
648
1.63k
        // livein list.
649
3.25k
        while (
!PhysDefs.empty()3.25k
) {
650
1.62k
          unsigned LiveIn = PhysDefs.pop_back_val();
651
1.62k
          if (!MBB->isLiveIn(LiveIn))
652
1.62k
            MBB->addLiveIn(LiveIn);
653
1.62k
        }
654
1.63k
        ++NumCrossBBCSEs;
655
1.63k
      }
656
236k
657
236k
      MI->eraseFromParent();
658
236k
      ++NumCSEs;
659
236k
      if (!PhysRefs.empty())
660
9.45k
        ++NumPhysCSEs;
661
236k
      if (Commuted)
662
1
        ++NumCommutes;
663
236k
      Changed = true;
664
437k
    } else {
665
200k
      VNT.insert(MI, CurrVN++);
666
200k
      Exps.push_back(MI);
667
200k
    }
668
37.6M
    CSEPairs.clear();
669
37.6M
    ImplicitDefsToUpdate.clear();
670
37.6M
    ImplicitDefs.clear();
671
37.6M
  }
672
3.82M
673
3.82M
  return Changed;
674
3.82M
}
675
676
/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
677
/// dominator tree node if its a leaf or all of its children are done. Walk
678
/// up the dominator tree to destroy ancestors which are now done.
679
void
680
MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
681
3.82M
                        DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
682
3.82M
  if (OpenChildren[Node])
683
1.99M
    return;
684
1.82M
685
1.82M
  // Pop scope.
686
1.82M
  ExitScope(Node->getBlock());
687
1.82M
688
1.82M
  // Now traverse upwards to pop ancestors whose offsprings are all done.
689
3.82M
  while (MachineDomTreeNode *
Parent3.82M
= Node->getIDom()) {
690
3.21M
    unsigned Left = --OpenChildren[Parent];
691
3.21M
    if (Left != 0)
692
1.21M
      break;
693
1.99M
    ExitScope(Parent->getBlock());
694
1.99M
    Node = Parent;
695
1.99M
  }
696
3.82M
}
697
698
603k
bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
699
603k
  SmallVector<MachineDomTreeNode*, 32> Scopes;
700
603k
  SmallVector<MachineDomTreeNode*, 8> WorkList;
701
603k
  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
702
603k
703
603k
  CurrVN = 0;
704
603k
705
603k
  // Perform a DFS walk to determine the order of visit.
706
603k
  WorkList.push_back(Node);
707
3.82M
  do {
708
3.82M
    Node = WorkList.pop_back_val();
709
3.82M
    Scopes.push_back(Node);
710
3.82M
    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
711
3.82M
    OpenChildren[Node] = Children.size();
712
3.82M
    for (MachineDomTreeNode *Child : Children)
713
3.21M
      WorkList.push_back(Child);
714
3.82M
  } while (!WorkList.empty());
715
603k
716
603k
  // Now perform CSE.
717
603k
  bool Changed = false;
718
3.82M
  for (MachineDomTreeNode *Node : Scopes) {
719
3.82M
    MachineBasicBlock *MBB = Node->getBlock();
720
3.82M
    EnterScope(MBB);
721
3.82M
    Changed |= ProcessBlock(MBB);
722
3.82M
    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
723
3.82M
    ExitScopeIfDone(Node, OpenChildren);
724
3.82M
  }
725
603k
726
603k
  return Changed;
727
603k
}
728
729
603k
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
730
603k
  if (skipFunction(*MF.getFunction()))
731
45
    return false;
732
603k
733
603k
  TII = MF.getSubtarget().getInstrInfo();
734
603k
  TRI = MF.getSubtarget().getRegisterInfo();
735
603k
  MRI = &MF.getRegInfo();
736
603k
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
737
603k
  DT = &getAnalysis<MachineDominatorTree>();
738
603k
  LookAheadLimit = TII->getMachineCSELookAheadLimit();
739
603k
  return PerformCSE(DT->getRootNode());
740
603k
}