Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MachineCSE.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass performs global common subexpression elimination on machine
10
// instructions using a scoped hash table based value numbering scheme. It
11
// must be run while the machine function is still in SSA form.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/ScopedHashTable.h"
17
#include "llvm/ADT/SmallPtrSet.h"
18
#include "llvm/ADT/SmallSet.h"
19
#include "llvm/ADT/SmallVector.h"
20
#include "llvm/ADT/Statistic.h"
21
#include "llvm/Analysis/AliasAnalysis.h"
22
#include "llvm/Analysis/CFG.h"
23
#include "llvm/CodeGen/MachineBasicBlock.h"
24
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25
#include "llvm/CodeGen/MachineDominators.h"
26
#include "llvm/CodeGen/MachineFunction.h"
27
#include "llvm/CodeGen/MachineFunctionPass.h"
28
#include "llvm/CodeGen/MachineInstr.h"
29
#include "llvm/CodeGen/MachineOperand.h"
30
#include "llvm/CodeGen/MachineRegisterInfo.h"
31
#include "llvm/CodeGen/Passes.h"
32
#include "llvm/CodeGen/TargetInstrInfo.h"
33
#include "llvm/CodeGen/TargetOpcodes.h"
34
#include "llvm/CodeGen/TargetRegisterInfo.h"
35
#include "llvm/CodeGen/TargetSubtargetInfo.h"
36
#include "llvm/MC/MCInstrDesc.h"
37
#include "llvm/MC/MCRegisterInfo.h"
38
#include "llvm/Pass.h"
39
#include "llvm/Support/Allocator.h"
40
#include "llvm/Support/Debug.h"
41
#include "llvm/Support/RecyclingAllocator.h"
42
#include "llvm/Support/raw_ostream.h"
43
#include <cassert>
44
#include <iterator>
45
#include <utility>
46
#include <vector>
47
48
using namespace llvm;
49
50
#define DEBUG_TYPE "machine-cse"
51
52
STATISTIC(NumCoalesces, "Number of copies coalesced");
53
STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
54
STATISTIC(NumPREs,      "Number of partial redundant expression"
55
                        " transformed to fully redundant");
56
STATISTIC(NumPhysCSEs,
57
          "Number of physreg referencing common subexpr eliminated");
58
STATISTIC(NumCrossBBCSEs,
59
          "Number of cross-MBB physreg referencing CS eliminated");
60
STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
61
62
namespace {
63
64
  class MachineCSE : public MachineFunctionPass {
65
    const TargetInstrInfo *TII;
66
    const TargetRegisterInfo *TRI;
67
    AliasAnalysis *AA;
68
    MachineDominatorTree *DT;
69
    MachineRegisterInfo *MRI;
70
    MachineBlockFrequencyInfo *MBFI;
71
72
  public:
73
    static char ID; // Pass identification
74
75
36.8k
    MachineCSE() : MachineFunctionPass(ID) {
76
36.8k
      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
77
36.8k
    }
78
79
    bool runOnMachineFunction(MachineFunction &MF) override;
80
81
36.6k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
82
36.6k
      AU.setPreservesCFG();
83
36.6k
      MachineFunctionPass::getAnalysisUsage(AU);
84
36.6k
      AU.addRequired<AAResultsWrapperPass>();
85
36.6k
      AU.addPreservedID(MachineLoopInfoID);
86
36.6k
      AU.addRequired<MachineDominatorTree>();
87
36.6k
      AU.addPreserved<MachineDominatorTree>();
88
36.6k
      AU.addRequired<MachineBlockFrequencyInfo>();
89
36.6k
      AU.addPreserved<MachineBlockFrequencyInfo>();
90
36.6k
    }
91
92
514k
    void releaseMemory() override {
93
514k
      ScopeMap.clear();
94
514k
      PREMap.clear();
95
514k
      Exps.clear();
96
514k
    }
97
98
  private:
99
    using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
100
                            ScopedHashTableVal<MachineInstr *, unsigned>>;
101
    using ScopedHTType =
102
        ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
103
                        AllocatorTy>;
104
    using ScopeType = ScopedHTType::ScopeTy;
105
    using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
106
107
    unsigned LookAheadLimit = 0;
108
    DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
109
    DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
110
        PREMap;
111
    ScopedHTType VNT;
112
    SmallVector<MachineInstr *, 64> Exps;
113
    unsigned CurrVN = 0;
114
115
    bool PerformTrivialCopyPropagation(MachineInstr *MI,
116
                                       MachineBasicBlock *MBB);
117
    bool isPhysDefTriviallyDead(unsigned Reg,
118
                                MachineBasicBlock::const_iterator I,
119
                                MachineBasicBlock::const_iterator E) const;
120
    bool hasLivePhysRegDefUses(const MachineInstr *MI,
121
                               const MachineBasicBlock *MBB,
122
                               SmallSet<unsigned, 8> &PhysRefs,
123
                               PhysDefVector &PhysDefs, bool &PhysUseDef) const;
124
    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
125
                          SmallSet<unsigned, 8> &PhysRefs,
126
                          PhysDefVector &PhysDefs, bool &NonLocal) const;
127
    bool isCSECandidate(MachineInstr *MI);
128
    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
129
                           MachineBasicBlock *CSBB, MachineInstr *MI);
130
    void EnterScope(MachineBasicBlock *MBB);
131
    void ExitScope(MachineBasicBlock *MBB);
132
    bool ProcessBlockCSE(MachineBasicBlock *MBB);
133
    void ExitScopeIfDone(MachineDomTreeNode *Node,
134
                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
135
    bool PerformCSE(MachineDomTreeNode *Node);
136
137
    bool isPRECandidate(MachineInstr *MI);
138
    bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
139
    bool PerformSimplePRE(MachineDominatorTree *DT);
140
    /// Heuristics to see if it's beneficial to move common computations of MBB
141
    /// and MBB1 to CandidateBB.
142
    bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
143
                                MachineBasicBlock *MBB,
144
                                MachineBasicBlock *MBB1);
145
  };
146
147
} // end anonymous namespace
148
149
char MachineCSE::ID = 0;
150
151
char &llvm::MachineCSEID = MachineCSE::ID;
152
153
42.3k
INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
154
42.3k
                      "Machine Common Subexpression Elimination", false, false)
155
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
156
42.3k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
157
42.3k
INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
158
                    "Machine Common Subexpression Elimination", false, false)
159
160
/// The source register of a COPY machine instruction can be propagated to all
161
/// its users, and this propagation could increase the probability of finding
162
/// common subexpressions. If the COPY has only one user, the COPY itself can
163
/// be removed.
164
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
165
6.43M
                                               MachineBasicBlock *MBB) {
166
6.43M
  bool Changed = false;
167
23.8M
  for (MachineOperand &MO : MI->operands()) {
168
23.8M
    if (!MO.isReg() || 
!MO.isUse()16.4M
)
169
15.1M
      continue;
170
8.68M
    unsigned Reg = MO.getReg();
171
8.68M
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
172
1.78M
      continue;
173
6.89M
    bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
174
6.89M
    MachineInstr *DefMI = MRI->getVRegDef(Reg);
175
6.89M
    if (!DefMI->isCopy())
176
5.22M
      continue;
177
1.67M
    unsigned SrcReg = DefMI->getOperand(1).getReg();
178
1.67M
    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
179
800k
      continue;
180
870k
    if (DefMI->getOperand(0).getSubReg())
181
12
      continue;
182
870k
    // FIXME: We should trivially coalesce subregister copies to expose CSE
183
870k
    // opportunities on instructions with truncated operands (see
184
870k
    // cse-add-with-overflow.ll). This can be done here as follows:
185
870k
    // if (SrcSubReg)
186
870k
    //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
187
870k
    //                                     SrcSubReg);
188
870k
    // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
189
870k
    //
190
870k
    // The 2-addr pass has been updated to handle coalesced subregs. However,
191
870k
    // some machine-specific code still can't handle it.
192
870k
    // To handle it properly we also need a way find a constrained subregister
193
870k
    // class given a super-reg class and subreg index.
194
870k
    if (DefMI->getOperand(1).getSubReg())
195
399k
      continue;
196
471k
    if (!MRI->constrainRegAttrs(SrcReg, Reg))
197
108k
      continue;
198
362k
    LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
199
362k
    LLVM_DEBUG(dbgs() << "***     to: " << *MI);
200
362k
201
362k
    // Update matching debug values.
202
362k
    DefMI->changeDebugValuesDefReg(SrcReg);
203
362k
204
362k
    // Propagate SrcReg of copies to MI.
205
362k
    MO.setReg(SrcReg);
206
362k
    MRI->clearKillFlags(SrcReg);
207
362k
    // Coalesce single use copies.
208
362k
    if (OnlyOneUse) {
209
188k
      DefMI->eraseFromParent();
210
188k
      ++NumCoalesces;
211
188k
    }
212
362k
    Changed = true;
213
362k
  }
214
6.43M
215
6.43M
  return Changed;
216
6.43M
}
217
218
bool
219
MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
220
                                   MachineBasicBlock::const_iterator I,
221
57.1k
                                   MachineBasicBlock::const_iterator E) const {
222
57.1k
  unsigned LookAheadLeft = LookAheadLimit;
223
97.5k
  while (LookAheadLeft) {
224
94.4k
    // Skip over dbg_value's.
225
94.4k
    I = skipDebugInstructionsForward(I, E);
226
94.4k
227
94.4k
    if (I == E)
228
8.34k
      // Reached end of block, we don't know if register is dead or not.
229
8.34k
      return false;
230
86.0k
231
86.0k
    bool SeenDef = false;
232
299k
    for (const MachineOperand &MO : I->operands()) {
233
299k
      if (MO.isRegMask() && 
MO.clobbersPhysReg(Reg)499
)
234
499
        SeenDef = true;
235
299k
      if (!MO.isReg() || 
!MO.getReg()200k
)
236
100k
        continue;
237
198k
      if (!TRI->regsOverlap(MO.getReg(), Reg))
238
152k
        continue;
239
46.1k
      if (MO.isUse())
240
43.5k
        // Found a use!
241
43.5k
        return false;
242
2.62k
      SeenDef = true;
243
2.62k
    }
244
86.0k
    
if (42.5k
SeenDef42.5k
)
245
2.14k
      // See a def of Reg (or an alias) before encountering any use, it's
246
2.14k
      // trivially dead.
247
2.14k
      return true;
248
40.4k
249
40.4k
    --LookAheadLeft;
250
40.4k
    ++I;
251
40.4k
  }
252
57.1k
  
return false3.12k
;
253
57.1k
}
254
255
static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
256
                                            const MachineFunction &MF,
257
552k
                                            const TargetRegisterInfo &TRI) {
258
552k
  // MachineRegisterInfo::isConstantPhysReg directly called by
259
552k
  // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
260
552k
  // reserved registers to be frozen. That doesn't cause a problem  post-ISel as
261
552k
  // most (if not all) targets freeze reserved registers right after ISel.
262
552k
  //
263
552k
  // It does cause issues mid-GlobalISel, however, hence the additional
264
552k
  // reservedRegsFrozen check.
265
552k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
266
552k
  return TRI.isCallerPreservedPhysReg(Reg, MF) ||
267
552k
         
(552k
MRI.reservedRegsFrozen()552k
&&
MRI.isConstantPhysReg(Reg)552k
);
268
552k
}
269
270
/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
271
/// physical registers (except for dead defs of physical registers). It also
272
/// returns the physical register def by reference if it's the only one and the
273
/// instruction does not uses a physical register.
274
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
275
                                       const MachineBasicBlock *MBB,
276
                                       SmallSet<unsigned, 8> &PhysRefs,
277
                                       PhysDefVector &PhysDefs,
278
610k
                                       bool &PhysUseDef) const {
279
610k
  // First, add all uses to PhysRefs.
280
2.71M
  for (const MachineOperand &MO : MI->operands()) {
281
2.71M
    if (!MO.isReg() || 
MO.isDef()1.80M
)
282
1.93M
      continue;
283
780k
    unsigned Reg = MO.getReg();
284
780k
    if (!Reg)
285
58.7k
      continue;
286
721k
    if (TargetRegisterInfo::isVirtualRegister(Reg))
287
169k
      continue;
288
552k
    // Reading either caller preserved or constant physregs is ok.
289
552k
    if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
290
2.71M
      
for (MCRegAliasIterator AI(Reg, TRI, true); 433k
AI.isValid();
++AI2.28M
)
291
2.28M
        PhysRefs.insert(*AI);
292
552k
  }
293
610k
294
610k
  // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
295
610k
  // (which currently contains only uses), set the PhysUseDef flag.
296
610k
  PhysUseDef = false;
297
610k
  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
298
2.71M
  for (const auto &MOP : llvm::enumerate(MI->operands())) {
299
2.71M
    const MachineOperand &MO = MOP.value();
300
2.71M
    if (!MO.isReg() || 
!MO.isDef()1.80M
)
301
1.69M
      continue;
302
1.02M
    unsigned Reg = MO.getReg();
303
1.02M
    if (!Reg)
304
0
      continue;
305
1.02M
    if (TargetRegisterInfo::isVirtualRegister(Reg))
306
395k
      continue;
307
632k
    // Check against PhysRefs even if the def is "dead".
308
632k
    if (PhysRefs.count(Reg))
309
371k
      PhysUseDef = true;
310
632k
    // If the def is dead, it's ok. But the def may not marked "dead". That's
311
632k
    // common since this pass is run before livevariables. We can scan
312
632k
    // forward a few instructions and check if it is obviously dead.
313
632k
    if (!MO.isDead() && 
!isPhysDefTriviallyDead(Reg, I, MBB->end())57.1k
)
314
54.9k
      PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
315
632k
  }
316
610k
317
610k
  // Finally, add all defs to PhysRefs as well.
318
665k
  for (unsigned i = 0, e = PhysDefs.size(); i != e; 
++i54.9k
)
319
194k
    
for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); 54.9k
AI.isValid();
320
139k
         ++AI)
321
139k
      PhysRefs.insert(*AI);
322
610k
323
610k
  return !PhysRefs.empty();
324
610k
}
325
326
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
327
                                  SmallSet<unsigned, 8> &PhysRefs,
328
                                  PhysDefVector &PhysDefs,
329
91.6k
                                  bool &NonLocal) const {
330
91.6k
  // For now conservatively returns false if the common subexpression is
331
91.6k
  // not in the same basic block as the given instruction. The only exception
332
91.6k
  // is if the common subexpression is in the sole predecessor block.
333
91.6k
  const MachineBasicBlock *MBB = MI->getParent();
334
91.6k
  const MachineBasicBlock *CSMBB = CSMI->getParent();
335
91.6k
336
91.6k
  bool CrossMBB = false;
337
91.6k
  if (CSMBB != MBB) {
338
62.0k
    if (MBB->pred_size() != 1 || 
*MBB->pred_begin() != CSMBB34.2k
)
339
49.4k
      return false;
340
12.6k
341
15.6k
    
for (unsigned i = 0, e = PhysDefs.size(); 12.6k
i != e;
++i2.99k
) {
342
8.40k
      if (MRI->isAllocatable(PhysDefs[i].second) ||
343
8.40k
          
MRI->isReserved(PhysDefs[i].second)8.32k
)
344
5.40k
        // Avoid extending live range of physical registers if they are
345
5.40k
        //allocatable or reserved.
346
5.40k
        return false;
347
8.40k
    }
348
12.6k
    CrossMBB = true;
349
7.21k
  }
350
91.6k
  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
351
36.7k
  MachineBasicBlock::const_iterator E = MI;
352
36.7k
  MachineBasicBlock::const_iterator EE = CSMBB->end();
353
36.7k
  unsigned LookAheadLeft = LookAheadLimit;
354
92.2k
  while (LookAheadLeft) {
355
89.9k
    // Skip over dbg_value's.
356
89.9k
    while (I != E && 
I != EE81.9k
&&
I->isDebugInstr()77.2k
)
357
0
      ++I;
358
89.9k
359
89.9k
    if (I == EE) {
360
4.73k
      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
361
4.73k
      (void)CrossMBB;
362
4.73k
      CrossMBB = false;
363
4.73k
      NonLocal = true;
364
4.73k
      I = MBB->begin();
365
4.73k
      EE = MBB->end();
366
4.73k
      continue;
367
4.73k
    }
368
85.1k
369
85.1k
    if (I == E)
370
7.95k
      return true;
371
77.2k
372
304k
    
for (const MachineOperand &MO : I->operands())77.2k
{
373
304k
      // RegMasks go on instructions like calls that clobber lots of physregs.
374
304k
      // Don't attempt to CSE across such an instruction.
375
304k
      if (MO.isRegMask())
376
198
        return false;
377
304k
      if (!MO.isReg() || 
!MO.isDef()228k
)
378
212k
        continue;
379
92.3k
      unsigned MOReg = MO.getReg();
380
92.3k
      if (TargetRegisterInfo::isVirtualRegister(MOReg))
381
42.7k
        continue;
382
49.5k
      if (PhysRefs.count(MOReg))
383
26.3k
        return false;
384
49.5k
    }
385
77.2k
386
77.2k
    --LookAheadLeft;
387
50.7k
    ++I;
388
50.7k
  }
389
36.7k
390
36.7k
  
return false2.30k
;
391
36.7k
}
392
393
50.7M
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
394
50.7M
  if (MI->isPosition() || 
MI->isPHI()50.5M
||
MI->isImplicitDef()49.3M
||
MI->isKill()48.9M
||
395
50.7M
      
MI->isInlineAsm()48.9M
||
MI->isDebugInstr()48.9M
)
396
1.80M
    return false;
397
48.9M
398
48.9M
  // Ignore copies.
399
48.9M
  if (MI->isCopyLike())
400
14.7M
    return false;
401
34.1M
402
34.1M
  // Ignore stuff that we obviously can't move.
403
34.1M
  if (MI->mayStore() || 
MI->isCall()31.1M
||
MI->isTerminator()28.1M
||
404
34.1M
      
MI->mayRaiseFPException()22.6M
||
MI->hasUnmodeledSideEffects()22.6M
)
405
16.8M
    return false;
406
17.3M
407
17.3M
  if (MI->mayLoad()) {
408
3.72M
    // Okay, this instruction does a load. As a refinement, we allow the target
409
3.72M
    // to decide whether the loaded value is actually a constant. If so, we can
410
3.72M
    // actually use it as a load.
411
3.72M
    if (!MI->isDereferenceableInvariantLoad(AA))
412
3.25M
      // FIXME: we should be able to hoist loads with no other side effects if
413
3.25M
      // there are no other instructions which can change memory in this loop.
414
3.25M
      // This is a trivial form of alias analysis.
415
3.25M
      return false;
416
14.0M
  }
417
14.0M
418
14.0M
  // Ignore stack guard loads, otherwise the register that holds CSEed value may
419
14.0M
  // be spilled and get loaded back with corrupted data.
420
14.0M
  if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
421
8.81k
    return false;
422
14.0M
423
14.0M
  return true;
424
14.0M
}
425
426
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
427
/// common expression that defines Reg. CSBB is basic block where CSReg is
428
/// defined.
429
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
430
343k
                                   MachineBasicBlock *CSBB, MachineInstr *MI) {
431
343k
  // FIXME: Heuristics that works around the lack the live range splitting.
432
343k
433
343k
  // If CSReg is used at all uses of Reg, CSE should not increase register
434
343k
  // pressure of CSReg.
435
343k
  bool MayIncreasePressure = true;
436
343k
  if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
437
343k
      TargetRegisterInfo::isVirtualRegister(Reg)) {
438
343k
    MayIncreasePressure = false;
439
343k
    SmallPtrSet<MachineInstr*, 8> CSUses;
440
9.10M
    for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
441
9.10M
      CSUses.insert(&MI);
442
9.10M
    }
443
343k
    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
444
340k
      if (!CSUses.count(&MI)) {
445
335k
        MayIncreasePressure = true;
446
335k
        break;
447
335k
      }
448
340k
    }
449
343k
  }
450
343k
  if (!MayIncreasePressure) 
return true8.14k
;
451
335k
452
335k
  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
453
335k
  // an immediate predecessor. We don't want to increase register pressure and
454
335k
  // end up causing other computation to be spilled.
455
335k
  if (TII->isAsCheapAsAMove(*MI)) {
456
218k
    MachineBasicBlock *BB = MI->getParent();
457
218k
    if (CSBB != BB && 
!CSBB->isSuccessor(BB)172k
)
458
125k
      return false;
459
209k
  }
460
209k
461
209k
  // Heuristics #2: If the expression doesn't not use a vr and the only use
462
209k
  // of the redundant computation are copies, do not cse.
463
209k
  bool HasVRegUse = false;
464
500k
  for (const MachineOperand &MO : MI->operands()) {
465
500k
    if (MO.isReg() && 
MO.isUse()327k
&&
466
500k
        
TargetRegisterInfo::isVirtualRegister(MO.getReg())110k
) {
467
70.6k
      HasVRegUse = true;
468
70.6k
      break;
469
70.6k
    }
470
500k
  }
471
209k
  if (!HasVRegUse) {
472
138k
    bool HasNonCopyUse = false;
473
151k
    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
474
151k
      // Ignore copies.
475
151k
      if (!MI.isCopyLike()) {
476
103k
        HasNonCopyUse = true;
477
103k
        break;
478
103k
      }
479
151k
    }
480
138k
    if (!HasNonCopyUse)
481
35.1k
      return false;
482
174k
  }
483
174k
484
174k
  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
485
174k
  // it unless the defined value is already used in the BB of the new use.
486
174k
  bool HasPHI = false;
487
8.66M
  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
488
8.66M
    HasPHI |= UseMI.isPHI();
489
8.66M
    if (UseMI.getParent() == MI->getParent())
490
31.0k
      return true;
491
8.66M
  }
492
174k
493
174k
  
return !HasPHI143k
;
494
174k
}
495
496
2.62M
void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
497
2.62M
  LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
498
2.62M
  ScopeType *Scope = new ScopeType(VNT);
499
2.62M
  ScopeMap[MBB] = Scope;
500
2.62M
}
501
502
2.62M
void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
503
2.62M
  LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
504
2.62M
  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
505
2.62M
  assert(SI != ScopeMap.end());
506
2.62M
  delete SI->second;
507
2.62M
  ScopeMap.erase(SI);
508
2.62M
}
509
510
2.62M
bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
511
2.62M
  bool Changed = false;
512
2.62M
513
2.62M
  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
514
2.62M
  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
515
2.62M
  SmallVector<unsigned, 2> ImplicitDefs;
516
27.9M
  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
517
25.3M
    MachineInstr *MI = &*I;
518
25.3M
    ++I;
519
25.3M
520
25.3M
    if (!isCSECandidate(MI))
521
18.3M
      continue;
522
7.03M
523
7.03M
    bool FoundCSE = VNT.count(MI);
524
7.03M
    if (!FoundCSE) {
525
6.43M
      // Using trivial copy propagation to find more CSE opportunities.
526
6.43M
      if (PerformTrivialCopyPropagation(MI, MBB)) {
527
302k
        Changed = true;
528
302k
529
302k
        // After coalescing MI itself may become a copy.
530
302k
        if (MI->isCopyLike())
531
0
          continue;
532
302k
533
302k
        // Try again to see if CSE is possible.
534
302k
        FoundCSE = VNT.count(MI);
535
302k
      }
536
6.43M
    }
537
7.03M
538
7.03M
    // Commute commutable instructions.
539
7.03M
    bool Commuted = false;
540
7.03M
    if (!FoundCSE && 
MI->isCommutable()6.42M
) {
541
401k
      if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
542
364k
        Commuted = true;
543
364k
        FoundCSE = VNT.count(NewMI);
544
364k
        if (NewMI != MI) {
545
0
          // New instruction. It doesn't need to be kept.
546
0
          NewMI->eraseFromParent();
547
0
          Changed = true;
548
364k
        } else if (!FoundCSE)
549
364k
          // MI was changed but it didn't help, commute it back!
550
364k
          (void)TII->commuteInstruction(*MI);
551
364k
      }
552
401k
    }
553
7.03M
554
7.03M
    // If the instruction defines physical registers and the values *may* be
555
7.03M
    // used, then it's not safe to replace it with a common subexpression.
556
7.03M
    // It's also not safe if the instruction uses physical registers.
557
7.03M
    bool CrossMBBPhysDef = false;
558
7.03M
    SmallSet<unsigned, 8> PhysRefs;
559
7.03M
    PhysDefVector PhysDefs;
560
7.03M
    bool PhysUseDef = false;
561
7.03M
    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
562
610k
                                          PhysDefs, PhysUseDef)) {
563
279k
      FoundCSE = false;
564
279k
565
279k
      // ... Unless the CS is local or is in the sole predecessor block
566
279k
      // and it also defines the physical register which is not clobbered
567
279k
      // in between and the physical register uses were not clobbered.
568
279k
      // This can never be the case if the instruction both uses and
569
279k
      // defines the same physical register, which was detected above.
570
279k
      if (!PhysUseDef) {
571
91.6k
        unsigned CSVN = VNT.lookup(MI);
572
91.6k
        MachineInstr *CSMI = Exps[CSVN];
573
91.6k
        if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
574
7.95k
          FoundCSE = true;
575
91.6k
      }
576
279k
    }
577
7.03M
578
7.03M
    if (!FoundCSE) {
579
6.69M
      VNT.insert(MI, CurrVN++);
580
6.69M
      Exps.push_back(MI);
581
6.69M
      continue;
582
6.69M
    }
583
338k
584
338k
    // Found a common subexpression, eliminate it.
585
338k
    unsigned CSVN = VNT.lookup(MI);
586
338k
    MachineInstr *CSMI = Exps[CSVN];
587
338k
    LLVM_DEBUG(dbgs() << "Examining: " << *MI);
588
338k
    LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
589
338k
590
338k
    // Check if it's profitable to perform this CSE.
591
338k
    bool DoCSE = true;
592
338k
    unsigned NumDefs = MI->getNumDefs();
593
338k
594
547k
    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && 
i != e376k
;
++i209k
) {
595
376k
      MachineOperand &MO = MI->getOperand(i);
596
376k
      if (!MO.isReg() || 
!MO.isDef()366k
)
597
26.2k
        continue;
598
350k
      unsigned OldReg = MO.getReg();
599
350k
      unsigned NewReg = CSMI->getOperand(i).getReg();
600
350k
601
350k
      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
602
350k
      // we should make sure it is not dead at CSMI.
603
350k
      if (MO.isImplicit() && 
!MO.isDead()13.1k
&&
CSMI->getOperand(i).isDead()7.41k
)
604
38
        ImplicitDefsToUpdate.push_back(i);
605
350k
606
350k
      // Keep track of implicit defs of CSMI and MI, to clear possibly
607
350k
      // made-redundant kill flags.
608
350k
      if (MO.isImplicit() && 
!MO.isDead()13.1k
&&
OldReg == NewReg7.41k
)
609
7.41k
        ImplicitDefs.push_back(OldReg);
610
350k
611
350k
      if (OldReg == NewReg) {
612
18.4k
        --NumDefs;
613
18.4k
        continue;
614
18.4k
      }
615
331k
616
331k
      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
617
331k
             TargetRegisterInfo::isVirtualRegister(NewReg) &&
618
331k
             "Do not CSE physical register defs!");
619
331k
620
331k
      if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
621
167k
        LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
622
167k
        DoCSE = false;
623
167k
        break;
624
167k
      }
625
164k
626
164k
      // Don't perform CSE if the result of the new instruction cannot exist
627
164k
      // within the constraints (register class, bank, or low-level type) of
628
164k
      // the old instruction.
629
164k
      if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
630
1
        LLVM_DEBUG(
631
1
            dbgs() << "*** Not the same register constraints, avoid CSE!\n");
632
1
        DoCSE = false;
633
1
        break;
634
1
      }
635
164k
636
164k
      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
637
164k
      --NumDefs;
638
164k
    }
639
338k
640
338k
    // Actually perform the elimination.
641
338k
    if (DoCSE) {
642
171k
      for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
643
164k
        unsigned OldReg = CSEPair.first;
644
164k
        unsigned NewReg = CSEPair.second;
645
164k
        // OldReg may have been unused but is used now, clear the Dead flag
646
164k
        MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
647
164k
        assert(Def != nullptr && "CSEd register has no unique definition?");
648
164k
        Def->clearRegisterDeads(NewReg);
649
164k
        // Replace with NewReg and clear kill flags which may be wrong now.
650
164k
        MRI->replaceRegWith(OldReg, NewReg);
651
164k
        MRI->clearKillFlags(NewReg);
652
164k
      }
653
171k
654
171k
      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
655
171k
      // we should make sure it is not dead at CSMI.
656
171k
      for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
657
38
        CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
658
171k
      for (auto PhysDef : PhysDefs)
659
11.2k
        if (!MI->getOperand(PhysDef.first).isDead())
660
11.2k
          CSMI->getOperand(PhysDef.first).setIsDead(false);
661
171k
662
171k
      // Go through implicit defs of CSMI and MI, and clear the kill flags on
663
171k
      // their uses in all the instructions between CSMI and MI.
664
171k
      // We might have made some of the kill flags redundant, consider:
665
171k
      //   subs  ... implicit-def %nzcv    <- CSMI
666
171k
      //   csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
667
171k
      //   subs  ... implicit-def %nzcv    <- MI, to be eliminated
668
171k
      //   csinc ... implicit killed %nzcv
669
171k
      // Since we eliminated MI, and reused a register imp-def'd by CSMI
670
171k
      // (here %nzcv), that register, if it was killed before MI, should have
671
171k
      // that kill flag removed, because it's lifetime was extended.
672
171k
      if (CSMI->getParent() == MI->getParent()) {
673
913k
        for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; 
++II865k
)
674
865k
          for (auto ImplicitDef : ImplicitDefs)
675
13.6k
            if (MachineOperand *MO = II->findRegisterUseOperand(
676
0
                    ImplicitDef, /*isKill=*/true, TRI))
677
0
              MO->setIsKill(false);
678
123k
      } else {
679
123k
        // If the instructions aren't in the same BB, bail out and clear the
680
123k
        // kill flag on all uses of the imp-def'd register.
681
123k
        for (auto ImplicitDef : ImplicitDefs)
682
1.87k
          MRI->clearKillFlags(ImplicitDef);
683
123k
      }
684
171k
685
171k
      if (CrossMBBPhysDef) {
686
1.77k
        // Add physical register defs now coming in from a predecessor to MBB
687
1.77k
        // livein list.
688
3.53k
        while (!PhysDefs.empty()) {
689
1.76k
          auto LiveIn = PhysDefs.pop_back_val();
690
1.76k
          if (!MBB->isLiveIn(LiveIn.second))
691
1.75k
            MBB->addLiveIn(LiveIn.second);
692
1.76k
        }
693
1.77k
        ++NumCrossBBCSEs;
694
1.77k
      }
695
171k
696
171k
      MI->eraseFromParent();
697
171k
      ++NumCSEs;
698
171k
      if (!PhysRefs.empty())
699
7.88k
        ++NumPhysCSEs;
700
171k
      if (Commuted)
701
60
        ++NumCommutes;
702
171k
      Changed = true;
703
171k
    } else {
704
167k
      VNT.insert(MI, CurrVN++);
705
167k
      Exps.push_back(MI);
706
167k
    }
707
338k
    CSEPairs.clear();
708
338k
    ImplicitDefsToUpdate.clear();
709
338k
    ImplicitDefs.clear();
710
338k
  }
711
2.62M
712
2.62M
  return Changed;
713
2.62M
}
714
715
/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
716
/// dominator tree node if its a leaf or all of its children are done. Walk
717
/// up the dominator tree to destroy ancestors which are now done.
718
void
719
MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
720
2.62M
                        DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
721
2.62M
  if (OpenChildren[Node])
722
1.28M
    return;
723
1.34M
724
1.34M
  // Pop scope.
725
1.34M
  ExitScope(Node->getBlock());
726
1.34M
727
1.34M
  // Now traverse upwards to pop ancestors whose offsprings are all done.
728
2.62M
  while (MachineDomTreeNode *Parent = Node->getIDom()) {
729
2.11M
    unsigned Left = --OpenChildren[Parent];
730
2.11M
    if (Left != 0)
731
831k
      break;
732
1.28M
    ExitScope(Parent->getBlock());
733
1.28M
    Node = Parent;
734
1.28M
  }
735
1.34M
}
736
737
514k
bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
738
514k
  SmallVector<MachineDomTreeNode*, 32> Scopes;
739
514k
  SmallVector<MachineDomTreeNode*, 8> WorkList;
740
514k
  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
741
514k
742
514k
  CurrVN = 0;
743
514k
744
514k
  // Perform a DFS walk to determine the order of visit.
745
514k
  WorkList.push_back(Node);
746
2.62M
  do {
747
2.62M
    Node = WorkList.pop_back_val();
748
2.62M
    Scopes.push_back(Node);
749
2.62M
    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
750
2.62M
    OpenChildren[Node] = Children.size();
751
2.62M
    for (MachineDomTreeNode *Child : Children)
752
2.11M
      WorkList.push_back(Child);
753
2.62M
  } while (!WorkList.empty());
754
514k
755
514k
  // Now perform CSE.
756
514k
  bool Changed = false;
757
2.62M
  for (MachineDomTreeNode *Node : Scopes) {
758
2.62M
    MachineBasicBlock *MBB = Node->getBlock();
759
2.62M
    EnterScope(MBB);
760
2.62M
    Changed |= ProcessBlockCSE(MBB);
761
2.62M
    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
762
2.62M
    ExitScopeIfDone(Node, OpenChildren);
763
2.62M
  }
764
514k
765
514k
  return Changed;
766
514k
}
767
768
// We use stronger checks for PRE candidate rather than for CSE ones to embrace
769
// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
770
// to exclude instrs created by PRE that won't be CSEed later.
771
25.3M
bool MachineCSE::isPRECandidate(MachineInstr *MI) {
772
25.3M
  if (!isCSECandidate(MI) ||
773
25.3M
      
MI->isNotDuplicable()7.02M
||
774
25.3M
      
MI->mayLoad()7.02M
||
775
25.3M
      
MI->isAsCheapAsAMove()6.79M
||
776
25.3M
      
MI->getNumDefs() != 14.41M
||
777
25.3M
      
MI->getNumExplicitDefs() != 13.00M
)
778
22.5M
    return false;
779
2.84M
780
2.84M
  for (auto def : MI->defs())
781
2.84M
    if (!TRI->isVirtualRegister(def.getReg()))
782
367
      return false;
783
2.84M
784
2.84M
  
for (auto use : MI->uses())2.84M
785
7.02M
    if (use.isReg() && 
!TRI->isVirtualRegister(use.getReg())3.88M
)
786
763k
      return false;
787
2.84M
788
2.84M
  
return true2.08M
;
789
2.84M
}
790
791
bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
792
2.62M
                                 MachineBasicBlock *MBB) {
793
2.62M
  bool Changed = false;
794
27.9M
  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
795
25.3M
    MachineInstr *MI = &*I;
796
25.3M
    ++I;
797
25.3M
798
25.3M
    if (!isPRECandidate(MI))
799
23.2M
      continue;
800
2.08M
801
2.08M
    if (!PREMap.count(MI)) {
802
1.94M
      PREMap[MI] = MBB;
803
1.94M
      continue;
804
1.94M
    }
805
140k
806
140k
    auto MBB1 = PREMap[MI];
807
140k
    assert(
808
140k
        !DT->properlyDominates(MBB, MBB1) &&
809
140k
        "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
810
140k
    auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
811
140k
    if (!CMBB->isLegalToHoistInto())
812
1.40k
      continue;
813
139k
814
139k
    if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
815
69.3k
      continue;
816
69.7k
817
69.7k
    // Two instrs are partial redundant if their basic blocks are reachable
818
69.7k
    // from one to another but one doesn't dominate another.
819
69.7k
    if (CMBB != MBB1) {
820
25.6k
      auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
821
25.6k
      if (BB != nullptr && 
BB1 != nullptr25.6k
&&
822
25.6k
          
(25.6k
isPotentiallyReachable(BB1, BB)25.6k
||
823
25.6k
           
isPotentiallyReachable(BB, BB1)15.9k
)) {
824
11.2k
825
11.2k
        assert(MI->getOperand(0).isDef() &&
826
11.2k
               "First operand of instr with one explicit def must be this def");
827
11.2k
        unsigned VReg = MI->getOperand(0).getReg();
828
11.2k
        unsigned NewReg = MRI->cloneVirtualRegister(VReg);
829
11.2k
        if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
830
6.33k
          continue;
831
4.89k
        MachineInstr &NewMI =
832
4.89k
            TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
833
4.89k
        NewMI.getOperand(0).setReg(NewReg);
834
4.89k
835
4.89k
        PREMap[MI] = CMBB;
836
4.89k
        ++NumPREs;
837
4.89k
        Changed = true;
838
4.89k
      }
839
25.6k
    }
840
69.7k
  }
841
2.62M
  return Changed;
842
2.62M
}
843
844
// This simple PRE (partial redundancy elimination) pass doesn't actually
845
// eliminate partial redundancy but transforms it to full redundancy,
846
// anticipating that the next CSE step will eliminate this created redundancy.
847
// If CSE doesn't eliminate this, than created instruction will remain dead
848
// and eliminated later by Remove Dead Machine Instructions pass.
849
514k
bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
850
514k
  SmallVector<MachineDomTreeNode *, 32> BBs;
851
514k
852
514k
  PREMap.clear();
853
514k
  bool Changed = false;
854
514k
  BBs.push_back(DT->getRootNode());
855
2.62M
  do {
856
2.62M
    auto Node = BBs.pop_back_val();
857
2.62M
    const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
858
2.62M
    for (MachineDomTreeNode *Child : Children)
859
2.11M
      BBs.push_back(Child);
860
2.62M
861
2.62M
    MachineBasicBlock *MBB = Node->getBlock();
862
2.62M
    Changed |= ProcessBlockPRE(DT, MBB);
863
2.62M
864
2.62M
  } while (!BBs.empty());
865
514k
866
514k
  return Changed;
867
514k
}
868
869
bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
870
                                        MachineBasicBlock *MBB,
871
139k
                                        MachineBasicBlock *MBB1) {
872
139k
  if (CandidateBB->getParent()->getFunction().hasMinSize())
873
311
    return true;
874
138k
  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
875
138k
  assert(DT->dominates(CandidateBB, MBB1) &&
876
138k
         "CandidateBB should dominate MBB1");
877
138k
  return MBFI->getBlockFreq(CandidateBB) <=
878
138k
         MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
879
138k
}
880
881
514k
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
882
514k
  if (skipFunction(MF.getFunction()))
883
278
    return false;
884
514k
885
514k
  TII = MF.getSubtarget().getInstrInfo();
886
514k
  TRI = MF.getSubtarget().getRegisterInfo();
887
514k
  MRI = &MF.getRegInfo();
888
514k
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
889
514k
  DT = &getAnalysis<MachineDominatorTree>();
890
514k
  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
891
514k
  LookAheadLimit = TII->getMachineCSELookAheadLimit();
892
514k
  bool ChangedPRE, ChangedCSE;
893
514k
  ChangedPRE = PerformSimplePRE(DT);
894
514k
  ChangedCSE = PerformCSE(DT->getRootNode());
895
514k
  return ChangedPRE || 
ChangedCSE512k
;
896
514k
}