Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MachineLICM.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass performs loop invariant code motion on machine instructions. We
10
// attempt to remove as much code from the body of a loop as possible.
11
//
12
// This pass is not intended to be a replacement or a complete alternative
13
// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14
// constructs that are not exposed before lowering and instruction selection.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "llvm/ADT/BitVector.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/SmallSet.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/ADT/Statistic.h"
24
#include "llvm/Analysis/AliasAnalysis.h"
25
#include "llvm/CodeGen/MachineBasicBlock.h"
26
#include "llvm/CodeGen/MachineDominators.h"
27
#include "llvm/CodeGen/MachineFrameInfo.h"
28
#include "llvm/CodeGen/MachineFunction.h"
29
#include "llvm/CodeGen/MachineFunctionPass.h"
30
#include "llvm/CodeGen/MachineInstr.h"
31
#include "llvm/CodeGen/MachineLoopInfo.h"
32
#include "llvm/CodeGen/MachineMemOperand.h"
33
#include "llvm/CodeGen/MachineOperand.h"
34
#include "llvm/CodeGen/MachineRegisterInfo.h"
35
#include "llvm/CodeGen/PseudoSourceValue.h"
36
#include "llvm/CodeGen/TargetInstrInfo.h"
37
#include "llvm/CodeGen/TargetLowering.h"
38
#include "llvm/CodeGen/TargetRegisterInfo.h"
39
#include "llvm/CodeGen/TargetSchedule.h"
40
#include "llvm/CodeGen/TargetSubtargetInfo.h"
41
#include "llvm/IR/DebugLoc.h"
42
#include "llvm/MC/MCInstrDesc.h"
43
#include "llvm/MC/MCRegisterInfo.h"
44
#include "llvm/Pass.h"
45
#include "llvm/Support/Casting.h"
46
#include "llvm/Support/CommandLine.h"
47
#include "llvm/Support/Debug.h"
48
#include "llvm/Support/raw_ostream.h"
49
#include <algorithm>
50
#include <cassert>
51
#include <limits>
52
#include <vector>
53
54
using namespace llvm;
55
56
#define DEBUG_TYPE "machinelicm"
57
58
static cl::opt<bool>
59
AvoidSpeculation("avoid-speculation",
60
                 cl::desc("MachineLICM should avoid speculation"),
61
                 cl::init(true), cl::Hidden);
62
63
static cl::opt<bool>
64
HoistCheapInsts("hoist-cheap-insts",
65
                cl::desc("MachineLICM should hoist even cheap instructions"),
66
                cl::init(false), cl::Hidden);
67
68
static cl::opt<bool>
69
SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
70
                       cl::desc("MachineLICM should sink instructions into "
71
                                "loops to avoid register spills"),
72
                       cl::init(false), cl::Hidden);
73
static cl::opt<bool>
74
HoistConstStores("hoist-const-stores",
75
                 cl::desc("Hoist invariant stores"),
76
                 cl::init(true), cl::Hidden);
77
78
STATISTIC(NumHoisted,
79
          "Number of machine instructions hoisted out of loops");
80
STATISTIC(NumLowRP,
81
          "Number of instructions hoisted in low reg pressure situation");
82
STATISTIC(NumHighLatency,
83
          "Number of high latency instructions hoisted");
84
STATISTIC(NumCSEed,
85
          "Number of hoisted machine instructions CSEed");
86
STATISTIC(NumPostRAHoisted,
87
          "Number of machine instructions hoisted out of loops post regalloc");
88
STATISTIC(NumStoreConst,
89
          "Number of stores of const phys reg hoisted out of loops");
90
91
namespace {
92
93
  class MachineLICMBase : public MachineFunctionPass {
94
    const TargetInstrInfo *TII;
95
    const TargetLoweringBase *TLI;
96
    const TargetRegisterInfo *TRI;
97
    const MachineFrameInfo *MFI;
98
    MachineRegisterInfo *MRI;
99
    TargetSchedModel SchedModel;
100
    bool PreRegAlloc;
101
102
    // Various analyses that we use...
103
    AliasAnalysis        *AA;      // Alias analysis info.
104
    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
105
    MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
106
107
    // State that is updated as we process loops
108
    bool         Changed;          // True if a loop is changed.
109
    bool         FirstInLoop;      // True if it's the first LICM in the loop.
110
    MachineLoop *CurLoop;          // The current loop we are working on.
111
    MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
112
113
    // Exit blocks for CurLoop.
114
    SmallVector<MachineBasicBlock *, 8> ExitBlocks;
115
116
4.56k
    bool isExitBlock(const MachineBasicBlock *MBB) const {
117
4.56k
      return is_contained(ExitBlocks, MBB);
118
4.56k
    }
119
120
    // Track 'estimated' register pressure.
121
    SmallSet<unsigned, 32> RegSeen;
122
    SmallVector<unsigned, 8> RegPressure;
123
124
    // Register pressure "limit" per register pressure set. If the pressure
125
    // is higher than the limit, then it's considered high.
126
    SmallVector<unsigned, 8> RegLimit;
127
128
    // Register pressure on path leading from loop preheader to current BB.
129
    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
130
131
    // For each opcode, keep a list of potential CSE instructions.
132
    DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap;
133
134
    enum {
135
      SpeculateFalse   = 0,
136
      SpeculateTrue    = 1,
137
      SpeculateUnknown = 2
138
    };
139
140
    // If a MBB does not dominate loop exiting blocks then it may not safe
141
    // to hoist loads from this block.
142
    // Tri-state: 0 - false, 1 - true, 2 - unknown
143
    unsigned SpeculationState;
144
145
  public:
146
    MachineLICMBase(char &PassID, bool PreRegAlloc)
147
70.7k
        : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
148
149
    bool runOnMachineFunction(MachineFunction &MF) override;
150
151
70.1k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
152
70.1k
      AU.addRequired<MachineLoopInfo>();
153
70.1k
      AU.addRequired<MachineDominatorTree>();
154
70.1k
      AU.addRequired<AAResultsWrapperPass>();
155
70.1k
      AU.addPreserved<MachineLoopInfo>();
156
70.1k
      AU.addPreserved<MachineDominatorTree>();
157
70.1k
      MachineFunctionPass::getAnalysisUsage(AU);
158
70.1k
    }
159
160
999k
    void releaseMemory() override {
161
999k
      RegSeen.clear();
162
999k
      RegPressure.clear();
163
999k
      RegLimit.clear();
164
999k
      BackTrace.clear();
165
999k
      CSEMap.clear();
166
999k
    }
167
168
  private:
169
    /// Keep track of information about hoisting candidates.
170
    struct CandidateInfo {
171
      MachineInstr *MI;
172
      unsigned      Def;
173
      int           FI;
174
175
      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
176
48.6k
        : MI(mi), Def(def), FI(fi) {}
177
    };
178
179
    void HoistRegionPostRA();
180
181
    void HoistPostRA(MachineInstr *MI, unsigned Def);
182
183
    void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
184
                   BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
185
                   SmallVectorImpl<CandidateInfo> &Candidates);
186
187
    void AddToLiveIns(unsigned Reg);
188
189
    bool IsLICMCandidate(MachineInstr &I);
190
191
    bool IsLoopInvariantInst(MachineInstr &I);
192
193
    bool HasLoopPHIUse(const MachineInstr *MI) const;
194
195
    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
196
                               unsigned Reg) const;
197
198
    bool IsCheapInstruction(MachineInstr &MI) const;
199
200
    bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
201
                                 bool Cheap);
202
203
    void UpdateBackTraceRegPressure(const MachineInstr *MI);
204
205
    bool IsProfitableToHoist(MachineInstr &MI);
206
207
    bool IsGuaranteedToExecute(MachineBasicBlock *BB);
208
209
    void EnterScope(MachineBasicBlock *MBB);
210
211
    void ExitScope(MachineBasicBlock *MBB);
212
213
    void ExitScopeIfDone(
214
        MachineDomTreeNode *Node,
215
        DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
216
        DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
217
218
    void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
219
220
    void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
221
222
    void SinkIntoLoop();
223
224
    void InitRegPressure(MachineBasicBlock *BB);
225
226
    DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
227
                                             bool ConsiderSeen,
228
                                             bool ConsiderUnseenAsDef);
229
230
    void UpdateRegPressure(const MachineInstr *MI,
231
                           bool ConsiderUnseenAsDef = false);
232
233
    MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
234
235
    const MachineInstr *
236
    LookForDuplicate(const MachineInstr *MI,
237
                     std::vector<const MachineInstr *> &PrevMIs);
238
239
    bool EliminateCSE(
240
        MachineInstr *MI,
241
        DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
242
243
    bool MayCSE(MachineInstr *MI);
244
245
    bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
246
247
    void InitCSEMap(MachineBasicBlock *BB);
248
249
    MachineBasicBlock *getCurPreheader();
250
  };
251
252
  class MachineLICM : public MachineLICMBase {
253
  public:
254
    static char ID;
255
33.8k
    MachineLICM() : MachineLICMBase(ID, false) {
256
33.8k
      initializeMachineLICMPass(*PassRegistry::getPassRegistry());
257
33.8k
    }
258
  };
259
260
  class EarlyMachineLICM : public MachineLICMBase {
261
  public:
262
    static char ID;
263
36.8k
    EarlyMachineLICM() : MachineLICMBase(ID, true) {
264
36.8k
      initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry());
265
36.8k
    }
266
  };
267
268
} // end anonymous namespace
269
270
char MachineLICM::ID;
271
char EarlyMachineLICM::ID;
272
273
char &llvm::MachineLICMID = MachineLICM::ID;
274
char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
275
276
42.3k
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
277
42.3k
                      "Machine Loop Invariant Code Motion", false, false)
278
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
279
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
280
42.3k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
281
42.3k
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
282
                    "Machine Loop Invariant Code Motion", false, false)
283
284
42.3k
INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
285
42.3k
                      "Early Machine Loop Invariant Code Motion", false, false)
286
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
287
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
288
42.3k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
289
42.3k
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
290
                    "Early Machine Loop Invariant Code Motion", false, false)
291
292
/// Test if the given loop is the outer-most loop that has a unique predecessor.
293
154k
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
294
154k
  // Check whether this loop even has a unique predecessor.
295
154k
  if (!CurLoop->getLoopPredecessor())
296
1.49k
    return false;
297
152k
  // Ok, now check to see if any of its outer loops do.
298
152k
  
for (MachineLoop *L = CurLoop->getParentLoop(); 152k
L;
L = L->getParentLoop()6
)
299
6
    if (L->getLoopPredecessor())
300
0
      return false;
301
152k
  // None of them did, so this is the outermost with a unique predecessor.
302
152k
  return true;
303
152k
}
304
305
999k
bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
306
999k
  if (skipFunction(MF.getFunction()))
307
528
    return false;
308
998k
309
998k
  Changed = FirstInLoop = false;
310
998k
  const TargetSubtargetInfo &ST = MF.getSubtarget();
311
998k
  TII = ST.getInstrInfo();
312
998k
  TLI = ST.getTargetLowering();
313
998k
  TRI = ST.getRegisterInfo();
314
998k
  MFI = &MF.getFrameInfo();
315
998k
  MRI = &MF.getRegInfo();
316
998k
  SchedModel.init(&ST);
317
998k
318
998k
  PreRegAlloc = MRI->isSSA();
319
998k
320
998k
  if (PreRegAlloc)
321
998k
    LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
322
998k
  else
323
998k
    LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
324
998k
  LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
325
998k
326
998k
  if (PreRegAlloc) {
327
514k
    // Estimate register pressure during pre-regalloc pass.
328
514k
    unsigned NumRPS = TRI->getNumRegPressureSets();
329
514k
    RegPressure.resize(NumRPS);
330
514k
    std::fill(RegPressure.begin(), RegPressure.end(), 0);
331
514k
    RegLimit.resize(NumRPS);
332
27.4M
    for (unsigned i = 0, e = NumRPS; i != e; 
++i26.8M
)
333
26.8M
      RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
334
514k
  }
335
998k
336
998k
  // Get our Loop information...
337
998k
  MLI = &getAnalysis<MachineLoopInfo>();
338
998k
  DT  = &getAnalysis<MachineDominatorTree>();
339
998k
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
340
998k
341
998k
  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
342
1.30M
  while (!Worklist.empty()) {
343
308k
    CurLoop = Worklist.pop_back_val();
344
308k
    CurPreheader = nullptr;
345
308k
    ExitBlocks.clear();
346
308k
347
308k
    // If this is done before regalloc, only visit outer-most preheader-sporting
348
308k
    // loops.
349
308k
    if (PreRegAlloc && 
!LoopIsOuterMostWithPredecessor(CurLoop)154k
) {
350
1.49k
      Worklist.append(CurLoop->begin(), CurLoop->end());
351
1.49k
      continue;
352
1.49k
    }
353
306k
354
306k
    CurLoop->getExitBlocks(ExitBlocks);
355
306k
356
306k
    if (!PreRegAlloc)
357
153k
      HoistRegionPostRA();
358
152k
    else {
359
152k
      // CSEMap is initialized for loop header when the first instruction is
360
152k
      // being hoisted.
361
152k
      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
362
152k
      FirstInLoop = true;
363
152k
      HoistOutOfLoop(N);
364
152k
      CSEMap.clear();
365
152k
366
152k
      if (SinkInstsToAvoidSpills)
367
1
        SinkIntoLoop();
368
152k
    }
369
306k
  }
370
998k
371
998k
  return Changed;
372
998k
}
373
374
/// Return true if instruction stores to the specified frame.
375
70.3k
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
376
70.3k
  // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
377
70.3k
  // true since they have no memory operands.
378
70.3k
  if (!MI->mayStore())
379
50.8k
     return false;
380
19.5k
  // If we lost memory operands, conservatively assume that the instruction
381
19.5k
  // writes to all slots.
382
19.5k
  if (MI->memoperands_empty())
383
3
    return true;
384
19.5k
  for (const MachineMemOperand *MemOp : MI->memoperands()) {
385
19.5k
    if (!MemOp->isStore() || 
!MemOp->getPseudoValue()19.2k
)
386
269
      continue;
387
19.2k
    if (const FixedStackPseudoSourceValue *Value =
388
19.2k
        dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
389
19.2k
      if (Value->getFrameIndex() == FI)
390
19.2k
        return true;
391
19.2k
    }
392
19.2k
  }
393
19.5k
  
return false269
;
394
19.5k
}
395
396
/// Examine the instruction for potentai LICM candidate. Also
397
/// gather register def and frame object update information.
398
void MachineLICMBase::ProcessMI(MachineInstr *MI,
399
                                BitVector &PhysRegDefs,
400
                                BitVector &PhysRegClobbers,
401
                                SmallSet<int, 32> &StoredFIs,
402
4.77M
                                SmallVectorImpl<CandidateInfo> &Candidates) {
403
4.77M
  bool RuledOut = false;
404
4.77M
  bool HasNonInvariantUse = false;
405
4.77M
  unsigned Def = 0;
406
16.2M
  for (const MachineOperand &MO : MI->operands()) {
407
16.2M
    if (MO.isFI()) {
408
202k
      // Remember if the instruction stores to the frame index.
409
202k
      int FI = MO.getIndex();
410
202k
      if (!StoredFIs.count(FI) &&
411
202k
          
MFI->isSpillSlotObjectIndex(FI)159k
&&
412
202k
          
InstructionStoresToFI(MI, FI)70.3k
)
413
19.2k
        StoredFIs.insert(FI);
414
202k
      HasNonInvariantUse = true;
415
202k
      continue;
416
202k
    }
417
16.0M
418
16.0M
    // We can't hoist an instruction defining a physreg that is clobbered in
419
16.0M
    // the loop.
420
16.0M
    if (MO.isRegMask()) {
421
261k
      PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
422
261k
      continue;
423
261k
    }
424
15.7M
425
15.7M
    if (!MO.isReg())
426
5.02M
      continue;
427
10.7M
    unsigned Reg = MO.getReg();
428
10.7M
    if (!Reg)
429
249k
      continue;
430
10.4M
    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
431
10.4M
           "Not expecting virtual register!");
432
10.4M
433
10.4M
    if (!MO.isDef()) {
434
6.13M
      if (Reg && (PhysRegDefs.test(Reg) || 
PhysRegClobbers.test(Reg)1.22M
))
435
6.10M
        // If it's using a non-loop-invariant register, then it's obviously not
436
6.10M
        // safe to hoist.
437
6.10M
        HasNonInvariantUse = true;
438
6.13M
      continue;
439
6.13M
    }
440
4.36M
441
4.36M
    if (MO.isImplicit()) {
442
6.50M
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); 
++AI4.88M
)
443
4.88M
        PhysRegClobbers.set(*AI);
444
1.61M
      if (!MO.isDead())
445
1.10M
        // Non-dead implicit def? This cannot be hoisted.
446
1.10M
        RuledOut = true;
447
1.61M
      // No need to check if a dead implicit def is also defined by
448
1.61M
      // another instruction.
449
1.61M
      continue;
450
1.61M
    }
451
2.74M
452
2.74M
    // FIXME: For now, avoid instructions with multiple defs, unless
453
2.74M
    // it's a dead implicit def.
454
2.74M
    if (Def)
455
13.6k
      RuledOut = true;
456
2.73M
    else
457
2.73M
      Def = Reg;
458
2.74M
459
2.74M
    // If we have already seen another instruction that defines the same
460
2.74M
    // register, then this is not safe.  Two defs is indicated by setting a
461
2.74M
    // PhysRegClobbers bit.
462
23.8M
    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); 
++AS21.0M
) {
463
21.0M
      if (PhysRegDefs.test(*AS))
464
17.8M
        PhysRegClobbers.set(*AS);
465
21.0M
    }
466
2.74M
    // Need a second loop because MCRegAliasIterator can visit the same
467
2.74M
    // register twice.
468
23.8M
    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); 
++AS21.0M
)
469
21.0M
      PhysRegDefs.set(*AS);
470
2.74M
471
2.74M
    if (PhysRegClobbers.test(Reg))
472
2.23M
      // MI defined register is seen defined by another instruction in
473
2.23M
      // the loop, it cannot be a LICM candidate.
474
2.23M
      RuledOut = true;
475
2.74M
  }
476
4.77M
477
4.77M
  // Only consider reloads for now and remats which do not have register
478
4.77M
  // operands. FIXME: Consider unfold load folding instructions.
479
4.77M
  if (Def && 
!RuledOut2.73M
) {
480
423k
    int FI = std::numeric_limits<int>::min();
481
423k
    if ((!HasNonInvariantUse && 
IsLICMCandidate(*MI)39.5k
) ||
482
423k
        
(384k
TII->isLoadFromStackSlot(*MI, FI)384k
&&
MFI->isSpillSlotObjectIndex(FI)10.2k
))
483
48.6k
      Candidates.push_back(CandidateInfo(MI, Def, FI));
484
423k
  }
485
4.77M
}
486
487
/// Walk the specified region of the CFG and hoist loop invariants out to the
488
/// preheader.
489
153k
void MachineLICMBase::HoistRegionPostRA() {
490
153k
  MachineBasicBlock *Preheader = getCurPreheader();
491
153k
  if (!Preheader)
492
1.51k
    return;
493
152k
494
152k
  unsigned NumRegs = TRI->getNumRegs();
495
152k
  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
496
152k
  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
497
152k
498
152k
  SmallVector<CandidateInfo, 32> Candidates;
499
152k
  SmallSet<int, 32> StoredFIs;
500
152k
501
152k
  // Walk the entire region, count number of defs for each register, and
502
152k
  // collect potential LICM candidates.
503
690k
  for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
504
690k
    // If the header of the loop containing this basic block is a landing pad,
505
690k
    // then don't try to hoist instructions out of this loop.
506
690k
    const MachineLoop *ML = MLI->getLoopFor(BB);
507
690k
    if (ML && ML->getHeader()->isEHPad()) 
continue0
;
508
690k
509
690k
    // Conservatively treat live-in's as an external def.
510
690k
    // FIXME: That means a reload that're reused in successor block(s) will not
511
690k
    // be LICM'ed.
512
6.33M
    
for (const auto &LI : BB->liveins())690k
{
513
44.7M
      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); 
++AI38.3M
)
514
38.3M
        PhysRegDefs.set(*AI);
515
6.33M
    }
516
690k
517
690k
    SpeculationState = SpeculateUnknown;
518
690k
    for (MachineInstr &MI : *BB)
519
4.77M
      ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
520
690k
  }
521
152k
522
152k
  // Gather the registers read / clobbered by the terminator.
523
152k
  BitVector TermRegs(NumRegs);
524
152k
  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
525
152k
  if (TI != Preheader->end()) {
526
47.5k
    for (const MachineOperand &MO : TI->operands()) {
527
47.5k
      if (!MO.isReg())
528
46.6k
        continue;
529
837
      unsigned Reg = MO.getReg();
530
837
      if (!Reg)
531
780
        continue;
532
171
      
for (MCRegAliasIterator AI(Reg, TRI, true); 57
AI.isValid();
++AI114
)
533
114
        TermRegs.set(*AI);
534
57
    }
535
45.9k
  }
536
152k
537
152k
  // Now evaluate whether the potential candidates qualify.
538
152k
  // 1. Check if the candidate defined register is defined by another
539
152k
  //    instruction in the loop.
540
152k
  // 2. If the candidate is a load from stack slot (always true for now),
541
152k
  //    check if the slot is stored anywhere in the loop.
542
152k
  // 3. Make sure candidate def should not clobber
543
152k
  //    registers read by the terminator. Similarly its def should not be
544
152k
  //    clobbered by the terminator.
545
152k
  for (CandidateInfo &Candidate : Candidates) {
546
48.6k
    if (Candidate.FI != std::numeric_limits<int>::min() &&
547
48.6k
        
StoredFIs.count(Candidate.FI)9.43k
)
548
345
      continue;
549
48.2k
550
48.2k
    unsigned Def = Candidate.Def;
551
48.2k
    if (!PhysRegClobbers.test(Def) && 
!TermRegs.test(Def)1.15k
) {
552
1.15k
      bool Safe = true;
553
1.15k
      MachineInstr *MI = Candidate.MI;
554
3.11k
      for (const MachineOperand &MO : MI->operands()) {
555
3.11k
        if (!MO.isReg() || 
MO.isDef()1.41k
||
!MO.getReg()211
)
556
3.09k
          continue;
557
17
        unsigned Reg = MO.getReg();
558
17
        if (PhysRegDefs.test(Reg) ||
559
17
            
PhysRegClobbers.test(Reg)12
) {
560
5
          // If it's using a non-loop-invariant register, then it's obviously
561
5
          // not safe to hoist.
562
5
          Safe = false;
563
5
          break;
564
5
        }
565
17
      }
566
1.15k
      if (Safe)
567
1.15k
        HoistPostRA(MI, Candidate.Def);
568
1.15k
    }
569
48.2k
  }
570
152k
}
571
572
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
573
/// sure it is not killed by any instructions in the loop.
574
1.15k
void MachineLICMBase::AddToLiveIns(unsigned Reg) {
575
5.30k
  for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
576
5.30k
    if (!BB->isLiveIn(Reg))
577
4.68k
      BB->addLiveIn(Reg);
578
94.3k
    for (MachineInstr &MI : *BB) {
579
310k
      for (MachineOperand &MO : MI.operands()) {
580
310k
        if (!MO.isReg() || 
!MO.getReg()219k
||
MO.isDef()215k
)
continue177k
;
581
132k
        if (MO.getReg() == Reg || 
TRI->isSuperRegister(Reg, MO.getReg())130k
)
582
2.04k
          MO.setIsKill(false);
583
132k
      }
584
94.3k
    }
585
5.30k
  }
586
1.15k
}
587
588
/// When an instruction is found to only use loop invariant operands that is
589
/// safe to hoist, this instruction is called to do the dirty work.
590
1.15k
void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
591
1.15k
  MachineBasicBlock *Preheader = getCurPreheader();
592
1.15k
593
1.15k
  // Now move the instructions to the predecessor, inserting it before any
594
1.15k
  // terminator instructions.
595
1.15k
  LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
596
1.15k
                    << " from " << printMBBReference(*MI->getParent()) << ": "
597
1.15k
                    << *MI);
598
1.15k
599
1.15k
  // Splice the instruction to the preheader.
600
1.15k
  MachineBasicBlock *MBB = MI->getParent();
601
1.15k
  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
602
1.15k
603
1.15k
  // Add register to livein list to all the BBs in the current loop since a
604
1.15k
  // loop invariant must be kept live throughout the whole loop. This is
605
1.15k
  // important to ensure later passes do not scavenge the def register.
606
1.15k
  AddToLiveIns(Def);
607
1.15k
608
1.15k
  ++NumPostRAHoisted;
609
1.15k
  Changed = true;
610
1.15k
}
611
612
/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
613
/// may not be safe to hoist.
614
39.3k
bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
615
39.3k
  if (SpeculationState != SpeculateUnknown)
616
4.05k
    return SpeculationState == SpeculateFalse;
617
35.3k
618
35.3k
  if (BB != CurLoop->getHeader()) {
619
29.2k
    // Check loop exiting blocks.
620
29.2k
    SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
621
29.2k
    CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
622
29.2k
    for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
623
31.6k
      if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
624
26.7k
        SpeculationState = SpeculateTrue;
625
26.7k
        return false;
626
26.7k
      }
627
29.2k
  }
628
35.3k
629
35.3k
  SpeculationState = SpeculateFalse;
630
8.60k
  return true;
631
35.3k
}
632
633
639k
void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
634
639k
  LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
635
639k
636
639k
  // Remember livein register pressure.
637
639k
  BackTrace.push_back(RegPressure);
638
639k
}
639
640
498k
void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
641
498k
  LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
642
498k
  BackTrace.pop_back();
643
498k
}
644
645
/// Destroy scope for the MBB that corresponds to the given dominator tree node
646
/// if its a leaf or all of its children are done. Walk up the dominator tree to
647
/// destroy ancestors which are now done.
648
void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
649
    DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
650
639k
    DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
651
639k
  if (OpenChildren[Node])
652
374k
    return;
653
265k
654
265k
  // Pop scope.
655
265k
  ExitScope(Node->getBlock());
656
265k
657
265k
  // Now traverse upwards to pop ancestors whose offsprings are all done.
658
498k
  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
659
420k
    unsigned Left = --OpenChildren[Parent];
660
420k
    if (Left != 0)
661
188k
      break;
662
232k
    ExitScope(Parent->getBlock());
663
232k
    Node = Parent;
664
232k
  }
665
265k
}
666
667
/// Walk the specified loop in the CFG (defined by all blocks dominated by the
668
/// specified header block, and that are in the current loop) in depth first
669
/// order w.r.t the DominatorTree. This allows us to visit definitions before
670
/// uses, allowing us to hoist a loop body in one pass without iteration.
671
152k
void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
672
152k
  MachineBasicBlock *Preheader = getCurPreheader();
673
152k
  if (!Preheader)
674
17
    return;
675
152k
676
152k
  SmallVector<MachineDomTreeNode*, 32> Scopes;
677
152k
  SmallVector<MachineDomTreeNode*, 8> WorkList;
678
152k
  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
679
152k
  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
680
152k
681
152k
  // Perform a DFS walk to determine the order of visit.
682
152k
  WorkList.push_back(HeaderN);
683
883k
  while (!WorkList.empty()) {
684
731k
    MachineDomTreeNode *Node = WorkList.pop_back_val();
685
731k
    assert(Node && "Null dominator tree node?");
686
731k
    MachineBasicBlock *BB = Node->getBlock();
687
731k
688
731k
    // If the header of the loop containing this basic block is a landing pad,
689
731k
    // then don't try to hoist instructions out of this loop.
690
731k
    const MachineLoop *ML = MLI->getLoopFor(BB);
691
731k
    if (ML && 
ML->getHeader()->isEHPad()639k
)
692
1
      continue;
693
731k
694
731k
    // If this subregion is not in the top level loop at all, exit.
695
731k
    if (!CurLoop->contains(BB))
696
91.2k
      continue;
697
639k
698
639k
    Scopes.push_back(Node);
699
639k
    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
700
639k
    unsigned NumChildren = Children.size();
701
639k
702
639k
    // Don't hoist things out of a large switch statement.  This often causes
703
639k
    // code to be hoisted that wasn't going to be executed, and increases
704
639k
    // register pressure in a situation where it's likely to matter.
705
639k
    if (BB->succ_size() >= 25)
706
29
      NumChildren = 0;
707
639k
708
639k
    OpenChildren[Node] = NumChildren;
709
639k
    // Add children in reverse order as then the next popped worklist node is
710
639k
    // the first child of this node.  This means we ultimately traverse the
711
639k
    // DOM tree in exactly the same order as if we'd recursed.
712
1.21M
    for (int i = (int)NumChildren-1; i >= 0; 
--i578k
) {
713
578k
      MachineDomTreeNode *Child = Children[i];
714
578k
      ParentMap[Child] = Node;
715
578k
      WorkList.push_back(Child);
716
578k
    }
717
639k
  }
718
152k
719
152k
  if (Scopes.size() == 0)
720
0
    return;
721
152k
722
152k
  // Compute registers which are livein into the loop headers.
723
152k
  RegSeen.clear();
724
152k
  BackTrace.clear();
725
152k
  InitRegPressure(Preheader);
726
152k
727
152k
  // Now perform LICM.
728
639k
  for (MachineDomTreeNode *Node : Scopes) {
729
639k
    MachineBasicBlock *MBB = Node->getBlock();
730
639k
731
639k
    EnterScope(MBB);
732
639k
733
639k
    // Process the block
734
639k
    SpeculationState = SpeculateUnknown;
735
639k
    for (MachineBasicBlock::iterator
736
6.52M
         MII = MBB->begin(), E = MBB->end(); MII != E; ) {
737
5.88M
      MachineBasicBlock::iterator NextMII = MII; ++NextMII;
738
5.88M
      MachineInstr *MI = &*MII;
739
5.88M
      if (!Hoist(MI, Preheader))
740
5.55M
        UpdateRegPressure(MI);
741
5.88M
      // If we have hoisted an instruction that may store, it can only be a
742
5.88M
      // constant store.
743
5.88M
      MII = NextMII;
744
5.88M
    }
745
639k
746
639k
    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
747
639k
    ExitScopeIfDone(Node, OpenChildren, ParentMap);
748
639k
  }
749
152k
}
750
751
/// Sink instructions into loops if profitable. This especially tries to prevent
752
/// register spills caused by register pressure if there is little to no
753
/// overhead moving instructions into loops.
754
1
void MachineLICMBase::SinkIntoLoop() {
755
1
  MachineBasicBlock *Preheader = getCurPreheader();
756
1
  if (!Preheader)
757
0
    return;
758
1
759
1
  SmallVector<MachineInstr *, 8> Candidates;
760
1
  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
761
9
       I != Preheader->instr_end(); 
++I8
) {
762
8
    // We need to ensure that we can safely move this instruction into the loop.
763
8
    // As such, it must not have side-effects, e.g. such as a call has.
764
8
    if (IsLoopInvariantInst(*I) && 
!HasLoopPHIUse(&*I)6
)
765
5
      Candidates.push_back(&*I);
766
8
  }
767
1
768
5
  for (MachineInstr *I : Candidates) {
769
5
    const MachineOperand &MO = I->getOperand(0);
770
5
    if (!MO.isDef() || !MO.isReg() || !MO.getReg())
771
0
      continue;
772
5
    if (!MRI->hasOneDef(MO.getReg()))
773
0
      continue;
774
5
    bool CanSink = true;
775
5
    MachineBasicBlock *B = nullptr;
776
5
    for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
777
5
      // FIXME: Come up with a proper cost model that estimates whether sinking
778
5
      // the instruction (and thus possibly executing it on every loop
779
5
      // iteration) is more expensive than a register.
780
5
      // For now assumes that copies are cheap and thus almost always worth it.
781
5
      if (!MI.isCopy()) {
782
0
        CanSink = false;
783
0
        break;
784
0
      }
785
5
      if (!B) {
786
5
        B = MI.getParent();
787
5
        continue;
788
5
      }
789
0
      B = DT->findNearestCommonDominator(B, MI.getParent());
790
0
      if (!B) {
791
0
        CanSink = false;
792
0
        break;
793
0
      }
794
0
    }
795
5
    if (!CanSink || !B || B == Preheader)
796
0
      continue;
797
5
    B->splice(B->getFirstNonPHI(), Preheader, I);
798
5
  }
799
1
}
800
801
6.30M
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
802
6.30M
  return MO.isKill() || 
MRI->hasOneNonDBGUse(MO.getReg())5.70M
;
803
6.30M
}
804
805
/// Find all virtual register references that are liveout of the preheader to
806
/// initialize the starting "register pressure". Note this does not count live
807
/// through (livein but not used) registers.
808
272k
void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
809
272k
  std::fill(RegPressure.begin(), RegPressure.end(), 0);
810
272k
811
272k
  // If the preheader has only a single predecessor and it ends with a
812
272k
  // fallthrough or an unconditional branch, then scan its predecessor for live
813
272k
  // defs as well. This happens whenever the preheader is created by splitting
814
272k
  // the critical edge from the loop predecessor to the loop header.
815
272k
  if (BB->pred_size() == 1) {
816
171k
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
817
171k
    SmallVector<MachineOperand, 4> Cond;
818
171k
    if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && 
Cond.empty()170k
)
819
119k
      InitRegPressure(*BB->pred_begin());
820
171k
  }
821
272k
822
272k
  for (const MachineInstr &MI : *BB)
823
1.91M
    UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
824
272k
}
825
826
/// Update estimate of register pressure after the specified instruction.
827
void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
828
7.46M
                                        bool ConsiderUnseenAsDef) {
829
7.46M
  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
830
10.4M
  for (const auto &RPIdAndCost : Cost) {
831
10.4M
    unsigned Class = RPIdAndCost.first;
832
10.4M
    if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
833
0
      RegPressure[Class] = 0;
834
10.4M
    else
835
10.4M
      RegPressure[Class] += RPIdAndCost.second;
836
10.4M
  }
837
7.46M
}
838
839
/// Calculate the additional register pressure that the registers used in MI
840
/// cause.
841
///
842
/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
843
/// figure out which usages are live-ins.
844
/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
845
DenseMap<unsigned, int>
846
MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
847
7.80M
                                  bool ConsiderUnseenAsDef) {
848
7.80M
  DenseMap<unsigned, int> Cost;
849
7.80M
  if (MI->isImplicitDef())
850
56.5k
    return Cost;
851
26.9M
  
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); 7.75M
i != e;
++i19.2M
) {
852
19.2M
    const MachineOperand &MO = MI->getOperand(i);
853
19.2M
    if (!MO.isReg() || 
MO.isImplicit()12.4M
)
854
6.76M
      continue;
855
12.4M
    unsigned Reg = MO.getReg();
856
12.4M
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
857
1.85M
      continue;
858
10.6M
859
10.6M
    // FIXME: It seems bad to use RegSeen only for some of these calculations.
860
10.6M
    bool isNew = ConsiderSeen ? 
RegSeen.insert(Reg).second10.1M
:
false441k
;
861
10.6M
    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
862
10.6M
863
10.6M
    RegClassWeight W = TRI->getRegClassWeight(RC);
864
10.6M
    int RCCost = 0;
865
10.6M
    if (MO.isDef())
866
4.31M
      RCCost = W.RegWeight;
867
6.30M
    else {
868
6.30M
      bool isKill = isOperandKill(MO, MRI);
869
6.30M
      if (isNew && 
!isKill679k
&&
ConsiderUnseenAsDef405k
)
870
219k
        // Haven't seen this, it must be a livein.
871
219k
        RCCost = W.RegWeight;
872
6.08M
      else if (!isNew && 
isKill5.62M
)
873
2.44M
        RCCost = -W.RegWeight;
874
6.30M
    }
875
10.6M
    if (RCCost == 0)
876
3.64M
      continue;
877
6.98M
    const int *PS = TRI->getRegClassPressureSets(RC);
878
22.9M
    for (; *PS != -1; 
++PS16.0M
) {
879
16.0M
      if (Cost.find(*PS) == Cost.end())
880
11.3M
        Cost[*PS] = RCCost;
881
4.63M
      else
882
4.63M
        Cost[*PS] += RCCost;
883
16.0M
    }
884
6.98M
  }
885
7.75M
  return Cost;
886
7.75M
}
887
888
/// Return true if this machine instruction loads from global offset table or
889
/// constant pool.
890
10.7k
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
891
10.7k
  assert(MI.mayLoad() && "Expected MI that loads!");
892
10.7k
893
10.7k
  // If we lost memory operands, conservatively assume that the instruction
894
10.7k
  // reads from everything..
895
10.7k
  if (MI.memoperands_empty())
896
0
    return true;
897
10.7k
898
10.7k
  for (MachineMemOperand *MemOp : MI.memoperands())
899
10.7k
    if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
900
7.49k
      if (PSV->isGOT() || 
PSV->isConstantPool()5.11k
)
901
7.36k
        return true;
902
10.7k
903
10.7k
  
return false3.42k
;
904
10.7k
}
905
906
// This function iterates through all the operands of the input store MI and
907
// checks that each register operand statisfies isCallerPreservedPhysReg.
908
// This means, the value being stored and the address where it is being stored
909
// is constant throughout the body of the function (not including prologue and
910
// epilogue). When called with an MI that isn't a store, it returns false.
911
// A future improvement can be to check if the store registers are constant
912
// throughout the loop rather than throughout the funtion.
913
static bool isInvariantStore(const MachineInstr &MI,
914
                             const TargetRegisterInfo *TRI,
915
2.94M
                             const MachineRegisterInfo *MRI) {
916
2.94M
917
2.94M
  bool FoundCallerPresReg = false;
918
2.94M
  if (!MI.mayStore() || 
MI.hasUnmodeledSideEffects()423k
||
919
2.94M
      
(MI.getNumOperands() == 0)369k
)
920
2.57M
    return false;
921
369k
922
369k
  // Check that all register operands are caller-preserved physical registers.
923
369k
  
for (const MachineOperand &MO : MI.operands())369k
{
924
369k
    if (MO.isReg()) {
925
364k
      unsigned Reg = MO.getReg();
926
364k
      // If operand is a virtual register, check if it comes from a copy of a
927
364k
      // physical register.
928
364k
      if (TargetRegisterInfo::isVirtualRegister(Reg))
929
358k
        Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
930
364k
      if (TargetRegisterInfo::isVirtualRegister(Reg))
931
330k
        return false;
932
34.4k
      if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
933
34.3k
        return false;
934
36
      else
935
36
        FoundCallerPresReg = true;
936
34.4k
    } else 
if (4.45k
!MO.isImm()4.45k
) {
937
4.31k
        return false;
938
4.31k
    }
939
369k
  }
940
369k
  
return FoundCallerPresReg18
;
941
369k
}
942
943
// Return true if the input MI is a copy instruction that feeds an invariant
944
// store instruction. This means that the src of the copy has to satisfy
945
// isCallerPreservedPhysReg and atleast one of it's users should satisfy
946
// isInvariantStore.
947
static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
948
                                        const MachineRegisterInfo *MRI,
949
358k
                                        const TargetRegisterInfo *TRI) {
950
358k
951
358k
  // FIXME: If targets would like to look through instructions that aren't
952
358k
  // pure copies, this can be updated to a query.
953
358k
  if (!MI.isCopy())
954
313k
    return false;
955
45.7k
956
45.7k
  const MachineFunction *MF = MI.getMF();
957
45.7k
  // Check that we are copying a constant physical register.
958
45.7k
  unsigned CopySrcReg = MI.getOperand(1).getReg();
959
45.7k
  if (TargetRegisterInfo::isVirtualRegister(CopySrcReg))
960
33.9k
    return false;
961
11.7k
962
11.7k
  if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
963
11.7k
    return false;
964
9
965
9
  unsigned CopyDstReg = MI.getOperand(0).getReg();
966
9
  // Check if any of the uses of the copy are invariant stores.
967
9
  assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) &&
968
9
          "copy dst is not a virtual reg");
969
9
970
9
  for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
971
9
    if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
972
9
      return true;
973
9
  }
974
9
  
return false0
;
975
9
}
976
977
/// Returns true if the instruction may be a suitable candidate for LICM.
978
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
979
5.92M
bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
980
5.92M
  // Check if it's safe to move the instruction.
981
5.92M
  bool DontMoveAcrossStore = true;
982
5.92M
  if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
983
5.92M
      
!(2.94M
HoistConstStores2.94M
&&
isInvariantStore(I, TRI, MRI)2.94M
)) {
984
2.94M
    return false;
985
2.94M
  }
986
2.97M
987
2.97M
  // If it is load then check if it is guaranteed to execute by making sure that
988
2.97M
  // it dominates all exiting blocks. If it doesn't, then there is a path out of
989
2.97M
  // the loop which does not execute this load, so we can't hoist it. Loads
990
2.97M
  // from constant memory are not safe to speculate all the time, for example
991
2.97M
  // indexed load from a jump table.
992
2.97M
  // Stores and side effects are already checked by isSafeToMove.
993
2.97M
  if (I.mayLoad() && 
!mayLoadFromGOTOrConstantPool(I)10.7k
&&
994
2.97M
      
!IsGuaranteedToExecute(I.getParent())3.42k
)
995
1.21k
    return false;
996
2.97M
997
2.97M
  return true;
998
2.97M
}
999
1000
/// Returns true if the instruction is loop invariant.
1001
/// I.e., all virtual register operands are defined outside of the loop,
1002
/// physical registers aren't accessed explicitly, and there are no side
1003
/// effects that aren't captured by the operands or other flags.
1004
5.88M
bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
1005
5.88M
  if (!IsLICMCandidate(I))
1006
2.94M
    return false;
1007
2.93M
1008
2.93M
  // The instruction is loop invariant if all of its operands are.
1009
5.63M
  
for (const MachineOperand &MO : I.operands())2.93M
{
1010
5.63M
    if (!MO.isReg())
1011
556k
      continue;
1012
5.08M
1013
5.08M
    unsigned Reg = MO.getReg();
1014
5.08M
    if (Reg == 0) 
continue29.3k
;
1015
5.05M
1016
5.05M
    // Don't hoist an instruction that uses or defines a physical register.
1017
5.05M
    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1018
1.30M
      if (MO.isUse()) {
1019
374k
        // If the physreg has no defs anywhere, it's just an ambient register
1020
374k
        // and we can freely move its uses. Alternatively, if it's allocatable,
1021
374k
        // it could get allocated to something with a def during allocation.
1022
374k
        // However, if the physreg is known to always be caller saved/restored
1023
374k
        // then this use is safe to hoist.
1024
374k
        if (!MRI->isConstantPhysReg(Reg) &&
1025
374k
            
!(TRI->isCallerPreservedPhysReg(Reg, *I.getMF()))255k
)
1026
255k
          return false;
1027
118k
        // Otherwise it's safe to move.
1028
118k
        continue;
1029
926k
      } else if (!MO.isDead()) {
1030
852k
        // A def that isn't dead. We can't move it.
1031
852k
        return false;
1032
852k
      } else 
if (73.9k
CurLoop->getHeader()->isLiveIn(Reg)73.9k
) {
1033
0
        // If the reg is live into the loop, we can't hoist an instruction
1034
0
        // which would clobber it.
1035
0
        return false;
1036
0
      }
1037
3.82M
    }
1038
3.82M
1039
3.82M
    if (!MO.isUse())
1040
2.11M
      continue;
1041
1.70M
1042
1.70M
    assert(MRI->getVRegDef(Reg) &&
1043
1.70M
           "Machine instr not mapped for this vreg?!");
1044
1.70M
1045
1.70M
    // If the loop contains the definition of an operand, then the instruction
1046
1.70M
    // isn't loop invariant.
1047
1.70M
    if (CurLoop->contains(MRI->getVRegDef(Reg)))
1048
1.43M
      return false;
1049
1.70M
  }
1050
2.93M
1051
2.93M
  // If we got this far, the instruction is loop invariant!
1052
2.93M
  
return true388k
;
1053
2.93M
}
1054
1055
/// Return true if the specified instruction is used by a phi node and hoisting
1056
/// it could cause a copy to be inserted.
1057
358k
bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
1058
358k
  SmallVector<const MachineInstr*, 8> Work(1, MI);
1059
429k
  do {
1060
429k
    MI = Work.pop_back_val();
1061
976k
    for (const MachineOperand &MO : MI->operands()) {
1062
976k
      if (!MO.isReg() || 
!MO.isDef()621k
)
1063
541k
        continue;
1064
434k
      unsigned Reg = MO.getReg();
1065
434k
      if (!TargetRegisterInfo::isVirtualRegister(Reg))
1066
53.8k
        continue;
1067
448k
      
for (MachineInstr &UseMI : MRI->use_instructions(Reg))381k
{
1068
448k
        // A PHI may cause a copy to be inserted.
1069
448k
        if (UseMI.isPHI()) {
1070
28.4k
          // A PHI inside the loop causes a copy because the live range of Reg is
1071
28.4k
          // extended across the PHI.
1072
28.4k
          if (CurLoop->contains(&UseMI))
1073
23.8k
            return true;
1074
4.56k
          // A PHI in an exit block can cause a copy to be inserted if the PHI
1075
4.56k
          // has multiple predecessors in the loop with different values.
1076
4.56k
          // For now, approximate by rejecting all exit blocks.
1077
4.56k
          if (isExitBlock(UseMI.getParent()))
1078
4.56k
            return true;
1079
2
          continue;
1080
2
        }
1081
419k
        // Look past copies as well.
1082
419k
        if (UseMI.isCopy() && 
CurLoop->contains(&UseMI)71.1k
)
1083
71.0k
          Work.push_back(&UseMI);
1084
419k
      }
1085
381k
    }
1086
429k
  } while (
!Work.empty()401k
);
1087
358k
  
return false330k
;
1088
358k
}
1089
1090
/// Compute operand latency between a def of 'Reg' and an use in the current
1091
/// loop, return true if the target considered it high.
1092
bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI,
1093
                                            unsigned DefIdx,
1094
86.9k
                                            unsigned Reg) const {
1095
86.9k
  if (MRI->use_nodbg_empty(Reg))
1096
1
    return false;
1097
86.9k
1098
88.2k
  
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg))86.9k
{
1099
88.2k
    if (UseMI.isCopyLike())
1100
12.9k
      continue;
1101
75.3k
    if (!CurLoop->contains(UseMI.getParent()))
1102
127
      continue;
1103
339k
    
for (unsigned i = 0, e = UseMI.getNumOperands(); 75.1k
i != e;
++i264k
) {
1104
264k
      const MachineOperand &MO = UseMI.getOperand(i);
1105
264k
      if (!MO.isReg() || 
!MO.isUse()244k
)
1106
113k
        continue;
1107
150k
      unsigned MOReg = MO.getReg();
1108
150k
      if (MOReg != Reg)
1109
75.3k
        continue;
1110
75.4k
1111
75.4k
      if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1112
10
        return true;
1113
75.4k
    }
1114
75.1k
1115
75.1k
    // Only look at the first in loop use.
1116
75.1k
    
break75.1k
;
1117
75.1k
  }
1118
86.9k
1119
86.9k
  
return false86.9k
;
1120
86.9k
}
1121
1122
/// Return true if the instruction is marked "cheap" or the operand latency
1123
/// between its def and a use is one or less.
1124
358k
bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
1125
358k
  if (TII->isAsCheapAsAMove(MI) || 
MI.isCopyLike()93.5k
)
1126
285k
    return true;
1127
72.9k
1128
72.9k
  bool isCheap = false;
1129
72.9k
  unsigned NumDefs = MI.getDesc().getNumDefs();
1130
74.3k
  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && 
i != e73.0k
;
++i1.35k
) {
1131
73.0k
    MachineOperand &DefMO = MI.getOperand(i);
1132
73.0k
    if (!DefMO.isReg() || !DefMO.isDef())
1133
0
      continue;
1134
73.0k
    --NumDefs;
1135
73.0k
    unsigned Reg = DefMO.getReg();
1136
73.0k
    if (TargetRegisterInfo::isPhysicalRegister(Reg))
1137
58
      continue;
1138
72.9k
1139
72.9k
    if (!TII->hasLowDefLatency(SchedModel, MI, i))
1140
71.6k
      return false;
1141
1.30k
    isCheap = true;
1142
1.30k
  }
1143
72.9k
1144
72.9k
  
return isCheap1.31k
;
1145
72.9k
}
1146
1147
/// Visit BBs from header to current BB, check if hoisting an instruction of the
1148
/// given cost matrix can cause high register pressure.
1149
bool
1150
MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1151
86.9k
                                         bool CheapInstr) {
1152
309k
  for (const auto &RPIdAndCost : Cost) {
1153
309k
    if (RPIdAndCost.second <= 0)
1154
54.3k
      continue;
1155
255k
1156
255k
    unsigned Class = RPIdAndCost.first;
1157
255k
    int Limit = RegLimit[Class];
1158
255k
1159
255k
    // Don't hoist cheap instructions if they would increase register pressure,
1160
255k
    // even if we're under the limit.
1161
255k
    if (CheapInstr && 
!HoistCheapInsts34.4k
)
1162
34.4k
      return true;
1163
221k
1164
221k
    for (const auto &RP : BackTrace)
1165
315k
      if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1166
1.51k
        return true;
1167
221k
  }
1168
86.9k
1169
86.9k
  
return false50.9k
;
1170
86.9k
}
1171
1172
/// Traverse the back trace from header to the current block and update their
1173
/// register pressures to reflect the effect of hoisting MI from the current
1174
/// block to the preheader.
1175
253k
void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1176
253k
  // First compute the 'cost' of the instruction, i.e. its contribution
1177
253k
  // to register pressure.
1178
253k
  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1179
253k
                               /*ConsiderUnseenAsDef=*/false);
1180
253k
1181
253k
  // Update register pressure of blocks from loop header to current block.
1182
253k
  for (auto &RP : BackTrace)
1183
631k
    for (const auto &RPIdAndCost : Cost)
1184
1.13M
      RP[RPIdAndCost.first] += RPIdAndCost.second;
1185
253k
}
1186
1187
/// Return true if it is potentially profitable to hoist the given loop
1188
/// invariant.
1189
388k
bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
1190
388k
  if (MI.isImplicitDef())
1191
29.9k
    return true;
1192
358k
1193
358k
  // Besides removing computation from the loop, hoisting an instruction has
1194
358k
  // these effects:
1195
358k
  //
1196
358k
  // - The value defined by the instruction becomes live across the entire
1197
358k
  //   loop. This increases register pressure in the loop.
1198
358k
  //
1199
358k
  // - If the value is used by a PHI in the loop, a copy will be required for
1200
358k
  //   lowering the PHI after extending the live range.
1201
358k
  //
1202
358k
  // - When hoisting the last use of a value in the loop, that value no longer
1203
358k
  //   needs to be live in the loop. This lowers register pressure in the loop.
1204
358k
1205
358k
  if (HoistConstStores &&  isCopyFeedingInvariantStore(MI, MRI, TRI))
1206
9
    return true;
1207
358k
1208
358k
  bool CheapInstr = IsCheapInstruction(MI);
1209
358k
  bool CreatesCopy = HasLoopPHIUse(&MI);
1210
358k
1211
358k
  // Don't hoist a cheap instruction if it would create a copy in the loop.
1212
358k
  if (CheapInstr && 
CreatesCopy287k
) {
1213
24.6k
    LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1214
24.6k
    return false;
1215
24.6k
  }
1216
334k
1217
334k
  // Rematerializable instructions should always be hoisted since the register
1218
334k
  // allocator can just pull them down again when needed.
1219
334k
  if (TII->isTriviallyReMaterializable(MI, AA))
1220
247k
    return true;
1221
86.9k
1222
86.9k
  // FIXME: If there are long latency loop-invariant instructions inside the
1223
86.9k
  // loop at this point, why didn't the optimizer's LICM hoist them?
1224
319k
  
for (unsigned i = 0, e = MI.getDesc().getNumOperands(); 86.9k
i != e;
++i232k
) {
1225
232k
    const MachineOperand &MO = MI.getOperand(i);
1226
232k
    if (!MO.isReg() || 
MO.isImplicit()177k
)
1227
55.2k
      continue;
1228
177k
    unsigned Reg = MO.getReg();
1229
177k
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
1230
8.00k
      continue;
1231
169k
    if (MO.isDef() && 
HasHighOperandLatency(MI, i, Reg)86.9k
) {
1232
10
      LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1233
10
      ++NumHighLatency;
1234
10
      return true;
1235
10
    }
1236
169k
  }
1237
86.9k
1238
86.9k
  // Estimate register pressure to determine whether to LICM the instruction.
1239
86.9k
  // In low register pressure situation, we can be more aggressive about
1240
86.9k
  // hoisting. Also, favors hoisting long latency instructions even in
1241
86.9k
  // moderately high pressure situation.
1242
86.9k
  // Cheap instructions will only be hoisted if they don't increase register
1243
86.9k
  // pressure at all.
1244
86.9k
  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1245
86.9k
                               /*ConsiderUnseenAsDef=*/false);
1246
86.9k
1247
86.9k
  // Visit BBs from header to current BB, if hoisting this doesn't cause
1248
86.9k
  // high register pressure, then it's safe to proceed.
1249
86.9k
  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1250
50.9k
    LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1251
50.9k
    ++NumLowRP;
1252
50.9k
    return true;
1253
50.9k
  }
1254
35.9k
1255
35.9k
  // Don't risk increasing register pressure if it would create copies.
1256
35.9k
  if (CreatesCopy) {
1257
26
    LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1258
26
    return false;
1259
26
  }
1260
35.9k
1261
35.9k
  // Do not "speculate" in high register pressure situation. If an
1262
35.9k
  // instruction is not guaranteed to be executed in the loop, it's best to be
1263
35.9k
  // conservative.
1264
35.9k
  if (AvoidSpeculation &&
1265
35.9k
      (!IsGuaranteedToExecute(MI.getParent()) && 
!MayCSE(&MI)27.7k
)) {
1266
24.2k
    LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1267
24.2k
    return false;
1268
24.2k
  }
1269
11.7k
1270
11.7k
  // High register pressure situation, only hoist if the instruction is going
1271
11.7k
  // to be remat'ed.
1272
11.7k
  if (!TII->isTriviallyReMaterializable(MI, AA) &&
1273
11.7k
      !MI.isDereferenceableInvariantLoad(AA)) {
1274
11.4k
    LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1275
11.4k
    return false;
1276
11.4k
  }
1277
328
1278
328
  return true;
1279
328
}
1280
1281
/// Unfold a load from the given machineinstr if the load itself could be
1282
/// hoisted. Return the unfolded and hoistable load, or null if the load
1283
/// couldn't be unfolded or if it wouldn't be hoistable.
1284
5.55M
MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
1285
5.55M
  // Don't unfold simple loads.
1286
5.55M
  if (MI->canFoldAsLoad())
1287
39.1k
    return nullptr;
1288
5.51M
1289
5.51M
  // If not, we may be able to unfold a load and hoist that.
1290
5.51M
  // First test whether the instruction is loading from an amenable
1291
5.51M
  // memory location.
1292
5.51M
  if (!MI->isDereferenceableInvariantLoad(AA))
1293
5.50M
    return nullptr;
1294
3.47k
1295
3.47k
  // Next determine the register class for a temporary register.
1296
3.47k
  unsigned LoadRegIndex;
1297
3.47k
  unsigned NewOpc =
1298
3.47k
    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1299
3.47k
                                    /*UnfoldLoad=*/true,
1300
3.47k
                                    /*UnfoldStore=*/false,
1301
3.47k
                                    &LoadRegIndex);
1302
3.47k
  if (NewOpc == 0) 
return nullptr3.22k
;
1303
254
  const MCInstrDesc &MID = TII->get(NewOpc);
1304
254
  MachineFunction &MF = *MI->getMF();
1305
254
  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1306
254
  // Ok, we're unfolding. Create a temporary register and do the unfold.
1307
254
  unsigned Reg = MRI->createVirtualRegister(RC);
1308
254
1309
254
  SmallVector<MachineInstr *, 2> NewMIs;
1310
254
  bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1311
254
                                          /*UnfoldLoad=*/true,
1312
254
                                          /*UnfoldStore=*/false, NewMIs);
1313
254
  (void)Success;
1314
254
  assert(Success &&
1315
254
         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1316
254
         "succeeded!");
1317
254
  assert(NewMIs.size() == 2 &&
1318
254
         "Unfolded a load into multiple instructions!");
1319
254
  MachineBasicBlock *MBB = MI->getParent();
1320
254
  MachineBasicBlock::iterator Pos = MI;
1321
254
  MBB->insert(Pos, NewMIs[0]);
1322
254
  MBB->insert(Pos, NewMIs[1]);
1323
254
  // If unfolding produced a load that wasn't loop-invariant or profitable to
1324
254
  // hoist, discard the new instructions and bail.
1325
254
  if (!IsLoopInvariantInst(*NewMIs[0]) || 
!IsProfitableToHoist(*NewMIs[0])171
) {
1326
83
    NewMIs[0]->eraseFromParent();
1327
83
    NewMIs[1]->eraseFromParent();
1328
83
    return nullptr;
1329
83
  }
1330
171
1331
171
  // Update register pressure for the unfolded instruction.
1332
171
  UpdateRegPressure(NewMIs[1]);
1333
171
1334
171
  // Otherwise we successfully unfolded a load that we can hoist.
1335
171
  MI->eraseFromParent();
1336
171
  return NewMIs[0];
1337
171
}
1338
1339
/// Initialize the CSE map with instructions that are in the current loop
1340
/// preheader that may become duplicates of instructions that are hoisted
1341
/// out of the loop.
1342
86.5k
void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
1343
86.5k
  for (MachineInstr &MI : *BB)
1344
423k
    CSEMap[MI.getOpcode()].push_back(&MI);
1345
86.5k
}
1346
1347
/// Find an instruction amount PrevMIs that is a duplicate of MI.
1348
/// Return this instruction if it's found.
1349
const MachineInstr*
1350
MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
1351
177k
                                  std::vector<const MachineInstr*> &PrevMIs) {
1352
177k
  for (const MachineInstr *PrevMI : PrevMIs)
1353
480k
    if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : 
nullptr0
)))
1354
78.9k
      return PrevMI;
1355
177k
1356
177k
  
return nullptr98.9k
;
1357
177k
}
1358
1359
/// Given a LICM'ed instruction, look for an instruction on the preheader that
1360
/// computes the same value. If it's found, do a RAU on with the definition of
1361
/// the existing instruction rather than hoisting the instruction to the
1362
/// preheader.
1363
bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
1364
328k
    DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
1365
328k
  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1366
328k
  // the undef property onto uses.
1367
328k
  if (CI == CSEMap.end() || 
MI->isImplicitDef()192k
)
1368
158k
    return false;
1369
169k
1370
169k
  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1371
75.3k
    LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1372
75.3k
1373
75.3k
    // Replace virtual registers defined by MI by their counterparts defined
1374
75.3k
    // by Dup.
1375
75.3k
    SmallVector<unsigned, 2> Defs;
1376
248k
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; 
++i173k
) {
1377
173k
      const MachineOperand &MO = MI->getOperand(i);
1378
173k
1379
173k
      // Physical registers may not differ here.
1380
173k
      assert((!MO.isReg() || MO.getReg() == 0 ||
1381
173k
              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
1382
173k
              MO.getReg() == Dup->getOperand(i).getReg()) &&
1383
173k
             "Instructions with different phys regs are not identical!");
1384
173k
1385
173k
      if (MO.isReg() && 
MO.isDef()90.2k
&&
1386
173k
          
!TargetRegisterInfo::isPhysicalRegister(MO.getReg())78.3k
)
1387
75.3k
        Defs.push_back(i);
1388
173k
    }
1389
75.3k
1390
75.3k
    SmallVector<const TargetRegisterClass*, 2> OrigRCs;
1391
150k
    for (unsigned i = 0, e = Defs.size(); i != e; 
++i75.3k
) {
1392
75.3k
      unsigned Idx = Defs[i];
1393
75.3k
      unsigned Reg = MI->getOperand(Idx).getReg();
1394
75.3k
      unsigned DupReg = Dup->getOperand(Idx).getReg();
1395
75.3k
      OrigRCs.push_back(MRI->getRegClass(DupReg));
1396
75.3k
1397
75.3k
      if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1398
0
        // Restore old RCs if more than one defs.
1399
0
        for (unsigned j = 0; j != i; ++j)
1400
0
          MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1401
0
        return false;
1402
0
      }
1403
75.3k
    }
1404
75.3k
1405
75.3k
    for (unsigned Idx : Defs) {
1406
75.3k
      unsigned Reg = MI->getOperand(Idx).getReg();
1407
75.3k
      unsigned DupReg = Dup->getOperand(Idx).getReg();
1408
75.3k
      MRI->replaceRegWith(Reg, DupReg);
1409
75.3k
      MRI->clearKillFlags(DupReg);
1410
75.3k
    }
1411
75.3k
1412
75.3k
    MI->eraseFromParent();
1413
75.3k
    ++NumCSEed;
1414
75.3k
    return true;
1415
94.5k
  }
1416
94.5k
  return false;
1417
94.5k
}
1418
1419
/// Return true if the given instruction will be CSE'd if it's hoisted out of
1420
/// the loop.
1421
27.7k
bool MachineLICMBase::MayCSE(MachineInstr *MI) {
1422
27.7k
  unsigned Opcode = MI->getOpcode();
1423
27.7k
  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
1424
27.7k
    CI = CSEMap.find(Opcode);
1425
27.7k
  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1426
27.7k
  // the undef property onto uses.
1427
27.7k
  if (CI == CSEMap.end() || 
MI->isImplicitDef()7.94k
)
1428
19.8k
    return false;
1429
7.94k
1430
7.94k
  return LookForDuplicate(MI, CI->second) != nullptr;
1431
7.94k
}
1432
1433
/// When an instruction is found to use only loop invariant operands
1434
/// that are safe to hoist, this instruction is called to do the dirty work.
1435
/// It returns true if the instruction is hoisted.
1436
5.88M
bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1437
5.88M
  // First check whether we should hoist this instruction.
1438
5.88M
  if (!IsLoopInvariantInst(*MI) || 
!IsProfitableToHoist(*MI)388k
) {
1439
5.55M
    // If not, try unfolding a hoistable load.
1440
5.55M
    MI = ExtractHoistableLoad(MI);
1441
5.55M
    if (!MI) 
return false5.55M
;
1442
328k
  }
1443
328k
1444
328k
  // If we have hoisted an instruction that may store, it can only be a constant
1445
328k
  // store.
1446
328k
  if (MI->mayStore())
1447
9
    NumStoreConst++;
1448
328k
1449
328k
  // Now move the instructions to the predecessor, inserting it before any
1450
328k
  // terminator instructions.
1451
328k
  LLVM_DEBUG({
1452
328k
    dbgs() << "Hoisting " << *MI;
1453
328k
    if (MI->getParent()->getBasicBlock())
1454
328k
      dbgs() << " from " << printMBBReference(*MI->getParent());
1455
328k
    if (Preheader->getBasicBlock())
1456
328k
      dbgs() << " to " << printMBBReference(*Preheader);
1457
328k
    dbgs() << "\n";
1458
328k
  });
1459
328k
1460
328k
  // If this is the first instruction being hoisted to the preheader,
1461
328k
  // initialize the CSE map with potential common expressions.
1462
328k
  if (FirstInLoop) {
1463
86.5k
    InitCSEMap(Preheader);
1464
86.5k
    FirstInLoop = false;
1465
86.5k
  }
1466
328k
1467
328k
  // Look for opportunity to CSE the hoisted instruction.
1468
328k
  unsigned Opcode = MI->getOpcode();
1469
328k
  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
1470
328k
    CI = CSEMap.find(Opcode);
1471
328k
  if (!EliminateCSE(MI, CI)) {
1472
253k
    // Otherwise, splice the instruction to the preheader.
1473
253k
    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1474
253k
1475
253k
    // Since we are moving the instruction out of its basic block, we do not
1476
253k
    // retain its debug location. Doing so would degrade the debugging
1477
253k
    // experience and adversely affect the accuracy of profiling information.
1478
253k
    MI->setDebugLoc(DebugLoc());
1479
253k
1480
253k
    // Update register pressure for BBs from header to this block.
1481
253k
    UpdateBackTraceRegPressure(MI);
1482
253k
1483
253k
    // Clear the kill flags of any register this instruction defines,
1484
253k
    // since they may need to be live throughout the entire loop
1485
253k
    // rather than just live for part of it.
1486
253k
    for (MachineOperand &MO : MI->operands())
1487
587k
      if (MO.isReg() && 
MO.isDef()324k
&&
!MO.isDead()254k
)
1488
253k
        MRI->clearKillFlags(MO.getReg());
1489
253k
1490
253k
    // Add to the CSE map.
1491
253k
    if (CI != CSEMap.end())
1492
117k
      CI->second.push_back(MI);
1493
136k
    else
1494
136k
      CSEMap[Opcode].push_back(MI);
1495
253k
  }
1496
328k
1497
328k
  ++NumHoisted;
1498
328k
  Changed = true;
1499
328k
1500
328k
  return true;
1501
328k
}
1502
1503
/// Get the preheader for the current loop, splitting a critical edge if needed.
1504
307k
MachineBasicBlock *MachineLICMBase::getCurPreheader() {
1505
307k
  // Determine the block to which to hoist instructions. If we can't find a
1506
307k
  // suitable loop predecessor, we can't do any hoisting.
1507
307k
1508
307k
  // If we've tried to get a preheader and failed, don't try again.
1509
307k
  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1510
0
    return nullptr;
1511
307k
1512
307k
  if (!CurPreheader) {
1513
306k
    CurPreheader = CurLoop->getLoopPreheader();
1514
306k
    if (!CurPreheader) {
1515
22.1k
      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1516
22.1k
      if (!Pred) {
1517
1.49k
        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1518
1.49k
        return nullptr;
1519
1.49k
      }
1520
20.6k
1521
20.6k
      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1522
20.6k
      if (!CurPreheader) {
1523
35
        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1524
35
        return nullptr;
1525
35
      }
1526
306k
    }
1527
306k
  }
1528
306k
  return CurPreheader;
1529
306k
}