Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/RegisterCoalescer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the generic RegisterCoalescer interface which
11
// is used as the common interface used by all clients and
12
// implementations of register coalescing.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "RegisterCoalescer.h"
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/BitVector.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallPtrSet.h"
21
#include "llvm/ADT/SmallVector.h"
22
#include "llvm/ADT/Statistic.h"
23
#include "llvm/Analysis/AliasAnalysis.h"
24
#include "llvm/CodeGen/LiveInterval.h"
25
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
26
#include "llvm/CodeGen/LiveRangeEdit.h"
27
#include "llvm/CodeGen/MachineBasicBlock.h"
28
#include "llvm/CodeGen/MachineFunction.h"
29
#include "llvm/CodeGen/MachineFunctionPass.h"
30
#include "llvm/CodeGen/MachineInstr.h"
31
#include "llvm/CodeGen/MachineInstrBuilder.h"
32
#include "llvm/CodeGen/MachineLoopInfo.h"
33
#include "llvm/CodeGen/MachineOperand.h"
34
#include "llvm/CodeGen/MachineRegisterInfo.h"
35
#include "llvm/CodeGen/Passes.h"
36
#include "llvm/CodeGen/RegisterClassInfo.h"
37
#include "llvm/CodeGen/SlotIndexes.h"
38
#include "llvm/IR/DebugLoc.h"
39
#include "llvm/Pass.h"
40
#include "llvm/MC/LaneBitmask.h"
41
#include "llvm/MC/MCInstrDesc.h"
42
#include "llvm/MC/MCRegisterInfo.h"
43
#include "llvm/Support/CommandLine.h"
44
#include "llvm/Support/Compiler.h"
45
#include "llvm/Support/Debug.h"
46
#include "llvm/Support/ErrorHandling.h"
47
#include "llvm/Support/raw_ostream.h"
48
#include "llvm/Target/TargetInstrInfo.h"
49
#include "llvm/Target/TargetOpcodes.h"
50
#include "llvm/Target/TargetRegisterInfo.h"
51
#include "llvm/Target/TargetSubtargetInfo.h"
52
#include <algorithm>
53
#include <cassert>
54
#include <iterator>
55
#include <limits>
56
#include <tuple>
57
#include <utility>
58
#include <vector>
59
60
using namespace llvm;
61
62
#define DEBUG_TYPE "regalloc"
63
64
STATISTIC(numJoins    , "Number of interval joins performed");
65
STATISTIC(numCrossRCs , "Number of cross class joins performed");
66
STATISTIC(numCommutes , "Number of instruction commuting performed");
67
STATISTIC(numExtends  , "Number of copies extended");
68
STATISTIC(NumReMats   , "Number of instructions re-materialized");
69
STATISTIC(NumInflated , "Number of register classes inflated");
70
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
71
STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
72
73
static cl::opt<bool>
74
EnableJoining("join-liveintervals",
75
              cl::desc("Coalesce copies (default=true)"),
76
              cl::init(true));
77
78
static cl::opt<bool> UseTerminalRule("terminal-rule",
79
                                     cl::desc("Apply the terminal rule"),
80
                                     cl::init(false), cl::Hidden);
81
82
/// Temporary flag to test critical edge unsplitting.
83
static cl::opt<bool>
84
EnableJoinSplits("join-splitedges",
85
  cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
86
87
/// Temporary flag to test global copy optimization.
88
static cl::opt<cl::boolOrDefault>
89
EnableGlobalCopies("join-globalcopies",
90
  cl::desc("Coalesce copies that span blocks (default=subtarget)"),
91
  cl::init(cl::BOU_UNSET), cl::Hidden);
92
93
static cl::opt<bool>
94
VerifyCoalescing("verify-coalescing",
95
         cl::desc("Verify machine instrs before and after register coalescing"),
96
         cl::Hidden);
97
98
namespace {
99
100
  class RegisterCoalescer : public MachineFunctionPass,
101
                            private LiveRangeEdit::Delegate {
102
    MachineFunction* MF;
103
    MachineRegisterInfo* MRI;
104
    const TargetRegisterInfo* TRI;
105
    const TargetInstrInfo* TII;
106
    LiveIntervals *LIS;
107
    const MachineLoopInfo* Loops;
108
    AliasAnalysis *AA;
109
    RegisterClassInfo RegClassInfo;
110
111
    /// A LaneMask to remember on which subregister live ranges we need to call
112
    /// shrinkToUses() later.
113
    LaneBitmask ShrinkMask;
114
115
    /// True if the main range of the currently coalesced intervals should be
116
    /// checked for smaller live intervals.
117
    bool ShrinkMainRange;
118
119
    /// \brief True if the coalescer should aggressively coalesce global copies
120
    /// in favor of keeping local copies.
121
    bool JoinGlobalCopies;
122
123
    /// \brief True if the coalescer should aggressively coalesce fall-thru
124
    /// blocks exclusively containing copies.
125
    bool JoinSplitEdges;
126
127
    /// Copy instructions yet to be coalesced.
128
    SmallVector<MachineInstr*, 8> WorkList;
129
    SmallVector<MachineInstr*, 8> LocalWorkList;
130
131
    /// Set of instruction pointers that have been erased, and
132
    /// that may be present in WorkList.
133
    SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
134
135
    /// Dead instructions that are about to be deleted.
136
    SmallVector<MachineInstr*, 8> DeadDefs;
137
138
    /// Virtual registers to be considered for register class inflation.
139
    SmallVector<unsigned, 8> InflateRegs;
140
141
    /// Recursively eliminate dead defs in DeadDefs.
142
    void eliminateDeadDefs();
143
144
    /// LiveRangeEdit callback for eliminateDeadDefs().
145
    void LRE_WillEraseInstruction(MachineInstr *MI) override;
146
147
    /// Coalesce the LocalWorkList.
148
    void coalesceLocals();
149
150
    /// Join compatible live intervals
151
    void joinAllIntervals();
152
153
    /// Coalesce copies in the specified MBB, putting
154
    /// copies that cannot yet be coalesced into WorkList.
155
    void copyCoalesceInMBB(MachineBasicBlock *MBB);
156
157
    /// Tries to coalesce all copies in CurrList. Returns true if any progress
158
    /// was made.
159
    bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
160
161
    /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
162
    /// src/dst of the copy instruction CopyMI.  This returns true if the copy
163
    /// was successfully coalesced away. If it is not currently possible to
164
    /// coalesce this interval, but it may be possible if other things get
165
    /// coalesced, then it returns true by reference in 'Again'.
166
    bool joinCopy(MachineInstr *TheCopy, bool &Again);
167
168
    /// Attempt to join these two intervals.  On failure, this
169
    /// returns false.  The output "SrcInt" will not have been modified, so we
170
    /// can use this information below to update aliases.
171
    bool joinIntervals(CoalescerPair &CP);
172
173
    /// Attempt joining two virtual registers. Return true on success.
174
    bool joinVirtRegs(CoalescerPair &CP);
175
176
    /// Attempt joining with a reserved physreg.
177
    bool joinReservedPhysReg(CoalescerPair &CP);
178
179
    /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
180
    /// Subranges in @p LI which only partially interfere with the desired
181
    /// LaneMask are split as necessary. @p LaneMask are the lanes that
182
    /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
183
    /// lanemasks already adjusted to the coalesced register.
184
    void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
185
                           LaneBitmask LaneMask, CoalescerPair &CP);
186
187
    /// Join the liveranges of two subregisters. Joins @p RRange into
188
    /// @p LRange, @p RRange may be invalid afterwards.
189
    void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
190
                          LaneBitmask LaneMask, const CoalescerPair &CP);
191
192
    /// We found a non-trivially-coalescable copy. If the source value number is
193
    /// defined by a copy from the destination reg see if we can merge these two
194
    /// destination reg valno# into a single value number, eliminating a copy.
195
    /// This returns true if an interval was modified.
196
    bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
197
198
    /// Return true if there are definitions of IntB
199
    /// other than BValNo val# that can reach uses of AValno val# of IntA.
200
    bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
201
                              VNInfo *AValNo, VNInfo *BValNo);
202
203
    /// We found a non-trivially-coalescable copy.
204
    /// If the source value number is defined by a commutable instruction and
205
    /// its other operand is coalesced to the copy dest register, see if we
206
    /// can transform the copy into a noop by commuting the definition.
207
    /// This returns true if an interval was modified.
208
    bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
209
210
    /// We found a copy which can be moved to its less frequent predecessor.
211
    bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
212
213
    /// If the source of a copy is defined by a
214
    /// trivial computation, replace the copy by rematerialize the definition.
215
    bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
216
                                 bool &IsDefCopy);
217
218
    /// Return true if a copy involving a physreg should be joined.
219
    bool canJoinPhys(const CoalescerPair &CP);
220
221
    /// Replace all defs and uses of SrcReg to DstReg and update the subregister
222
    /// number if it is not zero. If DstReg is a physical register and the
223
    /// existing subregister number of the def / use being updated is not zero,
224
    /// make sure to set it to the correct physical subregister.
225
    void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
226
227
    /// If the given machine operand reads only undefined lanes add an undef
228
    /// flag.
229
    /// This can happen when undef uses were previously concealed by a copy
230
    /// which we coalesced. Example:
231
    ///    %vreg0:sub0<def,read-undef> = ...
232
    ///    %vreg1 = COPY %vreg0       <-- Coalescing COPY reveals undef
233
    ///           = use %vreg1:sub1   <-- hidden undef use
234
    void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
235
                      MachineOperand &MO, unsigned SubRegIdx);
236
237
    /// Handle copies of undef values.
238
    /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
239
    bool eliminateUndefCopy(MachineInstr *CopyMI);
240
241
    /// Check whether or not we should apply the terminal rule on the
242
    /// destination (Dst) of \p Copy.
243
    /// When the terminal rule applies, Copy is not profitable to
244
    /// coalesce.
245
    /// Dst is terminal if it has exactly one affinity (Dst, Src) and
246
    /// at least one interference (Dst, Dst2). If Dst is terminal, the
247
    /// terminal rule consists in checking that at least one of
248
    /// interfering node, say Dst2, has an affinity of equal or greater
249
    /// weight with Src.
250
    /// In that case, Dst2 and Dst will not be able to be both coalesced
251
    /// with Src. Since Dst2 exposes more coalescing opportunities than
252
    /// Dst, we can drop \p Copy.
253
    bool applyTerminalRule(const MachineInstr &Copy) const;
254
255
    /// Wrapper method for \see LiveIntervals::shrinkToUses.
256
    /// This method does the proper fixing of the live-ranges when the afore
257
    /// mentioned method returns true.
258
    void shrinkToUses(LiveInterval *LI,
259
732k
                      SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
260
732k
      if (
LIS->shrinkToUses(LI, Dead)732k
) {
261
112
        /// Check whether or not \p LI is composed by multiple connected
262
112
        /// components and if that is the case, fix that.
263
112
        SmallVector<LiveInterval*, 8> SplitLIs;
264
112
        LIS->splitSeparateComponents(*LI, SplitLIs);
265
112
      }
266
732k
    }
267
268
    /// Wrapper Method to do all the necessary work when an Instruction is
269
    /// deleted.
270
    /// Optimizations should use this to make sure that deleted instructions
271
    /// are always accounted for.
272
1.03M
    void deleteInstr(MachineInstr* MI) {
273
1.03M
      ErasedInstrs.insert(MI);
274
1.03M
      LIS->RemoveMachineInstrFromMaps(*MI);
275
1.03M
      MI->eraseFromParent();
276
1.03M
    }
277
278
  public:
279
    static char ID; ///< Class identification, replacement for typeinfo
280
281
32.0k
    RegisterCoalescer() : MachineFunctionPass(ID) {
282
32.0k
      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
283
32.0k
    }
284
285
    void getAnalysisUsage(AnalysisUsage &AU) const override;
286
287
    void releaseMemory() override;
288
289
    /// This is the pass entry point.
290
    bool runOnMachineFunction(MachineFunction&) override;
291
292
    /// Implement the dump method.
293
    void print(raw_ostream &O, const Module* = nullptr) const override;
294
  };
295
296
} // end anonymous namespace
297
298
char RegisterCoalescer::ID = 0;
299
300
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
301
302
36.7k
INITIALIZE_PASS_BEGIN36.7k
(RegisterCoalescer, "simple-register-coalescing",
303
36.7k
                      "Simple Register Coalescing", false, false)
304
36.7k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
305
36.7k
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
306
36.7k
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
307
36.7k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
308
36.7k
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
309
                    "Simple Register Coalescing", false, false)
310
311
static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
312
                        unsigned &Src, unsigned &Dst,
313
35.1M
                        unsigned &SrcSub, unsigned &DstSub) {
314
35.1M
  if (
MI->isCopy()35.1M
) {
315
30.5M
    Dst = MI->getOperand(0).getReg();
316
30.5M
    DstSub = MI->getOperand(0).getSubReg();
317
30.5M
    Src = MI->getOperand(1).getReg();
318
30.5M
    SrcSub = MI->getOperand(1).getSubReg();
319
35.1M
  } else 
if (4.53M
MI->isSubregToReg()4.53M
) {
320
1.70M
    Dst = MI->getOperand(0).getReg();
321
1.70M
    DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
322
1.70M
                                      MI->getOperand(3).getImm());
323
1.70M
    Src = MI->getOperand(2).getReg();
324
1.70M
    SrcSub = MI->getOperand(2).getSubReg();
325
1.70M
  } else
326
2.82M
    return false;
327
32.2M
  return true;
328
32.2M
}
329
330
/// Return true if this block should be vacated by the coalescer to eliminate
331
/// branches. The important cases to handle in the coalescer are critical edges
332
/// split during phi elimination which contain only copies. Simple blocks that
333
/// contain non-branches should also be vacated, but this can be handled by an
334
/// earlier pass similar to early if-conversion.
335
0
static bool isSplitEdge(const MachineBasicBlock *MBB) {
336
0
  if (
MBB->pred_size() != 1 || 0
MBB->succ_size() != 10
)
337
0
    return false;
338
0
339
0
  
for (const auto &MI : *MBB) 0
{
340
0
    if (
!MI.isCopyLike() && 0
!MI.isUnconditionalBranch()0
)
341
0
      return false;
342
0
  }
343
0
  return true;
344
0
}
345
346
18.5M
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
347
18.5M
  SrcReg = DstReg = 0;
348
18.5M
  SrcIdx = DstIdx = 0;
349
18.5M
  NewRC = nullptr;
350
18.5M
  Flipped = CrossClass = false;
351
18.5M
352
18.5M
  unsigned Src, Dst, SrcSub, DstSub;
353
18.5M
  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
354
137
    return false;
355
18.5M
  
Partial = SrcSub || 18.5M
DstSub17.9M
;
356
18.5M
357
18.5M
  // If one register is a physreg, it must be Dst.
358
18.5M
  if (
TargetRegisterInfo::isPhysicalRegister(Src)18.5M
) {
359
3.97M
    if (TargetRegisterInfo::isPhysicalRegister(Dst))
360
294k
      return false;
361
3.67M
    std::swap(Src, Dst);
362
3.67M
    std::swap(SrcSub, DstSub);
363
3.67M
    Flipped = true;
364
3.67M
  }
365
18.5M
366
18.2M
  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
367
18.2M
368
18.2M
  if (
TargetRegisterInfo::isPhysicalRegister(Dst)18.2M
) {
369
11.8M
    // Eliminate DstSub on a physreg.
370
11.8M
    if (
DstSub11.8M
) {
371
2
      Dst = TRI.getSubReg(Dst, DstSub);
372
2
      if (
!Dst2
)
return false0
;
373
2
      DstSub = 0;
374
2
    }
375
11.8M
376
11.8M
    // Eliminate SrcSub by picking a corresponding Dst superregister.
377
11.8M
    
if (11.8M
SrcSub11.8M
) {
378
85.5k
      Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
379
85.5k
      if (
!Dst85.5k
)
return false11.8k
;
380
11.8M
    } else 
if (11.8M
!MRI.getRegClass(Src)->contains(Dst)11.8M
) {
381
138k
      return false;
382
138k
    }
383
6.33M
  } else {
384
6.33M
    // Both registers are virtual.
385
6.33M
    const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
386
6.33M
    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
387
6.33M
388
6.33M
    // Both registers have subreg indices.
389
6.33M
    if (
SrcSub && 6.33M
DstSub554k
) {
390
107k
      // Copies between different sub-registers are never coalescable.
391
107k
      if (
Src == Dst && 107k
SrcSub != DstSub3.11k
)
392
3.11k
        return false;
393
104k
394
104k
      NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
395
104k
                                         SrcIdx, DstIdx);
396
104k
      if (!NewRC)
397
538
        return false;
398
6.22M
    } else 
if (6.22M
DstSub6.22M
) {
399
805k
      // SrcReg will be merged with a sub-register of DstReg.
400
805k
      SrcIdx = DstSub;
401
805k
      NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
402
6.22M
    } else 
if (5.42M
SrcSub5.42M
) {
403
447k
      // DstReg will be merged with a sub-register of SrcReg.
404
447k
      DstIdx = SrcSub;
405
447k
      NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
406
5.42M
    } else {
407
4.97M
      // This is a straight copy without sub-registers.
408
4.97M
      NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
409
4.97M
    }
410
6.33M
411
6.33M
    // The combined constraint may be impossible to satisfy.
412
6.33M
    
if (6.33M
!NewRC6.33M
)
413
76.2k
      return false;
414
6.25M
415
6.25M
    // Prefer SrcReg to be a sub-register of DstReg.
416
6.25M
    // FIXME: Coalescer should support subregs symmetrically.
417
6.25M
    
if (6.25M
DstIdx && 6.25M
!SrcIdx435k
) {
418
434k
      std::swap(Src, Dst);
419
434k
      std::swap(SrcIdx, DstIdx);
420
434k
      Flipped = !Flipped;
421
434k
    }
422
6.25M
423
3.52M
    CrossClass = NewRC != DstRC || NewRC != SrcRC;
424
6.33M
  }
425
18.2M
  // Check our invariants
426
18.0M
  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
427
18.0M
  assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
428
18.0M
         "Cannot have a physical SubIdx");
429
18.0M
  SrcReg = Src;
430
18.0M
  DstReg = Dst;
431
18.0M
  return true;
432
18.5M
}
433
434
1.49M
bool CoalescerPair::flip() {
435
1.49M
  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
436
0
    return false;
437
1.49M
  std::swap(SrcReg, DstReg);
438
1.49M
  std::swap(SrcIdx, DstIdx);
439
1.49M
  Flipped = !Flipped;
440
1.49M
  return true;
441
1.49M
}
442
443
10.4M
bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
444
10.4M
  if (!MI)
445
101k
    return false;
446
10.3M
  unsigned Src, Dst, SrcSub, DstSub;
447
10.3M
  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
448
2.82M
    return false;
449
7.51M
450
7.51M
  // Find the virtual register that is SrcReg.
451
7.51M
  
if (7.51M
Dst == SrcReg7.51M
) {
452
2.29M
    std::swap(Src, Dst);
453
2.29M
    std::swap(SrcSub, DstSub);
454
7.51M
  } else 
if (5.22M
Src != SrcReg5.22M
) {
455
274k
    return false;
456
274k
  }
457
7.23M
458
7.23M
  // Now check that Dst matches DstReg.
459
7.23M
  
if (7.23M
TargetRegisterInfo::isPhysicalRegister(DstReg)7.23M
) {
460
331k
    if (!TargetRegisterInfo::isPhysicalRegister(Dst))
461
3.09k
      return false;
462
331k
    assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
463
328k
    // DstSub could be set for a physreg from INSERT_SUBREG.
464
328k
    if (DstSub)
465
0
      Dst = TRI.getSubReg(Dst, DstSub);
466
328k
    // Full copy of Src.
467
328k
    if (!SrcSub)
468
315k
      return DstReg == Dst;
469
13.2k
    // This is a partial register copy. Check that the parts match.
470
13.2k
    return TRI.getSubReg(DstReg, SrcSub) == Dst;
471
0
  } else {
472
6.90M
    // DstReg is virtual.
473
6.90M
    if (DstReg != Dst)
474
173k
      return false;
475
6.73M
    // Registers match, do the subregisters line up?
476
6.73M
    return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
477
6.73M
           TRI.composeSubRegIndices(DstIdx, DstSub);
478
6.73M
  }
479
10.4M
}
480
481
32.0k
void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
482
32.0k
  AU.setPreservesCFG();
483
32.0k
  AU.addRequired<AAResultsWrapperPass>();
484
32.0k
  AU.addRequired<LiveIntervals>();
485
32.0k
  AU.addPreserved<LiveIntervals>();
486
32.0k
  AU.addPreserved<SlotIndexes>();
487
32.0k
  AU.addRequired<MachineLoopInfo>();
488
32.0k
  AU.addPreserved<MachineLoopInfo>();
489
32.0k
  AU.addPreservedID(MachineDominatorsID);
490
32.0k
  MachineFunctionPass::getAnalysisUsage(AU);
491
32.0k
}
492
493
494k
void RegisterCoalescer::eliminateDeadDefs() {
494
494k
  SmallVector<unsigned, 8> NewRegs;
495
494k
  LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
496
494k
                nullptr, this).eliminateDeadDefs(DeadDefs);
497
494k
}
498
499
494k
void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
500
494k
  // MI may be in WorkList. Make sure we don't visit it.
501
494k
  ErasedInstrs.insert(MI);
502
494k
}
503
504
bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
505
390k
                                             MachineInstr *CopyMI) {
506
390k
  assert(!CP.isPartial() && "This doesn't work for partial copies.");
507
390k
  assert(!CP.isPhys() && "This doesn't work for physreg copies.");
508
390k
509
390k
  LiveInterval &IntA =
510
390k
    LIS->getInterval(CP.isFlipped() ? 
CP.getDstReg()109k
:
CP.getSrcReg()280k
);
511
390k
  LiveInterval &IntB =
512
390k
    LIS->getInterval(CP.isFlipped() ? 
CP.getSrcReg()109k
:
CP.getDstReg()280k
);
513
390k
  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
514
390k
515
390k
  // We have a non-trivially-coalescable copy with IntA being the source and
516
390k
  // IntB being the dest, thus this defines a value number in IntB.  If the
517
390k
  // source value number (in IntA) is defined by a copy from B, see if we can
518
390k
  // merge these two pieces of B into a single value number, eliminating a copy.
519
390k
  // For example:
520
390k
  //
521
390k
  //  A3 = B0
522
390k
  //    ...
523
390k
  //  B1 = A3      <- this copy
524
390k
  //
525
390k
  // In this case, B0 can be extended to where the B1 copy lives, allowing the
526
390k
  // B1 value number to be replaced with B0 (which simplifies the B
527
390k
  // liveinterval).
528
390k
529
390k
  // BValNo is a value number in B that is defined by a copy from A.  'B1' in
530
390k
  // the example above.
531
390k
  LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
532
390k
  if (
BS == IntB.end()390k
)
return false0
;
533
390k
  VNInfo *BValNo = BS->valno;
534
390k
535
390k
  // Get the location that B is defined at.  Two options: either this value has
536
390k
  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
537
390k
  // can't process it.
538
390k
  if (
BValNo->def != CopyIdx390k
)
return false0
;
539
390k
540
390k
  // AValNo is the value number in A that defines the copy, A3 in the example.
541
390k
  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
542
390k
  LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
543
390k
  // The live segment might not exist after fun with physreg coalescing.
544
390k
  if (
AS == IntA.end()390k
)
return false0
;
545
390k
  VNInfo *AValNo = AS->valno;
546
390k
547
390k
  // If AValNo is defined as a copy from IntB, we can potentially process this.
548
390k
  // Get the instruction that defines this value number.
549
390k
  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
550
390k
  // Don't allow any partial copies, even if isCoalescable() allows them.
551
390k
  if (
!CP.isCoalescable(ACopyMI) || 390k
!ACopyMI->isFullCopy()736
)
552
390k
    return false;
553
736
554
736
  // Get the Segment in IntB that this value number starts with.
555
736
  LiveInterval::iterator ValS =
556
736
    IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
557
736
  if (ValS == IntB.end())
558
0
    return false;
559
736
560
736
  // Make sure that the end of the live segment is inside the same block as
561
736
  // CopyMI.
562
736
  MachineInstr *ValSEndInst =
563
736
    LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
564
736
  if (
!ValSEndInst || 736
ValSEndInst->getParent() != CopyMI->getParent()736
)
565
659
    return false;
566
77
567
77
  // Okay, we now know that ValS ends in the same block that the CopyMI
568
77
  // live-range starts.  If there are no intervening live segments between them
569
77
  // in IntB, we can merge them.
570
77
  
if (77
ValS+1 != BS77
)
return false0
;
571
77
572
77
  
DEBUG77
(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
573
77
574
77
  SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
575
77
  // We are about to delete CopyMI, so need to remove it as the 'instruction
576
77
  // that defines this value #'. Update the valnum with the new defining
577
77
  // instruction #.
578
77
  BValNo->def = FillerStart;
579
77
580
77
  // Okay, we can merge them.  We need to insert a new liverange:
581
77
  // [ValS.end, BS.begin) of either value number, then we merge the
582
77
  // two value numbers.
583
77
  IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
584
77
585
77
  // Okay, merge "B1" into the same value number as "B0".
586
77
  if (BValNo != ValS->valno)
587
77
    IntB.MergeValueNumberInto(BValNo, ValS->valno);
588
77
589
77
  // Do the same for the subregister segments.
590
0
  for (LiveInterval::SubRange &S : IntB.subranges()) {
591
0
    VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
592
0
    S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
593
0
    VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
594
0
    if (SubBValNo != SubValSNo)
595
0
      S.MergeValueNumberInto(SubBValNo, SubValSNo);
596
0
  }
597
77
598
77
  DEBUG(dbgs() << "   result = " << IntB << '\n');
599
77
600
77
  // If the source instruction was killing the source register before the
601
77
  // merge, unset the isKill marker given the live range has been extended.
602
77
  int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
603
77
  if (
UIdx != -177
) {
604
0
    ValSEndInst->getOperand(UIdx).setIsKill(false);
605
0
  }
606
77
607
77
  // Rewrite the copy. If the copy instruction was killing the destination
608
77
  // register before the merge, find the last use and trim the live range. That
609
77
  // will also add the isKill marker.
610
77
  CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
611
77
  if (AS->end == CopyIdx)
612
0
    shrinkToUses(&IntA);
613
390k
614
390k
  ++numExtends;
615
390k
  return true;
616
390k
}
617
618
bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
619
                                             LiveInterval &IntB,
620
                                             VNInfo *AValNo,
621
365
                                             VNInfo *BValNo) {
622
365
  // If AValNo has PHI kills, conservatively assume that IntB defs can reach
623
365
  // the PHI values.
624
365
  if (LIS->hasPHIKill(IntA, AValNo))
625
4
    return true;
626
361
627
361
  
for (LiveRange::Segment &ASeg : IntA.segments) 361
{
628
2.00k
    if (
ASeg.valno != AValNo2.00k
)
continue1.62k
;
629
380
    LiveInterval::iterator BI =
630
380
      std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
631
380
    if (BI != IntB.begin())
632
380
      --BI;
633
1.12k
    for (; 
BI != IntB.end() && 1.12k
ASeg.end >= BI->start1.05k
;
++BI742
) {
634
749
      if (BI->valno == BValNo)
635
380
        continue;
636
369
      
if (369
BI->start <= ASeg.start && 369
BI->end > ASeg.start361
)
637
0
        return true;
638
369
      
if (369
BI->start > ASeg.start && 369
BI->start < ASeg.end8
)
639
7
        return true;
640
749
    }
641
2.00k
  }
642
354
  return false;
643
365
}
644
645
/// Copy segements with value number @p SrcValNo from liverange @p Src to live
646
/// range @Dst and use value number @p DstValNo there.
647
static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
648
352
                                 const LiveRange &Src, const VNInfo *SrcValNo) {
649
1.96k
  for (const LiveRange::Segment &S : Src.segments) {
650
1.96k
    if (S.valno != SrcValNo)
651
1.59k
      continue;
652
371
    Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
653
371
  }
654
352
}
655
656
bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
657
390k
                                                 MachineInstr *CopyMI) {
658
390k
  assert(!CP.isPhys());
659
390k
660
390k
  LiveInterval &IntA =
661
390k
      LIS->getInterval(CP.isFlipped() ? 
CP.getDstReg()109k
:
CP.getSrcReg()280k
);
662
390k
  LiveInterval &IntB =
663
390k
      LIS->getInterval(CP.isFlipped() ? 
CP.getSrcReg()109k
:
CP.getDstReg()280k
);
664
390k
665
390k
  // We found a non-trivially-coalescable copy with IntA being the source and
666
390k
  // IntB being the dest, thus this defines a value number in IntB.  If the
667
390k
  // source value number (in IntA) is defined by a commutable instruction and
668
390k
  // its other operand is coalesced to the copy dest register, see if we can
669
390k
  // transform the copy into a noop by commuting the definition. For example,
670
390k
  //
671
390k
  //  A3 = op A2 B0<kill>
672
390k
  //    ...
673
390k
  //  B1 = A3      <- this copy
674
390k
  //    ...
675
390k
  //     = op A3   <- more uses
676
390k
  //
677
390k
  // ==>
678
390k
  //
679
390k
  //  B2 = op B0 A2<kill>
680
390k
  //    ...
681
390k
  //  B1 = B2      <- now an identity copy
682
390k
  //    ...
683
390k
  //     = op B2   <- more uses
684
390k
685
390k
  // BValNo is a value number in B that is defined by a copy from A. 'B1' in
686
390k
  // the example above.
687
390k
  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
688
390k
  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
689
390k
  assert(BValNo != nullptr && BValNo->def == CopyIdx);
690
390k
691
390k
  // AValNo is the value number in A that defines the copy, A3 in the example.
692
390k
  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
693
390k
  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
694
390k
  if (AValNo->isPHIDef())
695
101k
    return false;
696
288k
  MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
697
288k
  if (!DefMI)
698
0
    return false;
699
288k
  
if (288k
!DefMI->isCommutable()288k
)
700
281k
    return false;
701
7.03k
  // If DefMI is a two-address instruction then commuting it will change the
702
7.03k
  // destination register.
703
7.03k
  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
704
7.03k
  assert(DefIdx != -1);
705
7.03k
  unsigned UseOpIdx;
706
7.03k
  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
707
354
    return false;
708
6.67k
709
6.67k
  // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
710
6.67k
  // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
711
6.67k
  // passed to the method. That _other_ operand is chosen by
712
6.67k
  // the findCommutedOpIndices() method.
713
6.67k
  //
714
6.67k
  // That is obviously an area for improvement in case of instructions having
715
6.67k
  // more than 2 operands. For example, if some instruction has 3 commutable
716
6.67k
  // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
717
6.67k
  // op#2<->op#3) of commute transformation should be considered/tried here.
718
6.67k
  unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
719
6.67k
  if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
720
114
    return false;
721
6.56k
722
6.56k
  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
723
6.56k
  unsigned NewReg = NewDstMO.getReg();
724
6.56k
  if (
NewReg != IntB.reg || 6.56k
!IntB.Query(AValNo->def).isKill()426
)
725
6.19k
    return false;
726
365
727
365
  // Make sure there are no other definitions of IntB that would reach the
728
365
  // uses which the new definition can reach.
729
365
  
if (365
hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)365
)
730
11
    return false;
731
354
732
354
  // If some of the uses of IntA.reg is already coalesced away, return false.
733
354
  // It's not possible to determine whether it's safe to perform the coalescing.
734
354
  
for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) 354
{
735
1.70k
    MachineInstr *UseMI = MO.getParent();
736
1.70k
    unsigned OpNo = &MO - &UseMI->getOperand(0);
737
1.70k
    SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
738
1.70k
    LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
739
1.70k
    if (
US == IntA.end() || 1.70k
US->valno != AValNo1.70k
)
740
1.30k
      continue;
741
406
    // If this use is tied to a def, we can't rewrite the register.
742
406
    
if (406
UseMI->isRegTiedToDefOperand(OpNo)406
)
743
0
      return false;
744
354
  }
745
354
746
354
  
DEBUG354
(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
747
354
               << *DefMI);
748
354
749
354
  // At this point we have decided that it is legal to do this
750
354
  // transformation.  Start by commuting the instruction.
751
354
  MachineBasicBlock *MBB = DefMI->getParent();
752
354
  MachineInstr *NewMI =
753
354
      TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
754
354
  if (!NewMI)
755
2
    return false;
756
352
  
if (352
TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
757
352
      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
758
352
      !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
759
0
    return false;
760
352
  
if (352
NewMI != DefMI352
) {
761
0
    LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
762
0
    MachineBasicBlock::iterator Pos = DefMI;
763
0
    MBB->insert(Pos, NewMI);
764
0
    MBB->erase(DefMI);
765
0
  }
766
352
767
352
  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
768
352
  // A = or A, B
769
352
  // ...
770
352
  // B = A
771
352
  // ...
772
352
  // C = A<kill>
773
352
  // ...
774
352
  //   = B
775
352
776
352
  // Update uses of IntA of the specific Val# with IntB.
777
352
  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
778
352
                                         UE = MRI->use_end();
779
2.05k
       
UI != UE2.05k
; /* ++UI is below because of possible MI removal */) {
780
1.70k
    MachineOperand &UseMO = *UI;
781
1.70k
    ++UI;
782
1.70k
    if (UseMO.isUndef())
783
0
      continue;
784
1.70k
    MachineInstr *UseMI = UseMO.getParent();
785
1.70k
    if (
UseMI->isDebugValue()1.70k
) {
786
0
      // FIXME These don't have an instruction index.  Not clear we have enough
787
0
      // info to decide whether to do this replacement or not.  For now do it.
788
0
      UseMO.setReg(NewReg);
789
0
      continue;
790
0
    }
791
1.70k
    SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
792
1.70k
    LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
793
1.70k
    assert(US != IntA.end() && "Use must be live");
794
1.70k
    if (US->valno != AValNo)
795
1.29k
      continue;
796
402
    // Kill flags are no longer accurate. They are recomputed after RA.
797
402
    UseMO.setIsKill(false);
798
402
    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
799
0
      UseMO.substPhysReg(NewReg, *TRI);
800
402
    else
801
402
      UseMO.setReg(NewReg);
802
402
    if (UseMI == CopyMI)
803
352
      continue;
804
50
    
if (50
!UseMI->isCopy()50
)
805
25
      continue;
806
25
    
if (25
UseMI->getOperand(0).getReg() != IntB.reg ||
807
1
        UseMI->getOperand(0).getSubReg())
808
24
      continue;
809
1
810
1
    // This copy will become a noop. If it's defining a new val#, merge it into
811
1
    // BValNo.
812
1
    SlotIndex DefIdx = UseIdx.getRegSlot();
813
1
    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
814
1
    if (!DVNI)
815
0
      continue;
816
1
    
DEBUG1
(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
817
1
    assert(DVNI->def == DefIdx);
818
1
    BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
819
0
    for (LiveInterval::SubRange &S : IntB.subranges()) {
820
0
      VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
821
0
      if (!SubDVNI)
822
0
        continue;
823
0
      VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
824
0
      assert(SubBValNo->def == CopyIdx);
825
0
      S.MergeValueNumberInto(SubDVNI, SubBValNo);
826
0
    }
827
1.70k
828
1.70k
    deleteInstr(UseMI);
829
1.70k
  }
830
352
831
352
  // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
832
352
  // is updated.
833
352
  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
834
352
  if (
IntB.hasSubRanges()352
) {
835
0
    if (
!IntA.hasSubRanges()0
) {
836
0
      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
837
0
      IntA.createSubRangeFrom(Allocator, Mask, IntA);
838
0
    }
839
0
    SlotIndex AIdx = CopyIdx.getRegSlot(true);
840
0
    for (LiveInterval::SubRange &SA : IntA.subranges()) {
841
0
      VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
842
0
      assert(ASubValNo != nullptr);
843
0
844
0
      IntB.refineSubRanges(Allocator, SA.LaneMask,
845
0
          [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
846
0
        VNInfo *BSubValNo = SR.empty()
847
0
          ? SR.getNextValue(CopyIdx, Allocator)
848
0
          : SR.getVNInfoAt(CopyIdx);
849
0
        assert(BSubValNo != nullptr);
850
0
        addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
851
0
      });
852
0
    }
853
0
  }
854
352
855
352
  BValNo->def = AValNo->def;
856
352
  addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
857
352
  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
858
352
859
352
  LIS->removeVRegDefAt(IntA, AValNo->def);
860
352
861
352
  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
862
390k
  ++numCommutes;
863
390k
  return true;
864
390k
}
865
866
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
867
/// predecessor of BB2, and if B is not redefined on the way from A = B
868
/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
869
/// execution goes through the path from BB0 to BB2. We may move B = A
870
/// to the predecessor without such reversed copy.
871
/// So we will transform the program from:
872
///   BB0:
873
///      A = B;    BB1:
874
///       ...         ...
875
///     /     \      /
876
///             BB2:
877
///               ...
878
///               B = A;
879
///
880
/// to:
881
///
882
///   BB0:         BB1:
883
///      A = B;        ...
884
///       ...          B = A;
885
///     /     \       /
886
///             BB2:
887
///               ...
888
///
889
/// A special case is when BB0 and BB2 are the same BB which is the only
890
/// BB in a loop:
891
///   BB1:
892
///        ...
893
///   BB0/BB2:  ----
894
///        B = A;   |
895
///        ...      |
896
///        A = B;   |
897
///          |-------
898
///          |
899
/// We may hoist B = A from BB0/BB2 to BB1.
900
///
901
/// The major preconditions for correctness to remove such partial
902
/// redundancy include:
903
/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
904
///    the PHI is defined by the reversed copy A = B in BB0.
905
/// 2. No B is referenced from the start of BB2 to B = A.
906
/// 3. No B is defined from A = B to the end of BB0.
907
/// 4. BB1 has only one successor.
908
///
909
/// 2 and 4 implicitly ensure B is not live at the end of BB1.
910
/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
911
/// colder place, which not only prevent endless loop, but also make sure
912
/// the movement of copy is beneficial.
913
bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
914
390k
                                                MachineInstr &CopyMI) {
915
390k
  assert(!CP.isPhys());
916
390k
  if (!CopyMI.isFullCopy())
917
0
    return false;
918
390k
919
390k
  MachineBasicBlock &MBB = *CopyMI.getParent();
920
390k
  if (MBB.isEHPad())
921
160
    return false;
922
390k
923
390k
  
if (390k
MBB.pred_size() != 2390k
)
924
226k
    return false;
925
163k
926
163k
  LiveInterval &IntA =
927
163k
      LIS->getInterval(CP.isFlipped() ? 
CP.getDstReg()39.4k
:
CP.getSrcReg()124k
);
928
163k
  LiveInterval &IntB =
929
163k
      LIS->getInterval(CP.isFlipped() ? 
CP.getSrcReg()39.4k
:
CP.getDstReg()124k
);
930
163k
931
163k
  // A is defined by PHI at the entry of MBB.
932
163k
  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
933
163k
  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
934
163k
  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
935
163k
  if (!AValNo->isPHIDef())
936
114k
    return false;
937
48.9k
938
48.9k
  // No B is referenced before CopyMI in MBB.
939
48.9k
  
if (48.9k
IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx)48.9k
)
940
950
    return false;
941
48.0k
942
48.0k
  // MBB has two predecessors: one contains A = B so no copy will be inserted
943
48.0k
  // for it. The other one will have a copy moved from MBB.
944
48.0k
  bool FoundReverseCopy = false;
945
48.0k
  MachineBasicBlock *CopyLeftBB = nullptr;
946
96.0k
  for (MachineBasicBlock *Pred : MBB.predecessors()) {
947
96.0k
    VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
948
96.0k
    MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
949
96.0k
    if (
!DefMI || 96.0k
!DefMI->isFullCopy()89.5k
) {
950
46.6k
      CopyLeftBB = Pred;
951
46.6k
      continue;
952
46.6k
    }
953
49.3k
    // Check DefMI is a reverse copy and it is in BB Pred.
954
49.3k
    
if (49.3k
DefMI->getOperand(0).getReg() != IntA.reg ||
955
49.3k
        DefMI->getOperand(1).getReg() != IntB.reg ||
956
49.3k
        
DefMI->getParent() != Pred861
) {
957
48.5k
      CopyLeftBB = Pred;
958
48.5k
      continue;
959
48.5k
    }
960
843
    // If there is any other def of B after DefMI and before the end of Pred,
961
843
    // we need to keep the copy of B = A at the end of Pred if we remove
962
843
    // B = A from MBB.
963
843
    bool ValB_Changed = false;
964
4.60k
    for (auto VNI : IntB.valnos) {
965
4.60k
      if (VNI->isUnused())
966
114
        continue;
967
4.48k
      
if (4.48k
PVal->def < VNI->def && 4.48k
VNI->def < LIS->getMBBEndIdx(Pred)1.40k
) {
968
4
        ValB_Changed = true;
969
4
        break;
970
4
      }
971
843
    }
972
843
    if (
ValB_Changed843
) {
973
4
      CopyLeftBB = Pred;
974
4
      continue;
975
4
    }
976
839
    FoundReverseCopy = true;
977
839
  }
978
48.0k
979
48.0k
  // If no reverse copy is found in predecessors, nothing to do.
980
48.0k
  if (!FoundReverseCopy)
981
47.1k
    return false;
982
833
983
833
  // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
984
833
  // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
985
833
  // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
986
833
  // update IntA/IntB.
987
833
  //
988
833
  // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
989
833
  // MBB is hotter than CopyLeftBB.
990
833
  
if (833
CopyLeftBB && 833
CopyLeftBB->succ_size() > 1827
)
991
346
    return false;
992
487
993
487
  // Now ok to move copy.
994
487
  
if (487
CopyLeftBB487
) {
995
481
    DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
996
481
                 << CopyLeftBB->getNumber() << '\t' << CopyMI);
997
481
998
481
    // Insert new copy to CopyLeftBB.
999
481
    auto InsPos = CopyLeftBB->getFirstTerminator();
1000
481
    MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1001
481
                                      TII->get(TargetOpcode::COPY), IntB.reg)
1002
481
                                  .addReg(IntA.reg);
1003
481
    SlotIndex NewCopyIdx =
1004
481
        LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1005
481
    IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1006
481
    for (LiveInterval::SubRange &SR : IntB.subranges())
1007
0
      SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1008
481
1009
481
    // If the newly created Instruction has an address of an instruction that was
1010
481
    // deleted before (object recycled by the allocator) it needs to be removed from
1011
481
    // the deleted list.
1012
481
    ErasedInstrs.erase(NewCopyMI);
1013
487
  } else {
1014
6
    DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
1015
6
                 << MBB.getNumber() << '\t' << CopyMI);
1016
6
  }
1017
487
1018
487
  // Remove CopyMI.
1019
487
  // Note: This is fine to remove the copy before updating the live-ranges.
1020
487
  // While updating the live-ranges, we only look at slot indices and
1021
487
  // never go back to the instruction.
1022
487
  // Mark instructions as deleted.
1023
487
  deleteInstr(&CopyMI);
1024
487
1025
487
  // Update the liveness.
1026
487
  SmallVector<SlotIndex, 8> EndPoints;
1027
487
  VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1028
487
  LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1029
487
                  &EndPoints);
1030
487
  BValNo->markUnused();
1031
487
  // Extend IntB to the EndPoints of its original live interval.
1032
487
  LIS->extendToIndices(IntB, EndPoints);
1033
487
1034
487
  // Now, do the same for its subranges.
1035
0
  for (LiveInterval::SubRange &SR : IntB.subranges()) {
1036
0
    EndPoints.clear();
1037
0
    VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1038
0
    assert(BValNo && "All sublanes should be live");
1039
0
    LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1040
0
    BValNo->markUnused();
1041
0
    LIS->extendToIndices(SR, EndPoints);
1042
0
  }
1043
390k
1044
390k
  // Finally, update the live-range of IntA.
1045
390k
  shrinkToUses(&IntA);
1046
390k
  return true;
1047
390k
}
1048
1049
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1050
/// defining a subregister.
1051
724k
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
1052
724k
  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
1053
724k
         "This code cannot handle physreg aliasing");
1054
726k
  for (const MachineOperand &Op : MI.operands()) {
1055
726k
    if (
!Op.isReg() || 726k
!Op.isDef()725k
||
Op.getReg() != Reg724k
)
1056
1.48k
      continue;
1057
724k
    // Return true if we define the full register or don't care about the value
1058
724k
    // inside other subregisters.
1059
724k
    
if (724k
Op.getSubReg() == 0 || 724k
Op.isUndef()153k
)
1060
724k
      return true;
1061
742
  }
1062
742
  return false;
1063
742
}
1064
1065
bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1066
                                                MachineInstr *CopyMI,
1067
11.1M
                                                bool &IsDefCopy) {
1068
11.1M
  IsDefCopy = false;
1069
11.1M
  unsigned SrcReg = CP.isFlipped() ? 
CP.getDstReg()2.62M
:
CP.getSrcReg()8.55M
;
1070
11.1M
  unsigned SrcIdx = CP.isFlipped() ? 
CP.getDstIdx()2.62M
:
CP.getSrcIdx()8.55M
;
1071
11.1M
  unsigned DstReg = CP.isFlipped() ? 
CP.getSrcReg()2.62M
:
CP.getDstReg()8.55M
;
1072
11.1M
  unsigned DstIdx = CP.isFlipped() ? 
CP.getSrcIdx()2.62M
:
CP.getDstIdx()8.55M
;
1073
11.1M
  if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
1074
2.50M
    return false;
1075
8.67M
1076
8.67M
  LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1077
8.67M
  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1078
8.67M
  VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1079
8.67M
  assert(ValNo && "CopyMI input register not live");
1080
8.67M
  if (
ValNo->isPHIDef() || 8.67M
ValNo->isUnused()8.26M
)
1081
414k
    return false;
1082
8.26M
  MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
1083
8.26M
  if (!DefMI)
1084
0
    return false;
1085
8.26M
  
if (8.26M
DefMI->isCopyLike()8.26M
) {
1086
4.84M
    IsDefCopy = true;
1087
4.84M
    return false;
1088
4.84M
  }
1089
3.42M
  
if (3.42M
!TII->isAsCheapAsAMove(*DefMI)3.42M
)
1090
1.83M
    return false;
1091
1.58M
  
if (1.58M
!TII->isTriviallyReMaterializable(*DefMI, AA)1.58M
)
1092
858k
    return false;
1093
724k
  
if (724k
!definesFullReg(*DefMI, SrcReg)724k
)
1094
742
    return false;
1095
724k
  bool SawStore = false;
1096
724k
  if (!DefMI->isSafeToMove(AA, SawStore))
1097
0
    return false;
1098
724k
  const MCInstrDesc &MCID = DefMI->getDesc();
1099
724k
  if (MCID.getNumDefs() != 1)
1100
0
    return false;
1101
724k
  // Only support subregister destinations when the def is read-undef.
1102
724k
  MachineOperand &DstOperand = CopyMI->getOperand(0);
1103
724k
  unsigned CopyDstReg = DstOperand.getReg();
1104
724k
  if (
DstOperand.getSubReg() && 724k
!DstOperand.isUndef()377
)
1105
4
    return false;
1106
724k
1107
724k
  // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1108
724k
  // the register substantially (beyond both source and dest size). This is bad
1109
724k
  // for performance since it can cascade through a function, introducing many
1110
724k
  // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1111
724k
  // around after a few subreg copies).
1112
724k
  
if (724k
SrcIdx && 724k
DstIdx304
)
1113
0
    return false;
1114
724k
1115
724k
  const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1116
724k
  if (
!DefMI->isImplicitDef()724k
) {
1117
724k
    if (
TargetRegisterInfo::isPhysicalRegister(DstReg)724k
) {
1118
690k
      unsigned NewDstReg = DstReg;
1119
690k
1120
690k
      unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1121
690k
                                              DefMI->getOperand(0).getSubReg());
1122
690k
      if (NewDstIdx)
1123
138k
        NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1124
690k
1125
690k
      // Finally, make sure that the physical subregister that will be
1126
690k
      // constructed later is permitted for the instruction.
1127
690k
      if (!DefRC->contains(NewDstReg))
1128
0
        return false;
1129
33.6k
    } else {
1130
33.6k
      // Theoretically, some stack frame reference could exist. Just make sure
1131
33.6k
      // it hasn't actually happened.
1132
33.6k
      assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
1133
33.6k
             "Only expect to deal with virtual or physical registers");
1134
33.6k
    }
1135
724k
  }
1136
724k
1137
724k
  DebugLoc DL = CopyMI->getDebugLoc();
1138
724k
  MachineBasicBlock *MBB = CopyMI->getParent();
1139
724k
  MachineBasicBlock::iterator MII =
1140
724k
    std::next(MachineBasicBlock::iterator(CopyMI));
1141
724k
  TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
1142
724k
  MachineInstr &NewMI = *std::prev(MII);
1143
724k
  NewMI.setDebugLoc(DL);
1144
724k
1145
724k
  // In a situation like the following:
1146
724k
  //     %vreg0:subreg = instr              ; DefMI, subreg = DstIdx
1147
724k
  //     %vreg1        = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
1148
724k
  // instead of widening %vreg1 to the register class of %vreg0 simply do:
1149
724k
  //     %vreg1 = instr
1150
724k
  const TargetRegisterClass *NewRC = CP.getNewRC();
1151
724k
  if (
DstIdx != 0724k
) {
1152
291
    MachineOperand &DefMO = NewMI.getOperand(0);
1153
291
    if (
DefMO.getSubReg() == DstIdx291
) {
1154
239
      assert(SrcIdx == 0 && CP.isFlipped()
1155
239
             && "Shouldn't have SrcIdx+DstIdx at this point");
1156
239
      const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1157
239
      const TargetRegisterClass *CommonRC =
1158
239
        TRI->getCommonSubClass(DefRC, DstRC);
1159
239
      if (
CommonRC != nullptr239
) {
1160
239
        NewRC = CommonRC;
1161
239
        DstIdx = 0;
1162
239
        DefMO.setSubReg(0);
1163
239
        DefMO.setIsUndef(false); // Only subregs can have def+undef.
1164
239
      }
1165
239
    }
1166
291
  }
1167
724k
1168
724k
  // CopyMI may have implicit operands, save them so that we can transfer them
1169
724k
  // over to the newly materialized instruction after CopyMI is removed.
1170
724k
  SmallVector<MachineOperand, 4> ImplicitOps;
1171
724k
  ImplicitOps.reserve(CopyMI->getNumOperands() -
1172
724k
                      CopyMI->getDesc().getNumOperands());
1173
724k
  for (unsigned I = CopyMI->getDesc().getNumOperands(),
1174
724k
                E = CopyMI->getNumOperands();
1175
724k
       
I != E724k
;
++I1
) {
1176
1
    MachineOperand &MO = CopyMI->getOperand(I);
1177
1
    if (
MO.isReg()1
) {
1178
1
      assert(MO.isImplicit() && "No explicit operands after implict operands.");
1179
1
      // Discard VReg implicit defs.
1180
1
      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1181
1
        ImplicitOps.push_back(MO);
1182
1
    }
1183
1
  }
1184
724k
1185
724k
  LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
1186
724k
  CopyMI->eraseFromParent();
1187
724k
  ErasedInstrs.insert(CopyMI);
1188
724k
1189
724k
  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1190
724k
  // We need to remember these so we can add intervals once we insert
1191
724k
  // NewMI into SlotIndexes.
1192
724k
  SmallVector<unsigned, 4> NewMIImplDefs;
1193
724k
  for (unsigned i = NewMI.getDesc().getNumOperands(),
1194
724k
                e = NewMI.getNumOperands();
1195
733k
       
i != e733k
;
++i9.14k
) {
1196
9.14k
    MachineOperand &MO = NewMI.getOperand(i);
1197
9.14k
    if (
MO.isReg() && 9.14k
MO.isDef()9.14k
) {
1198
8.08k
      assert(MO.isImplicit() && MO.isDead() &&
1199
8.08k
             TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
1200
8.08k
      NewMIImplDefs.push_back(MO.getReg());
1201
8.08k
    }
1202
9.14k
  }
1203
724k
1204
724k
  if (
TargetRegisterInfo::isVirtualRegister(DstReg)724k
) {
1205
33.6k
    unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1206
33.6k
1207
33.6k
    if (
DefRC != nullptr33.6k
) {
1208
33.6k
      if (NewIdx)
1209
14.5k
        NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1210
33.6k
      else
1211
19.0k
        NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1212
33.6k
      assert(NewRC && "subreg chosen for remat incompatible with instruction");
1213
33.6k
    }
1214
33.6k
    // Remap subranges to new lanemask and change register class.
1215
33.6k
    LiveInterval &DstInt = LIS->getInterval(DstReg);
1216
2
    for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1217
2
      SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1218
2
    }
1219
33.6k
    MRI->setRegClass(DstReg, NewRC);
1220
33.6k
1221
33.6k
    // Update machine operands and add flags.
1222
33.6k
    updateRegDefsUses(DstReg, DstReg, DstIdx);
1223
33.6k
    NewMI.getOperand(0).setSubReg(NewIdx);
1224
33.6k
    // Add dead subregister definitions if we are defining the whole register
1225
33.6k
    // but only part of it is live.
1226
33.6k
    // This could happen if the rematerialization instruction is rematerializing
1227
33.6k
    // more than actually is used in the register.
1228
33.6k
    // An example would be:
1229
33.6k
    // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1230
33.6k
    // ; Copying only part of the register here, but the rest is undef.
1231
33.6k
    // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit
1232
33.6k
    // ==>
1233
33.6k
    // ; Materialize all the constants but only using one
1234
33.6k
    // vreg2 = LOAD_CONSTANTS 5, 8
1235
33.6k
    //
1236
33.6k
    // at this point for the part that wasn't defined before we could have
1237
33.6k
    // subranges missing the definition.
1238
33.6k
    if (
NewIdx == 0 && 33.6k
DstInt.hasSubRanges()19.0k
) {
1239
0
      SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1240
0
      SlotIndex DefIndex =
1241
0
          CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1242
0
      LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1243
0
      VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
1244
0
      for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1245
0
        if (!SR.liveAt(DefIndex))
1246
0
          SR.createDeadDef(DefIndex, Alloc);
1247
0
        MaxMask &= ~SR.LaneMask;
1248
0
      }
1249
0
      if (
MaxMask.any()0
) {
1250
0
        LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1251
0
        SR->createDeadDef(DefIndex, Alloc);
1252
0
      }
1253
0
    }
1254
33.6k
1255
33.6k
    // Make sure that the subrange for resultant undef is removed
1256
33.6k
    // For example:
1257
33.6k
    //   vreg1:sub1<def,read-undef> = LOAD CONSTANT 1
1258
33.6k
    //   vreg2<def> = COPY vreg1
1259
33.6k
    // ==>
1260
33.6k
    //   vreg2:sub1<def, read-undef> = LOAD CONSTANT 1
1261
33.6k
    //     ; Correct but need to remove the subrange for vreg2:sub0
1262
33.6k
    //     ; as it is now undef
1263
33.6k
    if (
NewIdx != 0 && 33.6k
DstInt.hasSubRanges()14.5k
) {
1264
1
      // The affected subregister segments can be removed.
1265
1
      SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1266
1
      LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1267
1
      bool UpdatedSubRanges = false;
1268
2
      for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1269
2
        if (
(SR.LaneMask & DstMask).none()2
) {
1270
1
          DEBUG(dbgs() << "Removing undefined SubRange "
1271
1
                << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1272
1
          // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
1273
1
          if (VNInfo *
RmValNo1
= SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1274
1
            SR.removeValNo(RmValNo);
1275
1
            UpdatedSubRanges = true;
1276
1
          }
1277
1
        }
1278
2
      }
1279
1
      if (UpdatedSubRanges)
1280
1
        DstInt.removeEmptySubRanges();
1281
1
    }
1282
724k
  } else 
if (690k
NewMI.getOperand(0).getReg() != CopyDstReg690k
) {
1283
137k
    // The New instruction may be defining a sub-register of what's actually
1284
137k
    // been asked for. If so it must implicitly define the whole thing.
1285
137k
    assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
1286
137k
           "Only expect virtual or physical registers in remat");
1287
137k
    NewMI.getOperand(0).setIsDead(true);
1288
137k
    NewMI.addOperand(MachineOperand::CreateReg(
1289
137k
        CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1290
137k
    // Record small dead def live-ranges for all the subregisters
1291
137k
    // of the destination register.
1292
137k
    // Otherwise, variables that live through may miss some
1293
137k
    // interferences, thus creating invalid allocation.
1294
137k
    // E.g., i386 code:
1295
137k
    // vreg1 = somedef ; vreg1 GR8
1296
137k
    // vreg2 = remat ; vreg2 GR32
1297
137k
    // CL = COPY vreg2.sub_8bit
1298
137k
    // = somedef vreg1 ; vreg1 GR8
1299
137k
    // =>
1300
137k
    // vreg1 = somedef ; vreg1 GR8
1301
137k
    // ECX<def, dead> = remat ; CL<imp-def>
1302
137k
    // = somedef vreg1 ; vreg1 GR8
1303
137k
    // vreg1 will see the inteferences with CL but not with CH since
1304
137k
    // no live-ranges would have been created for ECX.
1305
137k
    // Fix that!
1306
137k
    SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1307
137k
    for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
1308
278k
         
Units.isValid()278k
;
++Units140k
)
1309
140k
      
if (LiveRange *140k
LR140k
= LIS->getCachedRegUnit(*Units))
1310
103k
        LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1311
690k
  }
1312
724k
1313
724k
  if (NewMI.getOperand(0).getSubReg())
1314
14.5k
    NewMI.getOperand(0).setIsUndef();
1315
724k
1316
724k
  // Transfer over implicit operands to the rematerialized instruction.
1317
724k
  for (MachineOperand &MO : ImplicitOps)
1318
1
    NewMI.addOperand(MO);
1319
724k
1320
724k
  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1321
732k
  for (unsigned i = 0, e = NewMIImplDefs.size(); 
i != e732k
;
++i8.08k
) {
1322
8.08k
    unsigned Reg = NewMIImplDefs[i];
1323
16.1k
    for (MCRegUnitIterator Units(Reg, TRI); 
Units.isValid()16.1k
;
++Units8.08k
)
1324
8.08k
      
if (LiveRange *8.08k
LR8.08k
= LIS->getCachedRegUnit(*Units))
1325
0
        LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1326
8.08k
  }
1327
724k
1328
724k
  DEBUG(dbgs() << "Remat: " << NewMI);
1329
724k
  ++NumReMats;
1330
724k
1331
724k
  // The source interval can become smaller because we removed a use.
1332
724k
  shrinkToUses(&SrcInt, &DeadDefs);
1333
724k
  if (
!DeadDefs.empty()724k
) {
1334
489k
    // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1335
489k
    // to describe DstReg instead.
1336
5
    for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
1337
5
      MachineInstr *UseMI = UseMO.getParent();
1338
5
      if (
UseMI->isDebugValue()5
) {
1339
5
        UseMO.setReg(DstReg);
1340
5
        DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1341
5
      }
1342
5
    }
1343
489k
    eliminateDeadDefs();
1344
489k
  }
1345
724k
1346
724k
  return true;
1347
11.1M
}
1348
1349
6.24M
bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1350
6.24M
  // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
1351
6.24M
  // local variables. When we have a copy like:
1352
6.24M
  //
1353
6.24M
  //   %vreg1 = COPY %vreg2<undef>
1354
6.24M
  //
1355
6.24M
  // We delete the copy and remove the corresponding value number from %vreg1.
1356
6.24M
  // Any uses of that value number are marked as <undef>.
1357
6.24M
1358
6.24M
  // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1359
6.24M
  // CoalescerPair may have a new register class with adjusted subreg indices
1360
6.24M
  // at this point.
1361
6.24M
  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
1362
6.24M
  isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
1363
6.24M
1364
6.24M
  SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1365
6.24M
  const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1366
6.24M
  // CopyMI is undef iff SrcReg is not live before the instruction.
1367
6.24M
  if (
SrcSubIdx != 0 && 6.24M
SrcLI.hasSubRanges()534k
) {
1368
72.6k
    LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1369
169k
    for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1370
169k
      if ((SR.LaneMask & SrcMask).none())
1371
97.1k
        continue;
1372
72.6k
      
if (72.6k
SR.liveAt(Idx)72.6k
)
1373
72.6k
        return false;
1374
6.24M
    }
1375
6.17M
  } else 
if (6.17M
SrcLI.liveAt(Idx)6.17M
)
1376
6.17M
    return false;
1377
1
1378
1
  
DEBUG1
(dbgs() << "\tEliminating copy of <undef> value\n");
1379
1
1380
1
  // Remove any DstReg segments starting at the instruction.
1381
1
  LiveInterval &DstLI = LIS->getInterval(DstReg);
1382
1
  SlotIndex RegIndex = Idx.getRegSlot();
1383
1
  // Remove value or merge with previous one in case of a subregister def.
1384
1
  if (VNInfo *
PrevVNI1
= DstLI.getVNInfoAt(Idx)) {
1385
0
    VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1386
0
    DstLI.MergeValueNumberInto(VNI, PrevVNI);
1387
0
1388
0
    // The affected subregister segments can be removed.
1389
0
    LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1390
0
    for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1391
0
      if ((SR.LaneMask & DstMask).none())
1392
0
        continue;
1393
0
1394
0
      VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1395
0
      assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1396
0
      SR.removeValNo(SVNI);
1397
0
    }
1398
0
    DstLI.removeEmptySubRanges();
1399
0
  } else
1400
1
    LIS->removeVRegDefAt(DstLI, RegIndex);
1401
1
1402
1
  // Mark uses as undef.
1403
2
  for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1404
2
    if (MO.isDef() /*|| MO.isUndef()*/)
1405
1
      continue;
1406
1
    const MachineInstr &MI = *MO.getParent();
1407
1
    SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1408
1
    LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1409
1
    bool isLive;
1410
1
    if (
!UseMask.all() && 1
DstLI.hasSubRanges()0
) {
1411
0
      isLive = false;
1412
0
      for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1413
0
        if ((SR.LaneMask & UseMask).none())
1414
0
          continue;
1415
0
        
if (0
SR.liveAt(UseIdx)0
) {
1416
0
          isLive = true;
1417
0
          break;
1418
0
        }
1419
1
      }
1420
0
    } else
1421
1
      isLive = DstLI.liveAt(UseIdx);
1422
1
    if (isLive)
1423
0
      continue;
1424
1
    MO.setIsUndef(true);
1425
1
    DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1426
2
  }
1427
1
1428
1
  // A def of a subregister may be a use of the other subregisters, so
1429
1
  // deleting a def of a subregister may also remove uses. Since CopyMI
1430
1
  // is still part of the function (but about to be erased), mark all
1431
1
  // defs of DstReg in it as <undef>, so that shrinkToUses would
1432
1
  // ignore them.
1433
1
  for (MachineOperand &MO : CopyMI->operands())
1434
2
    
if (2
MO.isReg() && 2
MO.isDef()2
&&
MO.getReg() == DstReg1
)
1435
1
      MO.setIsUndef(true);
1436
6.24M
  LIS->shrinkToUses(&DstLI);
1437
6.24M
1438
6.24M
  return true;
1439
6.24M
}
1440
1441
void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1442
355k
                                     MachineOperand &MO, unsigned SubRegIdx) {
1443
355k
  LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1444
355k
  if (MO.isDef())
1445
137k
    Mask = ~Mask;
1446
355k
  bool IsUndef = true;
1447
896k
  for (const LiveInterval::SubRange &S : Int.subranges()) {
1448
896k
    if ((S.LaneMask & Mask).none())
1449
523k
      continue;
1450
372k
    
if (372k
S.liveAt(UseIdx)372k
) {
1451
355k
      IsUndef = false;
1452
355k
      break;
1453
355k
    }
1454
355k
  }
1455
355k
  if (
IsUndef355k
) {
1456
35
    MO.setIsUndef(true);
1457
35
    // We found out some subregister use is actually reading an undefined
1458
35
    // value. In some cases the whole vreg has become undefined at this
1459
35
    // point so we have to potentially shrink the main range if the
1460
35
    // use was ending a live segment there.
1461
35
    LiveQueryResult Q = Int.Query(UseIdx);
1462
35
    if (Q.valueOut() == nullptr)
1463
0
      ShrinkMainRange = true;
1464
35
  }
1465
355k
}
1466
1467
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
1468
                                          unsigned DstReg,
1469
6.85M
                                          unsigned SubIdx) {
1470
6.85M
  bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
1471
6.85M
  LiveInterval *DstInt = DstIsPhys ? 
nullptr1.02M
:
&LIS->getInterval(DstReg)5.82M
;
1472
6.85M
1473
6.85M
  if (
DstInt && 6.85M
DstInt->hasSubRanges()5.82M
&&
DstReg != SrcReg127k
) {
1474
476k
    for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1475
476k
      unsigned SubReg = MO.getSubReg();
1476
476k
      if (
SubReg == 0 || 476k
MO.isUndef()318k
)
1477
192k
        continue;
1478
284k
      MachineInstr &MI = *MO.getParent();
1479
284k
      if (MI.isDebugValue())
1480
0
        continue;
1481
284k
      SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1482
284k
      addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1483
284k
    }
1484
127k
  }
1485
6.85M
1486
6.85M
  SmallPtrSet<MachineInstr*, 8> Visited;
1487
6.85M
  for (MachineRegisterInfo::reg_instr_iterator
1488
6.85M
       I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1489
18.4M
       
I != E18.4M
; ) {
1490
11.5M
    MachineInstr *UseMI = &*(I++);
1491
11.5M
1492
11.5M
    // Each instruction can only be rewritten once because sub-register
1493
11.5M
    // composition is not always idempotent. When SrcReg != DstReg, rewriting
1494
11.5M
    // the UseMI operands removes them from the SrcReg use-def chain, but when
1495
11.5M
    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1496
11.5M
    // operands mentioning the virtual register.
1497
11.5M
    if (
SrcReg == DstReg && 11.5M
!Visited.insert(UseMI).second212k
)
1498
8.84k
      continue;
1499
11.5M
1500
11.5M
    SmallVector<unsigned,8> Ops;
1501
11.5M
    bool Reads, Writes;
1502
11.5M
    std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1503
11.5M
1504
11.5M
    // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1505
11.5M
    // because SrcReg is a sub-register.
1506
11.5M
    if (
DstInt && 11.5M
!Reads10.3M
&&
SubIdx4.40M
&&
!UseMI->isDebugValue()827k
)
1507
827k
      Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1508
11.5M
1509
11.5M
    // Replace SrcReg with DstReg in all UseMI operands.
1510
23.3M
    for (unsigned i = 0, e = Ops.size(); 
i != e23.3M
;
++i11.8M
) {
1511
11.8M
      MachineOperand &MO = UseMI->getOperand(Ops[i]);
1512
11.8M
1513
11.8M
      // Adjust <undef> flags in case of sub-register joins. We don't want to
1514
11.8M
      // turn a full def into a read-modify-write sub-register def and vice
1515
11.8M
      // versa.
1516
11.8M
      if (
SubIdx && 11.8M
MO.isDef()1.74M
)
1517
849k
        MO.setIsUndef(!Reads);
1518
11.8M
1519
11.8M
      // A subreg use of a partially undef (super) register may be a complete
1520
11.8M
      // undef use now and then has to be marked that way.
1521
11.8M
      if (
SubIdx != 0 && 11.8M
MO.isUse()1.74M
&&
MRI->shouldTrackSubRegLiveness(DstReg)893k
) {
1522
70.3k
        if (
!DstInt->hasSubRanges()70.3k
) {
1523
0
          BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1524
0
          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
1525
0
          DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
1526
0
        }
1527
70.3k
        SlotIndex MIIdx = UseMI->isDebugValue()
1528
2
                              ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
1529
70.3k
                              : LIS->getInstructionIndex(*UseMI);
1530
70.3k
        SlotIndex UseIdx = MIIdx.getRegSlot(true);
1531
70.3k
        addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
1532
70.3k
      }
1533
11.8M
1534
11.8M
      if (DstIsPhys)
1535
1.20M
        MO.substPhysReg(DstReg, *TRI);
1536
11.8M
      else
1537
10.5M
        MO.substVirtReg(DstReg, SubIdx, *TRI);
1538
11.8M
    }
1539
11.5M
1540
11.5M
    DEBUG({
1541
11.5M
        dbgs() << "\t\tupdated: ";
1542
11.5M
        if (!UseMI->isDebugValue())
1543
11.5M
          dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1544
11.5M
        dbgs() << *UseMI;
1545
11.5M
      });
1546
11.5M
  }
1547
6.85M
}
1548
1549
11.7M
bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1550
11.7M
  // Always join simple intervals that are defined by a single copy from a
1551
11.7M
  // reserved register. This doesn't increase register pressure, so it is
1552
11.7M
  // always beneficial.
1553
11.7M
  if (
!MRI->isReserved(CP.getDstReg())11.7M
) {
1554
10.5M
    DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1555
10.5M
    return false;
1556
10.5M
  }
1557
1.14M
1558
1.14M
  LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1559
1.14M
  if (JoinVInt.containsOneValue())
1560
1.03M
    return true;
1561
117k
1562
117k
  
DEBUG117k
(dbgs() << "\tCannot join complex intervals into reserved register.\n");
1563
117k
  return false;
1564
117k
}
1565
1566
18.5M
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1567
18.5M
  Again = false;
1568
18.5M
  DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1569
18.5M
1570
18.5M
  CoalescerPair CP(*TRI);
1571
18.5M
  if (
!CP.setRegisters(CopyMI)18.5M
) {
1572
524k
    DEBUG(dbgs() << "\tNot coalescable.\n");
1573
524k
    return false;
1574
524k
  }
1575
18.0M
1576
18.0M
  
if (18.0M
CP.getNewRC()18.0M
) {
1577
6.25M
    auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1578
6.25M
    auto DstRC = MRI->getRegClass(CP.getDstReg());
1579
6.25M
    unsigned SrcIdx = CP.getSrcIdx();
1580
6.25M
    unsigned DstIdx = CP.getDstIdx();
1581
6.25M
    if (
CP.isFlipped()6.25M
) {
1582
434k
      std::swap(SrcIdx, DstIdx);
1583
434k
      std::swap(SrcRC, DstRC);
1584
434k
    }
1585
6.25M
    if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1586
6.25M
                            CP.getNewRC())) {
1587
920
      DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1588
920
      return false;
1589
920
    }
1590
18.0M
  }
1591
18.0M
1592
18.0M
  // Dead code elimination. This really should be handled by MachineDCE, but
1593
18.0M
  // sometimes dead copies slip through, and we can't generate invalid live
1594
18.0M
  // ranges.
1595
18.0M
  
if (18.0M
!CP.isPhys() && 18.0M
CopyMI->allDefsAreDead()6.25M
) {
1596
5.30k
    DEBUG(dbgs() << "\tCopy is dead.\n");
1597
5.30k
    DeadDefs.push_back(CopyMI);
1598
5.30k
    eliminateDeadDefs();
1599
5.30k
    return true;
1600
5.30k
  }
1601
17.9M
1602
17.9M
  // Eliminate undefs.
1603
17.9M
  
if (17.9M
!CP.isPhys() && 17.9M
eliminateUndefCopy(CopyMI)6.24M
) {
1604
1
    deleteInstr(CopyMI);
1605
1
    return false;  // Not coalescable.
1606
1
  }
1607
17.9M
1608
17.9M
  // Coalesced copies are normally removed immediately, but transformations
1609
17.9M
  // like removeCopyByCommutingDef() can inadvertently create identity copies.
1610
17.9M
  // When that happens, just join the values and remove the copy.
1611
17.9M
  
if (17.9M
CP.getSrcReg() == CP.getDstReg()17.9M
) {
1612
0
    LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1613
0
    DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1614
0
    const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1615
0
    LiveQueryResult LRQ = LI.Query(CopyIdx);
1616
0
    if (VNInfo *
DefVNI0
= LRQ.valueDefined()) {
1617
0
      VNInfo *ReadVNI = LRQ.valueIn();
1618
0
      assert(ReadVNI && "No value before copy and no <undef> flag.");
1619
0
      assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1620
0
      LI.MergeValueNumberInto(DefVNI, ReadVNI);
1621
0
1622
0
      // Process subregister liveranges.
1623
0
      for (LiveInterval::SubRange &S : LI.subranges()) {
1624
0
        LiveQueryResult SLRQ = S.Query(CopyIdx);
1625
0
        if (VNInfo *
SDefVNI0
= SLRQ.valueDefined()) {
1626
0
          VNInfo *SReadVNI = SLRQ.valueIn();
1627
0
          S.MergeValueNumberInto(SDefVNI, SReadVNI);
1628
0
        }
1629
0
      }
1630
0
      DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
1631
0
    }
1632
0
    deleteInstr(CopyMI);
1633
0
    return true;
1634
0
  }
1635
17.9M
1636
17.9M
  // Enforce policies.
1637
17.9M
  
if (17.9M
CP.isPhys()17.9M
) {
1638
11.7M
    DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
1639
11.7M
                 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
1640
11.7M
                 << '\n');
1641
11.7M
    if (
!canJoinPhys(CP)11.7M
) {
1642
10.7M
      // Before giving up coalescing, if definition of source is defined by
1643
10.7M
      // trivial computation, try rematerializing it.
1644
10.7M
      bool IsDefCopy;
1645
10.7M
      if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1646
690k
        return true;
1647
10.0M
      
if (10.0M
IsDefCopy10.0M
)
1648
4.72M
        Again = true;  // May be possible to coalesce later.
1649
10.7M
      return false;
1650
10.7M
    }
1651
0
  } else {
1652
6.24M
    // When possible, let DstReg be the larger interval.
1653
6.24M
    if (
!CP.isPartial() && 6.24M
LIS->getInterval(CP.getSrcReg()).size() >
1654
4.92M
                           LIS->getInterval(CP.getDstReg()).size())
1655
1.49M
      CP.flip();
1656
6.24M
1657
6.24M
    DEBUG({
1658
6.24M
      dbgs() << "\tConsidering merging to "
1659
6.24M
             << TRI->getRegClassName(CP.getNewRC()) << " with ";
1660
6.24M
      if (CP.getDstIdx() && CP.getSrcIdx())
1661
6.24M
        dbgs() << PrintReg(CP.getDstReg()) << " in "
1662
6.24M
               << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
1663
6.24M
               << PrintReg(CP.getSrcReg()) << " in "
1664
6.24M
               << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
1665
6.24M
      else
1666
6.24M
        dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
1667
6.24M
               << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
1668
6.24M
    });
1669
6.24M
  }
1670
17.9M
1671
7.28M
  ShrinkMask = LaneBitmask::getNone();
1672
7.28M
  ShrinkMainRange = false;
1673
7.28M
1674
7.28M
  // Okay, attempt to join these two intervals.  On failure, this returns false.
1675
7.28M
  // Otherwise, if one of the intervals being joined is a physreg, this method
1676
7.28M
  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
1677
7.28M
  // been modified, so we can use this information below to update aliases.
1678
7.28M
  if (
!joinIntervals(CP)7.28M
) {
1679
459k
    // Coalescing failed.
1680
459k
1681
459k
    // If definition of source is defined by trivial computation, try
1682
459k
    // rematerializing it.
1683
459k
    bool IsDefCopy;
1684
459k
    if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1685
33.6k
      return true;
1686
426k
1687
426k
    // If we can eliminate the copy without merging the live segments, do so
1688
426k
    // now.
1689
426k
    
if (426k
!CP.isPartial() && 426k
!CP.isPhys()392k
) {
1690
390k
      if (adjustCopiesBackFrom(CP, CopyMI) ||
1691
390k
          
removeCopyByCommutingDef(CP, CopyMI)390k
) {
1692
429
        deleteInstr(CopyMI);
1693
429
        DEBUG(dbgs() << "\tTrivial!\n");
1694
429
        return true;
1695
429
      }
1696
425k
    }
1697
425k
1698
425k
    // Try and see if we can partially eliminate the copy by moving the copy to
1699
425k
    // its predecessor.
1700
425k
    
if (425k
!CP.isPartial() && 425k
!CP.isPhys()391k
)
1701
390k
      
if (390k
removePartialRedundancy(CP, *CopyMI)390k
)
1702
487
        return true;
1703
425k
1704
425k
    // Otherwise, we are unable to join the intervals.
1705
425k
    
DEBUG425k
(dbgs() << "\tInterference!\n");
1706
425k
    Again = true;  // May be possible to coalesce later.
1707
425k
    return false;
1708
425k
  }
1709
6.82M
1710
6.82M
  // Coalescing to a virtual register that is of a sub-register class of the
1711
6.82M
  // other. Make sure the resulting register is set to the right register class.
1712
6.82M
  
if (6.82M
CP.isCrossClass()6.82M
) {
1713
3.45M
    ++numCrossRCs;
1714
3.45M
    MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
1715
3.45M
  }
1716
6.82M
1717
6.82M
  // Removing sub-register copies can ease the register class constraints.
1718
6.82M
  // Make sure we attempt to inflate the register class of DstReg.
1719
6.82M
  if (
!CP.isPhys() && 6.82M
RegClassInfo.isProperSubClass(CP.getNewRC())5.79M
)
1720
15.0k
    InflateRegs.push_back(CP.getDstReg());
1721
6.82M
1722
6.82M
  // CopyMI has been erased by joinIntervals at this point. Remove it from
1723
6.82M
  // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
1724
6.82M
  // to the work list. This keeps ErasedInstrs from growing needlessly.
1725
6.82M
  ErasedInstrs.erase(CopyMI);
1726
6.82M
1727
6.82M
  // Rewrite all SrcReg operands to DstReg.
1728
6.82M
  // Also update DstReg operands to include DstIdx if it is set.
1729
6.82M
  if (CP.getDstIdx())
1730
291
    updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
1731
6.82M
  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
1732
6.82M
1733
6.82M
  // Shrink subregister ranges if necessary.
1734
6.82M
  if (
ShrinkMask.any()6.82M
) {
1735
28
    LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1736
74
    for (LiveInterval::SubRange &S : LI.subranges()) {
1737
74
      if ((S.LaneMask & ShrinkMask).none())
1738
34
        continue;
1739
40
      
DEBUG40
(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
1740
40
                   << ")\n");
1741
40
      LIS->shrinkToUses(S, LI.reg);
1742
40
    }
1743
28
    LI.removeEmptySubRanges();
1744
28
  }
1745
6.82M
  if (
ShrinkMainRange6.82M
) {
1746
0
    LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1747
0
    shrinkToUses(&LI);
1748
0
  }
1749
6.82M
1750
6.82M
  // SrcReg is guaranteed to be the register whose live interval that is
1751
6.82M
  // being merged.
1752
6.82M
  LIS->removeInterval(CP.getSrcReg());
1753
6.82M
1754
6.82M
  // Update regalloc hint.
1755
6.82M
  TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
1756
6.82M
1757
6.82M
  DEBUG({
1758
18.5M
    dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
1759
18.5M
           << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
1760
18.5M
    dbgs() << "\tResult = ";
1761
18.5M
    if (CP.isPhys())
1762
18.5M
      dbgs() << PrintReg(CP.getDstReg(), TRI);
1763
18.5M
    else
1764
18.5M
      dbgs() << LIS->getInterval(CP.getDstReg());
1765
18.5M
    dbgs() << '\n';
1766
18.5M
  });
1767
18.5M
1768
18.5M
  ++numJoins;
1769
18.5M
  return true;
1770
18.5M
}
1771
1772
1.03M
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
1773
1.03M
  unsigned DstReg = CP.getDstReg();
1774
1.03M
  unsigned SrcReg = CP.getSrcReg();
1775
1.03M
  assert(CP.isPhys() && "Must be a physreg copy");
1776
1.03M
  assert(MRI->isReserved(DstReg) && "Not a reserved register");
1777
1.03M
  LiveInterval &RHS = LIS->getInterval(SrcReg);
1778
1.03M
  DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
1779
1.03M
1780
1.03M
  assert(RHS.containsOneValue() && "Invalid join with reserved register");
1781
1.03M
1782
1.03M
  // Optimization for reserved registers like ESP. We can only merge with a
1783
1.03M
  // reserved physreg if RHS has a single value that is a copy of DstReg.
1784
1.03M
  // The live range of the reserved register will look like a set of dead defs
1785
1.03M
  // - we don't properly track the live range of reserved registers.
1786
1.03M
1787
1.03M
  // Deny any overlapping intervals.  This depends on all the reserved
1788
1.03M
  // register live ranges to look like dead defs.
1789
1.03M
  if (
!MRI->isConstantPhysReg(DstReg)1.03M
) {
1790
141k
    for (MCRegUnitIterator UI(DstReg, TRI); 
UI.isValid()141k
;
++UI69.9k
) {
1791
71.2k
      // Abort if not all the regunits are reserved.
1792
142k
      for (MCRegUnitRootIterator RI(*UI, TRI); 
RI.isValid()142k
;
++RI71.2k
) {
1793
71.2k
        if (!MRI->isReserved(*RI))
1794
22
          return false;
1795
71.2k
      }
1796
71.2k
      
if (71.2k
RHS.overlaps(LIS->getRegUnit(*UI))71.2k
) {
1797
1.23k
        DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
1798
1.23k
        return false;
1799
1.23k
      }
1800
71.2k
    }
1801
71.2k
1802
71.2k
    // We must also check for overlaps with regmask clobbers.
1803
69.9k
    BitVector RegMaskUsable;
1804
69.9k
    if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
1805
69.9k
        
!RegMaskUsable.test(DstReg)352
) {
1806
348
      DEBUG(dbgs() << "\t\tRegMask interference\n");
1807
348
      return false;
1808
348
    }
1809
1.02M
  }
1810
1.02M
1811
1.02M
  // Skip any value computations, we are not adding new values to the
1812
1.02M
  // reserved register.  Also skip merging the live ranges, the reserved
1813
1.02M
  // register live range doesn't need to be accurate as long as all the
1814
1.02M
  // defs are there.
1815
1.02M
1816
1.02M
  // Delete the identity copy.
1817
1.02M
  MachineInstr *CopyMI;
1818
1.02M
  if (
CP.isFlipped()1.02M
) {
1819
1.02M
    // Physreg is copied into vreg
1820
1.02M
    //   %vregY = COPY %X
1821
1.02M
    //   ...  //< no other def of %X here
1822
1.02M
    //   use %vregY
1823
1.02M
    // =>
1824
1.02M
    //   ...
1825
1.02M
    //   use %X
1826
1.02M
    CopyMI = MRI->getVRegDef(SrcReg);
1827
1.02M
  } else {
1828
55
    // VReg is copied into physreg:
1829
55
    //   %vregX = def
1830
55
    //   ... //< no other def or use of %Y here
1831
55
    //   %Y = COPY %vregX
1832
55
    // =>
1833
55
    //   %Y = def
1834
55
    //   ...
1835
55
    if (
!MRI->hasOneNonDBGUse(SrcReg)55
) {
1836
8
      DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
1837
8
      return false;
1838
8
    }
1839
47
1840
47
    
if (47
!LIS->intervalIsInOneMBB(RHS)47
) {
1841
9
      DEBUG(dbgs() << "\t\tComplex control flow!\n");
1842
9
      return false;
1843
9
    }
1844
38
1845
38
    MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
1846
38
    CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
1847
38
    SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
1848
38
    SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
1849
38
1850
38
    if (
!MRI->isConstantPhysReg(DstReg)38
) {
1851
37
      // We checked above that there are no interfering defs of the physical
1852
37
      // register. However, for this case, where we intend to move up the def of
1853
37
      // the physical register, we also need to check for interfering uses.
1854
37
      SlotIndexes *Indexes = LIS->getSlotIndexes();
1855
37
      for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
1856
69
           
SI != CopyRegIdx69
;
SI = Indexes->getNextNonNullIndex(SI)32
) {
1857
36
        MachineInstr *MI = LIS->getInstructionFromIndex(SI);
1858
36
        if (
MI->readsRegister(DstReg, TRI)36
) {
1859
4
          DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
1860
4
          return false;
1861
4
        }
1862
36
      }
1863
37
    }
1864
38
1865
38
    // We're going to remove the copy which defines a physical reserved
1866
38
    // register, so remove its valno, etc.
1867
34
    
DEBUG34
(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI)
1868
34
          << " at " << CopyRegIdx << "\n");
1869
34
1870
34
    LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
1871
34
    // Create a new dead def at the new def location.
1872
68
    for (MCRegUnitIterator UI(DstReg, TRI); 
UI.isValid()68
;
++UI34
) {
1873
34
      LiveRange &LR = LIS->getRegUnit(*UI);
1874
34
      LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
1875
34
    }
1876
55
  }
1877
1.02M
1878
1.02M
  deleteInstr(CopyMI);
1879
1.02M
1880
1.02M
  // We don't track kills for reserved registers.
1881
1.02M
  MRI->clearKillFlags(CP.getSrcReg());
1882
1.02M
1883
1.02M
  return true;
1884
1.03M
}
1885
1886
//===----------------------------------------------------------------------===//
1887
//                 Interference checking and interval joining
1888
//===----------------------------------------------------------------------===//
1889
//
1890
// In the easiest case, the two live ranges being joined are disjoint, and
1891
// there is no interference to consider. It is quite common, though, to have
1892
// overlapping live ranges, and we need to check if the interference can be
1893
// resolved.
1894
//
1895
// The live range of a single SSA value forms a sub-tree of the dominator tree.
1896
// This means that two SSA values overlap if and only if the def of one value
1897
// is contained in the live range of the other value. As a special case, the
1898
// overlapping values can be defined at the same index.
1899
//
1900
// The interference from an overlapping def can be resolved in these cases:
1901
//
1902
// 1. Coalescable copies. The value is defined by a copy that would become an
1903
//    identity copy after joining SrcReg and DstReg. The copy instruction will
1904
//    be removed, and the value will be merged with the source value.
1905
//
1906
//    There can be several copies back and forth, causing many values to be
1907
//    merged into one. We compute a list of ultimate values in the joined live
1908
//    range as well as a mappings from the old value numbers.
1909
//
1910
// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
1911
//    predecessors have a live out value. It doesn't cause real interference,
1912
//    and can be merged into the value it overlaps. Like a coalescable copy, it
1913
//    can be erased after joining.
1914
//
1915
// 3. Copy of external value. The overlapping def may be a copy of a value that
1916
//    is already in the other register. This is like a coalescable copy, but
1917
//    the live range of the source register must be trimmed after erasing the
1918
//    copy instruction:
1919
//
1920
//      %src = COPY %ext
1921
//      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
1922
//
1923
// 4. Clobbering undefined lanes. Vector registers are sometimes built by
1924
//    defining one lane at a time:
1925
//
1926
//      %dst:ssub0<def,read-undef> = FOO
1927
//      %src = BAR
1928
//      %dst:ssub1<def> = COPY %src
1929
//
1930
//    The live range of %src overlaps the %dst value defined by FOO, but
1931
//    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
1932
//    which was undef anyway.
1933
//
1934
//    The value mapping is more complicated in this case. The final live range
1935
//    will have different value numbers for both FOO and BAR, but there is no
1936
//    simple mapping from old to new values. It may even be necessary to add
1937
//    new PHI values.
1938
//
1939
// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
1940
//    is live, but never read. This can happen because we don't compute
1941
//    individual live ranges per lane.
1942
//
1943
//      %dst<def> = FOO
1944
//      %src = BAR
1945
//      %dst:ssub1<def> = COPY %src
1946
//
1947
//    This kind of interference is only resolved locally. If the clobbered
1948
//    lane value escapes the block, the join is aborted.
1949
1950
namespace {
1951
1952
/// Track information about values in a single virtual register about to be
1953
/// joined. Objects of this class are always created in pairs - one for each
1954
/// side of the CoalescerPair (or one for each lane of a side of the coalescer
1955
/// pair)
1956
class JoinVals {
1957
  /// Live range we work on.
1958
  LiveRange &LR;
1959
1960
  /// (Main) register we work on.
1961
  const unsigned Reg;
1962
1963
  /// Reg (and therefore the values in this liverange) will end up as
1964
  /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
1965
  /// CP.SrcIdx.
1966
  const unsigned SubIdx;
1967
1968
  /// The LaneMask that this liverange will occupy the coalesced register. May
1969
  /// be smaller than the lanemask produced by SubIdx when merging subranges.
1970
  const LaneBitmask LaneMask;
1971
1972
  /// This is true when joining sub register ranges, false when joining main
1973
  /// ranges.
1974
  const bool SubRangeJoin;
1975
1976
  /// Whether the current LiveInterval tracks subregister liveness.
1977
  const bool TrackSubRegLiveness;
1978
1979
  /// Values that will be present in the final live range.
1980
  SmallVectorImpl<VNInfo*> &NewVNInfo;
1981
1982
  const CoalescerPair &CP;
1983
  LiveIntervals *LIS;
1984
  SlotIndexes *Indexes;
1985
  const TargetRegisterInfo *TRI;
1986
1987
  /// Value number assignments. Maps value numbers in LI to entries in
1988
  /// NewVNInfo. This is suitable for passing to LiveInterval::join().
1989
  SmallVector<int, 8> Assignments;
1990
1991
  /// Conflict resolution for overlapping values.
1992
  enum ConflictResolution {
1993
    /// No overlap, simply keep this value.
1994
    CR_Keep,
1995
1996
    /// Merge this value into OtherVNI and erase the defining instruction.
1997
    /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
1998
    /// values.
1999
    CR_Erase,
2000
2001
    /// Merge this value into OtherVNI but keep the defining instruction.
2002
    /// This is for the special case where OtherVNI is defined by the same
2003
    /// instruction.
2004
    CR_Merge,
2005
2006
    /// Keep this value, and have it replace OtherVNI where possible. This
2007
    /// complicates value mapping since OtherVNI maps to two different values
2008
    /// before and after this def.
2009
    /// Used when clobbering undefined or dead lanes.
2010
    CR_Replace,
2011
2012
    /// Unresolved conflict. Visit later when all values have been mapped.
2013
    CR_Unresolved,
2014
2015
    /// Unresolvable conflict. Abort the join.
2016
    CR_Impossible
2017
  };
2018
2019
  /// Per-value info for LI. The lane bit masks are all relative to the final
2020
  /// joined register, so they can be compared directly between SrcReg and
2021
  /// DstReg.
2022
  struct Val {
2023
    ConflictResolution Resolution = CR_Keep;
2024
2025
    /// Lanes written by this def, 0 for unanalyzed values.
2026
    LaneBitmask WriteLanes;
2027
2028
    /// Lanes with defined values in this register. Other lanes are undef and
2029
    /// safe to clobber.
2030
    LaneBitmask ValidLanes;
2031
2032
    /// Value in LI being redefined by this def.
2033
    VNInfo *RedefVNI = nullptr;
2034
2035
    /// Value in the other live range that overlaps this def, if any.
2036
    VNInfo *OtherVNI = nullptr;
2037
2038
    /// Is this value an IMPLICIT_DEF that can be erased?
2039
    ///
2040
    /// IMPLICIT_DEF values should only exist at the end of a basic block that
2041
    /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2042
    /// safely erased if they are overlapping a live value in the other live
2043
    /// interval.
2044
    ///
2045
    /// Weird control flow graphs and incomplete PHI handling in
2046
    /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2047
    /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2048
    /// normal values.
2049
    bool ErasableImplicitDef = false;
2050
2051
    /// True when the live range of this value will be pruned because of an
2052
    /// overlapping CR_Replace value in the other live range.
2053
    bool Pruned = false;
2054
2055
    /// True once Pruned above has been computed.
2056
    bool PrunedComputed = false;
2057
2058
12.8M
    Val() = default;
2059
2060
38.3M
    bool isAnalyzed() const { return WriteLanes.any(); }
2061
  };
2062
2063
  /// One entry per value number in LI.
2064
  SmallVector<Val, 8> Vals;
2065
2066
  /// Compute the bitmask of lanes actually written by DefMI.
2067
  /// Set Redef if there are any partial register definitions that depend on the
2068
  /// previous value of the register.
2069
  LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2070
2071
  /// Find the ultimate value that VNI was copied from.
2072
  std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
2073
2074
  bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
2075
2076
  /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2077
  /// Return a conflict resolution when possible, but leave the hard cases as
2078
  /// CR_Unresolved.
2079
  /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2080
  /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2081
  /// The recursion always goes upwards in the dominator tree, making loops
2082
  /// impossible.
2083
  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2084
2085
  /// Compute the value assignment for ValNo in RI.
2086
  /// This may be called recursively by analyzeValue(), but never for a ValNo on
2087
  /// the stack.
2088
  void computeAssignment(unsigned ValNo, JoinVals &Other);
2089
2090
  /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2091
  /// the extent of the tainted lanes in the block.
2092
  ///
2093
  /// Multiple values in Other.LR can be affected since partial redefinitions
2094
  /// can preserve previously tainted lanes.
2095
  ///
2096
  ///   1 %dst = VLOAD           <-- Define all lanes in %dst
2097
  ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
2098
  ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
2099
  ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2100
  ///
2101
  /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2102
  /// entry to TaintedVals.
2103
  ///
2104
  /// Returns false if the tainted lanes extend beyond the basic block.
2105
  bool
2106
  taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2107
              SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2108
2109
  /// Return true if MI uses any of the given Lanes from Reg.
2110
  /// This does not include partial redefinitions of Reg.
2111
  bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
2112
2113
  /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2114
  /// be pruned:
2115
  ///
2116
  ///   %dst = COPY %src
2117
  ///   %src = COPY %dst  <-- This value to be pruned.
2118
  ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
2119
  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2120
2121
public:
2122
  JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
2123
           SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
2124
           LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2125
           bool TrackSubRegLiveness)
2126
    : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2127
      SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2128
      NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2129
12.8M
      TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
2130
2131
  /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2132
  /// Returns false if any conflicts were impossible to resolve.
2133
  bool mapValues(JoinVals &Other);
2134
2135
  /// Try to resolve conflicts that require all values to be mapped.
2136
  /// Returns false if any conflicts were impossible to resolve.
2137
  bool resolveConflicts(JoinVals &Other);
2138
2139
  /// Prune the live range of values in Other.LR where they would conflict with
2140
  /// CR_Replace values in LR. Collect end points for restoring the live range
2141
  /// after joining.
2142
  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2143
                   bool changeInstrs);
2144
2145
  /// Removes subranges starting at copies that get removed. This sometimes
2146
  /// happens when undefined subranges are copied around. These ranges contain
2147
  /// no useful information and can be removed.
2148
  void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2149
2150
  /// Pruning values in subranges can lead to removing segments in these
2151
  /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2152
  /// the main range also need to be removed. This function will mark
2153
  /// the corresponding values in the main range as pruned, so that
2154
  /// eraseInstrs can do the final cleanup.
2155
  /// The parameter @p LI must be the interval whose main range is the
2156
  /// live range LR.
2157
  void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2158
2159
  /// Erase any machine instructions that have been coalesced away.
2160
  /// Add erased instructions to ErasedInstrs.
2161
  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2162
  /// the erased instrs.
2163
  void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2164
                   SmallVectorImpl<unsigned> &ShrinkRegs,
2165
                   LiveInterval *LI = nullptr);
2166
2167
  /// Remove liverange defs at places where implicit defs will be removed.
2168
  void removeImplicitDefs();
2169
2170
  /// Get the value assignments suitable for passing to LiveInterval::join.
2171
11.8M
  const int *getAssignments() const { return Assignments.data(); }
2172
};
2173
2174
} // end anonymous namespace
2175
2176
LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2177
24.7M
  const {
2178
24.7M
  LaneBitmask L;
2179
59.7M
  for (const MachineOperand &MO : DefMI->operands()) {
2180
59.7M
    if (
!MO.isReg() || 59.7M
MO.getReg() != Reg49.5M
||
!MO.isDef()26.2M
)
2181
34.9M
      continue;
2182
24.7M
    L |= TRI->getSubRegIndexLaneMask(
2183
24.7M
           TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2184
24.7M
    if (MO.readsReg())
2185
350k
      Redef = true;
2186
59.7M
  }
2187
24.7M
  return L;
2188
24.7M
}
2189
2190
std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
2191
305k
    const VNInfo *VNI) const {
2192
305k
  unsigned Reg = this->Reg;
2193
305k
2194
514k
  while (
!VNI->isPHIDef()514k
) {
2195
414k
    SlotIndex Def = VNI->def;
2196
414k
    MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2197
414k
    assert(MI && "No defining instruction");
2198
414k
    if (!MI->isFullCopy())
2199
99.8k
      return std::make_pair(VNI, Reg);
2200
314k
    unsigned SrcReg = MI->getOperand(1).getReg();
2201
314k
    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
2202
105k
      return std::make_pair(VNI, Reg);
2203
208k
2204
208k
    const LiveInterval &LI = LIS->getInterval(SrcReg);
2205
208k
    const VNInfo *ValueIn;
2206
208k
    // No subrange involved.
2207
208k
    if (
!SubRangeJoin || 208k
!LI.hasSubRanges()2
) {
2208
208k
      LiveQueryResult LRQ = LI.Query(Def);
2209
208k
      ValueIn = LRQ.valueIn();
2210
208k
    } else {
2211
1
      // Query subranges. Pick the first matching one.
2212
1
      ValueIn = nullptr;
2213
2
      for (const LiveInterval::SubRange &S : LI.subranges()) {
2214
2
        // Transform lanemask to a mask in the joined live interval.
2215
2
        LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2216
2
        if ((SMask & LaneMask).none())
2217
1
          continue;
2218
1
        LiveQueryResult LRQ = S.Query(Def);
2219
1
        ValueIn = LRQ.valueIn();
2220
1
        break;
2221
1
      }
2222
1
    }
2223
208k
    if (ValueIn == nullptr)
2224
0
      break;
2225
208k
    VNI = ValueIn;
2226
208k
    Reg = SrcReg;
2227
208k
  }
2228
99.8k
  return std::make_pair(VNI, Reg);
2229
305k
}
2230
2231
bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2232
155k
                               const JoinVals &Other) const {
2233
155k
  const VNInfo *Orig0;
2234
155k
  unsigned Reg0;
2235
155k
  std::tie(Orig0, Reg0) = followCopyChain(Value0);
2236
155k
  if (Orig0 == Value1)
2237
5.39k
    return true;
2238
149k
2239
149k
  const VNInfo *Orig1;
2240
149k
  unsigned Reg1;
2241
149k
  std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2242
149k
2243
149k
  // The values are equal if they are defined at the same place and use the
2244
149k
  // same register. Note that we cannot compare VNInfos directly as some of
2245
149k
  // them might be from a copy created in mergeSubRangeInto()  while the other
2246
149k
  // is from the original LiveInterval.
2247
15.5k
  return Orig0->def == Orig1->def && Reg0 == Reg1;
2248
155k
}
2249
2250
JoinVals::ConflictResolution
2251
30.4M
JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2252
30.4M
  Val &V = Vals[ValNo];
2253
30.4M
  assert(!V.isAnalyzed() && "Value has already been analyzed!");
2254
30.4M
  VNInfo *VNI = LR.getValNumInfo(ValNo);
2255
30.4M
  if (
VNI->isUnused()30.4M
) {
2256
3.62k
    V.WriteLanes = LaneBitmask::getAll();
2257
3.62k
    return CR_Keep;
2258
3.62k
  }
2259
30.4M
2260
30.4M
  // Get the instruction defining this value, compute the lanes written.
2261
30.4M
  const MachineInstr *DefMI = nullptr;
2262
30.4M
  if (
VNI->isPHIDef()30.4M
) {
2263
5.37M
    // Conservatively assume that all lanes in a PHI are valid.
2264
1.16k
    LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2265
5.37M
                                     : TRI->getSubRegIndexLaneMask(SubIdx);
2266
5.37M
    V.ValidLanes = V.WriteLanes = Lanes;
2267
30.4M
  } else {
2268
25.1M
    DefMI = Indexes->getInstructionFromIndex(VNI->def);
2269
25.1M
    assert(DefMI != nullptr);
2270
25.1M
    if (
SubRangeJoin25.1M
) {
2271
316k
      // We don't care about the lanes when joining subregister ranges.
2272
316k
      V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2273
316k
      if (
DefMI->isImplicitDef()316k
) {
2274
82
        V.ValidLanes = LaneBitmask::getNone();
2275
82
        V.ErasableImplicitDef = true;
2276
82
      }
2277
25.1M
    } else {
2278
24.7M
      bool Redef = false;
2279
24.7M
      V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2280
24.7M
2281
24.7M
      // If this is a read-modify-write instruction, there may be more valid
2282
24.7M
      // lanes than the ones written by this instruction.
2283
24.7M
      // This only covers partial redef operands. DefMI may have normal use
2284
24.7M
      // operands reading the register. They don't contribute valid lanes.
2285
24.7M
      //
2286
24.7M
      // This adds ssub1 to the set of valid lanes in %src:
2287
24.7M
      //
2288
24.7M
      //   %src:ssub1<def> = FOO
2289
24.7M
      //
2290
24.7M
      // This leaves only ssub1 valid, making any other lanes undef:
2291
24.7M
      //
2292
24.7M
      //   %src:ssub1<def,read-undef> = FOO %src:ssub2
2293
24.7M
      //
2294
24.7M
      // The <read-undef> flag on the def operand means that old lane values are
2295
24.7M
      // not important.
2296
24.7M
      if (
Redef24.7M
) {
2297
350k
        V.RedefVNI = LR.Query(VNI->def).valueIn();
2298
350k
        assert((TrackSubRegLiveness || V.RedefVNI) &&
2299
350k
               "Instruction is reading nonexistent value");
2300
350k
        if (
V.RedefVNI != nullptr350k
) {
2301
350k
          computeAssignment(V.RedefVNI->id, Other);
2302
350k
          V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2303
350k
        }
2304
350k
      }
2305
24.7M
2306
24.7M
      // An IMPLICIT_DEF writes undef values.
2307
24.7M
      if (
DefMI->isImplicitDef()24.7M
) {
2308
16.7k
        // We normally expect IMPLICIT_DEF values to be live only until the end
2309
16.7k
        // of their block. If the value is really live longer and gets pruned in
2310
16.7k
        // another block, this flag is cleared again.
2311
16.7k
        V.ErasableImplicitDef = true;
2312
16.7k
        V.ValidLanes &= ~V.WriteLanes;
2313
16.7k
      }
2314
24.7M
    }
2315
25.1M
  }
2316
30.4M
2317
30.4M
  // Find the value in Other that overlaps VNI->def, if any.
2318
30.4M
  LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2319
30.4M
2320
30.4M
  // It is possible that both values are defined by the same instruction, or
2321
30.4M
  // the values are PHIs defined in the same block. When that happens, the two
2322
30.4M
  // values should be merged into one, but not into any preceding value.
2323
30.4M
  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2324
30.4M
  if (VNInfo *
OtherVNI30.4M
= OtherLRQ.valueDefined()) {
2325
43.0k
    assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2326
43.0k
2327
43.0k
    // One value stays, the other is merged. Keep the earlier one, or the first
2328
43.0k
    // one we see.
2329
43.0k
    if (OtherVNI->def < VNI->def)
2330
70
      Other.computeAssignment(OtherVNI->id, *this);
2331
42.9k
    else 
if (42.9k
VNI->def < OtherVNI->def && 42.9k
OtherLRQ.valueIn()86
) {
2332
0
      // This is an early-clobber def overlapping a live-in value in the other
2333
0
      // register. Not mergeable.
2334
0
      V.OtherVNI = OtherLRQ.valueIn();
2335
0
      return CR_Impossible;
2336
0
    }
2337
43.0k
    V.OtherVNI = OtherVNI;
2338
43.0k
    Val &OtherV = Other.Vals[OtherVNI->id];
2339
43.0k
    // Keep this value, check for conflicts when analyzing OtherVNI.
2340
43.0k
    if (!OtherV.isAnalyzed())
2341
32.0k
      return CR_Keep;
2342
11.0k
    // Both sides have been analyzed now.
2343
11.0k
    // Allow overlapping PHI values. Any real interference would show up in a
2344
11.0k
    // predecessor, the PHI itself can't introduce any conflicts.
2345
11.0k
    
if (11.0k
VNI->isPHIDef()11.0k
)
2346
10.8k
      return CR_Merge;
2347
195
    
if (195
(V.ValidLanes & OtherV.ValidLanes).any()195
)
2348
195
      // Overlapping lanes can't be resolved.
2349
182
      return CR_Impossible;
2350
195
    else
2351
13
      return CR_Merge;
2352
30.4M
  }
2353
30.4M
2354
30.4M
  // No simultaneous def. Is Other live at the def?
2355
30.4M
  V.OtherVNI = OtherLRQ.valueIn();
2356
30.4M
  if (!V.OtherVNI)
2357
30.4M
    // No overlap, no conflict.
2358
22.6M
    return CR_Keep;
2359
7.76M
2360
30.4M
  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2361
7.76M
2362
7.76M
  // We have overlapping values, or possibly a kill of Other.
2363
7.76M
  // Recursively compute assignments up the dominator tree.
2364
7.76M
  Other.computeAssignment(V.OtherVNI->id, *this);
2365
7.76M
  Val &OtherV = Other.Vals[V.OtherVNI->id];
2366
7.76M
2367
7.76M
  // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2368
7.76M
  // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2369
7.76M
  // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2370
7.76M
  // technically.
2371
7.76M
  //
2372
7.76M
  // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2373
7.76M
  // to erase the IMPLICIT_DEF instruction.
2374
7.76M
  if (
OtherV.ErasableImplicitDef && 7.76M
DefMI858
&&
2375
7.76M
      
DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)858
) {
2376
1
    DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2377
1
                 << " extends into BB#" << DefMI->getParent()->getNumber()
2378
1
                 << ", keeping it.\n");
2379
1
    OtherV.ErasableImplicitDef = false;
2380
1
  }
2381
7.76M
2382
7.76M
  // Allow overlapping PHI values. Any real interference would show up in a
2383
7.76M
  // predecessor, the PHI itself can't introduce any conflicts.
2384
7.76M
  if (VNI->isPHIDef())
2385
90.6k
    return CR_Replace;
2386
7.67M
2387
7.67M
  // Check for simple erasable conflicts.
2388
7.67M
  
if (7.67M
DefMI->isImplicitDef()7.67M
) {
2389
1.87k
    // We need the def for the subregister if there is nothing else live at the
2390
1.87k
    // subrange at this point.
2391
1.87k
    if (TrackSubRegLiveness
2392
24
        && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
2393
7
      return CR_Replace;
2394
1.87k
    return CR_Erase;
2395
1.87k
  }
2396
7.67M
2397
7.67M
  // Include the non-conflict where DefMI is a coalescable copy that kills
2398
7.67M
  // OtherVNI. We still want the copy erased and value numbers merged.
2399
7.67M
  
if (7.67M
CP.isCoalescable(DefMI)7.67M
) {
2400
6.72M
    // Some of the lanes copied from OtherVNI may be undef, making them undef
2401
6.72M
    // here too.
2402
6.72M
    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2403
6.72M
    return CR_Erase;
2404
6.72M
  }
2405
945k
2406
945k
  // This may not be a real conflict if DefMI simply kills Other and defines
2407
945k
  // VNI.
2408
945k
  
if (945k
OtherLRQ.isKill() && 945k
OtherLRQ.endPoint() <= VNI->def347k
)
2409
347k
    return CR_Keep;
2410
597k
2411
597k
  // Handle the case where VNI and OtherVNI can be proven to be identical:
2412
597k
  //
2413
597k
  //   %other = COPY %ext
2414
597k
  //   %this  = COPY %ext <-- Erase this copy
2415
597k
  //
2416
597k
  
if (597k
DefMI->isFullCopy() && 597k
!CP.isPartial()161k
2417
155k
      && valuesIdentical(VNI, V.OtherVNI, Other))
2418
9.63k
    return CR_Erase;
2419
588k
2420
588k
  // If the lanes written by this instruction were all undef in OtherVNI, it is
2421
588k
  // still safe to join the live ranges. This can't be done with a simple value
2422
588k
  // mapping, though - OtherVNI will map to multiple values:
2423
588k
  //
2424
588k
  //   1 %dst:ssub0 = FOO                <-- OtherVNI
2425
588k
  //   2 %src = BAR                      <-- VNI
2426
588k
  //   3 %dst:ssub1 = COPY %src<kill>    <-- Eliminate this copy.
2427
588k
  //   4 BAZ %dst<kill>
2428
588k
  //   5 QUUX %src<kill>
2429
588k
  //
2430
588k
  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2431
588k
  // handles this complex value mapping.
2432
588k
  
if (588k
(V.WriteLanes & OtherV.ValidLanes).none()588k
)
2433
110k
    return CR_Replace;
2434
477k
2435
477k
  // If the other live range is killed by DefMI and the live ranges are still
2436
477k
  // overlapping, it must be because we're looking at an early clobber def:
2437
477k
  //
2438
477k
  //   %dst<def,early-clobber> = ASM %src<kill>
2439
477k
  //
2440
477k
  // In this case, it is illegal to merge the two live ranges since the early
2441
477k
  // clobber def would clobber %src before it was read.
2442
477k
  
if (477k
OtherLRQ.isKill()477k
) {
2443
16
    // This case where the def doesn't overlap the kill is handled above.
2444
16
    assert(VNI->def.isEarlyClobber() &&
2445
16
           "Only early clobber defs can overlap a kill");
2446
16
    return CR_Impossible;
2447
16
  }
2448
477k
2449
477k
  // VNI is clobbering live lanes in OtherVNI, but there is still the
2450
477k
  // possibility that no instructions actually read the clobbered lanes.
2451
477k
  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2452
477k
  // Otherwise Other.RI wouldn't be live here.
2453
477k
  
if (477k
(TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none()477k
)
2454
431k
    return CR_Impossible;
2455
46.3k
2456
46.3k
  // We need to verify that no instructions are reading the clobbered lanes. To
2457
46.3k
  // save compile time, we'll only check that locally. Don't allow the tainted
2458
46.3k
  // value to escape the basic block.
2459
46.3k
  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2460
46.3k
  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2461
14.1k
    return CR_Impossible;
2462
32.2k
2463
32.2k
  // There are still some things that could go wrong besides clobbered lanes
2464
32.2k
  // being read, for example OtherVNI may be only partially redefined in MBB,
2465
32.2k
  // and some clobbered lanes could escape the block. Save this analysis for
2466
32.2k
  // resolveConflicts() when all values have been mapped. We need to know
2467
32.2k
  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2468
32.2k
  // that now - the recursive analyzeValue() calls must go upwards in the
2469
32.2k
  // dominator tree.
2470
32.2k
  return CR_Unresolved;
2471
32.2k
}
2472
2473
38.2M
void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2474
38.2M
  Val &V = Vals[ValNo];
2475
38.2M
  if (
V.isAnalyzed()38.2M
) {
2476
7.78M
    // Recursion should always move up the dominator tree, so ValNo is not
2477
7.78M
    // supposed to reappear before it has been assigned.
2478
7.78M
    assert(Assignments[ValNo] != -1 && "Bad recursion?");
2479
7.78M
    return;
2480
7.78M
  }
2481
30.4M
  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2482
6.74M
  case CR_Erase:
2483
6.74M
  case CR_Merge:
2484
6.74M
    // Merge this ValNo into OtherVNI.
2485
6.74M
    assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2486
6.74M
    assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2487
6.74M
    Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2488
6.74M
    DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@'
2489
6.74M
                 << LR.getValNumInfo(ValNo)->def << " into "
2490
6.74M
                 << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2491
6.74M
                 << V.OtherVNI->def << " --> @"
2492
6.74M
                 << NewVNInfo[Assignments[ValNo]]->def << '\n');
2493
6.74M
    break;
2494
233k
  case CR_Replace:
2495
233k
  case CR_Unresolved: {
2496
233k
    // The other value is going to be pruned if this join is successful.
2497
233k
    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2498
233k
    Val &OtherV = Other.Vals[V.OtherVNI->id];
2499
233k
    // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2500
233k
    // its lanes.
2501
233k
    if (
(OtherV.WriteLanes & ~V.ValidLanes).any() && 233k
TrackSubRegLiveness83.5k
)
2502
72.0k
      OtherV.ErasableImplicitDef = false;
2503
233k
    OtherV.Pruned = true;
2504
233k
    LLVM_FALLTHROUGH;
2505
233k
  }
2506
23.7M
  default:
2507
23.7M
    // This value number needs to go in the final joined live range.
2508
23.7M
    Assignments[ValNo] = NewVNInfo.size();
2509
23.7M
    NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2510
23.7M
    break;
2511
30.4M
  }
2512
30.4M
}
2513
2514
12.5M
bool JoinVals::mapValues(JoinVals &Other) {
2515
42.2M
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e42.2M
;
++i29.7M
) {
2516
30.1M
    computeAssignment(i, Other);
2517
30.1M
    if (
Vals[i].Resolution == CR_Impossible30.1M
) {
2518
436k
      DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i
2519
436k
                   << '@' << LR.getValNumInfo(i)->def << '\n');
2520
436k
      return false;
2521
436k
    }
2522
30.1M
  }
2523
12.1M
  return true;
2524
12.5M
}
2525
2526
bool JoinVals::
2527
taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2528
27.1k
            SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
2529
27.1k
  VNInfo *VNI = LR.getValNumInfo(ValNo);
2530
27.1k
  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2531
27.1k
  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
2532
27.1k
2533
27.1k
  // Scan Other.LR from VNI.def to MBBEnd.
2534
27.1k
  LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
2535
27.1k
  assert(OtherI != Other.LR.end() && "No conflict?");
2536
56.9k
  do {
2537
56.9k
    // OtherI is pointing to a tainted value. Abort the join if the tainted
2538
56.9k
    // lanes escape the block.
2539
56.9k
    SlotIndex End = OtherI->end;
2540
56.9k
    if (
End >= MBBEnd56.9k
) {
2541
12
      DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':'
2542
12
                   << OtherI->valno->id << '@' << OtherI->start << '\n');
2543
12
      return false;
2544
12
    }
2545
56.9k
    
DEBUG56.9k
(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':'
2546
56.9k
                 << OtherI->valno->id << '@' << OtherI->start
2547
56.9k
                 << " to " << End << '\n');
2548
56.9k
    // A dead def is not a problem.
2549
56.9k
    if (End.isDead())
2550
0
      break;
2551
56.9k
    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
2552
56.9k
2553
56.9k
    // Check for another def in the MBB.
2554
56.9k
    if (
++OtherI == Other.LR.end() || 56.9k
OtherI->start >= MBBEnd33.5k
)
2555
23.6k
      break;
2556
33.3k
2557
33.3k
    // Lanes written by the new def are no longer tainted.
2558
33.3k
    const Val &OV = Other.Vals[OtherI->valno->id];
2559
33.3k
    TaintedLanes &= ~OV.WriteLanes;
2560
33.3k
    if (!OV.RedefVNI)
2561
1.32k
      break;
2562
31.9k
  } while (TaintedLanes.any());
2563
27.1k
  return true;
2564
27.1k
}
2565
2566
bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
2567
547k
                         LaneBitmask Lanes) const {
2568
547k
  if (MI.isDebugValue())
2569
2
    return false;
2570
547k
  
for (const MachineOperand &MO : MI.operands()) 547k
{
2571
1.96M
    if (
!MO.isReg() || 1.96M
MO.isDef()1.45M
||
MO.getReg() != Reg913k
)
2572
1.85M
      continue;
2573
106k
    
if (106k
!MO.readsReg()106k
)
2574
4
      continue;
2575
106k
    unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
2576
106k
    if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
2577
21.2k
      return true;
2578
526k
  }
2579
526k
  return false;
2580
526k
}
2581
2582
11.9M
bool JoinVals::resolveConflicts(JoinVals &Other) {
2583
40.1M
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e40.1M
;
++i28.1M
) {
2584
28.2M
    Val &V = Vals[i];
2585
28.2M
    assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
2586
28.2M
    if (V.Resolution != CR_Unresolved)
2587
28.1M
      continue;
2588
27.1k
    
DEBUG27.1k
(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i
2589
27.1k
                 << '@' << LR.getValNumInfo(i)->def << '\n');
2590
27.1k
    if (SubRangeJoin)
2591
0
      return false;
2592
27.1k
2593
27.1k
    ++NumLaneConflicts;
2594
27.1k
    assert(V.OtherVNI && "Inconsistent conflict resolution.");
2595
27.1k
    VNInfo *VNI = LR.getValNumInfo(i);
2596
27.1k
    const Val &OtherV = Other.Vals[V.OtherVNI->id];
2597
27.1k
2598
27.1k
    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
2599
27.1k
    // join, those lanes will be tainted with a wrong value. Get the extent of
2600
27.1k
    // the tainted lanes.
2601
27.1k
    LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
2602
27.1k
    SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
2603
27.1k
    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
2604
27.1k
      // Tainted lanes would extend beyond the basic block.
2605
12
      return false;
2606
27.1k
2607
27.1k
    assert(!TaintExtent.empty() && "There should be at least one conflict.");
2608
27.1k
2609
27.1k
    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
2610
27.1k
    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2611
27.1k
    MachineBasicBlock::iterator MI = MBB->begin();
2612
27.1k
    if (
!VNI->isPHIDef()27.1k
) {
2613
27.1k
      MI = Indexes->getInstructionFromIndex(VNI->def);
2614
27.1k
      // No need to check the instruction defining VNI for reads.
2615
27.1k
      ++MI;
2616
27.1k
    }
2617
27.1k
    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
2618
27.1k
           "Interference ends on VNI->def. Should have been handled earlier");
2619
27.1k
    MachineInstr *LastMI =
2620
27.1k
      Indexes->getInstructionFromIndex(TaintExtent.front().first);
2621
27.1k
    assert(LastMI && "Range must end at a proper instruction");
2622
27.1k
    unsigned TaintNum = 0;
2623
547k
    while (
true547k
) {
2624
547k
      assert(MI != MBB->end() && "Bad LastMI");
2625
547k
      if (
usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)547k
) {
2626
21.2k
        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
2627
21.2k
        return false;
2628
21.2k
      }
2629
526k
      // LastMI is the last instruction to use the current value.
2630
526k
      
if (526k
&*MI == LastMI526k
) {
2631
35.2k
        if (++TaintNum == TaintExtent.size())
2632
5.87k
          break;
2633
29.3k
        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
2634
29.3k
        assert(LastMI && "Range must end at a proper instruction");
2635
29.3k
        TaintedLanes = TaintExtent[TaintNum].second;
2636
29.3k
      }
2637
520k
      ++MI;
2638
520k
    }
2639
27.1k
2640
27.1k
    // The tainted lanes are unused.
2641
5.87k
    V.Resolution = CR_Replace;
2642
5.87k
    ++NumLaneResolves;
2643
5.87k
  }
2644
11.8M
  return true;
2645
11.9M
}
2646
2647
12.4M
bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
2648
12.4M
  Val &V = Vals[ValNo];
2649
12.4M
  if (
V.Pruned || 12.4M
V.PrunedComputed12.4M
)
2650
136k
    return V.Pruned;
2651
12.3M
2652
12.3M
  
if (12.3M
V.Resolution != CR_Erase && 12.3M
V.Resolution != CR_Merge6.10M
)
2653
6.10M
    return V.Pruned;
2654
6.24M
2655
6.24M
  // Follow copies up the dominator tree and check if any intermediate value
2656
6.24M
  // has been pruned.
2657
6.24M
  V.PrunedComputed = true;
2658
6.24M
  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
2659
6.24M
  return V.Pruned;
2660
6.24M
}
2661
2662
void JoinVals::pruneValues(JoinVals &Other,
2663
                           SmallVectorImpl<SlotIndex> &EndPoints,
2664
11.8M
                           bool changeInstrs) {
2665
40.0M
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e40.0M
;
++i28.1M
) {
2666
28.1M
    SlotIndex Def = LR.getValNumInfo(i)->def;
2667
28.1M
    switch (Vals[i].Resolution) {
2668
21.8M
    case CR_Keep:
2669
21.8M
      break;
2670
83.6k
    case CR_Replace: {
2671
83.6k
      // This value takes precedence over the value in Other.LR.
2672
83.6k
      LIS->pruneValue(Other.LR, Def, &EndPoints);
2673
83.6k
      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
2674
83.6k
      // instructions are only inserted to provide a live-out value for PHI
2675
83.6k
      // predecessors, so the instruction should simply go away once its value
2676
83.6k
      // has been replaced.
2677
83.6k
      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
2678
83.6k
      bool EraseImpDef = OtherV.ErasableImplicitDef &&
2679
216
                         OtherV.Resolution == CR_Keep;
2680
83.6k
      if (
!Def.isBlock()83.6k
) {
2681
83.0k
        if (
changeInstrs83.0k
) {
2682
83.0k
          // Remove <def,read-undef> flags. This def is now a partial redef.
2683
83.0k
          // Also remove <def,dead> flags since the joined live range will
2684
83.0k
          // continue past this instruction.
2685
83.0k
          for (MachineOperand &MO :
2686
356k
               Indexes->getInstructionFromIndex(Def)->operands()) {
2687
356k
            if (
MO.isReg() && 356k
MO.isDef()197k
&&
MO.getReg() == Reg87.0k
) {
2688
83.0k
              if (MO.getSubReg() != 0)
2689
49.3k
                MO.setIsUndef(EraseImpDef);
2690
83.0k
              MO.setIsDead(false);
2691
83.0k
            }
2692
356k
          }
2693
83.0k
        }
2694
83.0k
        // This value will reach instructions below, but we need to make sure
2695
83.0k
        // the live range also reaches the instruction at Def.
2696
83.0k
        if (!EraseImpDef)
2697
82.8k
          EndPoints.push_back(Def);
2698
83.0k
      }
2699
83.6k
      DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def
2700
83.6k
                   << ": " << Other.LR << '\n');
2701
83.6k
      break;
2702
28.1M
    }
2703
6.24M
    case CR_Erase:
2704
6.24M
    case CR_Merge:
2705
6.24M
      if (
isPrunedValue(i, Other)6.24M
) {
2706
33.4k
        // This value is ultimately a copy of a pruned value in LR or Other.LR.
2707
33.4k
        // We can no longer trust the value mapping computed by
2708
33.4k
        // computeAssignment(), the value that was originally copied could have
2709
33.4k
        // been replaced.
2710
33.4k
        LIS->pruneValue(LR, Def, &EndPoints);
2711
33.4k
        DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at "
2712
33.4k
                     << Def << ": " << LR << '\n');
2713
33.4k
      }
2714
6.24M
      break;
2715
0
    case CR_Unresolved:
2716
0
    case CR_Impossible:
2717
0
      llvm_unreachable("Unresolved conflicts");
2718
28.1M
    }
2719
28.1M
  }
2720
11.8M
}
2721
2722
254k
void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
2723
254k
  // Look for values being erased.
2724
254k
  bool DidPrune = false;
2725
710k
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e710k
;
++i456k
) {
2726
456k
    // We should trigger in all cases in which eraseInstrs() does something.
2727
456k
    // match what eraseInstrs() is doing, print a message so
2728
456k
    if (Vals[i].Resolution != CR_Erase &&
2729
311k
        
(Vals[i].Resolution != CR_Keep || 311k
!Vals[i].ErasableImplicitDef235k
||
2730
67
         !Vals[i].Pruned))
2731
311k
      continue;
2732
144k
2733
144k
    // Check subranges at the point where the copy will be removed.
2734
144k
    SlotIndex Def = LR.getValNumInfo(i)->def;
2735
144k
    // Print message so mismatches with eraseInstrs() can be diagnosed.
2736
144k
    DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n');
2737
505k
    for (LiveInterval::SubRange &S : LI.subranges()) {
2738
505k
      LiveQueryResult Q = S.Query(Def);
2739
505k
2740
505k
      // If a subrange starts at the copy then an undefined value has been
2741
505k
      // copied and we must remove that subrange value as well.
2742
505k
      VNInfo *ValueOut = Q.valueOutOrDead();
2743
505k
      if (
ValueOut != nullptr && 505k
Q.valueIn() == nullptr376k
) {
2744
190
        DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
2745
190
                     << " at " << Def << "\n");
2746
190
        LIS->pruneValue(S, Def, nullptr);
2747
190
        DidPrune = true;
2748
190
        // Mark value number as unused.
2749
190
        ValueOut->markUnused();
2750
190
        continue;
2751
190
      }
2752
505k
      // If a subrange ends at the copy, then a value was copied but only
2753
505k
      // partially used later. Shrink the subregister range appropriately.
2754
505k
      
if (505k
Q.valueIn() != nullptr && 505k
Q.valueOut() == nullptr376k
) {
2755
41
        DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
2756
41
                     << " at " << Def << "\n");
2757
41
        ShrinkMask |= S.LaneMask;
2758
41
      }
2759
505k
    }
2760
456k
  }
2761
254k
  if (DidPrune)
2762
164
    LI.removeEmptySubRanges();
2763
254k
}
2764
2765
/// Check if any of the subranges of @p LI contain a definition at @p Def.
2766
169k
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
2767
383k
  for (LiveInterval::SubRange &SR : LI.subranges()) {
2768
383k
    if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
2769
379k
      
if (379k
VNI->def == Def379k
)
2770
169k
        return true;
2771
0
  }
2772
0
  return false;
2773
0
}
2774
2775
127k
void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
2776
127k
  assert(&static_cast<LiveRange&>(LI) == &LR);
2777
127k
2778
440k
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e440k
;
++i313k
) {
2779
313k
    if (Vals[i].Resolution != CR_Keep)
2780
142k
      continue;
2781
170k
    VNInfo *VNI = LR.getValNumInfo(i);
2782
170k
    if (
VNI->isUnused() || 170k
VNI->isPHIDef()170k
||
isDefInSubRange(LI, VNI->def)169k
)
2783
170k
      continue;
2784
0
    Vals[i].Pruned = true;
2785
0
    ShrinkMainRange = true;
2786
0
  }
2787
127k
}
2788
2789
302k
void JoinVals::removeImplicitDefs() {
2790
620k
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e620k
;
++i317k
) {
2791
317k
    Val &V = Vals[i];
2792
317k
    if (
V.Resolution != CR_Keep || 317k
!V.ErasableImplicitDef169k
||
!V.Pruned74
)
2793
317k
      continue;
2794
1
2795
1
    VNInfo *VNI = LR.getValNumInfo(i);
2796
1
    VNI->markUnused();
2797
1
    LR.removeValNo(VNI);
2798
1
  }
2799
302k
}
2800
2801
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2802
                           SmallVectorImpl<unsigned> &ShrinkRegs,
2803
11.5M
                           LiveInterval *LI) {
2804
39.4M
  for (unsigned i = 0, e = LR.getNumValNums(); 
i != e39.4M
;
++i27.8M
) {
2805
27.8M
    // Get the def location before markUnused() below invalidates it.
2806
27.8M
    SlotIndex Def = LR.getValNumInfo(i)->def;
2807
27.8M
    switch (Vals[i].Resolution) {
2808
21.6M
    case CR_Keep: {
2809
21.6M
      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
2810
21.6M
      // longer. The IMPLICIT_DEF instructions are only inserted by
2811
21.6M
      // PHIElimination to guarantee that all PHI predecessors have a value.
2812
21.6M
      if (
!Vals[i].ErasableImplicitDef || 21.6M
!Vals[i].Pruned12.2k
)
2813
21.6M
        break;
2814
215
      // Remove value number i from LR.
2815
215
      // For intervals with subranges, removing a segment from the main range
2816
215
      // may require extending the previous segment: for each definition of
2817
215
      // a subregister, there will be a corresponding def in the main range.
2818
215
      // That def may fall in the middle of a segment from another subrange.
2819
215
      // In such cases, removing this def from the main range must be
2820
215
      // complemented by extending the main range to account for the liveness
2821
215
      // of the other subrange.
2822
215
      VNInfo *VNI = LR.getValNumInfo(i);
2823
215
      SlotIndex Def = VNI->def;
2824
215
      // The new end point of the main range segment to be extended.
2825
215
      SlotIndex NewEnd;
2826
215
      if (
LI != nullptr215
) {
2827
213
        LiveRange::iterator I = LR.FindSegmentContaining(Def);
2828
213
        assert(I != LR.end());
2829
213
        // Do not extend beyond the end of the segment being removed.
2830
213
        // The segment may have been pruned in preparation for joining
2831
213
        // live ranges.
2832
213
        NewEnd = I->end;
2833
213
      }
2834
215
2835
215
      LR.removeValNo(VNI);
2836
215
      // Note that this VNInfo is reused and still referenced in NewVNInfo,
2837
215
      // make it appear like an unused value number.
2838
215
      VNI->markUnused();
2839
215
2840
215
      if (
LI != nullptr && 215
LI->hasSubRanges()213
) {
2841
0
        assert(static_cast<LiveRange*>(LI) == &LR);
2842
0
        // Determine the end point based on the subrange information:
2843
0
        // minimum of (earliest def of next segment,
2844
0
        //             latest end point of containing segment)
2845
0
        SlotIndex ED, LE;
2846
0
        for (LiveInterval::SubRange &SR : LI->subranges()) {
2847
0
          LiveRange::iterator I = SR.find(Def);
2848
0
          if (I == SR.end())
2849
0
            continue;
2850
0
          
if (0
I->start > Def0
)
2851
0
            
ED = ED.isValid() ? 0
std::min(ED, I->start)0
:
I->start0
;
2852
0
          else
2853
0
            
LE = LE.isValid() ? 0
std::max(LE, I->end)0
:
I->end0
;
2854
0
        }
2855
0
        if (LE.isValid())
2856
0
          NewEnd = std::min(NewEnd, LE);
2857
0
        if (ED.isValid())
2858
0
          NewEnd = std::min(NewEnd, ED);
2859
0
2860
0
        // We only want to do the extension if there was a subrange that
2861
0
        // was live across Def.
2862
0
        if (
LE.isValid()0
) {
2863
0
          LiveRange::iterator S = LR.find(Def);
2864
0
          if (S != LR.begin())
2865
0
            std::prev(S)->end = NewEnd;
2866
0
        }
2867
0
      }
2868
215
      DEBUG({
2869
215
        dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
2870
215
        if (LI != nullptr)
2871
215
          dbgs() << "\t\t  LHS = " << *LI << '\n';
2872
215
      });
2873
215
      LLVM_FALLTHROUGH;
2874
215
    }
2875
215
2876
6.08M
    case CR_Erase: {
2877
6.08M
      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2878
6.08M
      assert(MI && "No instruction to erase");
2879
6.08M
      if (
MI->isCopy()6.08M
) {
2880
5.51M
        unsigned Reg = MI->getOperand(1).getReg();
2881
5.51M
        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
2882
5.51M
            
Reg != CP.getSrcReg()5.51M
&&
Reg != CP.getDstReg()1.87M
)
2883
7.59k
          ShrinkRegs.push_back(Reg);
2884
5.51M
      }
2885
6.08M
      ErasedInstrs.insert(MI);
2886
6.08M
      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
2887
6.08M
      LIS->RemoveMachineInstrFromMaps(*MI);
2888
6.08M
      MI->eraseFromParent();
2889
6.08M
      break;
2890
215
    }
2891
88.3k
    default:
2892
88.3k
      break;
2893
27.8M
    }
2894
27.8M
  }
2895
11.5M
}
2896
2897
void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
2898
                                         LaneBitmask LaneMask,
2899
151k
                                         const CoalescerPair &CP) {
2900
151k
  SmallVector<VNInfo*, 16> NewVNInfo;
2901
151k
  JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
2902
151k
                   NewVNInfo, CP, LIS, TRI, true, true);
2903
151k
  JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
2904
151k
                   NewVNInfo, CP, LIS, TRI, true, true);
2905
151k
2906
151k
  // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
2907
151k
  // We should be able to resolve all conflicts here as we could successfully do
2908
151k
  // it on the mainrange already. There is however a problem when multiple
2909
151k
  // ranges get mapped to the "overflow" lane mask bit which creates unexpected
2910
151k
  // interferences.
2911
151k
  if (
!LHSVals.mapValues(RHSVals) || 151k
!RHSVals.mapValues(LHSVals)151k
) {
2912
0
    // We already determined that it is legal to merge the intervals, so this
2913
0
    // should never fail.
2914
0
    llvm_unreachable("*** Couldn't join subrange!\n");
2915
0
  }
2916
151k
  
if (151k
!LHSVals.resolveConflicts(RHSVals) ||
2917
151k
      
!RHSVals.resolveConflicts(LHSVals)151k
) {
2918
0
    // We already determined that it is legal to merge the intervals, so this
2919
0
    // should never fail.
2920
0
    llvm_unreachable("*** Couldn't join subrange!\n");
2921
0
  }
2922
151k
2923
151k
  // The merging algorithm in LiveInterval::join() can't handle conflicting
2924
151k
  // value mappings, so we need to remove any live ranges that overlap a
2925
151k
  // CR_Replace resolution. Collect a set of end points that can be used to
2926
151k
  // restore the live range after joining.
2927
151k
  SmallVector<SlotIndex, 8> EndPoints;
2928
151k
  LHSVals.pruneValues(RHSVals, EndPoints, false);
2929
151k
  RHSVals.pruneValues(LHSVals, EndPoints, false);
2930
151k
2931
151k
  LHSVals.removeImplicitDefs();
2932
151k
  RHSVals.removeImplicitDefs();
2933
151k
2934
151k
  LRange.verify();
2935
151k
  RRange.verify();
2936
151k
2937
151k
  // Join RRange into LHS.
2938
151k
  LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
2939
151k
              NewVNInfo);
2940
151k
2941
151k
  DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
2942
151k
  if (EndPoints.empty())
2943
151k
    return;
2944
1
2945
1
  // Recompute the parts of the live range we had to remove because of
2946
1
  // CR_Replace conflicts.
2947
1
  
DEBUG1
({
2948
1
    dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
2949
1
    for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
2950
1
      dbgs() << EndPoints[i];
2951
1
      if (i != n-1)
2952
1
        dbgs() << ',';
2953
1
    }
2954
1
    dbgs() << ":  " << LRange << '\n';
2955
1
  });
2956
1
  LIS->extendToIndices(LRange, EndPoints);
2957
1
}
2958
2959
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
2960
                                          const LiveRange &ToMerge,
2961
                                          LaneBitmask LaneMask,
2962
150k
                                          CoalescerPair &CP) {
2963
150k
  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
2964
150k
  LI.refineSubRanges(Allocator, LaneMask,
2965
151k
      [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
2966
151k
    if (
SR.empty()151k
) {
2967
215
      SR.assign(ToMerge, Allocator);
2968
151k
    } else {
2969
151k
      // joinSubRegRange() destroys the merged range, so we need a copy.
2970
151k
      LiveRange RangeCopy(ToMerge, Allocator);
2971
151k
      joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
2972
151k
    }
2973
151k
  });
2974
150k
}
2975
2976
6.24M
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
2977
6.24M
  SmallVector<VNInfo*, 16> NewVNInfo;
2978
6.24M
  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
2979
6.24M
  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
2980
6.24M
  bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
2981
6.24M
  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
2982
6.24M
                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
2983
6.24M
  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
2984
6.24M
                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
2985
6.24M
2986
6.24M
  DEBUG(dbgs() << "\t\tRHS = " << RHS
2987
6.24M
               << "\n\t\tLHS = " << LHS
2988
6.24M
               << '\n');
2989
6.24M
2990
6.24M
  // First compute NewVNInfo and the simple value mappings.
2991
6.24M
  // Detect impossible conflicts early.
2992
6.24M
  if (
!LHSVals.mapValues(RHSVals) || 6.24M
!RHSVals.mapValues(LHSVals)6.01M
)
2993
436k
    return false;
2994
5.81M
2995
5.81M
  // Some conflicts can only be resolved after all values have been mapped.
2996
5.81M
  
if (5.81M
!LHSVals.resolveConflicts(RHSVals) || 5.81M
!RHSVals.resolveConflicts(LHSVals)5.79M
)
2997
21.2k
    return false;
2998
5.79M
2999
5.79M
  // All clear, the live ranges can be merged.
3000
5.79M
  
if (5.79M
RHS.hasSubRanges() || 5.79M
LHS.hasSubRanges()5.77M
) {
3001
127k
    BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3002
127k
3003
127k
    // Transform lanemasks from the LHS to masks in the coalesced register and
3004
127k
    // create initial subranges if necessary.
3005
127k
    unsigned DstIdx = CP.getDstIdx();
3006
127k
    if (
!LHS.hasSubRanges()127k
) {
3007
190
      LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3008
0
                                     : TRI->getSubRegIndexLaneMask(DstIdx);
3009
190
      // LHS must support subregs or we wouldn't be in this codepath.
3010
190
      assert(Mask.any());
3011
190
      LHS.createSubRangeFrom(Allocator, Mask, LHS);
3012
127k
    } else 
if (127k
DstIdx != 0127k
) {
3013
0
      // Transform LHS lanemasks to new register class if necessary.
3014
0
      for (LiveInterval::SubRange &R : LHS.subranges()) {
3015
0
        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3016
0
        R.LaneMask = Mask;
3017
0
      }
3018
127k
    }
3019
127k
    DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg())
3020
127k
                 << ' ' << LHS << '\n');
3021
127k
3022
127k
    // Determine lanemasks of RHS in the coalesced register and merge subranges.
3023
127k
    unsigned SrcIdx = CP.getSrcIdx();
3024
127k
    if (
!RHS.hasSubRanges()127k
) {
3025
472
      LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3026
107k
                                     : TRI->getSubRegIndexLaneMask(SrcIdx);
3027
108k
      mergeSubRangeInto(LHS, RHS, Mask, CP);
3028
127k
    } else {
3029
19.2k
      // Pair up subranges and merge.
3030
42.6k
      for (LiveInterval::SubRange &R : RHS.subranges()) {
3031
42.6k
        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3032
42.6k
        mergeSubRangeInto(LHS, R, Mask, CP);
3033
42.6k
      }
3034
19.2k
    }
3035
127k
    DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3036
127k
3037
127k
    // Pruning implicit defs from subranges may result in the main range
3038
127k
    // having stale segments.
3039
127k
    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3040
127k
3041
127k
    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3042
127k
    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3043
127k
  }
3044
5.79M
3045
5.79M
  // The merging algorithm in LiveInterval::join() can't handle conflicting
3046
5.79M
  // value mappings, so we need to remove any live ranges that overlap a
3047
5.79M
  // CR_Replace resolution. Collect a set of end points that can be used to
3048
5.79M
  // restore the live range after joining.
3049
5.79M
  SmallVector<SlotIndex, 8> EndPoints;
3050
5.79M
  LHSVals.pruneValues(RHSVals, EndPoints, true);
3051
5.79M
  RHSVals.pruneValues(LHSVals, EndPoints, true);
3052
5.79M
3053
5.79M
  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3054
5.79M
  // registers to require trimming.
3055
5.79M
  SmallVector<unsigned, 8> ShrinkRegs;
3056
5.79M
  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3057
5.79M
  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3058
5.79M
  while (!ShrinkRegs.empty())
3059
7.59k
    shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3060
5.79M
3061
5.79M
  // Join RHS into LHS.
3062
5.79M
  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3063
5.79M
3064
5.79M
  // Kill flags are going to be wrong if the live ranges were overlapping.
3065
5.79M
  // Eventually, we should simply clear all kill flags when computing live
3066
5.79M
  // ranges. They are reinserted after register allocation.
3067
5.79M
  MRI->clearKillFlags(LHS.reg);
3068
5.79M
  MRI->clearKillFlags(RHS.reg);
3069
5.79M
3070
5.79M
  if (
!EndPoints.empty()5.79M
) {
3071
56.0k
    // Recompute the parts of the live range we had to remove because of
3072
56.0k
    // CR_Replace conflicts.
3073
56.0k
    DEBUG({
3074
56.0k
      dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3075
56.0k
      for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3076
56.0k
        dbgs() << EndPoints[i];
3077
56.0k
        if (i != n-1)
3078
56.0k
          dbgs() << ',';
3079
56.0k
      }
3080
56.0k
      dbgs() << ":  " << LHS << '\n';
3081
56.0k
    });
3082
56.0k
    LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3083
56.0k
  }
3084
6.24M
3085
6.24M
  return true;
3086
6.24M
}
3087
3088
7.28M
bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3089
7.28M
  return CP.isPhys() ? 
joinReservedPhysReg(CP)1.03M
:
joinVirtRegs(CP)6.24M
;
3090
7.28M
}
3091
3092
namespace {
3093
3094
/// Information concerning MBB coalescing priority.
3095
struct MBBPriorityInfo {
3096
  MachineBasicBlock *MBB;
3097
  unsigned Depth;
3098
  bool IsSplit;
3099
3100
  MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3101
4.24M
    : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3102
};
3103
3104
} // end anonymous namespace
3105
3106
/// C-style comparator that sorts first based on the loop depth of the basic
3107
/// block (the unsigned), and then on the MBB number.
3108
///
3109
/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3110
static int compareMBBPriority(const MBBPriorityInfo *LHS,
3111
18.5M
                              const MBBPriorityInfo *RHS) {
3112
18.5M
  // Deeper loops first
3113
18.5M
  if (LHS->Depth != RHS->Depth)
3114
3.00M
    
return LHS->Depth > RHS->Depth ? 3.00M
-11.85M
:
11.14M
;
3115
15.5M
3116
15.5M
  // Try to unsplit critical edges next.
3117
15.5M
  
if (15.5M
LHS->IsSplit != RHS->IsSplit15.5M
)
3118
0
    
return LHS->IsSplit ? 0
-10
:
10
;
3119
15.5M
3120
15.5M
  // Prefer blocks that are more connected in the CFG. This takes care of
3121
15.5M
  // the most difficult copies first while intervals are short.
3122
15.5M
  unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3123
15.5M
  unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3124
15.5M
  if (cl != cr)
3125
5.28M
    
return cl > cr ? 5.28M
-12.31M
:
12.97M
;
3126
10.2M
3127
10.2M
  // As a last resort, sort by block number.
3128
10.2M
  
return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? 10.2M
-14.88M
:
15.39M
;
3129
18.5M
}
3130
3131
/// \returns true if the given copy uses or defines a local live range.
3132
15.3M
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3133
15.3M
  if (!Copy->isCopy())
3134
568k
    return false;
3135
14.8M
3136
14.8M
  
if (14.8M
Copy->getOperand(1).isUndef()14.8M
)
3137
14
    return false;
3138
14.8M
3139
14.8M
  unsigned SrcReg = Copy->getOperand(1).getReg();
3140
14.8M
  unsigned DstReg = Copy->getOperand(0).getReg();
3141
14.8M
  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
3142
11.7M
      || TargetRegisterInfo::isPhysicalRegister(DstReg))
3143
8.72M
    return false;
3144
6.08M
3145
6.08M
  return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3146
2.07M
    || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3147
15.3M
}
3148
3149
bool RegisterCoalescer::
3150
6.16M
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3151
6.16M
  bool Progress = false;
3152
26.7M
  for (unsigned i = 0, e = CurrList.size(); 
i != e26.7M
;
++i20.5M
) {
3153
20.5M
    if (!CurrList[i])
3154
1.92M
      continue;
3155
18.6M
    // Skip instruction pointers that have already been erased, for example by
3156
18.6M
    // dead code elimination.
3157
18.6M
    
if (18.6M
ErasedInstrs.count(CurrList[i])18.6M
) {
3158
129k
      CurrList[i] = nullptr;
3159
129k
      continue;
3160
129k
    }
3161
18.5M
    bool Again = false;
3162
18.5M
    bool Success = joinCopy(CurrList[i], Again);
3163
18.5M
    Progress |= Success;
3164
18.5M
    if (
Success || 18.5M
!Again10.9M
)
3165
13.3M
      CurrList[i] = nullptr;
3166
20.5M
  }
3167
6.16M
  return Progress;
3168
6.16M
}
3169
3170
/// Check if DstReg is a terminal node.
3171
/// I.e., it does not have any affinity other than \p Copy.
3172
static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
3173
21
                          const MachineRegisterInfo *MRI) {
3174
21
  assert(Copy.isCopyLike());
3175
21
  // Check if the destination of this copy as any other affinity.
3176
21
  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3177
48
    
if (48
&MI != &Copy && 48
MI.isCopyLike()33
)
3178
19
      return false;
3179
2
  return true;
3180
2
}
3181
3182
15.6M
bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3183
15.6M
  assert(Copy.isCopyLike());
3184
15.6M
  if (!UseTerminalRule)
3185
15.6M
    return false;
3186
33
  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
3187
33
  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
3188
33
  // Check if the destination of this copy has any other affinity.
3189
33
  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
3190
33
      // If SrcReg is a physical register, the copy won't be coalesced.
3191
33
      // Ignoring it may have other side effect (like missing
3192
33
      // rematerialization). So keep it.
3193
25
      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
3194
20
      !isTerminalReg(DstReg, Copy, MRI))
3195
31
    return false;
3196
2
3197
2
  // DstReg is a terminal node. Check if it interferes with any other
3198
2
  // copy involving SrcReg.
3199
2
  const MachineBasicBlock *OrigBB = Copy.getParent();
3200
2
  const LiveInterval &DstLI = LIS->getInterval(DstReg);
3201
6
  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3202
6
    // Technically we should check if the weight of the new copy is
3203
6
    // interesting compared to the other one and update the weight
3204
6
    // of the copies accordingly. However, this would only work if
3205
6
    // we would gather all the copies first then coalesce, whereas
3206
6
    // right now we interleave both actions.
3207
6
    // For now, just consider the copies that are in the same block.
3208
6
    if (
&MI == &Copy || 6
!MI.isCopyLike()4
||
MI.getParent() != OrigBB1
)
3209
5
      continue;
3210
1
    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
3211
1
    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3212
1
                OtherSubReg);
3213
1
    if (OtherReg == SrcReg)
3214
0
      OtherReg = OtherSrcReg;
3215
1
    // Check if OtherReg is a non-terminal.
3216
1
    if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
3217
1
        isTerminalReg(OtherReg, MI, MRI))
3218
0
      continue;
3219
1
    // Check that OtherReg interfere with DstReg.
3220
1
    
if (1
LIS->getInterval(OtherReg).overlaps(DstLI)1
) {
3221
1
      DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
3222
1
      return true;
3223
1
    }
3224
1
  }
3225
1
  return false;
3226
1
}
3227
3228
void
3229
4.24M
RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
3230
4.24M
  DEBUG(dbgs() << MBB->getName() << ":\n");
3231
4.24M
3232
4.24M
  // Collect all copy-like instructions in MBB. Don't start coalescing anything
3233
4.24M
  // yet, it might invalidate the iterator.
3234
4.24M
  const unsigned PrevSize = WorkList.size();
3235
4.24M
  if (
JoinGlobalCopies4.24M
) {
3236
4.16M
    SmallVector<MachineInstr*, 2> LocalTerminals;
3237
4.16M
    SmallVector<MachineInstr*, 2> GlobalTerminals;
3238
4.16M
    // Coalesce copies bottom-up to coalesce local defs before local uses. They
3239
4.16M
    // are not inherently easier to resolve, but slightly preferable until we
3240
4.16M
    // have local live range splitting. In particular this is required by
3241
4.16M
    // cmp+jmp macro fusion.
3242
4.16M
    for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
3243
42.0M
         
MII != E42.0M
;
++MII37.8M
) {
3244
37.8M
      if (!MII->isCopyLike())
3245
22.4M
        continue;
3246
15.3M
      bool ApplyTerminalRule = applyTerminalRule(*MII);
3247
15.3M
      if (
isLocalCopy(&(*MII), LIS)15.3M
) {
3248
5.09M
        if (ApplyTerminalRule)
3249
1
          LocalTerminals.push_back(&(*MII));
3250
5.09M
        else
3251
5.09M
          LocalWorkList.push_back(&(*MII));
3252
15.3M
      } else {
3253
10.2M
        if (ApplyTerminalRule)
3254
0
          GlobalTerminals.push_back(&(*MII));
3255
10.2M
        else
3256
10.2M
          WorkList.push_back(&(*MII));
3257
10.2M
      }
3258
37.8M
    }
3259
4.16M
    // Append the copies evicted by the terminal rule at the end of the list.
3260
4.16M
    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
3261
4.16M
    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
3262
4.16M
  }
3263
80.0k
  else {
3264
80.0k
    SmallVector<MachineInstr*, 2> Terminals;
3265
80.0k
    for (MachineInstr &MII : *MBB)
3266
662k
      
if (662k
MII.isCopyLike()662k
) {
3267
248k
        if (applyTerminalRule(MII))
3268
0
          Terminals.push_back(&MII);
3269
248k
        else
3270
248k
          WorkList.push_back(&MII);
3271
662k
      }
3272
80.0k
    // Append the copies evicted by the terminal rule at the end of the list.
3273
80.0k
    WorkList.append(Terminals.begin(), Terminals.end());
3274
80.0k
  }
3275
4.24M
  // Try coalescing the collected copies immediately, and remove the nulls.
3276
4.24M
  // This prevents the WorkList from getting too large since most copies are
3277
4.24M
  // joinable on the first attempt.
3278
4.24M
  MutableArrayRef<MachineInstr*>
3279
4.24M
    CurrList(WorkList.begin() + PrevSize, WorkList.end());
3280
4.24M
  if (copyCoalesceWorkList(CurrList))
3281
1.76M
    WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
3282
1.76M
                               nullptr), WorkList.end());
3283
4.24M
}
3284
3285
1.31M
void RegisterCoalescer::coalesceLocals() {
3286
1.31M
  copyCoalesceWorkList(LocalWorkList);
3287
6.40M
  for (unsigned j = 0, je = LocalWorkList.size(); 
j != je6.40M
;
++j5.09M
) {
3288
5.09M
    if (LocalWorkList[j])
3289
87.7k
      WorkList.push_back(LocalWorkList[j]);
3290
5.09M
  }
3291
1.31M
  LocalWorkList.clear();
3292
1.31M
}
3293
3294
588k
void RegisterCoalescer::joinAllIntervals() {
3295
588k
  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
3296
588k
  assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
3297
588k
3298
588k
  std::vector<MBBPriorityInfo> MBBs;
3299
588k
  MBBs.reserve(MF->size());
3300
4.83M
  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); 
I != E4.83M
;
++I4.24M
) {
3301
4.24M
    MachineBasicBlock *MBB = &*I;
3302
4.24M
    MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
3303
0
                                   JoinSplitEdges && isSplitEdge(MBB)));
3304
4.24M
  }
3305
588k
  array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
3306
588k
3307
588k
  // Coalesce intervals in MBB priority order.
3308
588k
  unsigned CurrDepth = std::numeric_limits<unsigned>::max();
3309
4.83M
  for (unsigned i = 0, e = MBBs.size(); 
i != e4.83M
;
++i4.24M
) {
3310
4.24M
    // Try coalescing the collected local copies for deeper loops.
3311
4.24M
    if (
JoinGlobalCopies && 4.24M
MBBs[i].Depth < CurrDepth4.16M
) {
3312
724k
      coalesceLocals();
3313
724k
      CurrDepth = MBBs[i].Depth;
3314
724k
    }
3315
4.24M
    copyCoalesceInMBB(MBBs[i].MBB);
3316
4.24M
  }
3317
588k
  coalesceLocals();
3318
588k
3319
588k
  // Joining intervals can allow other intervals to be joined.  Iteratively join
3320
588k
  // until we make no progress.
3321
600k
  while (copyCoalesceWorkList(WorkList))
3322
11.7k
    /* empty */ ;
3323
588k
}
3324
3325
588k
void RegisterCoalescer::releaseMemory() {
3326
588k
  ErasedInstrs.clear();
3327
588k
  WorkList.clear();
3328
588k
  DeadDefs.clear();
3329
588k
  InflateRegs.clear();
3330
588k
}
3331
3332
588k
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
3333
588k
  MF = &fn;
3334
588k
  MRI = &fn.getRegInfo();
3335
588k
  const TargetSubtargetInfo &STI = fn.getSubtarget();
3336
588k
  TRI = STI.getRegisterInfo();
3337
588k
  TII = STI.getInstrInfo();
3338
588k
  LIS = &getAnalysis<LiveIntervals>();
3339
588k
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3340
588k
  Loops = &getAnalysis<MachineLoopInfo>();
3341
588k
  if (EnableGlobalCopies == cl::BOU_UNSET)
3342
588k
    JoinGlobalCopies = STI.enableJoinGlobalCopies();
3343
588k
  else
3344
4
    JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
3345
588k
3346
588k
  // The MachineScheduler does not currently require JoinSplitEdges. This will
3347
588k
  // either be enabled unconditionally or replaced by a more general live range
3348
588k
  // splitting optimization.
3349
588k
  JoinSplitEdges = EnableJoinSplits;
3350
588k
3351
588k
  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
3352
588k
               << "********** Function: " << MF->getName() << '\n');
3353
588k
3354
588k
  if (VerifyCoalescing)
3355
46
    MF->verify(this, "Before register coalescing");
3356
588k
3357
588k
  RegClassInfo.runOnMachineFunction(fn);
3358
588k
3359
588k
  // Join (coalesce) intervals if requested.
3360
588k
  if (EnableJoining)
3361
588k
    joinAllIntervals();
3362
588k
3363
588k
  // After deleting a lot of copies, register classes may be less constrained.
3364
588k
  // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
3365
588k
  // DPR inflation.
3366
588k
  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
3367
588k
  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
3368
588k
                    InflateRegs.end());
3369
588k
  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
3370
597k
  for (unsigned i = 0, e = InflateRegs.size(); 
i != e597k
;
++i8.91k
) {
3371
8.91k
    unsigned Reg = InflateRegs[i];
3372
8.91k
    if (MRI->reg_nodbg_empty(Reg))
3373
1.17k
      continue;
3374
7.73k
    
if (7.73k
MRI->recomputeRegClass(Reg)7.73k
) {
3375
2.48k
      DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
3376
2.48k
                   << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
3377
2.48k
      ++NumInflated;
3378
2.48k
3379
2.48k
      LiveInterval &LI = LIS->getInterval(Reg);
3380
2.48k
      if (
LI.hasSubRanges()2.48k
) {
3381
0
        // If the inflated register class does not support subregisters anymore
3382
0
        // remove the subranges.
3383
0
        if (
!MRI->shouldTrackSubRegLiveness(Reg)0
) {
3384
0
          LI.clearSubRanges();
3385
0
        } else {
3386
#ifndef NDEBUG
3387
          LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
3388
          // If subranges are still supported, then the same subregs
3389
          // should still be supported.
3390
          for (LiveInterval::SubRange &S : LI.subranges()) {
3391
            assert((S.LaneMask & ~MaxMask).none());
3392
          }
3393
#endif
3394
        }
3395
0
      }
3396
2.48k
    }
3397
8.91k
  }
3398
588k
3399
588k
  DEBUG(dump());
3400
588k
  if (VerifyCoalescing)
3401
46
    MF->verify(this, "After register coalescing");
3402
588k
  return true;
3403
588k
}
3404
3405
0
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
3406
0
   LIS->print(O, m);
3407
0
}