Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/RegisterCoalescer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the generic RegisterCoalescer interface which
10
// is used as the common interface used by all clients and
11
// implementations of register coalescing.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "RegisterCoalescer.h"
16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/BitVector.h"
18
#include "llvm/ADT/DenseSet.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallPtrSet.h"
21
#include "llvm/ADT/SmallVector.h"
22
#include "llvm/ADT/Statistic.h"
23
#include "llvm/Analysis/AliasAnalysis.h"
24
#include "llvm/CodeGen/LiveInterval.h"
25
#include "llvm/CodeGen/LiveIntervals.h"
26
#include "llvm/CodeGen/LiveRangeEdit.h"
27
#include "llvm/CodeGen/MachineBasicBlock.h"
28
#include "llvm/CodeGen/MachineFunction.h"
29
#include "llvm/CodeGen/MachineFunctionPass.h"
30
#include "llvm/CodeGen/MachineInstr.h"
31
#include "llvm/CodeGen/MachineInstrBuilder.h"
32
#include "llvm/CodeGen/MachineLoopInfo.h"
33
#include "llvm/CodeGen/MachineOperand.h"
34
#include "llvm/CodeGen/MachineRegisterInfo.h"
35
#include "llvm/CodeGen/Passes.h"
36
#include "llvm/CodeGen/RegisterClassInfo.h"
37
#include "llvm/CodeGen/SlotIndexes.h"
38
#include "llvm/CodeGen/TargetInstrInfo.h"
39
#include "llvm/CodeGen/TargetOpcodes.h"
40
#include "llvm/CodeGen/TargetRegisterInfo.h"
41
#include "llvm/CodeGen/TargetSubtargetInfo.h"
42
#include "llvm/IR/DebugLoc.h"
43
#include "llvm/MC/LaneBitmask.h"
44
#include "llvm/MC/MCInstrDesc.h"
45
#include "llvm/MC/MCRegisterInfo.h"
46
#include "llvm/Pass.h"
47
#include "llvm/Support/CommandLine.h"
48
#include "llvm/Support/Compiler.h"
49
#include "llvm/Support/Debug.h"
50
#include "llvm/Support/ErrorHandling.h"
51
#include "llvm/Support/raw_ostream.h"
52
#include <algorithm>
53
#include <cassert>
54
#include <iterator>
55
#include <limits>
56
#include <tuple>
57
#include <utility>
58
#include <vector>
59
60
using namespace llvm;
61
62
#define DEBUG_TYPE "regalloc"
63
64
STATISTIC(numJoins    , "Number of interval joins performed");
65
STATISTIC(numCrossRCs , "Number of cross class joins performed");
66
STATISTIC(numCommutes , "Number of instruction commuting performed");
67
STATISTIC(numExtends  , "Number of copies extended");
68
STATISTIC(NumReMats   , "Number of instructions re-materialized");
69
STATISTIC(NumInflated , "Number of register classes inflated");
70
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
71
STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
72
STATISTIC(NumShrinkToUses,  "Number of shrinkToUses called");
73
74
static cl::opt<bool> EnableJoining("join-liveintervals",
75
                                   cl::desc("Coalesce copies (default=true)"),
76
                                   cl::init(true), cl::Hidden);
77
78
static cl::opt<bool> UseTerminalRule("terminal-rule",
79
                                     cl::desc("Apply the terminal rule"),
80
                                     cl::init(false), cl::Hidden);
81
82
/// Temporary flag to test critical edge unsplitting.
83
static cl::opt<bool>
84
EnableJoinSplits("join-splitedges",
85
  cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
86
87
/// Temporary flag to test global copy optimization.
88
static cl::opt<cl::boolOrDefault>
89
EnableGlobalCopies("join-globalcopies",
90
  cl::desc("Coalesce copies that span blocks (default=subtarget)"),
91
  cl::init(cl::BOU_UNSET), cl::Hidden);
92
93
static cl::opt<bool>
94
VerifyCoalescing("verify-coalescing",
95
         cl::desc("Verify machine instrs before and after register coalescing"),
96
         cl::Hidden);
97
98
static cl::opt<unsigned> LateRematUpdateThreshold(
99
    "late-remat-update-threshold", cl::Hidden,
100
    cl::desc("During rematerialization for a copy, if the def instruction has "
101
             "many other copy uses to be rematerialized, delay the multiple "
102
             "separate live interval update work and do them all at once after "
103
             "all those rematerialization are done. It will save a lot of "
104
             "repeated work. "),
105
    cl::init(100));
106
107
static cl::opt<unsigned> LargeIntervalSizeThreshold(
108
    "large-interval-size-threshold", cl::Hidden,
109
    cl::desc("If the valnos size of an interval is larger than the threshold, "
110
             "it is regarded as a large interval. "),
111
    cl::init(100));
112
113
static cl::opt<unsigned> LargeIntervalFreqThreshold(
114
    "large-interval-freq-threshold", cl::Hidden,
115
    cl::desc("For a large interval, if it is coalesed with other live "
116
             "intervals many times more than the threshold, stop its "
117
             "coalescing to control the compile time. "),
118
    cl::init(100));
119
120
namespace {
121
122
  class RegisterCoalescer : public MachineFunctionPass,
123
                            private LiveRangeEdit::Delegate {
124
    MachineFunction* MF;
125
    MachineRegisterInfo* MRI;
126
    const TargetRegisterInfo* TRI;
127
    const TargetInstrInfo* TII;
128
    LiveIntervals *LIS;
129
    const MachineLoopInfo* Loops;
130
    AliasAnalysis *AA;
131
    RegisterClassInfo RegClassInfo;
132
133
    /// A LaneMask to remember on which subregister live ranges we need to call
134
    /// shrinkToUses() later.
135
    LaneBitmask ShrinkMask;
136
137
    /// True if the main range of the currently coalesced intervals should be
138
    /// checked for smaller live intervals.
139
    bool ShrinkMainRange;
140
141
    /// True if the coalescer should aggressively coalesce global copies
142
    /// in favor of keeping local copies.
143
    bool JoinGlobalCopies;
144
145
    /// True if the coalescer should aggressively coalesce fall-thru
146
    /// blocks exclusively containing copies.
147
    bool JoinSplitEdges;
148
149
    /// Copy instructions yet to be coalesced.
150
    SmallVector<MachineInstr*, 8> WorkList;
151
    SmallVector<MachineInstr*, 8> LocalWorkList;
152
153
    /// Set of instruction pointers that have been erased, and
154
    /// that may be present in WorkList.
155
    SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
156
157
    /// Dead instructions that are about to be deleted.
158
    SmallVector<MachineInstr*, 8> DeadDefs;
159
160
    /// Virtual registers to be considered for register class inflation.
161
    SmallVector<unsigned, 8> InflateRegs;
162
163
    /// The collection of live intervals which should have been updated
164
    /// immediately after rematerialiation but delayed until
165
    /// lateLiveIntervalUpdate is called.
166
    DenseSet<unsigned> ToBeUpdated;
167
168
    /// Record how many times the large live interval with many valnos
169
    /// has been tried to join with other live interval.
170
    DenseMap<unsigned, unsigned long> LargeLIVisitCounter;
171
172
    /// Recursively eliminate dead defs in DeadDefs.
173
    void eliminateDeadDefs();
174
175
    /// LiveRangeEdit callback for eliminateDeadDefs().
176
    void LRE_WillEraseInstruction(MachineInstr *MI) override;
177
178
    /// Coalesce the LocalWorkList.
179
    void coalesceLocals();
180
181
    /// Join compatible live intervals
182
    void joinAllIntervals();
183
184
    /// Coalesce copies in the specified MBB, putting
185
    /// copies that cannot yet be coalesced into WorkList.
186
    void copyCoalesceInMBB(MachineBasicBlock *MBB);
187
188
    /// Tries to coalesce all copies in CurrList. Returns true if any progress
189
    /// was made.
190
    bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
191
192
    /// If one def has many copy like uses, and those copy uses are all
193
    /// rematerialized, the live interval update needed for those
194
    /// rematerializations will be delayed and done all at once instead
195
    /// of being done multiple times. This is to save compile cost because
196
    /// live interval update is costly.
197
    void lateLiveIntervalUpdate();
198
199
    /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
200
    /// src/dst of the copy instruction CopyMI.  This returns true if the copy
201
    /// was successfully coalesced away. If it is not currently possible to
202
    /// coalesce this interval, but it may be possible if other things get
203
    /// coalesced, then it returns true by reference in 'Again'.
204
    bool joinCopy(MachineInstr *CopyMI, bool &Again);
205
206
    /// Attempt to join these two intervals.  On failure, this
207
    /// returns false.  The output "SrcInt" will not have been modified, so we
208
    /// can use this information below to update aliases.
209
    bool joinIntervals(CoalescerPair &CP);
210
211
    /// Attempt joining two virtual registers. Return true on success.
212
    bool joinVirtRegs(CoalescerPair &CP);
213
214
    /// If a live interval has many valnos and is coalesced with other
215
    /// live intervals many times, we regard such live interval as having
216
    /// high compile time cost.
217
    bool isHighCostLiveInterval(LiveInterval &LI);
218
219
    /// Attempt joining with a reserved physreg.
220
    bool joinReservedPhysReg(CoalescerPair &CP);
221
222
    /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
223
    /// Subranges in @p LI which only partially interfere with the desired
224
    /// LaneMask are split as necessary. @p LaneMask are the lanes that
225
    /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
226
    /// lanemasks already adjusted to the coalesced register.
227
    void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
228
                           LaneBitmask LaneMask, CoalescerPair &CP);
229
230
    /// Join the liveranges of two subregisters. Joins @p RRange into
231
    /// @p LRange, @p RRange may be invalid afterwards.
232
    void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
233
                          LaneBitmask LaneMask, const CoalescerPair &CP);
234
235
    /// We found a non-trivially-coalescable copy. If the source value number is
236
    /// defined by a copy from the destination reg see if we can merge these two
237
    /// destination reg valno# into a single value number, eliminating a copy.
238
    /// This returns true if an interval was modified.
239
    bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
240
241
    /// Return true if there are definitions of IntB
242
    /// other than BValNo val# that can reach uses of AValno val# of IntA.
243
    bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
244
                              VNInfo *AValNo, VNInfo *BValNo);
245
246
    /// We found a non-trivially-coalescable copy.
247
    /// If the source value number is defined by a commutable instruction and
248
    /// its other operand is coalesced to the copy dest register, see if we
249
    /// can transform the copy into a noop by commuting the definition.
250
    /// This returns a pair of two flags:
251
    /// - the first element is true if an interval was modified,
252
    /// - the second element is true if the destination interval needs
253
    ///   to be shrunk after deleting the copy.
254
    std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
255
                                                  MachineInstr *CopyMI);
256
257
    /// We found a copy which can be moved to its less frequent predecessor.
258
    bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
259
260
    /// If the source of a copy is defined by a
261
    /// trivial computation, replace the copy by rematerialize the definition.
262
    bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
263
                                 bool &IsDefCopy);
264
265
    /// Return true if a copy involving a physreg should be joined.
266
    bool canJoinPhys(const CoalescerPair &CP);
267
268
    /// Replace all defs and uses of SrcReg to DstReg and update the subregister
269
    /// number if it is not zero. If DstReg is a physical register and the
270
    /// existing subregister number of the def / use being updated is not zero,
271
    /// make sure to set it to the correct physical subregister.
272
    void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
273
274
    /// If the given machine operand reads only undefined lanes add an undef
275
    /// flag.
276
    /// This can happen when undef uses were previously concealed by a copy
277
    /// which we coalesced. Example:
278
    ///    %0:sub0<def,read-undef> = ...
279
    ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
280
    ///       = use %1:sub1       <-- hidden undef use
281
    void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
282
                      MachineOperand &MO, unsigned SubRegIdx);
283
284
    /// Handle copies of undef values. If the undef value is an incoming
285
    /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
286
    /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
287
    /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
288
    MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
289
290
    /// Check whether or not we should apply the terminal rule on the
291
    /// destination (Dst) of \p Copy.
292
    /// When the terminal rule applies, Copy is not profitable to
293
    /// coalesce.
294
    /// Dst is terminal if it has exactly one affinity (Dst, Src) and
295
    /// at least one interference (Dst, Dst2). If Dst is terminal, the
296
    /// terminal rule consists in checking that at least one of
297
    /// interfering node, say Dst2, has an affinity of equal or greater
298
    /// weight with Src.
299
    /// In that case, Dst2 and Dst will not be able to be both coalesced
300
    /// with Src. Since Dst2 exposes more coalescing opportunities than
301
    /// Dst, we can drop \p Copy.
302
    bool applyTerminalRule(const MachineInstr &Copy) const;
303
304
    /// Wrapper method for \see LiveIntervals::shrinkToUses.
305
    /// This method does the proper fixing of the live-ranges when the afore
306
    /// mentioned method returns true.
307
    void shrinkToUses(LiveInterval *LI,
308
689k
                      SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
309
689k
      NumShrinkToUses++;
310
689k
      if (LIS->shrinkToUses(LI, Dead)) {
311
62
        /// Check whether or not \p LI is composed by multiple connected
312
62
        /// components and if that is the case, fix that.
313
62
        SmallVector<LiveInterval*, 8> SplitLIs;
314
62
        LIS->splitSeparateComponents(*LI, SplitLIs);
315
62
      }
316
689k
    }
317
318
    /// Wrapper Method to do all the necessary work when an Instruction is
319
    /// deleted.
320
    /// Optimizations should use this to make sure that deleted instructions
321
    /// are always accounted for.
322
242k
    void deleteInstr(MachineInstr* MI) {
323
242k
      ErasedInstrs.insert(MI);
324
242k
      LIS->RemoveMachineInstrFromMaps(*MI);
325
242k
      MI->eraseFromParent();
326
242k
    }
327
328
  public:
329
    static char ID; ///< Class identification, replacement for typeinfo
330
331
34.5k
    RegisterCoalescer() : MachineFunctionPass(ID) {
332
34.5k
      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
333
34.5k
    }
334
335
    void getAnalysisUsage(AnalysisUsage &AU) const override;
336
337
    void releaseMemory() override;
338
339
    /// This is the pass entry point.
340
    bool runOnMachineFunction(MachineFunction&) override;
341
342
    /// Implement the dump method.
343
    void print(raw_ostream &O, const Module* = nullptr) const override;
344
  };
345
346
} // end anonymous namespace
347
348
char RegisterCoalescer::ID = 0;
349
350
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
351
352
42.3k
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
353
42.3k
                      "Simple Register Coalescing", false, false)
354
42.3k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
355
42.3k
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
356
42.3k
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
357
42.3k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
358
42.3k
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
359
                    "Simple Register Coalescing", false, false)
360
361
LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
362
                                       const MachineInstr *MI, unsigned &Src,
363
                                       unsigned &Dst, unsigned &SrcSub,
364
23.3M
                                       unsigned &DstSub) {
365
23.3M
  if (MI->isCopy()) {
366
19.4M
    Dst = MI->getOperand(0).getReg();
367
19.4M
    DstSub = MI->getOperand(0).getSubReg();
368
19.4M
    Src = MI->getOperand(1).getReg();
369
19.4M
    SrcSub = MI->getOperand(1).getSubReg();
370
19.4M
  } else 
if (3.86M
MI->isSubregToReg()3.86M
) {
371
952k
    Dst = MI->getOperand(0).getReg();
372
952k
    DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
373
952k
                                      MI->getOperand(3).getImm());
374
952k
    Src = MI->getOperand(2).getReg();
375
952k
    SrcSub = MI->getOperand(2).getSubReg();
376
952k
  } else
377
2.91M
    return false;
378
20.4M
  return true;
379
20.4M
}
380
381
/// Return true if this block should be vacated by the coalescer to eliminate
382
/// branches. The important cases to handle in the coalescer are critical edges
383
/// split during phi elimination which contain only copies. Simple blocks that
384
/// contain non-branches should also be vacated, but this can be handled by an
385
/// earlier pass similar to early if-conversion.
386
0
static bool isSplitEdge(const MachineBasicBlock *MBB) {
387
0
  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
388
0
    return false;
389
0
390
0
  for (const auto &MI : *MBB) {
391
0
    if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
392
0
      return false;
393
0
  }
394
0
  return true;
395
0
}
396
397
11.3M
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
398
11.3M
  SrcReg = DstReg = 0;
399
11.3M
  SrcIdx = DstIdx = 0;
400
11.3M
  NewRC = nullptr;
401
11.3M
  Flipped = CrossClass = false;
402
11.3M
403
11.3M
  unsigned Src, Dst, SrcSub, DstSub;
404
11.3M
  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
405
137
    return false;
406
11.3M
  Partial = SrcSub || 
DstSub10.7M
;
407
11.3M
408
11.3M
  // If one register is a physreg, it must be Dst.
409
11.3M
  if (TargetRegisterInfo::isPhysicalRegister(Src)) {
410
2.06M
    if (TargetRegisterInfo::isPhysicalRegister(Dst))
411
87.1k
      return false;
412
1.97M
    std::swap(Src, Dst);
413
1.97M
    std::swap(SrcSub, DstSub);
414
1.97M
    Flipped = true;
415
1.97M
  }
416
11.3M
417
11.3M
  const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
418
11.2M
419
11.2M
  if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
420
7.18M
    // Eliminate DstSub on a physreg.
421
7.18M
    if (DstSub) {
422
2
      Dst = TRI.getSubReg(Dst, DstSub);
423
2
      if (!Dst) 
return false0
;
424
2
      DstSub = 0;
425
2
    }
426
7.18M
427
7.18M
    // Eliminate SrcSub by picking a corresponding Dst superregister.
428
7.18M
    if (SrcSub) {
429
71.7k
      Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
430
71.7k
      if (!Dst) 
return false1.79k
;
431
7.11M
    } else if (!MRI.getRegClass(Src)->contains(Dst)) {
432
25.3k
      return false;
433
25.3k
    }
434
4.10M
  } else {
435
4.10M
    // Both registers are virtual.
436
4.10M
    const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
437
4.10M
    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
438
4.10M
439
4.10M
    // Both registers have subreg indices.
440
4.10M
    if (SrcSub && 
DstSub538k
) {
441
126k
      // Copies between different sub-registers are never coalescable.
442
126k
      if (Src == Dst && 
SrcSub != DstSub2.76k
)
443
2.76k
        return false;
444
124k
445
124k
      NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
446
124k
                                         SrcIdx, DstIdx);
447
124k
      if (!NewRC)
448
6.31k
        return false;
449
3.97M
    } else if (DstSub) {
450
564k
      // SrcReg will be merged with a sub-register of DstReg.
451
564k
      SrcIdx = DstSub;
452
564k
      NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
453
3.41M
    } else if (SrcSub) {
454
412k
      // DstReg will be merged with a sub-register of SrcReg.
455
412k
      DstIdx = SrcSub;
456
412k
      NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
457
2.99M
    } else {
458
2.99M
      // This is a straight copy without sub-registers.
459
2.99M
      NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
460
2.99M
    }
461
4.10M
462
4.10M
    // The combined constraint may be impossible to satisfy.
463
4.10M
    
if (4.09M
!NewRC4.09M
)
464
110k
      return false;
465
3.98M
466
3.98M
    // Prefer SrcReg to be a sub-register of DstReg.
467
3.98M
    // FIXME: Coalescer should support subregs symmetrically.
468
3.98M
    if (DstIdx && 
!SrcIdx399k
) {
469
397k
      std::swap(Src, Dst);
470
397k
      std::swap(SrcIdx, DstIdx);
471
397k
      Flipped = !Flipped;
472
397k
    }
473
3.98M
474
3.98M
    CrossClass = NewRC != DstRC || 
NewRC != SrcRC2.81M
;
475
3.98M
  }
476
11.2M
  // Check our invariants
477
11.2M
  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
478
11.1M
  assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
479
11.1M
         "Cannot have a physical SubIdx");
480
11.1M
  SrcReg = Src;
481
11.1M
  DstReg = Dst;
482
11.1M
  return true;
483
11.2M
}
484
485
999k
bool CoalescerPair::flip() {
486
999k
  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
487
0
    return false;
488
999k
  std::swap(SrcReg, DstReg);
489
999k
  std::swap(SrcIdx, DstIdx);
490
999k
  Flipped = !Flipped;
491
999k
  return true;
492
999k
}
493
494
8.05M
bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
495
8.05M
  if (!MI)
496
84.8k
    return false;
497
7.97M
  unsigned Src, Dst, SrcSub, DstSub;
498
7.97M
  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
499
2.91M
    return false;
500
5.05M
501
5.05M
  // Find the virtual register that is SrcReg.
502
5.05M
  if (Dst == SrcReg) {
503
1.71M
    std::swap(Src, Dst);
504
1.71M
    std::swap(SrcSub, DstSub);
505
3.34M
  } else if (Src != SrcReg) {
506
341k
    return false;
507
341k
  }
508
4.71M
509
4.71M
  // Now check that Dst matches DstReg.
510
4.71M
  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
511
266k
    if (!TargetRegisterInfo::isPhysicalRegister(Dst))
512
9.71k
      return false;
513
256k
    assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
514
256k
    // DstSub could be set for a physreg from INSERT_SUBREG.
515
256k
    if (DstSub)
516
0
      Dst = TRI.getSubReg(Dst, DstSub);
517
256k
    // Full copy of Src.
518
256k
    if (!SrcSub)
519
242k
      return DstReg == Dst;
520
14.1k
    // This is a partial register copy. Check that the parts match.
521
14.1k
    return TRI.getSubReg(DstReg, SrcSub) == Dst;
522
4.44M
  } else {
523
4.44M
    // DstReg is virtual.
524
4.44M
    if (DstReg != Dst)
525
139k
      return false;
526
4.30M
    // Registers match, do the subregisters line up?
527
4.30M
    return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
528
4.30M
           TRI.composeSubRegIndices(DstIdx, DstSub);
529
4.30M
  }
530
4.71M
}
531
532
34.2k
void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
533
34.2k
  AU.setPreservesCFG();
534
34.2k
  AU.addRequired<AAResultsWrapperPass>();
535
34.2k
  AU.addRequired<LiveIntervals>();
536
34.2k
  AU.addPreserved<LiveIntervals>();
537
34.2k
  AU.addPreserved<SlotIndexes>();
538
34.2k
  AU.addRequired<MachineLoopInfo>();
539
34.2k
  AU.addPreserved<MachineLoopInfo>();
540
34.2k
  AU.addPreservedID(MachineDominatorsID);
541
34.2k
  MachineFunctionPass::getAnalysisUsage(AU);
542
34.2k
}
543
544
350k
void RegisterCoalescer::eliminateDeadDefs() {
545
350k
  SmallVector<unsigned, 8> NewRegs;
546
350k
  LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
547
350k
                nullptr, this).eliminateDeadDefs(DeadDefs);
548
350k
}
549
550
350k
void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
551
350k
  // MI may be in WorkList. Make sure we don't visit it.
552
350k
  ErasedInstrs.insert(MI);
553
350k
}
554
555
bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
556
383k
                                             MachineInstr *CopyMI) {
557
383k
  assert(!CP.isPartial() && "This doesn't work for partial copies.");
558
383k
  assert(!CP.isPhys() && "This doesn't work for physreg copies.");
559
383k
560
383k
  LiveInterval &IntA =
561
383k
    LIS->getInterval(CP.isFlipped() ? 
CP.getDstReg()119k
:
CP.getSrcReg()264k
);
562
383k
  LiveInterval &IntB =
563
383k
    LIS->getInterval(CP.isFlipped() ? 
CP.getSrcReg()119k
:
CP.getDstReg()264k
);
564
383k
  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
565
383k
566
383k
  // We have a non-trivially-coalescable copy with IntA being the source and
567
383k
  // IntB being the dest, thus this defines a value number in IntB.  If the
568
383k
  // source value number (in IntA) is defined by a copy from B, see if we can
569
383k
  // merge these two pieces of B into a single value number, eliminating a copy.
570
383k
  // For example:
571
383k
  //
572
383k
  //  A3 = B0
573
383k
  //    ...
574
383k
  //  B1 = A3      <- this copy
575
383k
  //
576
383k
  // In this case, B0 can be extended to where the B1 copy lives, allowing the
577
383k
  // B1 value number to be replaced with B0 (which simplifies the B
578
383k
  // liveinterval).
579
383k
580
383k
  // BValNo is a value number in B that is defined by a copy from A.  'B1' in
581
383k
  // the example above.
582
383k
  LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
583
383k
  if (BS == IntB.end()) 
return false0
;
584
383k
  VNInfo *BValNo = BS->valno;
585
383k
586
383k
  // Get the location that B is defined at.  Two options: either this value has
587
383k
  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
588
383k
  // can't process it.
589
383k
  if (BValNo->def != CopyIdx) 
return false0
;
590
383k
591
383k
  // AValNo is the value number in A that defines the copy, A3 in the example.
592
383k
  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
593
383k
  LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
594
383k
  // The live segment might not exist after fun with physreg coalescing.
595
383k
  if (AS == IntA.end()) 
return false0
;
596
383k
  VNInfo *AValNo = AS->valno;
597
383k
598
383k
  // If AValNo is defined as a copy from IntB, we can potentially process this.
599
383k
  // Get the instruction that defines this value number.
600
383k
  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
601
383k
  // Don't allow any partial copies, even if isCoalescable() allows them.
602
383k
  if (!CP.isCoalescable(ACopyMI) || 
!ACopyMI->isFullCopy()464
)
603
383k
    return false;
604
463
605
463
  // Get the Segment in IntB that this value number starts with.
606
463
  LiveInterval::iterator ValS =
607
463
    IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
608
463
  if (ValS == IntB.end())
609
0
    return false;
610
463
611
463
  // Make sure that the end of the live segment is inside the same block as
612
463
  // CopyMI.
613
463
  MachineInstr *ValSEndInst =
614
463
    LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
615
463
  if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
616
364
    return false;
617
99
618
99
  // Okay, we now know that ValS ends in the same block that the CopyMI
619
99
  // live-range starts.  If there are no intervening live segments between them
620
99
  // in IntB, we can merge them.
621
99
  if (ValS+1 != BS) 
return false0
;
622
99
623
99
  LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
624
99
625
99
  SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
626
99
  // We are about to delete CopyMI, so need to remove it as the 'instruction
627
99
  // that defines this value #'. Update the valnum with the new defining
628
99
  // instruction #.
629
99
  BValNo->def = FillerStart;
630
99
631
99
  // Okay, we can merge them.  We need to insert a new liverange:
632
99
  // [ValS.end, BS.begin) of either value number, then we merge the
633
99
  // two value numbers.
634
99
  IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
635
99
636
99
  // Okay, merge "B1" into the same value number as "B0".
637
99
  if (BValNo != ValS->valno)
638
99
    IntB.MergeValueNumberInto(BValNo, ValS->valno);
639
99
640
99
  // Do the same for the subregister segments.
641
99
  for (LiveInterval::SubRange &S : IntB.subranges()) {
642
3
    // Check for SubRange Segments of the form [1234r,1234d:0) which can be
643
3
    // removed to prevent creating bogus SubRange Segments.
644
3
    LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
645
3
    if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
646
1
      S.removeSegment(*SS, true);
647
1
      continue;
648
1
    }
649
2
    VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
650
2
    S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
651
2
    VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
652
2
    if (SubBValNo != SubValSNo)
653
2
      S.MergeValueNumberInto(SubBValNo, SubValSNo);
654
2
  }
655
99
656
99
  LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');
657
99
658
99
  // If the source instruction was killing the source register before the
659
99
  // merge, unset the isKill marker given the live range has been extended.
660
99
  int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
661
99
  if (UIdx != -1) {
662
0
    ValSEndInst->getOperand(UIdx).setIsKill(false);
663
0
  }
664
99
665
99
  // Rewrite the copy.
666
99
  CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
667
99
  // If the copy instruction was killing the destination register or any
668
99
  // subrange before the merge trim the live range.
669
99
  bool RecomputeLiveRange = AS->end == CopyIdx;
670
99
  if (!RecomputeLiveRange) {
671
97
    for (LiveInterval::SubRange &S : IntA.subranges()) {
672
2
      LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
673
2
      if (SS != S.end() && SS->end == CopyIdx) {
674
1
        RecomputeLiveRange = true;
675
1
        break;
676
1
      }
677
2
    }
678
97
  }
679
99
  if (RecomputeLiveRange)
680
3
    shrinkToUses(&IntA);
681
99
682
99
  ++numExtends;
683
99
  return true;
684
99
}
685
686
bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
687
                                             LiveInterval &IntB,
688
                                             VNInfo *AValNo,
689
501
                                             VNInfo *BValNo) {
690
501
  // If AValNo has PHI kills, conservatively assume that IntB defs can reach
691
501
  // the PHI values.
692
501
  if (LIS->hasPHIKill(IntA, AValNo))
693
5
    return true;
694
496
695
1.93k
  
for (LiveRange::Segment &ASeg : IntA.segments)496
{
696
1.93k
    if (ASeg.valno != AValNo) 
continue1.41k
;
697
516
    LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
698
516
    if (BI != IntB.begin())
699
516
      --BI;
700
1.52k
    for (; BI != IntB.end() && 
ASeg.end >= BI->start1.44k
;
++BI1.01k
) {
701
1.01k
      if (BI->valno == BValNo)
702
516
        continue;
703
500
      if (BI->start <= ASeg.start && 
BI->end > ASeg.start496
)
704
0
        return true;
705
500
      if (BI->start > ASeg.start && 
BI->start < ASeg.end4
)
706
4
        return true;
707
500
    }
708
516
  }
709
496
  
return false492
;
710
496
}
711
712
/// Copy segments with value number @p SrcValNo from liverange @p Src to live
713
/// range @Dst and use value number @p DstValNo there.
714
static std::pair<bool,bool>
715
addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
716
496
                     const VNInfo *SrcValNo) {
717
496
  bool Changed = false;
718
496
  bool MergedWithDead = false;
719
1.92k
  for (const LiveRange::Segment &S : Src.segments) {
720
1.92k
    if (S.valno != SrcValNo)
721
1.40k
      continue;
722
514
    // This is adding a segment from Src that ends in a copy that is about
723
514
    // to be removed. This segment is going to be merged with a pre-existing
724
514
    // segment in Dst. This works, except in cases when the corresponding
725
514
    // segment in Dst is dead. For example: adding [192r,208r:1) from Src
726
514
    // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
727
514
    // Recognized such cases, so that the segments can be shrunk.
728
514
    LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
729
514
    LiveRange::Segment &Merged = *Dst.addSegment(Added);
730
514
    if (Merged.end.isDead())
731
1
      MergedWithDead = true;
732
514
    Changed = true;
733
514
  }
734
496
  return std::make_pair(Changed, MergedWithDead);
735
496
}
736
737
std::pair<bool,bool>
738
RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
739
383k
                                            MachineInstr *CopyMI) {
740
383k
  assert(!CP.isPhys());
741
383k
742
383k
  LiveInterval &IntA =
743
383k
      LIS->getInterval(CP.isFlipped() ? 
CP.getDstReg()119k
:
CP.getSrcReg()263k
);
744
383k
  LiveInterval &IntB =
745
383k
      LIS->getInterval(CP.isFlipped() ? 
CP.getSrcReg()119k
:
CP.getDstReg()263k
);
746
383k
747
383k
  // We found a non-trivially-coalescable copy with IntA being the source and
748
383k
  // IntB being the dest, thus this defines a value number in IntB.  If the
749
383k
  // source value number (in IntA) is defined by a commutable instruction and
750
383k
  // its other operand is coalesced to the copy dest register, see if we can
751
383k
  // transform the copy into a noop by commuting the definition. For example,
752
383k
  //
753
383k
  //  A3 = op A2 killed B0
754
383k
  //    ...
755
383k
  //  B1 = A3      <- this copy
756
383k
  //    ...
757
383k
  //     = op A3   <- more uses
758
383k
  //
759
383k
  // ==>
760
383k
  //
761
383k
  //  B2 = op B0 killed A2
762
383k
  //    ...
763
383k
  //  B1 = B2      <- now an identity copy
764
383k
  //    ...
765
383k
  //     = op B2   <- more uses
766
383k
767
383k
  // BValNo is a value number in B that is defined by a copy from A. 'B1' in
768
383k
  // the example above.
769
383k
  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
770
383k
  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
771
383k
  assert(BValNo != nullptr && BValNo->def == CopyIdx);
772
383k
773
383k
  // AValNo is the value number in A that defines the copy, A3 in the example.
774
383k
  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
775
383k
  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
776
383k
  if (AValNo->isPHIDef())
777
84.8k
    return { false, false };
778
298k
  MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
779
298k
  if (!DefMI)
780
0
    return { false, false };
781
298k
  if (!DefMI->isCommutable())
782
282k
    return { false, false };
783
16.2k
  // If DefMI is a two-address instruction then commuting it will change the
784
16.2k
  // destination register.
785
16.2k
  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
786
16.2k
  assert(DefIdx != -1);
787
16.2k
  unsigned UseOpIdx;
788
16.2k
  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
789
958
    return { false, false };
790
15.2k
791
15.2k
  // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
792
15.2k
  // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
793
15.2k
  // passed to the method. That _other_ operand is chosen by
794
15.2k
  // the findCommutedOpIndices() method.
795
15.2k
  //
796
15.2k
  // That is obviously an area for improvement in case of instructions having
797
15.2k
  // more than 2 operands. For example, if some instruction has 3 commutable
798
15.2k
  // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
799
15.2k
  // op#2<->op#3) of commute transformation should be considered/tried here.
800
15.2k
  unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
801
15.2k
  if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
802
184
    return { false, false };
803
15.0k
804
15.0k
  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
805
15.0k
  unsigned NewReg = NewDstMO.getReg();
806
15.0k
  if (NewReg != IntB.reg || 
!IntB.Query(AValNo->def).isKill()566
)
807
14.5k
    return { false, false };
808
501
809
501
  // Make sure there are no other definitions of IntB that would reach the
810
501
  // uses which the new definition can reach.
811
501
  if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
812
9
    return { false, false };
813
492
814
492
  // If some of the uses of IntA.reg is already coalesced away, return false.
815
492
  // It's not possible to determine whether it's safe to perform the coalescing.
816
1.86k
  
for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg))492
{
817
1.86k
    MachineInstr *UseMI = MO.getParent();
818
1.86k
    unsigned OpNo = &MO - &UseMI->getOperand(0);
819
1.86k
    SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
820
1.86k
    LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
821
1.86k
    if (US == IntA.end() || US->valno != AValNo)
822
1.30k
      continue;
823
563
    // If this use is tied to a def, we can't rewrite the register.
824
563
    if (UseMI->isRegTiedToDefOperand(OpNo))
825
0
      return { false, false };
826
563
  }
827
492
828
492
  LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
829
492
                    << *DefMI);
830
492
831
492
  // At this point we have decided that it is legal to do this
832
492
  // transformation.  Start by commuting the instruction.
833
492
  MachineBasicBlock *MBB = DefMI->getParent();
834
492
  MachineInstr *NewMI =
835
492
      TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
836
492
  if (!NewMI)
837
0
    return { false, false };
838
492
  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
839
492
      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
840
492
      !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
841
0
    return { false, false };
842
492
  if (NewMI != DefMI) {
843
0
    LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
844
0
    MachineBasicBlock::iterator Pos = DefMI;
845
0
    MBB->insert(Pos, NewMI);
846
0
    MBB->erase(DefMI);
847
0
  }
848
492
849
492
  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
850
492
  // A = or A, B
851
492
  // ...
852
492
  // B = A
853
492
  // ...
854
492
  // C = killed A
855
492
  // ...
856
492
  //   = B
857
492
858
492
  // Update uses of IntA of the specific Val# with IntB.
859
492
  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
860
492
                                         UE = MRI->use_end();
861
2.35k
       UI != UE; /* ++UI is below because of possible MI removal */) {
862
1.86k
    MachineOperand &UseMO = *UI;
863
1.86k
    ++UI;
864
1.86k
    if (UseMO.isUndef())
865
0
      continue;
866
1.86k
    MachineInstr *UseMI = UseMO.getParent();
867
1.86k
    if (UseMI->isDebugValue()) {
868
0
      // FIXME These don't have an instruction index.  Not clear we have enough
869
0
      // info to decide whether to do this replacement or not.  For now do it.
870
0
      UseMO.setReg(NewReg);
871
0
      continue;
872
0
    }
873
1.86k
    SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
874
1.86k
    LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
875
1.86k
    assert(US != IntA.end() && "Use must be live");
876
1.86k
    if (US->valno != AValNo)
877
1.30k
      continue;
878
563
    // Kill flags are no longer accurate. They are recomputed after RA.
879
563
    UseMO.setIsKill(false);
880
563
    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
881
0
      UseMO.substPhysReg(NewReg, *TRI);
882
563
    else
883
563
      UseMO.setReg(NewReg);
884
563
    if (UseMI == CopyMI)
885
492
      continue;
886
71
    if (!UseMI->isCopy())
887
27
      continue;
888
44
    if (UseMI->getOperand(0).getReg() != IntB.reg ||
889
44
        
UseMI->getOperand(0).getSubReg()0
)
890
44
      continue;
891
0
892
0
    // This copy will become a noop. If it's defining a new val#, merge it into
893
0
    // BValNo.
894
0
    SlotIndex DefIdx = UseIdx.getRegSlot();
895
0
    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
896
0
    if (!DVNI)
897
0
      continue;
898
0
    LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
899
0
    assert(DVNI->def == DefIdx);
900
0
    BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
901
0
    for (LiveInterval::SubRange &S : IntB.subranges()) {
902
0
      VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
903
0
      if (!SubDVNI)
904
0
        continue;
905
0
      VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
906
0
      assert(SubBValNo->def == CopyIdx);
907
0
      S.MergeValueNumberInto(SubDVNI, SubBValNo);
908
0
    }
909
0
910
0
    deleteInstr(UseMI);
911
0
  }
912
492
913
492
  // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
914
492
  // is updated.
915
492
  bool ShrinkB = false;
916
492
  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
917
492
  if (IntA.hasSubRanges() || 
IntB.hasSubRanges()490
) {
918
3
    if (!IntA.hasSubRanges()) {
919
1
      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
920
1
      IntA.createSubRangeFrom(Allocator, Mask, IntA);
921
2
    } else if (!IntB.hasSubRanges()) {
922
0
      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
923
0
      IntB.createSubRangeFrom(Allocator, Mask, IntB);
924
0
    }
925
3
    SlotIndex AIdx = CopyIdx.getRegSlot(true);
926
3
    LaneBitmask MaskA;
927
3
    const SlotIndexes &Indexes = *LIS->getSlotIndexes();
928
4
    for (LiveInterval::SubRange &SA : IntA.subranges()) {
929
4
      VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
930
4
      // Even if we are dealing with a full copy, some lanes can
931
4
      // still be undefined.
932
4
      // E.g.,
933
4
      // undef A.subLow = ...
934
4
      // B = COPY A <== A.subHigh is undefined here and does
935
4
      //                not have a value number.
936
4
      if (!ASubValNo)
937
1
        continue;
938
3
      MaskA |= SA.LaneMask;
939
3
940
3
      IntB.refineSubRanges(
941
3
          Allocator, SA.LaneMask,
942
3
          [&Allocator, &SA, CopyIdx, ASubValNo,
943
4
           &ShrinkB](LiveInterval::SubRange &SR) {
944
4
            VNInfo *BSubValNo = SR.empty() ? 
SR.getNextValue(CopyIdx, Allocator)0
945
4
                                           : SR.getVNInfoAt(CopyIdx);
946
4
            assert(BSubValNo != nullptr);
947
4
            auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
948
4
            ShrinkB |= P.second;
949
4
            if (P.first)
950
4
              BSubValNo->def = ASubValNo->def;
951
4
          },
952
3
          Indexes, *TRI);
953
3
    }
954
3
    // Go over all subranges of IntB that have not been covered by IntA,
955
3
    // and delete the segments starting at CopyIdx. This can happen if
956
3
    // IntA has undef lanes that are defined in IntB.
957
6
    for (LiveInterval::SubRange &SB : IntB.subranges()) {
958
6
      if ((SB.LaneMask & MaskA).any())
959
4
        continue;
960
2
      if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
961
2
        if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
962
2
          SB.removeSegment(*S, true);
963
2
    }
964
3
  }
965
492
966
492
  BValNo->def = AValNo->def;
967
492
  auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
968
492
  ShrinkB |= P.second;
969
492
  LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
970
492
971
492
  LIS->removeVRegDefAt(IntA, AValNo->def);
972
492
973
492
  LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
974
492
  ++numCommutes;
975
492
  return { true, ShrinkB };
976
492
}
977
978
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
979
/// predecessor of BB2, and if B is not redefined on the way from A = B
980
/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
981
/// execution goes through the path from BB0 to BB2. We may move B = A
982
/// to the predecessor without such reversed copy.
983
/// So we will transform the program from:
984
///   BB0:
985
///      A = B;    BB1:
986
///       ...         ...
987
///     /     \      /
988
///             BB2:
989
///               ...
990
///               B = A;
991
///
992
/// to:
993
///
994
///   BB0:         BB1:
995
///      A = B;        ...
996
///       ...          B = A;
997
///     /     \       /
998
///             BB2:
999
///               ...
1000
///
1001
/// A special case is when BB0 and BB2 are the same BB which is the only
1002
/// BB in a loop:
1003
///   BB1:
1004
///        ...
1005
///   BB0/BB2:  ----
1006
///        B = A;   |
1007
///        ...      |
1008
///        A = B;   |
1009
///          |-------
1010
///          |
1011
/// We may hoist B = A from BB0/BB2 to BB1.
1012
///
1013
/// The major preconditions for correctness to remove such partial
1014
/// redundancy include:
1015
/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1016
///    the PHI is defined by the reversed copy A = B in BB0.
1017
/// 2. No B is referenced from the start of BB2 to B = A.
1018
/// 3. No B is defined from A = B to the end of BB0.
1019
/// 4. BB1 has only one successor.
1020
///
1021
/// 2 and 4 implicitly ensure B is not live at the end of BB1.
1022
/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1023
/// colder place, which not only prevent endless loop, but also make sure
1024
/// the movement of copy is beneficial.
1025
bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1026
382k
                                                MachineInstr &CopyMI) {
1027
382k
  assert(!CP.isPhys());
1028
382k
  if (!CopyMI.isFullCopy())
1029
0
    return false;
1030
382k
1031
382k
  MachineBasicBlock &MBB = *CopyMI.getParent();
1032
382k
  if (MBB.isEHPad())
1033
1.28k
    return false;
1034
381k
1035
381k
  if (MBB.pred_size() != 2)
1036
251k
    return false;
1037
129k
1038
129k
  LiveInterval &IntA =
1039
129k
      LIS->getInterval(CP.isFlipped() ? 
CP.getDstReg()35.7k
:
CP.getSrcReg()94.0k
);
1040
129k
  LiveInterval &IntB =
1041
129k
      LIS->getInterval(CP.isFlipped() ? 
CP.getSrcReg()35.7k
:
CP.getDstReg()94.0k
);
1042
129k
1043
129k
  // A is defined by PHI at the entry of MBB.
1044
129k
  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1045
129k
  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1046
129k
  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1047
129k
  if (!AValNo->isPHIDef())
1048
88.7k
    return false;
1049
41.0k
1050
41.0k
  // No B is referenced before CopyMI in MBB.
1051
41.0k
  if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1052
816
    return false;
1053
40.2k
1054
40.2k
  // MBB has two predecessors: one contains A = B so no copy will be inserted
1055
40.2k
  // for it. The other one will have a copy moved from MBB.
1056
40.2k
  bool FoundReverseCopy = false;
1057
40.2k
  MachineBasicBlock *CopyLeftBB = nullptr;
1058
80.4k
  for (MachineBasicBlock *Pred : MBB.predecessors()) {
1059
80.4k
    VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1060
80.4k
    MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
1061
80.4k
    if (!DefMI || 
!DefMI->isFullCopy()73.4k
) {
1062
39.1k
      CopyLeftBB = Pred;
1063
39.1k
      continue;
1064
39.1k
    }
1065
41.2k
    // Check DefMI is a reverse copy and it is in BB Pred.
1066
41.2k
    if (DefMI->getOperand(0).getReg() != IntA.reg ||
1067
41.2k
        DefMI->getOperand(1).getReg() != IntB.reg ||
1068
41.2k
        
DefMI->getParent() != Pred3.97k
) {
1069
37.3k
      CopyLeftBB = Pred;
1070
37.3k
      continue;
1071
37.3k
    }
1072
3.97k
    // If there is any other def of B after DefMI and before the end of Pred,
1073
3.97k
    // we need to keep the copy of B = A at the end of Pred if we remove
1074
3.97k
    // B = A from MBB.
1075
3.97k
    bool ValB_Changed = false;
1076
17.7k
    for (auto VNI : IntB.valnos) {
1077
17.7k
      if (VNI->isUnused())
1078
361
        continue;
1079
17.3k
      if (PVal->def < VNI->def && 
VNI->def < LIS->getMBBEndIdx(Pred)8.83k
) {
1080
0
        ValB_Changed = true;
1081
0
        break;
1082
0
      }
1083
17.3k
    }
1084
3.97k
    if (ValB_Changed) {
1085
0
      CopyLeftBB = Pred;
1086
0
      continue;
1087
0
    }
1088
3.97k
    FoundReverseCopy = true;
1089
3.97k
  }
1090
40.2k
1091
40.2k
  // If no reverse copy is found in predecessors, nothing to do.
1092
40.2k
  if (!FoundReverseCopy)
1093
36.2k
    return false;
1094
3.96k
1095
3.96k
  // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1096
3.96k
  // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1097
3.96k
  // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1098
3.96k
  // update IntA/IntB.
1099
3.96k
  //
1100
3.96k
  // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1101
3.96k
  // MBB is hotter than CopyLeftBB.
1102
3.96k
  if (CopyLeftBB && 
CopyLeftBB->succ_size() > 13.96k
)
1103
3.44k
    return false;
1104
520
1105
520
  // Now (almost sure it's) ok to move copy.
1106
520
  if (CopyLeftBB) {
1107
517
    // Position in CopyLeftBB where we should insert new copy.
1108
517
    auto InsPos = CopyLeftBB->getFirstTerminator();
1109
517
1110
517
    // Make sure that B isn't referenced in the terminators (if any) at the end
1111
517
    // of the predecessor since we're about to insert a new definition of B
1112
517
    // before them.
1113
517
    if (InsPos != CopyLeftBB->end()) {
1114
79
      SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1115
79
      if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1116
1
        return false;
1117
516
    }
1118
516
1119
516
    LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1120
516
                      << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1121
516
1122
516
    // Insert new copy to CopyLeftBB.
1123
516
    MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1124
516
                                      TII->get(TargetOpcode::COPY), IntB.reg)
1125
516
                                  .addReg(IntA.reg);
1126
516
    SlotIndex NewCopyIdx =
1127
516
        LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1128
516
    IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1129
516
    for (LiveInterval::SubRange &SR : IntB.subranges())
1130
0
      SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1131
516
1132
516
    // If the newly created Instruction has an address of an instruction that was
1133
516
    // deleted before (object recycled by the allocator) it needs to be removed from
1134
516
    // the deleted list.
1135
516
    ErasedInstrs.erase(NewCopyMI);
1136
516
  } else {
1137
3
    LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1138
3
                      << printMBBReference(MBB) << '\t' << CopyMI);
1139
3
  }
1140
520
1141
520
  // Remove CopyMI.
1142
520
  // Note: This is fine to remove the copy before updating the live-ranges.
1143
520
  // While updating the live-ranges, we only look at slot indices and
1144
520
  // never go back to the instruction.
1145
520
  // Mark instructions as deleted.
1146
520
  deleteInstr(&CopyMI);
1147
519
1148
519
  // Update the liveness.
1149
519
  SmallVector<SlotIndex, 8> EndPoints;
1150
519
  VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1151
519
  LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1152
519
                  &EndPoints);
1153
519
  BValNo->markUnused();
1154
519
  // Extend IntB to the EndPoints of its original live interval.
1155
519
  LIS->extendToIndices(IntB, EndPoints);
1156
519
1157
519
  // Now, do the same for its subranges.
1158
519
  for (LiveInterval::SubRange &SR : IntB.subranges()) {
1159
3
    EndPoints.clear();
1160
3
    VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1161
3
    assert(BValNo && "All sublanes should be live");
1162
3
    LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1163
3
    BValNo->markUnused();
1164
3
    // We can have a situation where the result of the original copy is live,
1165
3
    // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1166
3
    // the copy appear as an endpoint from pruneValue(), but we don't want it
1167
3
    // to because the copy has been removed.  We can go ahead and remove that
1168
3
    // endpoint; there is no other situation here that there could be a use at
1169
3
    // the same place as we know that the copy is a full copy.
1170
9
    for (unsigned I = 0; I != EndPoints.size(); ) {
1171
6
      if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1172
2
        EndPoints[I] = EndPoints.back();
1173
2
        EndPoints.pop_back();
1174
2
        continue;
1175
2
      }
1176
4
      ++I;
1177
4
    }
1178
3
    LIS->extendToIndices(SR, EndPoints);
1179
3
  }
1180
519
  // If any dead defs were extended, truncate them.
1181
519
  shrinkToUses(&IntB);
1182
519
1183
519
  // Finally, update the live-range of IntA.
1184
519
  shrinkToUses(&IntA);
1185
519
  return true;
1186
520
}
1187
1188
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1189
/// defining a subregister.
1190
687k
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
1191
687k
  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
1192
687k
         "This code cannot handle physreg aliasing");
1193
687k
  for (const MachineOperand &Op : MI.operands()) {
1194
687k
    if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
1195
0
      continue;
1196
687k
    // Return true if we define the full register or don't care about the value
1197
687k
    // inside other subregisters.
1198
687k
    if (Op.getSubReg() == 0 || 
Op.isUndef()44.0k
)
1199
687k
      return true;
1200
687k
  }
1201
18.4E
  return false;
1202
687k
}
1203
1204
bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1205
                                                MachineInstr *CopyMI,
1206
7.46M
                                                bool &IsDefCopy) {
1207
7.46M
  IsDefCopy = false;
1208
7.46M
  unsigned SrcReg = CP.isFlipped() ? 
CP.getDstReg()1.86M
:
CP.getSrcReg()5.59M
;
1209
7.46M
  unsigned SrcIdx = CP.isFlipped() ? 
CP.getDstIdx()1.86M
:
CP.getSrcIdx()5.59M
;
1210
7.46M
  unsigned DstReg = CP.isFlipped() ? 
CP.getSrcReg()1.86M
:
CP.getDstReg()5.59M
;
1211
7.46M
  unsigned DstIdx = CP.isFlipped() ? 
CP.getSrcIdx()1.86M
:
CP.getDstIdx()5.59M
;
1212
7.46M
  if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
1213
1.70M
    return false;
1214
5.75M
1215
5.75M
  LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1216
5.75M
  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1217
5.75M
  VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1218
5.75M
  if (!ValNo)
1219
1
    return false;
1220
5.75M
  if (ValNo->isPHIDef() || 
ValNo->isUnused()5.47M
)
1221
280k
    return false;
1222
5.47M
  MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
1223
5.47M
  if (!DefMI)
1224
0
    return false;
1225
5.47M
  if (DefMI->isCopyLike()) {
1226
2.90M
    IsDefCopy = true;
1227
2.90M
    return false;
1228
2.90M
  }
1229
2.56M
  if (!TII->isAsCheapAsAMove(*DefMI))
1230
1.40M
    return false;
1231
1.16M
  if (!TII->isTriviallyReMaterializable(*DefMI, AA))
1232
479k
    return false;
1233
687k
  if (!definesFullReg(*DefMI, SrcReg))
1234
0
    return false;
1235
687k
  bool SawStore = false;
1236
687k
  if (!DefMI->isSafeToMove(AA, SawStore))
1237
0
    return false;
1238
687k
  const MCInstrDesc &MCID = DefMI->getDesc();
1239
687k
  if (MCID.getNumDefs() != 1)
1240
0
    return false;
1241
687k
  // Only support subregister destinations when the def is read-undef.
1242
687k
  MachineOperand &DstOperand = CopyMI->getOperand(0);
1243
687k
  unsigned CopyDstReg = DstOperand.getReg();
1244
687k
  if (DstOperand.getSubReg() && 
!DstOperand.isUndef()3.59k
)
1245
80
    return false;
1246
687k
1247
687k
  // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1248
687k
  // the register substantially (beyond both source and dest size). This is bad
1249
687k
  // for performance since it can cascade through a function, introducing many
1250
687k
  // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1251
687k
  // around after a few subreg copies).
1252
687k
  if (SrcIdx && 
DstIdx3.29k
)
1253
0
    return false;
1254
687k
1255
687k
  const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1256
687k
  if (!DefMI->isImplicitDef()) {
1257
687k
    if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
1258
582k
      unsigned NewDstReg = DstReg;
1259
582k
1260
582k
      unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1261
582k
                                              DefMI->getOperand(0).getSubReg());
1262
582k
      if (NewDstIdx)
1263
39.1k
        NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1264
582k
1265
582k
      // Finally, make sure that the physical subregister that will be
1266
582k
      // constructed later is permitted for the instruction.
1267
582k
      if (!DefRC->contains(NewDstReg))
1268
0
        return false;
1269
104k
    } else {
1270
104k
      // Theoretically, some stack frame reference could exist. Just make sure
1271
104k
      // it hasn't actually happened.
1272
104k
      assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
1273
104k
             "Only expect to deal with virtual or physical registers");
1274
104k
    }
1275
687k
  }
1276
687k
1277
687k
  DebugLoc DL = CopyMI->getDebugLoc();
1278
687k
  MachineBasicBlock *MBB = CopyMI->getParent();
1279
687k
  MachineBasicBlock::iterator MII =
1280
687k
    std::next(MachineBasicBlock::iterator(CopyMI));
1281
687k
  TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
1282
687k
  MachineInstr &NewMI = *std::prev(MII);
1283
687k
  NewMI.setDebugLoc(DL);
1284
687k
1285
687k
  // In a situation like the following:
1286
687k
  //     %0:subreg = instr              ; DefMI, subreg = DstIdx
1287
687k
  //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
1288
687k
  // instead of widening %1 to the register class of %0 simply do:
1289
687k
  //     %1 = instr
1290
687k
  const TargetRegisterClass *NewRC = CP.getNewRC();
1291
687k
  if (DstIdx != 0) {
1292
536
    MachineOperand &DefMO = NewMI.getOperand(0);
1293
536
    if (DefMO.getSubReg() == DstIdx) {
1294
357
      assert(SrcIdx == 0 && CP.isFlipped()
1295
357
             && "Shouldn't have SrcIdx+DstIdx at this point");
1296
357
      const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1297
357
      const TargetRegisterClass *CommonRC =
1298
357
        TRI->getCommonSubClass(DefRC, DstRC);
1299
357
      if (CommonRC != nullptr) {
1300
357
        NewRC = CommonRC;
1301
357
        DstIdx = 0;
1302
357
        DefMO.setSubReg(0);
1303
357
        DefMO.setIsUndef(false); // Only subregs can have def+undef.
1304
357
      }
1305
357
    }
1306
536
  }
1307
687k
1308
687k
  // CopyMI may have implicit operands, save them so that we can transfer them
1309
687k
  // over to the newly materialized instruction after CopyMI is removed.
1310
687k
  SmallVector<MachineOperand, 4> ImplicitOps;
1311
687k
  ImplicitOps.reserve(CopyMI->getNumOperands() -
1312
687k
                      CopyMI->getDesc().getNumOperands());
1313
687k
  for (unsigned I = CopyMI->getDesc().getNumOperands(),
1314
687k
                E = CopyMI->getNumOperands();
1315
687k
       I != E; 
++I1
) {
1316
1
    MachineOperand &MO = CopyMI->getOperand(I);
1317
1
    if (MO.isReg()) {
1318
1
      assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1319
1
      // Discard VReg implicit defs.
1320
1
      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1321
1
        ImplicitOps.push_back(MO);
1322
1
    }
1323
1
  }
1324
687k
1325
687k
  LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
1326
687k
  CopyMI->eraseFromParent();
1327
687k
  ErasedInstrs.insert(CopyMI);
1328
687k
1329
687k
  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1330
687k
  // We need to remember these so we can add intervals once we insert
1331
687k
  // NewMI into SlotIndexes.
1332
687k
  SmallVector<unsigned, 4> NewMIImplDefs;
1333
687k
  for (unsigned i = NewMI.getDesc().getNumOperands(),
1334
687k
                e = NewMI.getNumOperands();
1335
722k
       i != e; 
++i34.8k
) {
1336
34.8k
    MachineOperand &MO = NewMI.getOperand(i);
1337
34.8k
    if (MO.isReg() && MO.isDef()) {
1338
34.7k
      assert(MO.isImplicit() && MO.isDead() &&
1339
34.7k
             TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
1340
34.7k
      NewMIImplDefs.push_back(MO.getReg());
1341
34.7k
    }
1342
34.8k
  }
1343
687k
1344
687k
  if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
1345
104k
    unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1346
104k
1347
104k
    if (DefRC != nullptr) {
1348
104k
      if (NewIdx)
1349
7.81k
        NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1350
97.0k
      else
1351
97.0k
        NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1352
104k
      assert(NewRC && "subreg chosen for remat incompatible with instruction");
1353
104k
    }
1354
104k
    // Remap subranges to new lanemask and change register class.
1355
104k
    LiveInterval &DstInt = LIS->getInterval(DstReg);
1356
104k
    for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1357
6
      SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1358
6
    }
1359
104k
    MRI->setRegClass(DstReg, NewRC);
1360
104k
1361
104k
    // Update machine operands and add flags.
1362
104k
    updateRegDefsUses(DstReg, DstReg, DstIdx);
1363
104k
    NewMI.getOperand(0).setSubReg(NewIdx);
1364
104k
    // updateRegDefUses can add an "undef" flag to the definition, since
1365
104k
    // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1366
104k
    // sure that "undef" is not set.
1367
104k
    if (NewIdx == 0)
1368
97.0k
      NewMI.getOperand(0).setIsUndef(false);
1369
104k
    // Add dead subregister definitions if we are defining the whole register
1370
104k
    // but only part of it is live.
1371
104k
    // This could happen if the rematerialization instruction is rematerializing
1372
104k
    // more than actually is used in the register.
1373
104k
    // An example would be:
1374
104k
    // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1375
104k
    // ; Copying only part of the register here, but the rest is undef.
1376
104k
    // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1377
104k
    // ==>
1378
104k
    // ; Materialize all the constants but only using one
1379
104k
    // %2 = LOAD_CONSTANTS 5, 8
1380
104k
    //
1381
104k
    // at this point for the part that wasn't defined before we could have
1382
104k
    // subranges missing the definition.
1383
104k
    if (NewIdx == 0 && 
DstInt.hasSubRanges()97.0k
) {
1384
3
      SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1385
3
      SlotIndex DefIndex =
1386
3
          CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1387
3
      LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1388
3
      VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
1389
4
      for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1390
4
        if (!SR.liveAt(DefIndex))
1391
0
          SR.createDeadDef(DefIndex, Alloc);
1392
4
        MaxMask &= ~SR.LaneMask;
1393
4
      }
1394
3
      if (MaxMask.any()) {
1395
0
        LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1396
0
        SR->createDeadDef(DefIndex, Alloc);
1397
0
      }
1398
3
    }
1399
104k
1400
104k
    // Make sure that the subrange for resultant undef is removed
1401
104k
    // For example:
1402
104k
    //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
1403
104k
    //   %2 = COPY %1
1404
104k
    // ==>
1405
104k
    //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
1406
104k
    //     ; Correct but need to remove the subrange for %2:sub0
1407
104k
    //     ; as it is now undef
1408
104k
    if (NewIdx != 0 && 
DstInt.hasSubRanges()7.81k
) {
1409
2
      // The affected subregister segments can be removed.
1410
2
      SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1411
2
      LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1412
2
      bool UpdatedSubRanges = false;
1413
4
      for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1414
4
        if ((SR.LaneMask & DstMask).none()) {
1415
2
          LLVM_DEBUG(dbgs()
1416
2
                     << "Removing undefined SubRange "
1417
2
                     << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1418
2
          // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
1419
2
          if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1420
1
            SR.removeValNo(RmValNo);
1421
1
            UpdatedSubRanges = true;
1422
1
          }
1423
2
        }
1424
4
      }
1425
2
      if (UpdatedSubRanges)
1426
1
        DstInt.removeEmptySubRanges();
1427
2
    }
1428
582k
  } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1429
42.6k
    // The New instruction may be defining a sub-register of what's actually
1430
42.6k
    // been asked for. If so it must implicitly define the whole thing.
1431
42.6k
    assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
1432
42.6k
           "Only expect virtual or physical registers in remat");
1433
42.6k
    NewMI.getOperand(0).setIsDead(true);
1434
42.6k
    NewMI.addOperand(MachineOperand::CreateReg(
1435
42.6k
        CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1436
42.6k
    // Record small dead def live-ranges for all the subregisters
1437
42.6k
    // of the destination register.
1438
42.6k
    // Otherwise, variables that live through may miss some
1439
42.6k
    // interferences, thus creating invalid allocation.
1440
42.6k
    // E.g., i386 code:
1441
42.6k
    // %1 = somedef ; %1 GR8
1442
42.6k
    // %2 = remat ; %2 GR32
1443
42.6k
    // CL = COPY %2.sub_8bit
1444
42.6k
    // = somedef %1 ; %1 GR8
1445
42.6k
    // =>
1446
42.6k
    // %1 = somedef ; %1 GR8
1447
42.6k
    // dead ECX = remat ; implicit-def CL
1448
42.6k
    // = somedef %1 ; %1 GR8
1449
42.6k
    // %1 will see the interferences with CL but not with CH since
1450
42.6k
    // no live-ranges would have been created for ECX.
1451
42.6k
    // Fix that!
1452
42.6k
    SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1453
42.6k
    for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
1454
129k
         Units.isValid(); 
++Units86.8k
)
1455
86.8k
      if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1456
19.9k
        LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1457
42.6k
  }
1458
687k
1459
687k
  if (NewMI.getOperand(0).getSubReg())
1460
7.81k
    NewMI.getOperand(0).setIsUndef();
1461
687k
1462
687k
  // Transfer over implicit operands to the rematerialized instruction.
1463
687k
  for (MachineOperand &MO : ImplicitOps)
1464
1
    NewMI.addOperand(MO);
1465
687k
1466
687k
  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1467
722k
  for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; 
++i34.7k
) {
1468
34.7k
    unsigned Reg = NewMIImplDefs[i];
1469
69.4k
    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); 
++Units34.7k
)
1470
34.7k
      if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1471
10
        LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1472
34.7k
  }
1473
687k
1474
687k
  LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1475
687k
  ++NumReMats;
1476
687k
1477
687k
  // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1478
687k
  // to describe DstReg instead.
1479
687k
  if (MRI->use_nodbg_empty(SrcReg)) {
1480
348k
    for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
1481
6
      MachineInstr *UseMI = UseMO.getParent();
1482
6
      if (UseMI->isDebugValue()) {
1483
6
        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
1484
6
          UseMO.substPhysReg(DstReg, *TRI);
1485
0
        else
1486
0
          UseMO.setReg(DstReg);
1487
6
        // Move the debug value directly after the def of the rematerialized
1488
6
        // value in DstReg.
1489
6
        MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1490
6
        LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1491
6
      }
1492
6
    }
1493
348k
  }
1494
687k
1495
687k
  if (ToBeUpdated.count(SrcReg))
1496
4.12k
    return true;
1497
683k
1498
683k
  unsigned NumCopyUses = 0;
1499
1.94M
  for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1500
1.94M
    if (UseMO.getParent()->isCopyLike())
1501
1.33M
      NumCopyUses++;
1502
1.94M
  }
1503
683k
  if (NumCopyUses < LateRematUpdateThreshold) {
1504
683k
    // The source interval can become smaller because we removed a use.
1505
683k
    shrinkToUses(&SrcInt, &DeadDefs);
1506
683k
    if (!DeadDefs.empty())
1507
348k
      eliminateDeadDefs();
1508
683k
  } else {
1509
24
    ToBeUpdated.insert(SrcReg);
1510
24
  }
1511
683k
  return true;
1512
683k
}
1513
1514
3.97M
MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1515
3.97M
  // ProcessImplicitDefs may leave some copies of <undef> values, it only
1516
3.97M
  // removes local variables. When we have a copy like:
1517
3.97M
  //
1518
3.97M
  //   %1 = COPY undef %2
1519
3.97M
  //
1520
3.97M
  // We delete the copy and remove the corresponding value number from %1.
1521
3.97M
  // Any uses of that value number are marked as <undef>.
1522
3.97M
1523
3.97M
  // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1524
3.97M
  // CoalescerPair may have a new register class with adjusted subreg indices
1525
3.97M
  // at this point.
1526
3.97M
  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
1527
3.97M
  if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1528
0
    return nullptr;
1529
3.97M
1530
3.97M
  SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1531
3.97M
  const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1532
3.97M
  // CopyMI is undef iff SrcReg is not live before the instruction.
1533
3.97M
  if (SrcSubIdx != 0 && 
SrcLI.hasSubRanges()504k
) {
1534
105k
    LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1535
301k
    for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1536
301k
      if ((SR.LaneMask & SrcMask).none())
1537
195k
        continue;
1538
105k
      if (SR.liveAt(Idx))
1539
105k
        return nullptr;
1540
105k
    }
1541
3.87M
  } else if (SrcLI.liveAt(Idx))
1542
3.87M
    return nullptr;
1543
5
1544
5
  // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1545
5
  // then replace it with an IMPLICIT_DEF.
1546
5
  LiveInterval &DstLI = LIS->getInterval(DstReg);
1547
5
  SlotIndex RegIndex = Idx.getRegSlot();
1548
5
  LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1549
5
  assert(Seg != nullptr && "No segment for defining instruction");
1550
5
  if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
1551
3
    if (V->isPHIDef()) {
1552
3
      CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1553
9
      for (unsigned i = CopyMI->getNumOperands(); i != 0; 
--i6
) {
1554
6
        MachineOperand &MO = CopyMI->getOperand(i-1);
1555
6
        if (MO.isReg() && MO.isUse())
1556
3
          CopyMI->RemoveOperand(i-1);
1557
6
      }
1558
3
      LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1559
3
                           "implicit def\n");
1560
3
      return CopyMI;
1561
3
    }
1562
2
  }
1563
2
1564
2
  // Remove any DstReg segments starting at the instruction.
1565
2
  LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1566
2
1567
2
  // Remove value or merge with previous one in case of a subregister def.
1568
2
  if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1569
1
    VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1570
1
    DstLI.MergeValueNumberInto(VNI, PrevVNI);
1571
1
1572
1
    // The affected subregister segments can be removed.
1573
1
    LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1574
2
    for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1575
2
      if ((SR.LaneMask & DstMask).none())
1576
1
        continue;
1577
1
1578
1
      VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1579
1
      assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1580
1
      SR.removeValNo(SVNI);
1581
1
    }
1582
1
    DstLI.removeEmptySubRanges();
1583
1
  } else
1584
1
    LIS->removeVRegDefAt(DstLI, RegIndex);
1585
2
1586
2
  // Mark uses as undef.
1587
6
  for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1588
6
    if (MO.isDef() /*|| MO.isUndef()*/)
1589
3
      continue;
1590
3
    const MachineInstr &MI = *MO.getParent();
1591
3
    SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1592
3
    LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1593
3
    bool isLive;
1594
3
    if (!UseMask.all() && 
DstLI.hasSubRanges()0
) {
1595
0
      isLive = false;
1596
0
      for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1597
0
        if ((SR.LaneMask & UseMask).none())
1598
0
          continue;
1599
0
        if (SR.liveAt(UseIdx)) {
1600
0
          isLive = true;
1601
0
          break;
1602
0
        }
1603
0
      }
1604
0
    } else
1605
3
      isLive = DstLI.liveAt(UseIdx);
1606
3
    if (isLive)
1607
2
      continue;
1608
1
    MO.setIsUndef(true);
1609
1
    LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1610
1
  }
1611
2
1612
2
  // A def of a subregister may be a use of the other subregisters, so
1613
2
  // deleting a def of a subregister may also remove uses. Since CopyMI
1614
2
  // is still part of the function (but about to be erased), mark all
1615
2
  // defs of DstReg in it as <undef>, so that shrinkToUses would
1616
2
  // ignore them.
1617
2
  for (MachineOperand &MO : CopyMI->operands())
1618
4
    if (MO.isReg() && MO.isDef() && 
MO.getReg() == DstReg2
)
1619
2
      MO.setIsUndef(true);
1620
2
  LIS->shrinkToUses(&DstLI);
1621
2
1622
2
  return CopyMI;
1623
2
}
1624
1625
void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1626
700k
                                     MachineOperand &MO, unsigned SubRegIdx) {
1627
700k
  LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1628
700k
  if (MO.isDef())
1629
284k
    Mask = ~Mask;
1630
700k
  bool IsUndef = true;
1631
2.21M
  for (const LiveInterval::SubRange &S : Int.subranges()) {
1632
2.21M
    if ((S.LaneMask & Mask).none())
1633
1.37M
      continue;
1634
842k
    if (S.liveAt(UseIdx)) {
1635
700k
      IsUndef = false;
1636
700k
      break;
1637
700k
    }
1638
842k
  }
1639
700k
  if (IsUndef) {
1640
48
    MO.setIsUndef(true);
1641
48
    // We found out some subregister use is actually reading an undefined
1642
48
    // value. In some cases the whole vreg has become undefined at this
1643
48
    // point so we have to potentially shrink the main range if the
1644
48
    // use was ending a live segment there.
1645
48
    LiveQueryResult Q = Int.Query(UseIdx);
1646
48
    if (Q.valueOut() == nullptr)
1647
0
      ShrinkMainRange = true;
1648
48
  }
1649
700k
}
1650
1651
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
1652
                                          unsigned DstReg,
1653
3.77M
                                          unsigned SubIdx) {
1654
3.77M
  bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
1655
3.77M
  LiveInterval *DstInt = DstIsPhys ? 
nullptr241k
:
&LIS->getInterval(DstReg)3.53M
;
1656
3.77M
1657
3.77M
  if (DstInt && 
DstInt->hasSubRanges()3.53M
&&
DstReg != SrcReg187k
) {
1658
864k
    for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1659
864k
      unsigned SubReg = MO.getSubReg();
1660
864k
      if (SubReg == 0 || 
MO.isUndef()644k
)
1661
259k
        continue;
1662
605k
      MachineInstr &MI = *MO.getParent();
1663
605k
      if (MI.isDebugValue())
1664
0
        continue;
1665
605k
      SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1666
605k
      addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1667
605k
    }
1668
187k
  }
1669
3.77M
1670
3.77M
  SmallPtrSet<MachineInstr*, 8> Visited;
1671
3.77M
  for (MachineRegisterInfo::reg_instr_iterator
1672
3.77M
       I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1673
11.8M
       I != E; ) {
1674
8.04M
    MachineInstr *UseMI = &*(I++);
1675
8.04M
1676
8.04M
    // Each instruction can only be rewritten once because sub-register
1677
8.04M
    // composition is not always idempotent. When SrcReg != DstReg, rewriting
1678
8.04M
    // the UseMI operands removes them from the SrcReg use-def chain, but when
1679
8.04M
    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1680
8.04M
    // operands mentioning the virtual register.
1681
8.04M
    if (SrcReg == DstReg && 
!Visited.insert(UseMI).second1.17M
)
1682
32.9k
      continue;
1683
8.01M
1684
8.01M
    SmallVector<unsigned,8> Ops;
1685
8.01M
    bool Reads, Writes;
1686
8.01M
    std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1687
8.01M
1688
8.01M
    // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1689
8.01M
    // because SrcReg is a sub-register.
1690
8.01M
    if (DstInt && 
!Reads7.69M
&&
SubIdx3.18M
&&
!UseMI->isDebugValue()576k
)
1691
576k
      Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1692
8.01M
1693
8.01M
    // Replace SrcReg with DstReg in all UseMI operands.
1694
16.3M
    for (unsigned i = 0, e = Ops.size(); i != e; 
++i8.36M
) {
1695
8.36M
      MachineOperand &MO = UseMI->getOperand(Ops[i]);
1696
8.36M
1697
8.36M
      // Adjust <undef> flags in case of sub-register joins. We don't want to
1698
8.36M
      // turn a full def into a read-modify-write sub-register def and vice
1699
8.36M
      // versa.
1700
8.36M
      if (SubIdx && 
MO.isDef()1.30M
)
1701
609k
        MO.setIsUndef(!Reads);
1702
8.36M
1703
8.36M
      // A subreg use of a partially undef (super) register may be a complete
1704
8.36M
      // undef use now and then has to be marked that way.
1705
8.36M
      if (SubIdx != 0 && 
MO.isUse()1.30M
&&
MRI->shouldTrackSubRegLiveness(DstReg)695k
) {
1706
95.0k
        if (!DstInt->hasSubRanges()) {
1707
2
          BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1708
2
          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
1709
2
          DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
1710
2
        }
1711
95.0k
        SlotIndex MIIdx = UseMI->isDebugValue()
1712
95.0k
                              ? 
LIS->getSlotIndexes()->getIndexBefore(*UseMI)4
1713
95.0k
                              : 
LIS->getInstructionIndex(*UseMI)95.0k
;
1714
95.0k
        SlotIndex UseIdx = MIIdx.getRegSlot(true);
1715
95.0k
        addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
1716
95.0k
      }
1717
8.36M
1718
8.36M
      if (DstIsPhys)
1719
311k
        MO.substPhysReg(DstReg, *TRI);
1720
8.05M
      else
1721
8.05M
        MO.substVirtReg(DstReg, SubIdx, *TRI);
1722
8.36M
    }
1723
8.01M
1724
8.01M
    LLVM_DEBUG({
1725
8.01M
      dbgs() << "\t\tupdated: ";
1726
8.01M
      if (!UseMI->isDebugValue())
1727
8.01M
        dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1728
8.01M
      dbgs() << *UseMI;
1729
8.01M
    });
1730
8.01M
  }
1731
3.77M
}
1732
1733
7.15M
bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1734
7.15M
  // Always join simple intervals that are defined by a single copy from a
1735
7.15M
  // reserved register. This doesn't increase register pressure, so it is
1736
7.15M
  // always beneficial.
1737
7.15M
  if (!MRI->isReserved(CP.getDstReg())) {
1738
6.89M
    LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1739
6.89M
    return false;
1740
6.89M
  }
1741
260k
1742
260k
  LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1743
260k
  if (JoinVInt.containsOneValue())
1744
245k
    return true;
1745
14.9k
1746
14.9k
  LLVM_DEBUG(
1747
14.9k
      dbgs() << "\tCannot join complex intervals into reserved register.\n");
1748
14.9k
  return false;
1749
14.9k
}
1750
1751
11.3M
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1752
11.3M
  Again = false;
1753
11.3M
  LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1754
11.3M
1755
11.3M
  CoalescerPair CP(*TRI);
1756
11.3M
  if (!CP.setRegisters(CopyMI)) {
1757
234k
    LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1758
234k
    return false;
1759
234k
  }
1760
11.1M
1761
11.1M
  if (CP.getNewRC()) {
1762
3.98M
    auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1763
3.98M
    auto DstRC = MRI->getRegClass(CP.getDstReg());
1764
3.98M
    unsigned SrcIdx = CP.getSrcIdx();
1765
3.98M
    unsigned DstIdx = CP.getDstIdx();
1766
3.98M
    if (CP.isFlipped()) {
1767
397k
      std::swap(SrcIdx, DstIdx);
1768
397k
      std::swap(SrcRC, DstRC);
1769
397k
    }
1770
3.98M
    if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1771
3.98M
                             CP.getNewRC(), *LIS)) {
1772
1.59k
      LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1773
1.59k
      return false;
1774
1.59k
    }
1775
11.1M
  }
1776
11.1M
1777
11.1M
  // Dead code elimination. This really should be handled by MachineDCE, but
1778
11.1M
  // sometimes dead copies slip through, and we can't generate invalid live
1779
11.1M
  // ranges.
1780
11.1M
  if (!CP.isPhys() && 
CopyMI->allDefsAreDead()3.98M
) {
1781
2.37k
    LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
1782
2.37k
    DeadDefs.push_back(CopyMI);
1783
2.37k
    eliminateDeadDefs();
1784
2.37k
    return true;
1785
2.37k
  }
1786
11.1M
1787
11.1M
  // Eliminate undefs.
1788
11.1M
  if (!CP.isPhys()) {
1789
3.97M
    // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
1790
3.97M
    if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
1791
5
      if (UndefMI->isImplicitDef())
1792
3
        return false;
1793
2
      deleteInstr(CopyMI);
1794
2
      return false;  // Not coalescable.
1795
2
    }
1796
3.97M
  }
1797
11.1M
1798
11.1M
  // Coalesced copies are normally removed immediately, but transformations
1799
11.1M
  // like removeCopyByCommutingDef() can inadvertently create identity copies.
1800
11.1M
  // When that happens, just join the values and remove the copy.
1801
11.1M
  if (CP.getSrcReg() == CP.getDstReg()) {
1802
0
    LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1803
0
    LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1804
0
    const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1805
0
    LiveQueryResult LRQ = LI.Query(CopyIdx);
1806
0
    if (VNInfo *DefVNI = LRQ.valueDefined()) {
1807
0
      VNInfo *ReadVNI = LRQ.valueIn();
1808
0
      assert(ReadVNI && "No value before copy and no <undef> flag.");
1809
0
      assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1810
0
      LI.MergeValueNumberInto(DefVNI, ReadVNI);
1811
0
1812
0
      // Process subregister liveranges.
1813
0
      for (LiveInterval::SubRange &S : LI.subranges()) {
1814
0
        LiveQueryResult SLRQ = S.Query(CopyIdx);
1815
0
        if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
1816
0
          VNInfo *SReadVNI = SLRQ.valueIn();
1817
0
          S.MergeValueNumberInto(SDefVNI, SReadVNI);
1818
0
        }
1819
0
      }
1820
0
      LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
1821
0
    }
1822
0
    deleteInstr(CopyMI);
1823
0
    return true;
1824
0
  }
1825
11.1M
1826
11.1M
  // Enforce policies.
1827
11.1M
  if (CP.isPhys()) {
1828
7.15M
    LLVM_DEBUG(dbgs() << "\tConsidering merging "
1829
7.15M
                      << printReg(CP.getSrcReg(), TRI) << " with "
1830
7.15M
                      << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
1831
7.15M
    if (!canJoinPhys(CP)) {
1832
6.91M
      // Before giving up coalescing, if definition of source is defined by
1833
6.91M
      // trivial computation, try rematerializing it.
1834
6.91M
      bool IsDefCopy;
1835
6.91M
      if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1836
582k
        return true;
1837
6.32M
      if (IsDefCopy)
1838
2.79M
        Again = true;  // May be possible to coalesce later.
1839
6.32M
      return false;
1840
6.32M
    }
1841
3.97M
  } else {
1842
3.97M
    // When possible, let DstReg be the larger interval.
1843
3.97M
    if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
1844
2.93M
                           LIS->getInterval(CP.getDstReg()).size())
1845
999k
      CP.flip();
1846
3.97M
1847
3.97M
    LLVM_DEBUG({
1848
3.97M
      dbgs() << "\tConsidering merging to "
1849
3.97M
             << TRI->getRegClassName(CP.getNewRC()) << " with ";
1850
3.97M
      if (CP.getDstIdx() && CP.getSrcIdx())
1851
3.97M
        dbgs() << printReg(CP.getDstReg()) << " in "
1852
3.97M
               << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
1853
3.97M
               << printReg(CP.getSrcReg()) << " in "
1854
3.97M
               << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
1855
3.97M
      else
1856
3.97M
        dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
1857
3.97M
               << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
1858
3.97M
    });
1859
3.97M
  }
1860
11.1M
1861
11.1M
  ShrinkMask = LaneBitmask::getNone();
1862
4.22M
  ShrinkMainRange = false;
1863
4.22M
1864
4.22M
  // Okay, attempt to join these two intervals.  On failure, this returns false.
1865
4.22M
  // Otherwise, if one of the intervals being joined is a physreg, this method
1866
4.22M
  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
1867
4.22M
  // been modified, so we can use this information below to update aliases.
1868
4.22M
  if (!joinIntervals(CP)) {
1869
549k
    // Coalescing failed.
1870
549k
1871
549k
    // If definition of source is defined by trivial computation, try
1872
549k
    // rematerializing it.
1873
549k
    bool IsDefCopy;
1874
549k
    if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1875
104k
      return true;
1876
444k
1877
444k
    // If we can eliminate the copy without merging the live segments, do so
1878
444k
    // now.
1879
444k
    if (!CP.isPartial() && 
!CP.isPhys()386k
) {
1880
383k
      bool Changed = adjustCopiesBackFrom(CP, CopyMI);
1881
383k
      bool Shrink = false;
1882
383k
      if (!Changed)
1883
383k
        std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
1884
383k
      if (Changed) {
1885
591
        deleteInstr(CopyMI);
1886
591
        if (Shrink) {
1887
1
          unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : 
CP.getDstReg()0
;
1888
1
          LiveInterval &DstLI = LIS->getInterval(DstReg);
1889
1
          shrinkToUses(&DstLI);
1890
1
          LLVM_DEBUG(dbgs() << "\t\tshrunk:   " << DstLI << '\n');
1891
1
        }
1892
591
        LLVM_DEBUG(dbgs() << "\tTrivial!\n");
1893
591
        return true;
1894
591
      }
1895
444k
    }
1896
444k
1897
444k
    // Try and see if we can partially eliminate the copy by moving the copy to
1898
444k
    // its predecessor.
1899
444k
    if (!CP.isPartial() && 
!CP.isPhys()386k
)
1900
382k
      if (removePartialRedundancy(CP, *CopyMI))
1901
519
        return true;
1902
443k
1903
443k
    // Otherwise, we are unable to join the intervals.
1904
443k
    LLVM_DEBUG(dbgs() << "\tInterference!\n");
1905
443k
    Again = true;  // May be possible to coalesce later.
1906
443k
    return false;
1907
443k
  }
1908
3.67M
1909
3.67M
  // Coalescing to a virtual register that is of a sub-register class of the
1910
3.67M
  // other. Make sure the resulting register is set to the right register class.
1911
3.67M
  if (CP.isCrossClass()) {
1912
1.73M
    ++numCrossRCs;
1913
1.73M
    MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
1914
1.73M
  }
1915
3.67M
1916
3.67M
  // Removing sub-register copies can ease the register class constraints.
1917
3.67M
  // Make sure we attempt to inflate the register class of DstReg.
1918
3.67M
  if (!CP.isPhys() && 
RegClassInfo.isProperSubClass(CP.getNewRC())3.43M
)
1919
18.6k
    InflateRegs.push_back(CP.getDstReg());
1920
3.67M
1921
3.67M
  // CopyMI has been erased by joinIntervals at this point. Remove it from
1922
3.67M
  // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
1923
3.67M
  // to the work list. This keeps ErasedInstrs from growing needlessly.
1924
3.67M
  ErasedInstrs.erase(CopyMI);
1925
3.67M
1926
3.67M
  // Rewrite all SrcReg operands to DstReg.
1927
3.67M
  // Also update DstReg operands to include DstIdx if it is set.
1928
3.67M
  if (CP.getDstIdx())
1929
140
    updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
1930
3.67M
  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
1931
3.67M
1932
3.67M
  // Shrink subregister ranges if necessary.
1933
3.67M
  if (ShrinkMask.any()) {
1934
74
    LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1935
197
    for (LiveInterval::SubRange &S : LI.subranges()) {
1936
197
      if ((S.LaneMask & ShrinkMask).none())
1937
110
        continue;
1938
87
      LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
1939
87
                        << ")\n");
1940
87
      LIS->shrinkToUses(S, LI.reg);
1941
87
    }
1942
74
    LI.removeEmptySubRanges();
1943
74
  }
1944
3.67M
1945
3.67M
  // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
1946
3.67M
  // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
1947
3.67M
  // is not up-to-date, need to update the merged live interval here.
1948
3.67M
  if (ToBeUpdated.count(CP.getSrcReg()))
1949
7
    ShrinkMainRange = true;
1950
3.67M
1951
3.67M
  if (ShrinkMainRange) {
1952
7
    LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1953
7
    shrinkToUses(&LI);
1954
7
  }
1955
3.67M
1956
3.67M
  // SrcReg is guaranteed to be the register whose live interval that is
1957
3.67M
  // being merged.
1958
3.67M
  LIS->removeInterval(CP.getSrcReg());
1959
3.67M
1960
3.67M
  // Update regalloc hint.
1961
3.67M
  TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
1962
3.67M
1963
3.67M
  LLVM_DEBUG({
1964
3.67M
    dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
1965
3.67M
           << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
1966
3.67M
    dbgs() << "\tResult = ";
1967
3.67M
    if (CP.isPhys())
1968
3.67M
      dbgs() << printReg(CP.getDstReg(), TRI);
1969
3.67M
    else
1970
3.67M
      dbgs() << LIS->getInterval(CP.getDstReg());
1971
3.67M
    dbgs() << '\n';
1972
3.67M
  });
1973
3.67M
1974
3.67M
  ++numJoins;
1975
3.67M
  return true;
1976
3.67M
}
1977
1978
245k
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
1979
245k
  unsigned DstReg = CP.getDstReg();
1980
245k
  unsigned SrcReg = CP.getSrcReg();
1981
245k
  assert(CP.isPhys() && "Must be a physreg copy");
1982
245k
  assert(MRI->isReserved(DstReg) && "Not a reserved register");
1983
245k
  LiveInterval &RHS = LIS->getInterval(SrcReg);
1984
245k
  LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
1985
245k
1986
245k
  assert(RHS.containsOneValue() && "Invalid join with reserved register");
1987
245k
1988
245k
  // Optimization for reserved registers like ESP. We can only merge with a
1989
245k
  // reserved physreg if RHS has a single value that is a copy of DstReg.
1990
245k
  // The live range of the reserved register will look like a set of dead defs
1991
245k
  // - we don't properly track the live range of reserved registers.
1992
245k
1993
245k
  // Deny any overlapping intervals.  This depends on all the reserved
1994
245k
  // register live ranges to look like dead defs.
1995
245k
  if (!MRI->isConstantPhysReg(DstReg)) {
1996
141k
    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); 
++UI72.6k
) {
1997
75.8k
      // Abort if not all the regunits are reserved.
1998
151k
      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); 
++RI75.8k
) {
1999
75.8k
        if (!MRI->isReserved(*RI))
2000
2
          return false;
2001
75.8k
      }
2002
75.8k
      
if (75.8k
RHS.overlaps(LIS->getRegUnit(*UI))75.8k
) {
2003
3.21k
        LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
2004
3.21k
                          << '\n');
2005
3.21k
        return false;
2006
3.21k
      }
2007
75.8k
    }
2008
69.2k
2009
69.2k
    // We must also check for overlaps with regmask clobbers.
2010
69.2k
    BitVector RegMaskUsable;
2011
66.0k
    if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2012
66.0k
        
!RegMaskUsable.test(DstReg)11
) {
2013
8
      LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2014
8
      return false;
2015
8
    }
2016
241k
  }
2017
241k
2018
241k
  // Skip any value computations, we are not adding new values to the
2019
241k
  // reserved register.  Also skip merging the live ranges, the reserved
2020
241k
  // register live range doesn't need to be accurate as long as all the
2021
241k
  // defs are there.
2022
241k
2023
241k
  // Delete the identity copy.
2024
241k
  MachineInstr *CopyMI;
2025
241k
  if (CP.isFlipped()) {
2026
241k
    // Physreg is copied into vreg
2027
241k
    //   %y = COPY %physreg_x
2028
241k
    //   ...  //< no other def of %physreg_x here
2029
241k
    //   use %y
2030
241k
    // =>
2031
241k
    //   ...
2032
241k
    //   use %physreg_x
2033
241k
    CopyMI = MRI->getVRegDef(SrcReg);
2034
241k
  } else {
2035
94
    // VReg is copied into physreg:
2036
94
    //   %y = def
2037
94
    //   ... //< no other def or use of %physreg_x here
2038
94
    //   %physreg_x = COPY %y
2039
94
    // =>
2040
94
    //   %physreg_x = def
2041
94
    //   ...
2042
94
    if (!MRI->hasOneNonDBGUse(SrcReg)) {
2043
4
      LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2044
4
      return false;
2045
4
    }
2046
90
2047
90
    if (!LIS->intervalIsInOneMBB(RHS)) {
2048
15
      LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2049
15
      return false;
2050
15
    }
2051
75
2052
75
    MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2053
75
    CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2054
75
    SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2055
75
    SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2056
75
2057
75
    if (!MRI->isConstantPhysReg(DstReg)) {
2058
74
      // We checked above that there are no interfering defs of the physical
2059
74
      // register. However, for this case, where we intend to move up the def of
2060
74
      // the physical register, we also need to check for interfering uses.
2061
74
      SlotIndexes *Indexes = LIS->getSlotIndexes();
2062
74
      for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2063
124
           SI != CopyRegIdx; 
SI = Indexes->getNextNonNullIndex(SI)50
) {
2064
53
        MachineInstr *MI = LIS->getInstructionFromIndex(SI);
2065
53
        if (MI->readsRegister(DstReg, TRI)) {
2066
3
          LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2067
3
          return false;
2068
3
        }
2069
53
      }
2070
74
    }
2071
75
2072
75
    // We're going to remove the copy which defines a physical reserved
2073
75
    // register, so remove its valno, etc.
2074
75
    
LLVM_DEBUG72
(dbgs() << "\t\tRemoving phys reg def of "
2075
72
                      << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2076
72
2077
72
    LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
2078
72
    // Create a new dead def at the new def location.
2079
191
    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); 
++UI119
) {
2080
119
      LiveRange &LR = LIS->getRegUnit(*UI);
2081
119
      LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2082
119
    }
2083
72
  }
2084
241k
2085
241k
  deleteInstr(CopyMI);
2086
241k
2087
241k
  // We don't track kills for reserved registers.
2088
241k
  MRI->clearKillFlags(CP.getSrcReg());
2089
241k
2090
241k
  return true;
2091
241k
}
2092
2093
//===----------------------------------------------------------------------===//
2094
//                 Interference checking and interval joining
2095
//===----------------------------------------------------------------------===//
2096
//
2097
// In the easiest case, the two live ranges being joined are disjoint, and
2098
// there is no interference to consider. It is quite common, though, to have
2099
// overlapping live ranges, and we need to check if the interference can be
2100
// resolved.
2101
//
2102
// The live range of a single SSA value forms a sub-tree of the dominator tree.
2103
// This means that two SSA values overlap if and only if the def of one value
2104
// is contained in the live range of the other value. As a special case, the
2105
// overlapping values can be defined at the same index.
2106
//
2107
// The interference from an overlapping def can be resolved in these cases:
2108
//
2109
// 1. Coalescable copies. The value is defined by a copy that would become an
2110
//    identity copy after joining SrcReg and DstReg. The copy instruction will
2111
//    be removed, and the value will be merged with the source value.
2112
//
2113
//    There can be several copies back and forth, causing many values to be
2114
//    merged into one. We compute a list of ultimate values in the joined live
2115
//    range as well as a mappings from the old value numbers.
2116
//
2117
// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2118
//    predecessors have a live out value. It doesn't cause real interference,
2119
//    and can be merged into the value it overlaps. Like a coalescable copy, it
2120
//    can be erased after joining.
2121
//
2122
// 3. Copy of external value. The overlapping def may be a copy of a value that
2123
//    is already in the other register. This is like a coalescable copy, but
2124
//    the live range of the source register must be trimmed after erasing the
2125
//    copy instruction:
2126
//
2127
//      %src = COPY %ext
2128
//      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
2129
//
2130
// 4. Clobbering undefined lanes. Vector registers are sometimes built by
2131
//    defining one lane at a time:
2132
//
2133
//      %dst:ssub0<def,read-undef> = FOO
2134
//      %src = BAR
2135
//      %dst:ssub1 = COPY %src
2136
//
2137
//    The live range of %src overlaps the %dst value defined by FOO, but
2138
//    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2139
//    which was undef anyway.
2140
//
2141
//    The value mapping is more complicated in this case. The final live range
2142
//    will have different value numbers for both FOO and BAR, but there is no
2143
//    simple mapping from old to new values. It may even be necessary to add
2144
//    new PHI values.
2145
//
2146
// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2147
//    is live, but never read. This can happen because we don't compute
2148
//    individual live ranges per lane.
2149
//
2150
//      %dst = FOO
2151
//      %src = BAR
2152
//      %dst:ssub1 = COPY %src
2153
//
2154
//    This kind of interference is only resolved locally. If the clobbered
2155
//    lane value escapes the block, the join is aborted.
2156
2157
namespace {
2158
2159
/// Track information about values in a single virtual register about to be
2160
/// joined. Objects of this class are always created in pairs - one for each
2161
/// side of the CoalescerPair (or one for each lane of a side of the coalescer
2162
/// pair)
2163
class JoinVals {
2164
  /// Live range we work on.
2165
  LiveRange &LR;
2166
2167
  /// (Main) register we work on.
2168
  const unsigned Reg;
2169
2170
  /// Reg (and therefore the values in this liverange) will end up as
2171
  /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2172
  /// CP.SrcIdx.
2173
  const unsigned SubIdx;
2174
2175
  /// The LaneMask that this liverange will occupy the coalesced register. May
2176
  /// be smaller than the lanemask produced by SubIdx when merging subranges.
2177
  const LaneBitmask LaneMask;
2178
2179
  /// This is true when joining sub register ranges, false when joining main
2180
  /// ranges.
2181
  const bool SubRangeJoin;
2182
2183
  /// Whether the current LiveInterval tracks subregister liveness.
2184
  const bool TrackSubRegLiveness;
2185
2186
  /// Values that will be present in the final live range.
2187
  SmallVectorImpl<VNInfo*> &NewVNInfo;
2188
2189
  const CoalescerPair &CP;
2190
  LiveIntervals *LIS;
2191
  SlotIndexes *Indexes;
2192
  const TargetRegisterInfo *TRI;
2193
2194
  /// Value number assignments. Maps value numbers in LI to entries in
2195
  /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2196
  SmallVector<int, 8> Assignments;
2197
2198
  /// Conflict resolution for overlapping values.
2199
  enum ConflictResolution {
2200
    /// No overlap, simply keep this value.
2201
    CR_Keep,
2202
2203
    /// Merge this value into OtherVNI and erase the defining instruction.
2204
    /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2205
    /// values.
2206
    CR_Erase,
2207
2208
    /// Merge this value into OtherVNI but keep the defining instruction.
2209
    /// This is for the special case where OtherVNI is defined by the same
2210
    /// instruction.
2211
    CR_Merge,
2212
2213
    /// Keep this value, and have it replace OtherVNI where possible. This
2214
    /// complicates value mapping since OtherVNI maps to two different values
2215
    /// before and after this def.
2216
    /// Used when clobbering undefined or dead lanes.
2217
    CR_Replace,
2218
2219
    /// Unresolved conflict. Visit later when all values have been mapped.
2220
    CR_Unresolved,
2221
2222
    /// Unresolvable conflict. Abort the join.
2223
    CR_Impossible
2224
  };
2225
2226
  /// Per-value info for LI. The lane bit masks are all relative to the final
2227
  /// joined register, so they can be compared directly between SrcReg and
2228
  /// DstReg.
2229
  struct Val {
2230
    ConflictResolution Resolution = CR_Keep;
2231
2232
    /// Lanes written by this def, 0 for unanalyzed values.
2233
    LaneBitmask WriteLanes;
2234
2235
    /// Lanes with defined values in this register. Other lanes are undef and
2236
    /// safe to clobber.
2237
    LaneBitmask ValidLanes;
2238
2239
    /// Value in LI being redefined by this def.
2240
    VNInfo *RedefVNI = nullptr;
2241
2242
    /// Value in the other live range that overlaps this def, if any.
2243
    VNInfo *OtherVNI = nullptr;
2244
2245
    /// Is this value an IMPLICIT_DEF that can be erased?
2246
    ///
2247
    /// IMPLICIT_DEF values should only exist at the end of a basic block that
2248
    /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2249
    /// safely erased if they are overlapping a live value in the other live
2250
    /// interval.
2251
    ///
2252
    /// Weird control flow graphs and incomplete PHI handling in
2253
    /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2254
    /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2255
    /// normal values.
2256
    bool ErasableImplicitDef = false;
2257
2258
    /// True when the live range of this value will be pruned because of an
2259
    /// overlapping CR_Replace value in the other live range.
2260
    bool Pruned = false;
2261
2262
    /// True once Pruned above has been computed.
2263
    bool PrunedComputed = false;
2264
2265
    /// True if this value is determined to be identical to OtherVNI
2266
    /// (in valuesIdentical). This is used with CR_Erase where the erased
2267
    /// copy is redundant, i.e. the source value is already the same as
2268
    /// the destination. In such cases the subranges need to be updated
2269
    /// properly. See comment at pruneSubRegValues for more info.
2270
    bool Identical = false;
2271
2272
8.38M
    Val() = default;
2273
2274
26.0M
    bool isAnalyzed() const { return WriteLanes.any(); }
2275
  };
2276
2277
  /// One entry per value number in LI.
2278
  SmallVector<Val, 8> Vals;
2279
2280
  /// Compute the bitmask of lanes actually written by DefMI.
2281
  /// Set Redef if there are any partial register definitions that depend on the
2282
  /// previous value of the register.
2283
  LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2284
2285
  /// Find the ultimate value that VNI was copied from.
2286
  std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
2287
2288
  bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2289
2290
  /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2291
  /// Return a conflict resolution when possible, but leave the hard cases as
2292
  /// CR_Unresolved.
2293
  /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2294
  /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2295
  /// The recursion always goes upwards in the dominator tree, making loops
2296
  /// impossible.
2297
  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2298
2299
  /// Compute the value assignment for ValNo in RI.
2300
  /// This may be called recursively by analyzeValue(), but never for a ValNo on
2301
  /// the stack.
2302
  void computeAssignment(unsigned ValNo, JoinVals &Other);
2303
2304
  /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2305
  /// the extent of the tainted lanes in the block.
2306
  ///
2307
  /// Multiple values in Other.LR can be affected since partial redefinitions
2308
  /// can preserve previously tainted lanes.
2309
  ///
2310
  ///   1 %dst = VLOAD           <-- Define all lanes in %dst
2311
  ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
2312
  ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
2313
  ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2314
  ///
2315
  /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2316
  /// entry to TaintedVals.
2317
  ///
2318
  /// Returns false if the tainted lanes extend beyond the basic block.
2319
  bool
2320
  taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2321
              SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2322
2323
  /// Return true if MI uses any of the given Lanes from Reg.
2324
  /// This does not include partial redefinitions of Reg.
2325
  bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
2326
2327
  /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2328
  /// be pruned:
2329
  ///
2330
  ///   %dst = COPY %src
2331
  ///   %src = COPY %dst  <-- This value to be pruned.
2332
  ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
2333
  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2334
2335
public:
2336
  JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
2337
           SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
2338
           LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2339
           bool TrackSubRegLiveness)
2340
    : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2341
      SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2342
      NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2343
8.38M
      TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
2344
2345
  /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2346
  /// Returns false if any conflicts were impossible to resolve.
2347
  bool mapValues(JoinVals &Other);
2348
2349
  /// Try to resolve conflicts that require all values to be mapped.
2350
  /// Returns false if any conflicts were impossible to resolve.
2351
  bool resolveConflicts(JoinVals &Other);
2352
2353
  /// Prune the live range of values in Other.LR where they would conflict with
2354
  /// CR_Replace values in LR. Collect end points for restoring the live range
2355
  /// after joining.
2356
  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2357
                   bool changeInstrs);
2358
2359
  /// Removes subranges starting at copies that get removed. This sometimes
2360
  /// happens when undefined subranges are copied around. These ranges contain
2361
  /// no useful information and can be removed.
2362
  void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2363
2364
  /// Pruning values in subranges can lead to removing segments in these
2365
  /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2366
  /// the main range also need to be removed. This function will mark
2367
  /// the corresponding values in the main range as pruned, so that
2368
  /// eraseInstrs can do the final cleanup.
2369
  /// The parameter @p LI must be the interval whose main range is the
2370
  /// live range LR.
2371
  void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2372
2373
  /// Erase any machine instructions that have been coalesced away.
2374
  /// Add erased instructions to ErasedInstrs.
2375
  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2376
  /// the erased instrs.
2377
  void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2378
                   SmallVectorImpl<unsigned> &ShrinkRegs,
2379
                   LiveInterval *LI = nullptr);
2380
2381
  /// Remove liverange defs at places where implicit defs will be removed.
2382
  void removeImplicitDefs();
2383
2384
  /// Get the value assignments suitable for passing to LiveInterval::join.
2385
7.29M
  const int *getAssignments() const { return Assignments.data(); }
2386
};
2387
2388
} // end anonymous namespace
2389
2390
LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2391
16.2M
  const {
2392
16.2M
  LaneBitmask L;
2393
41.2M
  for (const MachineOperand &MO : DefMI->operands()) {
2394
41.2M
    if (!MO.isReg() || 
MO.getReg() != Reg34.4M
||
!MO.isDef()17.8M
)
2395
25.0M
      continue;
2396
16.2M
    L |= TRI->getSubRegIndexLaneMask(
2397
16.2M
           TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2398
16.2M
    if (MO.readsReg())
2399
607k
      Redef = true;
2400
16.2M
  }
2401
16.2M
  return L;
2402
16.2M
}
2403
2404
std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
2405
279k
    const VNInfo *VNI) const {
2406
279k
  unsigned TrackReg = Reg;
2407
279k
2408
422k
  while (!VNI->isPHIDef()) {
2409
361k
    SlotIndex Def = VNI->def;
2410
361k
    MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2411
361k
    assert(MI && "No defining instruction");
2412
361k
    if (!MI->isFullCopy())
2413
134k
      return std::make_pair(VNI, TrackReg);
2414
227k
    unsigned SrcReg = MI->getOperand(1).getReg();
2415
227k
    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
2416
83.6k
      return std::make_pair(VNI, TrackReg);
2417
143k
2418
143k
    const LiveInterval &LI = LIS->getInterval(SrcReg);
2419
143k
    const VNInfo *ValueIn;
2420
143k
    // No subrange involved.
2421
143k
    if (!SubRangeJoin || 
!LI.hasSubRanges()77
) {
2422
143k
      LiveQueryResult LRQ = LI.Query(Def);
2423
143k
      ValueIn = LRQ.valueIn();
2424
143k
    } else {
2425
46
      // Query subranges. Ensure that all matching ones take us to the same def
2426
46
      // (allowing some of them to be undef).
2427
46
      ValueIn = nullptr;
2428
158
      for (const LiveInterval::SubRange &S : LI.subranges()) {
2429
158
        // Transform lanemask to a mask in the joined live interval.
2430
158
        LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2431
158
        if ((SMask & LaneMask).none())
2432
112
          continue;
2433
46
        LiveQueryResult LRQ = S.Query(Def);
2434
46
        if (!ValueIn) {
2435
44
          ValueIn = LRQ.valueIn();
2436
44
          continue;
2437
44
        }
2438
2
        if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2439
2
          return std::make_pair(VNI, TrackReg);
2440
2
      }
2441
46
    }
2442
143k
    
if (143k
ValueIn == nullptr143k
) {
2443
4
      // Reaching an undefined value is legitimate, for example:
2444
4
      //
2445
4
      // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef
2446
4
      // 2   %1 = COPY %0         ;; %1 is defined here.
2447
4
      // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition,
2448
4
      //                          ;; but it's equivalent to "undef".
2449
4
      return std::make_pair(nullptr, SrcReg);
2450
4
    }
2451
143k
    VNI = ValueIn;
2452
143k
    TrackReg = SrcReg;
2453
143k
  }
2454
279k
  
return std::make_pair(VNI, TrackReg)61.0k
;
2455
279k
}
2456
2457
bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2458
140k
                               const JoinVals &Other) const {
2459
140k
  const VNInfo *Orig0;
2460
140k
  unsigned Reg0;
2461
140k
  std::tie(Orig0, Reg0) = followCopyChain(Value0);
2462
140k
  if (Orig0 == Value1 && 
Reg0 == Other.Reg2.48k
)
2463
2.48k
    return true;
2464
138k
2465
138k
  const VNInfo *Orig1;
2466
138k
  unsigned Reg1;
2467
138k
  std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2468
138k
  // If both values are undefined, and the source registers are the same
2469
138k
  // register, the values are identical. Filter out cases where only one
2470
138k
  // value is defined.
2471
138k
  if (Orig0 == nullptr || 
Orig1 == nullptr138k
)
2472
2
    return Orig0 == Orig1 && Reg0 == Reg1;
2473
138k
2474
138k
  // The values are equal if they are defined at the same place and use the
2475
138k
  // same register. Note that we cannot compare VNInfos directly as some of
2476
138k
  // them might be from a copy created in mergeSubRangeInto()  while the other
2477
138k
  // is from the original LiveInterval.
2478
138k
  return Orig0->def == Orig1->def && 
Reg0 == Reg111.8k
;
2479
138k
}
2480
2481
JoinVals::ConflictResolution
2482
20.3M
JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2483
20.3M
  Val &V = Vals[ValNo];
2484
20.3M
  assert(!V.isAnalyzed() && "Value has already been analyzed!");
2485
20.3M
  VNInfo *VNI = LR.getValNumInfo(ValNo);
2486
20.3M
  if (VNI->isUnused()) {
2487
5.88k
    V.WriteLanes = LaneBitmask::getAll();
2488
5.88k
    return CR_Keep;
2489
5.88k
  }
2490
20.3M
2491
20.3M
  // Get the instruction defining this value, compute the lanes written.
2492
20.3M
  const MachineInstr *DefMI = nullptr;
2493
20.3M
  if (VNI->isPHIDef()) {
2494
3.67M
    // Conservatively assume that all lanes in a PHI are valid.
2495
3.67M
    LaneBitmask Lanes = SubRangeJoin ? 
LaneBitmask::getLane(0)4.31k
2496
3.67M
                                     : 
TRI->getSubRegIndexLaneMask(SubIdx)3.66M
;
2497
3.67M
    V.ValidLanes = V.WriteLanes = Lanes;
2498
16.6M
  } else {
2499
16.6M
    DefMI = Indexes->getInstructionFromIndex(VNI->def);
2500
16.6M
    assert(DefMI != nullptr);
2501
16.6M
    if (SubRangeJoin) {
2502
453k
      // We don't care about the lanes when joining subregister ranges.
2503
453k
      V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2504
453k
      if (DefMI->isImplicitDef()) {
2505
365
        V.ValidLanes = LaneBitmask::getNone();
2506
365
        V.ErasableImplicitDef = true;
2507
365
      }
2508
16.2M
    } else {
2509
16.2M
      bool Redef = false;
2510
16.2M
      V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2511
16.2M
2512
16.2M
      // If this is a read-modify-write instruction, there may be more valid
2513
16.2M
      // lanes than the ones written by this instruction.
2514
16.2M
      // This only covers partial redef operands. DefMI may have normal use
2515
16.2M
      // operands reading the register. They don't contribute valid lanes.
2516
16.2M
      //
2517
16.2M
      // This adds ssub1 to the set of valid lanes in %src:
2518
16.2M
      //
2519
16.2M
      //   %src:ssub1 = FOO
2520
16.2M
      //
2521
16.2M
      // This leaves only ssub1 valid, making any other lanes undef:
2522
16.2M
      //
2523
16.2M
      //   %src:ssub1<def,read-undef> = FOO %src:ssub2
2524
16.2M
      //
2525
16.2M
      // The <read-undef> flag on the def operand means that old lane values are
2526
16.2M
      // not important.
2527
16.2M
      if (Redef) {
2528
607k
        V.RedefVNI = LR.Query(VNI->def).valueIn();
2529
607k
        assert((TrackSubRegLiveness || V.RedefVNI) &&
2530
607k
               "Instruction is reading nonexistent value");
2531
607k
        if (V.RedefVNI != nullptr) {
2532
607k
          computeAssignment(V.RedefVNI->id, Other);
2533
607k
          V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2534
607k
        }
2535
607k
      }
2536
16.2M
2537
16.2M
      // An IMPLICIT_DEF writes undef values.
2538
16.2M
      if (DefMI->isImplicitDef()) {
2539
18.0k
        // We normally expect IMPLICIT_DEF values to be live only until the end
2540
18.0k
        // of their block. If the value is really live longer and gets pruned in
2541
18.0k
        // another block, this flag is cleared again.
2542
18.0k
        //
2543
18.0k
        // Clearing the valid lanes is deferred until it is sure this can be
2544
18.0k
        // erased.
2545
18.0k
        V.ErasableImplicitDef = true;
2546
18.0k
      }
2547
16.2M
    }
2548
16.6M
  }
2549
20.3M
2550
20.3M
  // Find the value in Other that overlaps VNI->def, if any.
2551
20.3M
  LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2552
20.3M
2553
20.3M
  // It is possible that both values are defined by the same instruction, or
2554
20.3M
  // the values are PHIs defined in the same block. When that happens, the two
2555
20.3M
  // values should be merged into one, but not into any preceding value.
2556
20.3M
  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2557
20.3M
  if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2558
42.1k
    assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2559
42.1k
2560
42.1k
    // One value stays, the other is merged. Keep the earlier one, or the first
2561
42.1k
    // one we see.
2562
42.1k
    if (OtherVNI->def < VNI->def)
2563
166
      Other.computeAssignment(OtherVNI->id, *this);
2564
41.9k
    else if (VNI->def < OtherVNI->def && 
OtherLRQ.valueIn()166
) {
2565
0
      // This is an early-clobber def overlapping a live-in value in the other
2566
0
      // register. Not mergeable.
2567
0
      V.OtherVNI = OtherLRQ.valueIn();
2568
0
      return CR_Impossible;
2569
0
    }
2570
42.1k
    V.OtherVNI = OtherVNI;
2571
42.1k
    Val &OtherV = Other.Vals[OtherVNI->id];
2572
42.1k
    // Keep this value, check for conflicts when analyzing OtherVNI.
2573
42.1k
    if (!OtherV.isAnalyzed())
2574
30.3k
      return CR_Keep;
2575
11.7k
    // Both sides have been analyzed now.
2576
11.7k
    // Allow overlapping PHI values. Any real interference would show up in a
2577
11.7k
    // predecessor, the PHI itself can't introduce any conflicts.
2578
11.7k
    if (VNI->isPHIDef())
2579
11.3k
      return CR_Merge;
2580
421
    if ((V.ValidLanes & OtherV.ValidLanes).any())
2581
406
      // Overlapping lanes can't be resolved.
2582
406
      return CR_Impossible;
2583
15
    else
2584
15
      return CR_Merge;
2585
20.3M
  }
2586
20.3M
2587
20.3M
  // No simultaneous def. Is Other live at the def?
2588
20.3M
  V.OtherVNI = OtherLRQ.valueIn();
2589
20.3M
  if (!V.OtherVNI)
2590
14.8M
    // No overlap, no conflict.
2591
14.8M
    return CR_Keep;
2592
5.42M
2593
5.42M
  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2594
5.42M
2595
5.42M
  // We have overlapping values, or possibly a kill of Other.
2596
5.42M
  // Recursively compute assignments up the dominator tree.
2597
5.42M
  Other.computeAssignment(V.OtherVNI->id, *this);
2598
5.42M
  Val &OtherV = Other.Vals[V.OtherVNI->id];
2599
5.42M
2600
5.42M
  if (OtherV.ErasableImplicitDef) {
2601
1.37k
    // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2602
1.37k
    // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2603
1.37k
    // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2604
1.37k
    // technically.
2605
1.37k
    //
2606
1.37k
    // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2607
1.37k
    // to erase the IMPLICIT_DEF instruction.
2608
1.37k
    if (DefMI &&
2609
1.37k
        DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
2610
4
      LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2611
4
                 << " extends into "
2612
4
                 << printMBBReference(*DefMI->getParent())
2613
4
                 << ", keeping it.\n");
2614
4
      OtherV.ErasableImplicitDef = false;
2615
1.37k
    } else {
2616
1.37k
      // We deferred clearing these lanes in case we needed to save them
2617
1.37k
      OtherV.ValidLanes &= ~OtherV.WriteLanes;
2618
1.37k
    }
2619
1.37k
  }
2620
5.42M
2621
5.42M
  // Allow overlapping PHI values. Any real interference would show up in a
2622
5.42M
  // predecessor, the PHI itself can't introduce any conflicts.
2623
5.42M
  if (VNI->isPHIDef())
2624
117k
    return CR_Replace;
2625
5.30M
2626
5.30M
  // Check for simple erasable conflicts.
2627
5.30M
  if (DefMI->isImplicitDef()) {
2628
3.49k
    // We need the def for the subregister if there is nothing else live at the
2629
3.49k
    // subrange at this point.
2630
3.49k
    if (TrackSubRegLiveness
2631
3.49k
        && 
(V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()57
)
2632
7
      return CR_Replace;
2633
3.48k
    return CR_Erase;
2634
3.48k
  }
2635
5.30M
2636
5.30M
  // Include the non-conflict where DefMI is a coalescable copy that kills
2637
5.30M
  // OtherVNI. We still want the copy erased and value numbers merged.
2638
5.30M
  if (CP.isCoalescable(DefMI)) {
2639
4.29M
    // Some of the lanes copied from OtherVNI may be undef, making them undef
2640
4.29M
    // here too.
2641
4.29M
    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2642
4.29M
    return CR_Erase;
2643
4.29M
  }
2644
1.00M
2645
1.00M
  // This may not be a real conflict if DefMI simply kills Other and defines
2646
1.00M
  // VNI.
2647
1.00M
  if (OtherLRQ.isKill() && 
OtherLRQ.endPoint() <= VNI->def247k
)
2648
247k
    return CR_Keep;
2649
759k
2650
759k
  // Handle the case where VNI and OtherVNI can be proven to be identical:
2651
759k
  //
2652
759k
  //   %other = COPY %ext
2653
759k
  //   %this  = COPY %ext <-- Erase this copy
2654
759k
  //
2655
759k
  if (DefMI->isFullCopy() && 
!CP.isPartial()146k
&&
2656
759k
      
valuesIdentical(VNI, V.OtherVNI, Other)140k
) {
2657
6.88k
    V.Identical = true;
2658
6.88k
    return CR_Erase;
2659
6.88k
  }
2660
753k
2661
753k
  // The remaining checks apply to the lanes, which aren't tracked here.  This
2662
753k
  // was already decided to be OK via the following CR_Replace condition.
2663
753k
  // CR_Replace.
2664
753k
  if (SubRangeJoin)
2665
6
    return CR_Replace;
2666
753k
2667
753k
  // If the lanes written by this instruction were all undef in OtherVNI, it is
2668
753k
  // still safe to join the live ranges. This can't be done with a simple value
2669
753k
  // mapping, though - OtherVNI will map to multiple values:
2670
753k
  //
2671
753k
  //   1 %dst:ssub0 = FOO                <-- OtherVNI
2672
753k
  //   2 %src = BAR                      <-- VNI
2673
753k
  //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
2674
753k
  //   4 BAZ killed %dst
2675
753k
  //   5 QUUX killed %src
2676
753k
  //
2677
753k
  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2678
753k
  // handles this complex value mapping.
2679
753k
  if ((V.WriteLanes & OtherV.ValidLanes).none())
2680
153k
    return CR_Replace;
2681
599k
2682
599k
  // If the other live range is killed by DefMI and the live ranges are still
2683
599k
  // overlapping, it must be because we're looking at an early clobber def:
2684
599k
  //
2685
599k
  //   %dst<def,early-clobber> = ASM killed %src
2686
599k
  //
2687
599k
  // In this case, it is illegal to merge the two live ranges since the early
2688
599k
  // clobber def would clobber %src before it was read.
2689
599k
  if (OtherLRQ.isKill()) {
2690
15
    // This case where the def doesn't overlap the kill is handled above.
2691
15
    assert(VNI->def.isEarlyClobber() &&
2692
15
           "Only early clobber defs can overlap a kill");
2693
15
    return CR_Impossible;
2694
15
  }
2695
599k
2696
599k
  // VNI is clobbering live lanes in OtherVNI, but there is still the
2697
599k
  // possibility that no instructions actually read the clobbered lanes.
2698
599k
  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2699
599k
  // Otherwise Other.RI wouldn't be live here.
2700
599k
  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2701
502k
    return CR_Impossible;
2702
96.2k
2703
96.2k
  // We need to verify that no instructions are reading the clobbered lanes. To
2704
96.2k
  // save compile time, we'll only check that locally. Don't allow the tainted
2705
96.2k
  // value to escape the basic block.
2706
96.2k
  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2707
96.2k
  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2708
15.4k
    return CR_Impossible;
2709
80.7k
2710
80.7k
  // There are still some things that could go wrong besides clobbered lanes
2711
80.7k
  // being read, for example OtherVNI may be only partially redefined in MBB,
2712
80.7k
  // and some clobbered lanes could escape the block. Save this analysis for
2713
80.7k
  // resolveConflicts() when all values have been mapped. We need to know
2714
80.7k
  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2715
80.7k
  // that now - the recursive analyzeValue() calls must go upwards in the
2716
80.7k
  // dominator tree.
2717
80.7k
  return CR_Unresolved;
2718
80.7k
}
2719
2720
26.0M
void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2721
26.0M
  Val &V = Vals[ValNo];
2722
26.0M
  if (V.isAnalyzed()) {
2723
5.67M
    // Recursion should always move up the dominator tree, so ValNo is not
2724
5.67M
    // supposed to reappear before it has been assigned.
2725
5.67M
    assert(Assignments[ValNo] != -1 && "Bad recursion?");
2726
5.67M
    return;
2727
5.67M
  }
2728
20.3M
  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2729
20.3M
  case CR_Erase:
2730
4.31M
  case CR_Merge:
2731
4.31M
    // Merge this ValNo into OtherVNI.
2732
4.31M
    assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2733
4.31M
    assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2734
4.31M
    Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2735
4.31M
    LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
2736
4.31M
                      << LR.getValNumInfo(ValNo)->def << " into "
2737
4.31M
                      << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2738
4.31M
                      << V.OtherVNI->def << " --> @"
2739
4.31M
                      << NewVNInfo[Assignments[ValNo]]->def << '\n');
2740
4.31M
    break;
2741
4.31M
  case CR_Replace:
2742
352k
  case CR_Unresolved: {
2743
352k
    // The other value is going to be pruned if this join is successful.
2744
352k
    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2745
352k
    Val &OtherV = Other.Vals[V.OtherVNI->id];
2746
352k
    // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2747
352k
    // its lanes.
2748
352k
    if (OtherV.ErasableImplicitDef &&
2749
352k
        
TrackSubRegLiveness377
&&
2750
352k
        
(OtherV.WriteLanes & ~V.ValidLanes).any()9
) {
2751
7
      LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
2752
7
2753
7
      OtherV.ErasableImplicitDef = false;
2754
7
      // The valid lanes written by the implicit_def were speculatively cleared
2755
7
      // before, so make this more conservative. It may be better to track this,
2756
7
      // I haven't found a testcase where it matters.
2757
7
      OtherV.ValidLanes = LaneBitmask::getAll();
2758
7
    }
2759
352k
2760
352k
    OtherV.Pruned = true;
2761
352k
    LLVM_FALLTHROUGH;
2762
352k
  }
2763
16.0M
  default:
2764
16.0M
    // This value number needs to go in the final joined live range.
2765
16.0M
    Assignments[ValNo] = NewVNInfo.size();
2766
16.0M
    NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2767
16.0M
    break;
2768
20.3M
  }
2769
20.3M
}
2770
2771
8.08M
bool JoinVals::mapValues(JoinVals &Other) {
2772
27.5M
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i19.4M
) {
2773
20.0M
    computeAssignment(i, Other);
2774
20.0M
    if (Vals[i].Resolution == CR_Impossible) {
2775
504k
      LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
2776
504k
                        << '@' << LR.getValNumInfo(i)->def << '\n');
2777
504k
      return false;
2778
504k
    }
2779
20.0M
  }
2780
8.08M
  
return true7.57M
;
2781
8.08M
}
2782
2783
bool JoinVals::
2784
taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2785
46.3k
            SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
2786
46.3k
  VNInfo *VNI = LR.getValNumInfo(ValNo);
2787
46.3k
  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2788
46.3k
  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
2789
46.3k
2790
46.3k
  // Scan Other.LR from VNI.def to MBBEnd.
2791
46.3k
  LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
2792
46.3k
  assert(OtherI != Other.LR.end() && "No conflict?");
2793
57.0k
  do {
2794
57.0k
    // OtherI is pointing to a tainted value. Abort the join if the tainted
2795
57.0k
    // lanes escape the block.
2796
57.0k
    SlotIndex End = OtherI->end;
2797
57.0k
    if (End >= MBBEnd) {
2798
19
      LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
2799
19
                        << OtherI->valno->id << '@' << OtherI->start << '\n');
2800
19
      return false;
2801
19
    }
2802
57.0k
    LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
2803
57.0k
                      << OtherI->valno->id << '@' << OtherI->start << " to "
2804
57.0k
                      << End << '\n');
2805
57.0k
    // A dead def is not a problem.
2806
57.0k
    if (End.isDead())
2807
0
      break;
2808
57.0k
    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
2809
57.0k
2810
57.0k
    // Check for another def in the MBB.
2811
57.0k
    if (++OtherI == Other.LR.end() || 
OtherI->start >= MBBEnd23.0k
)
2812
34.6k
      break;
2813
22.4k
2814
22.4k
    // Lanes written by the new def are no longer tainted.
2815
22.4k
    const Val &OV = Other.Vals[OtherI->valno->id];
2816
22.4k
    TaintedLanes &= ~OV.WriteLanes;
2817
22.4k
    if (!OV.RedefVNI)
2818
4.54k
      break;
2819
17.8k
  } while (TaintedLanes.any());
2820
46.3k
  
return true46.2k
;
2821
46.3k
}
2822
2823
bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
2824
567k
                         LaneBitmask Lanes) const {
2825
567k
  if (MI.isDebugInstr())
2826
0
    return false;
2827
2.02M
  
for (const MachineOperand &MO : MI.operands())567k
{
2828
2.02M
    if (!MO.isReg() || 
MO.isDef()1.47M
||
MO.getReg() != Reg934k
)
2829
1.87M
      continue;
2830
144k
    if (!MO.readsReg())
2831
0
      continue;
2832
144k
    unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
2833
144k
    if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
2834
38.1k
      return true;
2835
144k
  }
2836
567k
  
return false528k
;
2837
567k
}
2838
2839
7.34M
bool JoinVals::resolveConflicts(JoinVals &Other) {
2840
25.1M
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i17.7M
) {
2841
17.8M
    Val &V = Vals[i];
2842
17.8M
    assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
2843
17.8M
    if (V.Resolution != CR_Unresolved)
2844
17.7M
      continue;
2845
46.3k
    LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
2846
46.3k
                      << LR.getValNumInfo(i)->def << '\n');
2847
46.3k
    if (SubRangeJoin)
2848
0
      return false;
2849
46.3k
2850
46.3k
    ++NumLaneConflicts;
2851
46.3k
    assert(V.OtherVNI && "Inconsistent conflict resolution.");
2852
46.3k
    VNInfo *VNI = LR.getValNumInfo(i);
2853
46.3k
    const Val &OtherV = Other.Vals[V.OtherVNI->id];
2854
46.3k
2855
46.3k
    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
2856
46.3k
    // join, those lanes will be tainted with a wrong value. Get the extent of
2857
46.3k
    // the tainted lanes.
2858
46.3k
    LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
2859
46.3k
    SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
2860
46.3k
    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
2861
19
      // Tainted lanes would extend beyond the basic block.
2862
19
      return false;
2863
46.2k
2864
46.2k
    assert(!TaintExtent.empty() && "There should be at least one conflict.");
2865
46.2k
2866
46.2k
    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
2867
46.2k
    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2868
46.2k
    MachineBasicBlock::iterator MI = MBB->begin();
2869
46.2k
    if (!VNI->isPHIDef()) {
2870
46.2k
      MI = Indexes->getInstructionFromIndex(VNI->def);
2871
46.2k
      // No need to check the instruction defining VNI for reads.
2872
46.2k
      ++MI;
2873
46.2k
    }
2874
46.2k
    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
2875
46.2k
           "Interference ends on VNI->def. Should have been handled earlier");
2876
46.2k
    MachineInstr *LastMI =
2877
46.2k
      Indexes->getInstructionFromIndex(TaintExtent.front().first);
2878
46.2k
    assert(LastMI && "Range must end at a proper instruction");
2879
46.2k
    unsigned TaintNum = 0;
2880
567k
    while (true) {
2881
567k
      assert(MI != MBB->end() && "Bad LastMI");
2882
567k
      if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
2883
38.1k
        LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
2884
38.1k
        return false;
2885
38.1k
      }
2886
528k
      // LastMI is the last instruction to use the current value.
2887
528k
      if (&*MI == LastMI) {
2888
17.3k
        if (++TaintNum == TaintExtent.size())
2889
8.16k
          break;
2890
9.19k
        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
2891
9.19k
        assert(LastMI && "Range must end at a proper instruction");
2892
9.19k
        TaintedLanes = TaintExtent[TaintNum].second;
2893
9.19k
      }
2894
528k
      ++MI;
2895
520k
    }
2896
46.2k
2897
46.2k
    // The tainted lanes are unused.
2898
46.2k
    V.Resolution = CR_Replace;
2899
8.16k
    ++NumLaneResolves;
2900
8.16k
  }
2901
7.34M
  
return true7.31M
;
2902
7.34M
}
2903
2904
7.68M
bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
2905
7.68M
  Val &V = Vals[ValNo];
2906
7.68M
  if (V.Pruned || 
V.PrunedComputed7.62M
)
2907
96.2k
    return V.Pruned;
2908
7.58M
2909
7.58M
  if (V.Resolution != CR_Erase && 
V.Resolution != CR_Merge3.74M
)
2910
3.74M
    return V.Pruned;
2911
3.84M
2912
3.84M
  // Follow copies up the dominator tree and check if any intermediate value
2913
3.84M
  // has been pruned.
2914
3.84M
  V.PrunedComputed = true;
2915
3.84M
  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
2916
3.84M
  return V.Pruned;
2917
3.84M
}
2918
2919
void JoinVals::pruneValues(JoinVals &Other,
2920
                           SmallVectorImpl<SlotIndex> &EndPoints,
2921
7.29M
                           bool changeInstrs) {
2922
24.9M
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i17.6M
) {
2923
17.6M
    SlotIndex Def = LR.getValNumInfo(i)->def;
2924
17.6M
    switch (Vals[i].Resolution) {
2925
17.6M
    case CR_Keep:
2926
13.7M
      break;
2927
17.6M
    case CR_Replace: {
2928
150k
      // This value takes precedence over the value in Other.LR.
2929
150k
      LIS->pruneValue(Other.LR, Def, &EndPoints);
2930
150k
      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
2931
150k
      // instructions are only inserted to provide a live-out value for PHI
2932
150k
      // predecessors, so the instruction should simply go away once its value
2933
150k
      // has been replaced.
2934
150k
      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
2935
150k
      bool EraseImpDef = OtherV.ErasableImplicitDef &&
2936
150k
                         
OtherV.Resolution == CR_Keep367
;
2937
150k
      if (!Def.isBlock()) {
2938
149k
        if (changeInstrs) {
2939
149k
          // Remove <def,read-undef> flags. This def is now a partial redef.
2940
149k
          // Also remove dead flags since the joined live range will
2941
149k
          // continue past this instruction.
2942
149k
          for (MachineOperand &MO :
2943
537k
               Indexes->getInstructionFromIndex(Def)->operands()) {
2944
537k
            if (MO.isReg() && 
MO.isDef()351k
&&
MO.getReg() == Reg158k
) {
2945
149k
              if (MO.getSubReg() != 0 && 
MO.isUndef()108k
&&
!EraseImpDef42.1k
)
2946
42.1k
                MO.setIsUndef(false);
2947
149k
              MO.setIsDead(false);
2948
149k
            }
2949
537k
          }
2950
149k
        }
2951
149k
        // This value will reach instructions below, but we need to make sure
2952
149k
        // the live range also reaches the instruction at Def.
2953
149k
        if (!EraseImpDef)
2954
149k
          EndPoints.push_back(Def);
2955
149k
      }
2956
150k
      LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
2957
150k
                        << ": " << Other.LR << '\n');
2958
150k
      break;
2959
17.6M
    }
2960
17.6M
    case CR_Erase:
2961
3.84M
    case CR_Merge:
2962
3.84M
      if (isPrunedValue(i, Other)) {
2963
51.2k
        // This value is ultimately a copy of a pruned value in LR or Other.LR.
2964
51.2k
        // We can no longer trust the value mapping computed by
2965
51.2k
        // computeAssignment(), the value that was originally copied could have
2966
51.2k
        // been replaced.
2967
51.2k
        LIS->pruneValue(LR, Def, &EndPoints);
2968
51.2k
        LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
2969
51.2k
                          << Def << ": " << LR << '\n');
2970
51.2k
      }
2971
3.84M
      break;
2972
3.84M
    case CR_Unresolved:
2973
0
    case CR_Impossible:
2974
0
      llvm_unreachable("Unresolved conflicts");
2975
17.6M
    }
2976
17.6M
  }
2977
7.29M
}
2978
2979
/// Consider the following situation when coalescing the copy between
2980
/// %31 and %45 at 800. (The vertical lines represent live range segments.)
2981
///
2982
///                              Main range         Subrange 0004 (sub2)
2983
///                              %31    %45           %31    %45
2984
///  544    %45 = COPY %28               +                    +
2985
///                                      | v1                 | v1
2986
///  560B bb.1:                          +                    +
2987
///  624        = %45.sub2               | v2                 | v2
2988
///  800    %31 = COPY %45        +      +             +      +
2989
///                               | v0                 | v0
2990
///  816    %31.sub1 = ...        +                    |
2991
///  880    %30 = COPY %31        | v1                 +
2992
///  928    %45 = COPY %30        |      +                    +
2993
///                               |      | v0                 | v0  <--+
2994
///  992B   ; backedge -> bb.1    |      +                    +        |
2995
/// 1040        = %31.sub0        +                                    |
2996
///                                                 This value must remain
2997
///                                                 live-out!
2998
///
2999
/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3000
/// redundant, since it copies the value from %45 back into it. The
3001
/// conflict resolution for the main range determines that %45.v0 is
3002
/// to be erased, which is ok since %31.v1 is identical to it.
3003
/// The problem happens with the subrange for sub2: it has to be live
3004
/// on exit from the block, but since 928 was actually a point of
3005
/// definition of %45.sub2, %45.sub2 was not live immediately prior
3006
/// to that definition. As a result, when 928 was erased, the value v0
3007
/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3008
/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3009
/// providing an incorrect value to the use at 624.
3010
///
3011
/// Since the main-range values %31.v1 and %45.v0 were proved to be
3012
/// identical, the corresponding values in subranges must also be the
3013
/// same. A redundant copy is removed because it's not needed, and not
3014
/// because it copied an undefined value, so any liveness that originated
3015
/// from that copy cannot disappear. When pruning a value that started
3016
/// at the removed copy, the corresponding identical value must be
3017
/// extended to replace it.
3018
375k
void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3019
375k
  // Look for values being erased.
3020
375k
  bool DidPrune = false;
3021
1.11M
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i734k
) {
3022
734k
    Val &V = Vals[i];
3023
734k
    // We should trigger in all cases in which eraseInstrs() does something.
3024
734k
    // match what eraseInstrs() is doing, print a message so
3025
734k
    if (V.Resolution != CR_Erase &&
3026
734k
        
(530k
V.Resolution != CR_Keep530k
||
!V.ErasableImplicitDef390k
||
!V.Pruned182
))
3027
530k
      continue;
3028
203k
3029
203k
    // Check subranges at the point where the copy will be removed.
3030
203k
    SlotIndex Def = LR.getValNumInfo(i)->def;
3031
203k
    SlotIndex OtherDef;
3032
203k
    if (V.Identical)
3033
12
      OtherDef = V.OtherVNI->def;
3034
203k
3035
203k
    // Print message so mismatches with eraseInstrs() can be diagnosed.
3036
203k
    LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3037
203k
                      << '\n');
3038
871k
    for (LiveInterval::SubRange &S : LI.subranges()) {
3039
871k
      LiveQueryResult Q = S.Query(Def);
3040
871k
3041
871k
      // If a subrange starts at the copy then an undefined value has been
3042
871k
      // copied and we must remove that subrange value as well.
3043
871k
      VNInfo *ValueOut = Q.valueOutOrDead();
3044
871k
      if (ValueOut != nullptr && 
(648k
Q.valueIn() == nullptr648k
||
3045
648k
                                  
(647k
V.Identical647k
&&
V.Resolution == CR_Erase34
&&
3046
647k
                                   
ValueOut->def == Def34
))) {
3047
816
        LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3048
816
                          << " at " << Def << "\n");
3049
816
        SmallVector<SlotIndex,8> EndPoints;
3050
816
        LIS->pruneValue(S, Def, &EndPoints);
3051
816
        DidPrune = true;
3052
816
        // Mark value number as unused.
3053
816
        ValueOut->markUnused();
3054
816
3055
816
        if (V.Identical && 
S.Query(OtherDef).valueOutOrDead()10
) {
3056
10
          // If V is identical to V.OtherVNI (and S was live at OtherDef),
3057
10
          // then we can't simply prune V from S. V needs to be replaced
3058
10
          // with V.OtherVNI.
3059
10
          LIS->extendToIndices(S, EndPoints);
3060
10
        }
3061
816
        continue;
3062
816
      }
3063
870k
      // If a subrange ends at the copy, then a value was copied but only
3064
870k
      // partially used later. Shrink the subregister range appropriately.
3065
870k
      if (Q.valueIn() != nullptr && 
Q.valueOut() == nullptr647k
) {
3066
92
        LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3067
92
                          << PrintLaneMask(S.LaneMask) << " at " << Def
3068
92
                          << "\n");
3069
92
        ShrinkMask |= S.LaneMask;
3070
92
      }
3071
870k
    }
3072
203k
  }
3073
375k
  if (DidPrune)
3074
563
    LI.removeEmptySubRanges();
3075
375k
}
3076
3077
/// Check if any of the subranges of @p LI contain a definition at @p Def.
3078
287k
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
3079
1.07M
  for (LiveInterval::SubRange &SR : LI.subranges()) {
3080
1.07M
    if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3081
1.02M
      if (VNI->def == Def)
3082
287k
        return true;
3083
1.07M
  }
3084
287k
  
return false0
;
3085
287k
}
3086
3087
187k
void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3088
187k
  assert(&static_cast<LiveRange&>(LI) == &LR);
3089
187k
3090
709k
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i521k
) {
3091
521k
    if (Vals[i].Resolution != CR_Keep)
3092
232k
      continue;
3093
289k
    VNInfo *VNI = LR.getValNumInfo(i);
3094
289k
    if (VNI->isUnused() || 
VNI->isPHIDef()289k
||
isDefInSubRange(LI, VNI->def)287k
)
3095
289k
      continue;
3096
0
    Vals[i].Pruned = true;
3097
0
    ShrinkMainRange = true;
3098
0
  }
3099
187k
}
3100
3101
431k
void JoinVals::removeImplicitDefs() {
3102
890k
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i459k
) {
3103
459k
    Val &V = Vals[i];
3104
459k
    if (V.Resolution != CR_Keep || 
!V.ErasableImplicitDef248k
||
!V.Pruned342
)
3105
459k
      continue;
3106
1
3107
1
    VNInfo *VNI = LR.getValNumInfo(i);
3108
1
    VNI->markUnused();
3109
1
    LR.removeValNo(VNI);
3110
1
  }
3111
431k
}
3112
3113
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
3114
                           SmallVectorImpl<unsigned> &ShrinkRegs,
3115
6.86M
                           LiveInterval *LI) {
3116
24.0M
  for (unsigned i = 0, e = LR.getNumValNums(); i != e; 
++i17.2M
) {
3117
17.2M
    // Get the def location before markUnused() below invalidates it.
3118
17.2M
    SlotIndex Def = LR.getValNumInfo(i)->def;
3119
17.2M
    switch (Vals[i].Resolution) {
3120
17.2M
    case CR_Keep: {
3121
13.4M
      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3122
13.4M
      // longer. The IMPLICIT_DEF instructions are only inserted by
3123
13.4M
      // PHIElimination to guarantee that all PHI predecessors have a value.
3124
13.4M
      if (!Vals[i].ErasableImplicitDef || 
!Vals[i].Pruned11.5k
)
3125
13.4M
        break;
3126
365
      // Remove value number i from LR.
3127
365
      // For intervals with subranges, removing a segment from the main range
3128
365
      // may require extending the previous segment: for each definition of
3129
365
      // a subregister, there will be a corresponding def in the main range.
3130
365
      // That def may fall in the middle of a segment from another subrange.
3131
365
      // In such cases, removing this def from the main range must be
3132
365
      // complemented by extending the main range to account for the liveness
3133
365
      // of the other subrange.
3134
365
      VNInfo *VNI = LR.getValNumInfo(i);
3135
365
      SlotIndex Def = VNI->def;
3136
365
      // The new end point of the main range segment to be extended.
3137
365
      SlotIndex NewEnd;
3138
365
      if (LI != nullptr) {
3139
359
        LiveRange::iterator I = LR.FindSegmentContaining(Def);
3140
359
        assert(I != LR.end());
3141
359
        // Do not extend beyond the end of the segment being removed.
3142
359
        // The segment may have been pruned in preparation for joining
3143
359
        // live ranges.
3144
359
        NewEnd = I->end;
3145
359
      }
3146
365
3147
365
      LR.removeValNo(VNI);
3148
365
      // Note that this VNInfo is reused and still referenced in NewVNInfo,
3149
365
      // make it appear like an unused value number.
3150
365
      VNI->markUnused();
3151
365
3152
365
      if (LI != nullptr && 
LI->hasSubRanges()359
) {
3153
0
        assert(static_cast<LiveRange*>(LI) == &LR);
3154
0
        // Determine the end point based on the subrange information:
3155
0
        // minimum of (earliest def of next segment,
3156
0
        //             latest end point of containing segment)
3157
0
        SlotIndex ED, LE;
3158
0
        for (LiveInterval::SubRange &SR : LI->subranges()) {
3159
0
          LiveRange::iterator I = SR.find(Def);
3160
0
          if (I == SR.end())
3161
0
            continue;
3162
0
          if (I->start > Def)
3163
0
            ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3164
0
          else
3165
0
            LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3166
0
        }
3167
0
        if (LE.isValid())
3168
0
          NewEnd = std::min(NewEnd, LE);
3169
0
        if (ED.isValid())
3170
0
          NewEnd = std::min(NewEnd, ED);
3171
0
3172
0
        // We only want to do the extension if there was a subrange that
3173
0
        // was live across Def.
3174
0
        if (LE.isValid()) {
3175
0
          LiveRange::iterator S = LR.find(Def);
3176
0
          if (S != LR.begin())
3177
0
            std::prev(S)->end = NewEnd;
3178
0
        }
3179
0
      }
3180
365
      LLVM_DEBUG({
3181
365
        dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3182
365
        if (LI != nullptr)
3183
365
          dbgs() << "\t\t  LHS = " << *LI << '\n';
3184
365
      });
3185
365
      LLVM_FALLTHROUGH;
3186
365
    }
3187
365
3188
3.62M
    case CR_Erase: {
3189
3.62M
      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3190
3.62M
      assert(MI && "No instruction to erase");
3191
3.62M
      if (MI->isCopy()) {
3192
3.30M
        unsigned Reg = MI->getOperand(1).getReg();
3193
3.30M
        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
3194
3.30M
            Reg != CP.getSrcReg() && 
Reg != CP.getDstReg()1.29M
)
3195
5.23k
          ShrinkRegs.push_back(Reg);
3196
3.30M
      }
3197
3.62M
      ErasedInstrs.insert(MI);
3198
3.62M
      LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3199
3.62M
      LIS->RemoveMachineInstrFromMaps(*MI);
3200
3.62M
      MI->eraseFromParent();
3201
3.62M
      break;
3202
365
    }
3203
153k
    default:
3204
153k
      break;
3205
17.2M
    }
3206
17.2M
  }
3207
6.86M
}
3208
3209
void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3210
                                         LaneBitmask LaneMask,
3211
215k
                                         const CoalescerPair &CP) {
3212
215k
  SmallVector<VNInfo*, 16> NewVNInfo;
3213
215k
  JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3214
215k
                   NewVNInfo, CP, LIS, TRI, true, true);
3215
215k
  JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3216
215k
                   NewVNInfo, CP, LIS, TRI, true, true);
3217
215k
3218
215k
  // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3219
215k
  // We should be able to resolve all conflicts here as we could successfully do
3220
215k
  // it on the mainrange already. There is however a problem when multiple
3221
215k
  // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3222
215k
  // interferences.
3223
215k
  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3224
0
    // We already determined that it is legal to merge the intervals, so this
3225
0
    // should never fail.
3226
0
    llvm_unreachable("*** Couldn't join subrange!\n");
3227
0
  }
3228
215k
  if (!LHSVals.resolveConflicts(RHSVals) ||
3229
215k
      !RHSVals.resolveConflicts(LHSVals)) {
3230
0
    // We already determined that it is legal to merge the intervals, so this
3231
0
    // should never fail.
3232
0
    llvm_unreachable("*** Couldn't join subrange!\n");
3233
0
  }
3234
215k
3235
215k
  // The merging algorithm in LiveInterval::join() can't handle conflicting
3236
215k
  // value mappings, so we need to remove any live ranges that overlap a
3237
215k
  // CR_Replace resolution. Collect a set of end points that can be used to
3238
215k
  // restore the live range after joining.
3239
215k
  SmallVector<SlotIndex, 8> EndPoints;
3240
215k
  LHSVals.pruneValues(RHSVals, EndPoints, false);
3241
215k
  RHSVals.pruneValues(LHSVals, EndPoints, false);
3242
215k
3243
215k
  LHSVals.removeImplicitDefs();
3244
215k
  RHSVals.removeImplicitDefs();
3245
215k
3246
215k
  LRange.verify();
3247
215k
  RRange.verify();
3248
215k
3249
215k
  // Join RRange into LHS.
3250
215k
  LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3251
215k
              NewVNInfo);
3252
215k
3253
215k
  LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3254
215k
                    << ' ' << LRange << "\n");
3255
215k
  if (EndPoints.empty())
3256
215k
    return;
3257
15
3258
15
  // Recompute the parts of the live range we had to remove because of
3259
15
  // CR_Replace conflicts.
3260
15
  LLVM_DEBUG({
3261
15
    dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3262
15
    for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3263
15
      dbgs() << EndPoints[i];
3264
15
      if (i != n-1)
3265
15
        dbgs() << ',';
3266
15
    }
3267
15
    dbgs() << ":  " << LRange << '\n';
3268
15
  });
3269
15
  LIS->extendToIndices(LRange, EndPoints);
3270
15
}
3271
3272
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3273
                                          const LiveRange &ToMerge,
3274
                                          LaneBitmask LaneMask,
3275
214k
                                          CoalescerPair &CP) {
3276
214k
  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3277
214k
  LI.refineSubRanges(
3278
214k
      Allocator, LaneMask,
3279
216k
      [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3280
216k
        if (SR.empty()) {
3281
404
          SR.assign(ToMerge, Allocator);
3282
215k
        } else {
3283
215k
          // joinSubRegRange() destroys the merged range, so we need a copy.
3284
215k
          LiveRange RangeCopy(ToMerge, Allocator);
3285
215k
          joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3286
215k
        }
3287
216k
      },
3288
214k
      *LIS->getSlotIndexes(), *TRI);
3289
214k
}
3290
3291
7.95M
bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3292
7.95M
  if (LI.valnos.size() < LargeIntervalSizeThreshold)
3293
7.94M
    return false;
3294
7.18k
  auto &Counter = LargeLIVisitCounter[LI.reg];
3295
7.18k
  if (Counter < LargeIntervalFreqThreshold) {
3296
3.27k
    Counter++;
3297
3.27k
    return false;
3298
3.27k
  }
3299
3.91k
  return true;
3300
3.91k
}
3301
3302
3.97M
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3303
3.97M
  SmallVector<VNInfo*, 16> NewVNInfo;
3304
3.97M
  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3305
3.97M
  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3306
3.97M
  bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3307
3.97M
  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3308
3.97M
                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3309
3.97M
  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3310
3.97M
                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3311
3.97M
3312
3.97M
  LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3313
3.97M
3314
3.97M
  if (isHighCostLiveInterval(LHS) || 
isHighCostLiveInterval(RHS)3.97M
)
3315
3.91k
    return false;
3316
3.97M
3317
3.97M
  // First compute NewVNInfo and the simple value mappings.
3318
3.97M
  // Detect impossible conflicts early.
3319
3.97M
  if (!LHSVals.mapValues(RHSVals) || 
!RHSVals.mapValues(LHSVals)3.67M
)
3320
504k
    return false;
3321
3.47M
3322
3.47M
  // Some conflicts can only be resolved after all values have been mapped.
3323
3.47M
  if (!LHSVals.resolveConflicts(RHSVals) || 
!RHSVals.resolveConflicts(LHSVals)3.44M
)
3324
38.1k
    return false;
3325
3.43M
3326
3.43M
  // All clear, the live ranges can be merged.
3327
3.43M
  if (RHS.hasSubRanges() || 
LHS.hasSubRanges()3.41M
) {
3328
187k
    BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3329
187k
3330
187k
    // Transform lanemasks from the LHS to masks in the coalesced register and
3331
187k
    // create initial subranges if necessary.
3332
187k
    unsigned DstIdx = CP.getDstIdx();
3333
187k
    if (!LHS.hasSubRanges()) {
3334
460
      LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3335
460
                                     : 
TRI->getSubRegIndexLaneMask(DstIdx)0
;
3336
460
      // LHS must support subregs or we wouldn't be in this codepath.
3337
460
      assert(Mask.any());
3338
460
      LHS.createSubRangeFrom(Allocator, Mask, LHS);
3339
187k
    } else if (DstIdx != 0) {
3340
0
      // Transform LHS lanemasks to new register class if necessary.
3341
0
      for (LiveInterval::SubRange &R : LHS.subranges()) {
3342
0
        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3343
0
        R.LaneMask = Mask;
3344
0
      }
3345
0
    }
3346
187k
    LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3347
187k
                      << '\n');
3348
187k
3349
187k
    // Determine lanemasks of RHS in the coalesced register and merge subranges.
3350
187k
    unsigned SrcIdx = CP.getSrcIdx();
3351
187k
    if (!RHS.hasSubRanges()) {
3352
167k
      LaneBitmask Mask = SrcIdx == 0 ? 
CP.getNewRC()->getLaneMask()655
3353
167k
                                     : 
TRI->getSubRegIndexLaneMask(SrcIdx)166k
;
3354
167k
      mergeSubRangeInto(LHS, RHS, Mask, CP);
3355
167k
    } else {
3356
20.3k
      // Pair up subranges and merge.
3357
46.5k
      for (LiveInterval::SubRange &R : RHS.subranges()) {
3358
46.5k
        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3359
46.5k
        mergeSubRangeInto(LHS, R, Mask, CP);
3360
46.5k
      }
3361
20.3k
    }
3362
187k
    LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3363
187k
3364
187k
    // Pruning implicit defs from subranges may result in the main range
3365
187k
    // having stale segments.
3366
187k
    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3367
187k
3368
187k
    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3369
187k
    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3370
187k
  }
3371
3.43M
3372
3.43M
  // The merging algorithm in LiveInterval::join() can't handle conflicting
3373
3.43M
  // value mappings, so we need to remove any live ranges that overlap a
3374
3.43M
  // CR_Replace resolution. Collect a set of end points that can be used to
3375
3.43M
  // restore the live range after joining.
3376
3.43M
  SmallVector<SlotIndex, 8> EndPoints;
3377
3.43M
  LHSVals.pruneValues(RHSVals, EndPoints, true);
3378
3.43M
  RHSVals.pruneValues(LHSVals, EndPoints, true);
3379
3.43M
3380
3.43M
  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3381
3.43M
  // registers to require trimming.
3382
3.43M
  SmallVector<unsigned, 8> ShrinkRegs;
3383
3.43M
  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3384
3.43M
  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3385
3.43M
  while (!ShrinkRegs.empty())
3386
5.23k
    shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3387
3.43M
3388
3.43M
  // Join RHS into LHS.
3389
3.43M
  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3390
3.43M
3391
3.43M
  // Kill flags are going to be wrong if the live ranges were overlapping.
3392
3.43M
  // Eventually, we should simply clear all kill flags when computing live
3393
3.43M
  // ranges. They are reinserted after register allocation.
3394
3.43M
  MRI->clearKillFlags(LHS.reg);
3395
3.43M
  MRI->clearKillFlags(RHS.reg);
3396
3.43M
3397
3.43M
  if (!EndPoints.empty()) {
3398
84.8k
    // Recompute the parts of the live range we had to remove because of
3399
84.8k
    // CR_Replace conflicts.
3400
84.8k
    LLVM_DEBUG({
3401
84.8k
      dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3402
84.8k
      for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3403
84.8k
        dbgs() << EndPoints[i];
3404
84.8k
        if (i != n-1)
3405
84.8k
          dbgs() << ',';
3406
84.8k
      }
3407
84.8k
      dbgs() << ":  " << LHS << '\n';
3408
84.8k
    });
3409
84.8k
    LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3410
84.8k
  }
3411
3.43M
3412
3.43M
  return true;
3413
3.43M
}
3414
3415
4.22M
bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3416
4.22M
  return CP.isPhys() ? 
joinReservedPhysReg(CP)245k
:
joinVirtRegs(CP)3.97M
;
3417
4.22M
}
3418
3419
namespace {
3420
3421
/// Information concerning MBB coalescing priority.
3422
struct MBBPriorityInfo {
3423
  MachineBasicBlock *MBB;
3424
  unsigned Depth;
3425
  bool IsSplit;
3426
3427
  MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3428
2.83M
    : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3429
};
3430
3431
} // end anonymous namespace
3432
3433
/// C-style comparator that sorts first based on the loop depth of the basic
3434
/// block (the unsigned), and then on the MBB number.
3435
///
3436
/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3437
static int compareMBBPriority(const MBBPriorityInfo *LHS,
3438
11.7M
                              const MBBPriorityInfo *RHS) {
3439
11.7M
  // Deeper loops first
3440
11.7M
  if (LHS->Depth != RHS->Depth)
3441
1.79M
    return LHS->Depth > RHS->Depth ? 
-11.06M
:
1728k
;
3442
9.90M
3443
9.90M
  // Try to unsplit critical edges next.
3444
9.90M
  if (LHS->IsSplit != RHS->IsSplit)
3445
0
    return LHS->IsSplit ? -1 : 1;
3446
9.90M
3447
9.90M
  // Prefer blocks that are more connected in the CFG. This takes care of
3448
9.90M
  // the most difficult copies first while intervals are short.
3449
9.90M
  unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3450
9.90M
  unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3451
9.90M
  if (cl != cr)
3452
3.33M
    return cl > cr ? 
-11.51M
:
11.81M
;
3453
6.57M
3454
6.57M
  // As a last resort, sort by block number.
3455
6.57M
  return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? 
-13.01M
:
13.56M
;
3456
6.57M
}
3457
3458
/// \returns true if the given copy uses or defines a local live range.
3459
9.24M
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3460
9.24M
  if (!Copy->isCopy())
3461
317k
    return false;
3462
8.92M
3463
8.92M
  if (Copy->getOperand(1).isUndef())
3464
18
    return false;
3465
8.92M
3466
8.92M
  unsigned SrcReg = Copy->getOperand(1).getReg();
3467
8.92M
  unsigned DstReg = Copy->getOperand(0).getReg();
3468
8.92M
  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
3469
8.92M
      || 
TargetRegisterInfo::isPhysicalRegister(DstReg)7.11M
)
3470
5.34M
    return false;
3471
3.57M
3472
3.57M
  return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3473
3.57M
    || 
LIS->intervalIsInOneMBB(LIS->getInterval(DstReg))1.42M
;
3474
3.57M
}
3475
3476
979k
void RegisterCoalescer::lateLiveIntervalUpdate() {
3477
979k
  for (unsigned reg : ToBeUpdated) {
3478
24
    if (!LIS->hasInterval(reg))
3479
7
      continue;
3480
17
    LiveInterval &LI = LIS->getInterval(reg);
3481
17
    shrinkToUses(&LI, &DeadDefs);
3482
17
    if (!DeadDefs.empty())
3483
6
      eliminateDeadDefs();
3484
17
  }
3485
979k
  ToBeUpdated.clear();
3486
979k
}
3487
3488
bool RegisterCoalescer::
3489
4.38M
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3490
4.38M
  bool Progress = false;
3491
17.8M
  for (unsigned i = 0, e = CurrList.size(); i != e; 
++i13.4M
) {
3492
13.4M
    if (!CurrList[i])
3493
1.98M
      continue;
3494
11.4M
    // Skip instruction pointers that have already been erased, for example by
3495
11.4M
    // dead code elimination.
3496
11.4M
    if (ErasedInstrs.count(CurrList[i])) {
3497
79.0k
      CurrList[i] = nullptr;
3498
79.0k
      continue;
3499
79.0k
    }
3500
11.3M
    bool Again = false;
3501
11.3M
    bool Success = joinCopy(CurrList[i], Again);
3502
11.3M
    Progress |= Success;
3503
11.3M
    if (Success || 
!Again7.00M
)
3504
8.13M
      CurrList[i] = nullptr;
3505
11.3M
  }
3506
4.38M
  return Progress;
3507
4.38M
}
3508
3509
/// Check if DstReg is a terminal node.
3510
/// I.e., it does not have any affinity other than \p Copy.
3511
static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
3512
21
                          const MachineRegisterInfo *MRI) {
3513
21
  assert(Copy.isCopyLike());
3514
21
  // Check if the destination of this copy as any other affinity.
3515
21
  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3516
48
    if (&MI != &Copy && 
MI.isCopyLike()33
)
3517
19
      return false;
3518
21
  
return true2
;
3519
21
}
3520
3521
9.59M
bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3522
9.59M
  assert(Copy.isCopyLike());
3523
9.59M
  if (!UseTerminalRule)
3524
9.59M
    return false;
3525
33
  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
3526
33
  if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
3527
0
    return false;
3528
33
  // Check if the destination of this copy has any other affinity.
3529
33
  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
3530
33
      // If SrcReg is a physical register, the copy won't be coalesced.
3531
33
      // Ignoring it may have other side effect (like missing
3532
33
      // rematerialization). So keep it.
3533
33
      
TargetRegisterInfo::isPhysicalRegister(SrcReg)25
||
3534
33
      
!isTerminalReg(DstReg, Copy, MRI)20
)
3535
31
    return false;
3536
2
3537
2
  // DstReg is a terminal node. Check if it interferes with any other
3538
2
  // copy involving SrcReg.
3539
2
  const MachineBasicBlock *OrigBB = Copy.getParent();
3540
2
  const LiveInterval &DstLI = LIS->getInterval(DstReg);
3541
6
  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3542
6
    // Technically we should check if the weight of the new copy is
3543
6
    // interesting compared to the other one and update the weight
3544
6
    // of the copies accordingly. However, this would only work if
3545
6
    // we would gather all the copies first then coalesce, whereas
3546
6
    // right now we interleave both actions.
3547
6
    // For now, just consider the copies that are in the same block.
3548
6
    if (&MI == &Copy || 
!MI.isCopyLike()4
||
MI.getParent() != OrigBB1
)
3549
5
      continue;
3550
1
    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
3551
1
    if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3552
1
                OtherSubReg))
3553
0
      return false;
3554
1
    if (OtherReg == SrcReg)
3555
0
      OtherReg = OtherSrcReg;
3556
1
    // Check if OtherReg is a non-terminal.
3557
1
    if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
3558
1
        isTerminalReg(OtherReg, MI, MRI))
3559
0
      continue;
3560
1
    // Check that OtherReg interfere with DstReg.
3561
1
    if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
3562
1
      LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
3563
1
                        << '\n');
3564
1
      return true;
3565
1
    }
3566
1
  }
3567
2
  
return false1
;
3568
2
}
3569
3570
void
3571
2.83M
RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
3572
2.83M
  LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
3573
2.83M
3574
2.83M
  // Collect all copy-like instructions in MBB. Don't start coalescing anything
3575
2.83M
  // yet, it might invalidate the iterator.
3576
2.83M
  const unsigned PrevSize = WorkList.size();
3577
2.83M
  if (JoinGlobalCopies) {
3578
2.71M
    SmallVector<MachineInstr*, 2> LocalTerminals;
3579
2.71M
    SmallVector<MachineInstr*, 2> GlobalTerminals;
3580
2.71M
    // Coalesce copies bottom-up to coalesce local defs before local uses. They
3581
2.71M
    // are not inherently easier to resolve, but slightly preferable until we
3582
2.71M
    // have local live range splitting. In particular this is required by
3583
2.71M
    // cmp+jmp macro fusion.
3584
2.71M
    for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
3585
27.6M
         MII != E; 
++MII24.9M
) {
3586
24.9M
      if (!MII->isCopyLike())
3587
15.7M
        continue;
3588
9.24M
      bool ApplyTerminalRule = applyTerminalRule(*MII);
3589
9.24M
      if (isLocalCopy(&(*MII), LIS)) {
3590
2.81M
        if (ApplyTerminalRule)
3591
1
          LocalTerminals.push_back(&(*MII));
3592
2.81M
        else
3593
2.81M
          LocalWorkList.push_back(&(*MII));
3594
6.42M
      } else {
3595
6.42M
        if (ApplyTerminalRule)
3596
0
          GlobalTerminals.push_back(&(*MII));
3597
6.42M
        else
3598
6.42M
          WorkList.push_back(&(*MII));
3599
6.42M
      }
3600
9.24M
    }
3601
2.71M
    // Append the copies evicted by the terminal rule at the end of the list.
3602
2.71M
    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
3603
2.71M
    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
3604
2.71M
  }
3605
114k
  else {
3606
114k
    SmallVector<MachineInstr*, 2> Terminals;
3607
114k
    for (MachineInstr &MII : *MBB)
3608
1.03M
      if (MII.isCopyLike()) {
3609
347k
        if (applyTerminalRule(MII))
3610
0
          Terminals.push_back(&MII);
3611
347k
        else
3612
347k
          WorkList.push_back(&MII);
3613
347k
      }
3614
114k
    // Append the copies evicted by the terminal rule at the end of the list.
3615
114k
    WorkList.append(Terminals.begin(), Terminals.end());
3616
114k
  }
3617
2.83M
  // Try coalescing the collected copies immediately, and remove the nulls.
3618
2.83M
  // This prevents the WorkList from getting too large since most copies are
3619
2.83M
  // joinable on the first attempt.
3620
2.83M
  MutableArrayRef<MachineInstr*>
3621
2.83M
    CurrList(WorkList.begin() + PrevSize, WorkList.end());
3622
2.83M
  if (copyCoalesceWorkList(CurrList))
3623
1.01M
    WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
3624
1.01M
                               nullptr), WorkList.end());
3625
2.83M
}
3626
3627
1.03M
void RegisterCoalescer::coalesceLocals() {
3628
1.03M
  copyCoalesceWorkList(LocalWorkList);
3629
3.84M
  for (unsigned j = 0, je = LocalWorkList.size(); j != je; 
++j2.81M
) {
3630
2.81M
    if (LocalWorkList[j])
3631
109k
      WorkList.push_back(LocalWorkList[j]);
3632
2.81M
  }
3633
1.03M
  LocalWorkList.clear();
3634
1.03M
}
3635
3636
489k
void RegisterCoalescer::joinAllIntervals() {
3637
489k
  LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
3638
489k
  assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
3639
489k
3640
489k
  std::vector<MBBPriorityInfo> MBBs;
3641
489k
  MBBs.reserve(MF->size());
3642
3.32M
  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; 
++I2.83M
) {
3643
2.83M
    MachineBasicBlock *MBB = &*I;
3644
2.83M
    MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
3645
2.83M
                                   JoinSplitEdges && 
isSplitEdge(MBB)0
));
3646
2.83M
  }
3647
489k
  array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
3648
489k
3649
489k
  // Coalesce intervals in MBB priority order.
3650
489k
  unsigned CurrDepth = std::numeric_limits<unsigned>::max();
3651
3.32M
  for (unsigned i = 0, e = MBBs.size(); i != e; 
++i2.83M
) {
3652
2.83M
    // Try coalescing the collected local copies for deeper loops.
3653
2.83M
    if (JoinGlobalCopies && 
MBBs[i].Depth < CurrDepth2.71M
) {
3654
542k
      coalesceLocals();
3655
542k
      CurrDepth = MBBs[i].Depth;
3656
542k
    }
3657
2.83M
    copyCoalesceInMBB(MBBs[i].MBB);
3658
2.83M
  }
3659
489k
  lateLiveIntervalUpdate();
3660
489k
  coalesceLocals();
3661
489k
3662
489k
  // Joining intervals can allow other intervals to be joined.  Iteratively join
3663
489k
  // until we make no progress.
3664
521k
  while (copyCoalesceWorkList(WorkList))
3665
31.6k
    /* empty */ ;
3666
489k
  lateLiveIntervalUpdate();
3667
489k
}
3668
3669
489k
void RegisterCoalescer::releaseMemory() {
3670
489k
  ErasedInstrs.clear();
3671
489k
  WorkList.clear();
3672
489k
  DeadDefs.clear();
3673
489k
  InflateRegs.clear();
3674
489k
  LargeLIVisitCounter.clear();
3675
489k
}
3676
3677
489k
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
3678
489k
  MF = &fn;
3679
489k
  MRI = &fn.getRegInfo();
3680
489k
  const TargetSubtargetInfo &STI = fn.getSubtarget();
3681
489k
  TRI = STI.getRegisterInfo();
3682
489k
  TII = STI.getInstrInfo();
3683
489k
  LIS = &getAnalysis<LiveIntervals>();
3684
489k
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3685
489k
  Loops = &getAnalysis<MachineLoopInfo>();
3686
489k
  if (EnableGlobalCopies == cl::BOU_UNSET)
3687
489k
    JoinGlobalCopies = STI.enableJoinGlobalCopies();
3688
18.4E
  else
3689
18.4E
    JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
3690
489k
3691
489k
  // The MachineScheduler does not currently require JoinSplitEdges. This will
3692
489k
  // either be enabled unconditionally or replaced by a more general live range
3693
489k
  // splitting optimization.
3694
489k
  JoinSplitEdges = EnableJoinSplits;
3695
489k
3696
489k
  LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
3697
489k
                    << "********** Function: " << MF->getName() << '\n');
3698
489k
3699
489k
  if (VerifyCoalescing)
3700
55
    MF->verify(this, "Before register coalescing");
3701
489k
3702
489k
  RegClassInfo.runOnMachineFunction(fn);
3703
489k
3704
489k
  // Join (coalesce) intervals if requested.
3705
489k
  if (EnableJoining)
3706
489k
    joinAllIntervals();
3707
489k
3708
489k
  // After deleting a lot of copies, register classes may be less constrained.
3709
489k
  // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
3710
489k
  // DPR inflation.
3711
489k
  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
3712
489k
  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
3713
489k
                    InflateRegs.end());
3714
489k
  LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
3715
489k
                    << " regs.\n");
3716
501k
  for (unsigned i = 0, e = InflateRegs.size(); i != e; 
++i11.0k
) {
3717
11.0k
    unsigned Reg = InflateRegs[i];
3718
11.0k
    if (MRI->reg_nodbg_empty(Reg))
3719
1.56k
      continue;
3720
9.51k
    if (MRI->recomputeRegClass(Reg)) {
3721
1.97k
      LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
3722
1.97k
                        << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
3723
1.97k
      ++NumInflated;
3724
1.97k
3725
1.97k
      LiveInterval &LI = LIS->getInterval(Reg);
3726
1.97k
      if (LI.hasSubRanges()) {
3727
0
        // If the inflated register class does not support subregisters anymore
3728
0
        // remove the subranges.
3729
0
        if (!MRI->shouldTrackSubRegLiveness(Reg)) {
3730
0
          LI.clearSubRanges();
3731
0
        } else {
3732
#ifndef NDEBUG
3733
          LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
3734
          // If subranges are still supported, then the same subregs
3735
          // should still be supported.
3736
          for (LiveInterval::SubRange &S : LI.subranges()) {
3737
            assert((S.LaneMask & ~MaxMask).none());
3738
          }
3739
#endif
3740
        }
3741
0
      }
3742
1.97k
    }
3743
9.51k
  }
3744
489k
3745
489k
  LLVM_DEBUG(dump());
3746
489k
  if (VerifyCoalescing)
3747
55
    MF->verify(this, "After register coalescing");
3748
489k
  return true;
3749
489k
}
3750
3751
0
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
3752
0
   LIS->print(O, m);
3753
0
}