Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonExpandCondsets.cpp ------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
// Replace mux instructions with the corresponding legal instructions.
10
// It is meant to work post-SSA, but still on virtual registers. It was
11
// originally placed between register coalescing and machine instruction
12
// scheduler.
13
// In this place in the optimization sequence, live interval analysis had
14
// been performed, and the live intervals should be preserved. A large part
15
// of the code deals with preserving the liveness information.
16
//
17
// Liveness tracking aside, the main functionality of this pass is divided
18
// into two steps. The first step is to replace an instruction
19
//   %0 = C2_mux %1, %2, %3
20
// with a pair of conditional transfers
21
//   %0 = A2_tfrt %1, %2
22
//   %0 = A2_tfrf %1, %3
23
// It is the intention that the execution of this pass could be terminated
24
// after this step, and the code generated would be functionally correct.
25
//
26
// If the uses of the source values %1 and %2 are kills, and their
27
// definitions are predicable, then in the second step, the conditional
28
// transfers will then be rewritten as predicated instructions. E.g.
29
//   %0 = A2_or %1, %2
30
//   %3 = A2_tfrt %99, killed %0
31
// will be rewritten as
32
//   %3 = A2_port %99, %1, %2
33
//
34
// This replacement has two variants: "up" and "down". Consider this case:
35
//   %0 = A2_or %1, %2
36
//   ... [intervening instructions] ...
37
//   %3 = A2_tfrt %99, killed %0
38
// variant "up":
39
//   %3 = A2_port %99, %1, %2
40
//   ... [intervening instructions, %0->vreg3] ...
41
//   [deleted]
42
// variant "down":
43
//   [deleted]
44
//   ... [intervening instructions] ...
45
//   %3 = A2_port %99, %1, %2
46
//
47
// Both, one or none of these variants may be valid, and checks are made
48
// to rule out inapplicable variants.
49
//
50
// As an additional optimization, before either of the two steps above is
51
// executed, the pass attempts to coalesce the target register with one of
52
// the source registers, e.g. given an instruction
53
//   %3 = C2_mux %0, %1, %2
54
// %3 will be coalesced with either %1 or %2. If this succeeds,
55
// the instruction would then be (for example)
56
//   %3 = C2_mux %0, %3, %2
57
// and, under certain circumstances, this could result in only one predicated
58
// instruction:
59
//   %3 = A2_tfrf %0, %2
60
//
61
62
// Splitting a definition of a register into two predicated transfers
63
// creates a complication in liveness tracking. Live interval computation
64
// will see both instructions as actual definitions, and will mark the
65
// first one as dead. The definition is not actually dead, and this
66
// situation will need to be fixed. For example:
67
//   dead %1 = A2_tfrt ...  ; marked as dead
68
//   %1 = A2_tfrf ...
69
//
70
// Since any of the individual predicated transfers may end up getting
71
// removed (in case it is an identity copy), some pre-existing def may
72
// be marked as dead after live interval recomputation:
73
//   dead %1 = ...          ; marked as dead
74
//   ...
75
//   %1 = A2_tfrf ...       ; if A2_tfrt is removed
76
// This case happens if %1 was used as a source in A2_tfrt, which means
77
// that is it actually live at the A2_tfrf, and so the now dead definition
78
// of %1 will need to be updated to non-dead at some point.
79
//
80
// This issue could be remedied by adding implicit uses to the predicated
81
// transfers, but this will create a problem with subsequent predication,
82
// since the transfers will no longer be possible to reorder. To avoid
83
// that, the initial splitting will not add any implicit uses. These
84
// implicit uses will be added later, after predication. The extra price,
85
// however, is that finding the locations where the implicit uses need
86
// to be added, and updating the live ranges will be more involved.
87
88
#include "HexagonInstrInfo.h"
89
#include "HexagonRegisterInfo.h"
90
#include "llvm/ADT/DenseMap.h"
91
#include "llvm/ADT/SetVector.h"
92
#include "llvm/ADT/SmallVector.h"
93
#include "llvm/ADT/StringRef.h"
94
#include "llvm/CodeGen/LiveInterval.h"
95
#include "llvm/CodeGen/LiveIntervals.h"
96
#include "llvm/CodeGen/MachineBasicBlock.h"
97
#include "llvm/CodeGen/MachineDominators.h"
98
#include "llvm/CodeGen/MachineFunction.h"
99
#include "llvm/CodeGen/MachineFunctionPass.h"
100
#include "llvm/CodeGen/MachineInstr.h"
101
#include "llvm/CodeGen/MachineInstrBuilder.h"
102
#include "llvm/CodeGen/MachineOperand.h"
103
#include "llvm/CodeGen/MachineRegisterInfo.h"
104
#include "llvm/CodeGen/SlotIndexes.h"
105
#include "llvm/CodeGen/TargetRegisterInfo.h"
106
#include "llvm/CodeGen/TargetSubtargetInfo.h"
107
#include "llvm/IR/DebugLoc.h"
108
#include "llvm/IR/Function.h"
109
#include "llvm/MC/LaneBitmask.h"
110
#include "llvm/Pass.h"
111
#include "llvm/Support/CommandLine.h"
112
#include "llvm/Support/Debug.h"
113
#include "llvm/Support/ErrorHandling.h"
114
#include "llvm/Support/raw_ostream.h"
115
#include <cassert>
116
#include <iterator>
117
#include <set>
118
#include <utility>
119
120
#define DEBUG_TYPE "expand-condsets"
121
122
using namespace llvm;
123
124
static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
125
  cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
126
static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
127
  cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
128
129
namespace llvm {
130
131
  void initializeHexagonExpandCondsetsPass(PassRegistry&);
132
  FunctionPass *createHexagonExpandCondsets();
133
134
} // end namespace llvm
135
136
namespace {
137
138
  class HexagonExpandCondsets : public MachineFunctionPass {
139
  public:
140
    static char ID;
141
142
855
    HexagonExpandCondsets() : MachineFunctionPass(ID) {
143
855
      if (OptCoaLimit.getPosition())
144
1
        CoaLimitActive = true, CoaLimit = OptCoaLimit;
145
855
      if (OptTfrLimit.getPosition())
146
0
        TfrLimitActive = true, TfrLimit = OptTfrLimit;
147
855
      initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
148
855
    }
149
150
4.21k
    StringRef getPassName() const override { return "Hexagon Expand Condsets"; }
151
152
855
    void getAnalysisUsage(AnalysisUsage &AU) const override {
153
855
      AU.addRequired<LiveIntervals>();
154
855
      AU.addPreserved<LiveIntervals>();
155
855
      AU.addPreserved<SlotIndexes>();
156
855
      AU.addRequired<MachineDominatorTree>();
157
855
      AU.addPreserved<MachineDominatorTree>();
158
855
      MachineFunctionPass::getAnalysisUsage(AU);
159
855
    }
160
161
    bool runOnMachineFunction(MachineFunction &MF) override;
162
163
  private:
164
    const HexagonInstrInfo *HII = nullptr;
165
    const TargetRegisterInfo *TRI = nullptr;
166
    MachineDominatorTree *MDT;
167
    MachineRegisterInfo *MRI = nullptr;
168
    LiveIntervals *LIS = nullptr;
169
    bool CoaLimitActive = false;
170
    bool TfrLimitActive = false;
171
    unsigned CoaLimit;
172
    unsigned TfrLimit;
173
    unsigned CoaCounter = 0;
174
    unsigned TfrCounter = 0;
175
176
    struct RegisterRef {
177
      RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
178
9.29k
          Sub(Op.getSubReg()) {}
179
389
      RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
180
181
1.25k
      bool operator== (RegisterRef RR) const {
182
1.25k
        return Reg == RR.Reg && 
Sub == RR.Sub387
;
183
1.25k
      }
184
428
      bool operator!= (RegisterRef RR) const { return !operator==(RR); }
185
2.12k
      bool operator< (RegisterRef RR) const {
186
2.12k
        return Reg < RR.Reg || 
(1.61k
Reg == RR.Reg1.61k
&&
Sub < RR.Sub1.57k
);
187
2.12k
      }
188
189
      unsigned Reg, Sub;
190
    };
191
192
    using ReferenceMap = DenseMap<unsigned, unsigned>;
193
    enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
194
    enum { Exec_Then = 0x10, Exec_Else = 0x20 };
195
196
    unsigned getMaskForSub(unsigned Sub);
197
    bool isCondset(const MachineInstr &MI);
198
    LaneBitmask getLaneMask(unsigned Reg, unsigned Sub);
199
200
    void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
201
    bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
202
203
    void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range);
204
    void updateKillFlags(unsigned Reg);
205
    void updateDeadFlags(unsigned Reg);
206
    void recalculateLiveInterval(unsigned Reg);
207
    void removeInstr(MachineInstr &MI);
208
    void updateLiveness(std::set<unsigned> &RegSet, bool Recalc,
209
        bool UpdateKills, bool UpdateDeads);
210
211
    unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
212
    MachineInstr *genCondTfrFor(MachineOperand &SrcOp,
213
        MachineBasicBlock::iterator At, unsigned DstR,
214
        unsigned DstSR, const MachineOperand &PredOp, bool PredSense,
215
        bool ReadUndef, bool ImpUse);
216
    bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs);
217
218
    bool isPredicable(MachineInstr *MI);
219
    MachineInstr *getReachingDefForPred(RegisterRef RD,
220
        MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
221
    bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses);
222
    bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown);
223
    void predicateAt(const MachineOperand &DefOp, MachineInstr &MI,
224
                     MachineBasicBlock::iterator Where,
225
                     const MachineOperand &PredOp, bool Cond,
226
                     std::set<unsigned> &UpdRegs);
227
    void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
228
        bool Cond, MachineBasicBlock::iterator First,
229
        MachineBasicBlock::iterator Last);
230
    bool predicate(MachineInstr &TfrI, bool Cond, std::set<unsigned> &UpdRegs);
231
    bool predicateInBlock(MachineBasicBlock &B,
232
        std::set<unsigned> &UpdRegs);
233
234
    bool isIntReg(RegisterRef RR, unsigned &BW);
235
    bool isIntraBlocks(LiveInterval &LI);
236
    bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
237
    bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets,
238
                          std::set<unsigned> &UpdRegs);
239
  };
240
241
} // end anonymous namespace
242
243
char HexagonExpandCondsets::ID = 0;
244
245
namespace llvm {
246
247
  char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID;
248
249
} // end namespace llvm
250
251
1.02k
INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets",
252
1.02k
  "Hexagon Expand Condsets", false, false)
253
1.02k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
254
1.02k
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
255
1.02k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
256
1.02k
INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets",
257
  "Hexagon Expand Condsets", false, false)
258
259
285
unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
260
285
  switch (Sub) {
261
285
    case Hexagon::isub_lo:
262
5
    case Hexagon::vsub_lo:
263
5
      return Sub_Low;
264
5
    case Hexagon::isub_hi:
265
3
    case Hexagon::vsub_hi:
266
3
      return Sub_High;
267
277
    case Hexagon::NoSubRegister:
268
277
      return Sub_None;
269
0
  }
270
0
  llvm_unreachable("Invalid subregister");
271
0
}
272
273
36.6k
bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) {
274
36.6k
  unsigned Opc = MI.getOpcode();
275
36.6k
  switch (Opc) {
276
36.6k
    case Hexagon::C2_mux:
277
345
    case Hexagon::C2_muxii:
278
345
    case Hexagon::C2_muxir:
279
345
    case Hexagon::C2_muxri:
280
345
    case Hexagon::PS_pselect:
281
345
        return true;
282
345
      
break0
;
283
36.2k
  }
284
36.2k
  return false;
285
36.2k
}
286
287
2.63k
LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) {
288
2.63k
  assert(TargetRegisterInfo::isVirtualRegister(Reg));
289
2.63k
  return Sub != 0 ? 
TRI->getSubRegIndexLaneMask(Sub)28
290
2.63k
                  : 
MRI->getMaxLaneMaskForVReg(Reg)2.60k
;
291
2.63k
}
292
293
void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
294
247
      unsigned Exec) {
295
247
  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
296
247
  ReferenceMap::iterator F = Map.find(RR.Reg);
297
247
  if (F == Map.end())
298
224
    Map.insert(std::make_pair(RR.Reg, Mask));
299
23
  else
300
23
    F->second |= Mask;
301
247
}
302
303
bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
304
694
      unsigned Exec) {
305
694
  ReferenceMap::iterator F = Map.find(RR.Reg);
306
694
  if (F == Map.end())
307
656
    return false;
308
38
  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
309
38
  if (Mask & F->second)
310
38
    return true;
311
0
  return false;
312
0
}
313
314
1.38k
void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
315
1.57k
  auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void {
316
1.57k
    // Set the <kill> flag on a use of Reg whose lane mask is contained in LM.
317
1.57k
    MachineInstr *MI = LIS->getInstructionFromIndex(K);
318
4.31k
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; 
++i2.74k
) {
319
4.29k
      MachineOperand &Op = MI->getOperand(i);
320
4.29k
      if (!Op.isReg() || 
!Op.isUse()4.08k
||
Op.getReg() != Reg2.54k
||
321
4.29k
          
MI->isRegTiedToDefOperand(i)1.57k
)
322
2.74k
        continue;
323
1.55k
      LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg());
324
1.55k
      if ((SLM & LM) == SLM) {
325
1.55k
        // Only set the kill flag on the first encountered use of Reg in this
326
1.55k
        // instruction.
327
1.55k
        Op.setIsKill(true);
328
1.55k
        break;
329
1.55k
      }
330
1.55k
    }
331
1.57k
  };
332
1.38k
333
1.38k
  LiveInterval &LI = LIS->getInterval(Reg);
334
3.65k
  for (auto I = LI.begin(), E = LI.end(); I != E; 
++I2.27k
) {
335
2.27k
    if (!I->end.isRegister())
336
315
      continue;
337
1.96k
    // Do not mark the end of the segment as <kill>, if the next segment
338
1.96k
    // starts with a predicated instruction.
339
1.96k
    auto NextI = std::next(I);
340
1.96k
    if (NextI != E && 
NextI->start.isRegister()756
) {
341
743
      MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start);
342
743
      if (HII->isPredicated(*DefI))
343
359
        continue;
344
1.60k
    }
345
1.60k
    bool WholeReg = true;
346
1.60k
    if (LI.hasSubRanges()) {
347
54
      auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool {
348
54
        LiveRange::iterator F = S.find(I->end);
349
54
        return F != S.end() && 
I->end == F->end36
;
350
54
      };
351
27
      // Check if all subranges end at I->end. If so, make sure to kill
352
27
      // the whole register.
353
54
      for (LiveInterval::SubRange &S : LI.subranges()) {
354
54
        if (EndsAtI(S))
355
0
          KillAt(I->end, S.LaneMask);
356
54
        else
357
54
          WholeReg = false;
358
54
      }
359
27
    }
360
1.60k
    if (WholeReg)
361
1.57k
      KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg));
362
1.60k
  }
363
1.38k
}
364
365
void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
366
786
      LiveRange &Range) {
367
786
  assert(TargetRegisterInfo::isVirtualRegister(Reg));
368
786
  if (Range.empty())
369
127
    return;
370
659
371
659
  // Return two booleans: { def-modifes-reg, def-covers-reg }.
372
3.26k
  
auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> 659
{
373
3.26k
    if (!Op.isReg() || 
!Op.isDef()2.65k
)
374
2.16k
      return { false, false };
375
1.09k
    unsigned DR = Op.getReg(), DSR = Op.getSubReg();
376
1.09k
    if (!TargetRegisterInfo::isVirtualRegister(DR) || 
DR != Reg1.08k
)
377
20
      return { false, false };
378
1.07k
    LaneBitmask SLM = getLaneMask(DR, DSR);
379
1.07k
    LaneBitmask A = SLM & LM;
380
1.07k
    return { A.any(), A == SLM };
381
1.07k
  };
382
659
383
659
  // The splitting step will create pairs of predicated definitions without
384
659
  // any implicit uses (since implicit uses would interfere with predication).
385
659
  // This can cause the reaching defs to become dead after live range
386
659
  // recomputation, even though they are not really dead.
387
659
  // We need to identify predicated defs that need implicit uses, and
388
659
  // dead defs that are not really dead, and correct both problems.
389
659
390
659
  auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs,
391
659
                          MachineBasicBlock *Dest) -> bool {
392
57
    for (MachineBasicBlock *D : Defs)
393
74
      if (D != Dest && 
MDT->dominates(D, Dest)30
)
394
13
        return true;
395
57
396
57
    MachineBasicBlock *Entry = &Dest->getParent()->front();
397
44
    SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end());
398
307
    for (unsigned i = 0; i < Work.size(); 
++i263
) {
399
306
      MachineBasicBlock *B = Work[i];
400
306
      if (Defs.count(B))
401
7
        continue;
402
299
      if (B == Entry)
403
43
        return false;
404
256
      for (auto *P : B->predecessors())
405
373
        Work.insert(P);
406
256
    }
407
44
    
return true1
;
408
44
  };
409
659
410
659
  // First, try to extend live range within individual basic blocks. This
411
659
  // will leave us only with dead defs that do not reach any predicated
412
659
  // defs in the same block.
413
659
  SetVector<MachineBasicBlock*> Defs;
414
659
  SmallVector<SlotIndex,4> PredDefs;
415
1.17k
  for (auto &Seg : Range) {
416
1.17k
    if (!Seg.start.isRegister())
417
94
      continue;
418
1.07k
    MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
419
1.07k
    Defs.insert(DefI->getParent());
420
1.07k
    if (HII->isPredicated(*DefI))
421
538
      PredDefs.push_back(Seg.start);
422
1.07k
  }
423
659
424
659
  SmallVector<SlotIndex,8> Undefs;
425
659
  LiveInterval &LI = LIS->getInterval(Reg);
426
659
  LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes());
427
659
428
659
  for (auto &SI : PredDefs) {
429
538
    MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
430
538
    auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI);
431
538
    if (P.first != nullptr || 
P.second197
)
432
342
      SI = SlotIndex();
433
538
  }
434
659
435
659
  // Calculate reachability for those predicated defs that were not handled
436
659
  // by the in-block extension.
437
659
  SmallVector<SlotIndex,4> ExtTo;
438
659
  for (auto &SI : PredDefs) {
439
538
    if (!SI.isValid())
440
342
      continue;
441
196
    MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
442
196
    if (BB->pred_empty())
443
139
      continue;
444
57
    // If the defs from this range reach SI via all predecessors, it is live.
445
57
    // It can happen that SI is reached by the defs through some paths, but
446
57
    // not all. In the IR coming into this optimization, SI would not be
447
57
    // considered live, since the defs would then not jointly dominate SI.
448
57
    // That means that SI is an overwriting def, and no implicit use is
449
57
    // needed at this point. Do not add SI to the extension points, since
450
57
    // extendToIndices will abort if there is no joint dominance.
451
57
    // If the abort was avoided by adding extra undefs added to Undefs,
452
57
    // extendToIndices could actually indicate that SI is live, contrary
453
57
    // to the original IR.
454
57
    if (Dominate(Defs, BB))
455
14
      ExtTo.push_back(SI);
456
57
  }
457
659
458
659
  if (!ExtTo.empty())
459
9
    LIS->extendToIndices(Range, ExtTo, Undefs);
460
659
461
659
  // Remove <dead> flags from all defs that are not dead after live range
462
659
  // extension, and collect all def operands. They will be used to generate
463
659
  // the necessary implicit uses.
464
659
  // At the same time, add <dead> flag to all defs that are actually dead.
465
659
  // This can happen, for example, when a mux with identical inputs is
466
659
  // replaced with a COPY: the use of the predicate register disappears and
467
659
  // the dead can become dead.
468
659
  std::set<RegisterRef> DefRegs;
469
1.18k
  for (auto &Seg : Range) {
470
1.18k
    if (!Seg.start.isRegister())
471
106
      continue;
472
1.07k
    MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
473
3.26k
    for (auto &Op : DefI->operands()) {
474
3.26k
      auto P = IsRegDef(Op);
475
3.26k
      if (P.second && 
Seg.end.isDead()1.06k
) {
476
5
        Op.setIsDead(true);
477
3.26k
      } else if (P.first) {
478
1.07k
        DefRegs.insert(Op);
479
1.07k
        Op.setIsDead(false);
480
1.07k
      }
481
3.26k
    }
482
1.07k
  }
483
659
484
659
  // Now, add implicit uses to each predicated def that is reached
485
659
  // by other defs.
486
1.18k
  for (auto &Seg : Range) {
487
1.18k
    if (!Seg.start.isRegister() || 
!Range.liveAt(Seg.start.getPrevSlot())1.07k
)
488
805
      continue;
489
380
    MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
490
380
    if (!HII->isPredicated(*DefI))
491
25
      continue;
492
355
    // Construct the set of all necessary implicit uses, based on the def
493
355
    // operands in the instruction. We need to tie the implicit uses to
494
355
    // the corresponding defs.
495
355
    std::map<RegisterRef,unsigned> ImpUses;
496
1.45k
    for (unsigned i = 0, e = DefI->getNumOperands(); i != e; 
++i1.10k
) {
497
1.10k
      MachineOperand &Op = DefI->getOperand(i);
498
1.10k
      if (!Op.isReg() || 
!DefRegs.count(Op)876
)
499
730
        continue;
500
374
      if (Op.isDef()) {
501
354
        // Tied defs will always have corresponding uses, so no extra
502
354
        // implicit uses are needed.
503
354
        if (!Op.isTied())
504
352
          ImpUses.insert({Op, i});
505
354
      } else {
506
20
        // This function can be called for the same register with different
507
20
        // lane masks. If the def in this instruction was for the whole
508
20
        // register, we can get here more than once. Avoid adding multiple
509
20
        // implicit uses (or adding an implicit use when an explicit one is
510
20
        // present).
511
20
        if (Op.isTied())
512
2
          ImpUses.erase(Op);
513
20
      }
514
374
    }
515
355
    if (ImpUses.empty())
516
3
      continue;
517
352
    MachineFunction &MF = *DefI->getParent()->getParent();
518
352
    for (std::pair<RegisterRef, unsigned> P : ImpUses) {
519
352
      RegisterRef R = P.first;
520
352
      MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
521
352
      DefI->tieOperands(P.second, DefI->getNumOperands()-1);
522
352
    }
523
352
  }
524
659
}
525
526
776
void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
527
776
  LiveInterval &LI = LIS->getInterval(Reg);
528
776
  if (LI.hasSubRanges()) {
529
20
    for (LiveInterval::SubRange &S : LI.subranges()) {
530
20
      updateDeadsInRange(Reg, S.LaneMask, S);
531
20
      LIS->shrinkToUses(S, Reg);
532
20
    }
533
10
    LI.clear();
534
10
    LIS->constructMainRangeFromSubranges(LI);
535
766
  } else {
536
766
    updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI);
537
766
  }
538
776
}
539
540
776
void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) {
541
776
  LIS->removeInterval(Reg);
542
776
  LIS->createAndComputeVirtRegInterval(Reg);
543
776
}
544
545
656
void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
546
656
  LIS->RemoveMachineInstrFromMaps(MI);
547
656
  MI.eraseFromParent();
548
656
}
549
550
void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
551
6.70k
      bool Recalc, bool UpdateKills, bool UpdateDeads) {
552
6.70k
  UpdateKills |= UpdateDeads;
553
6.70k
  for (unsigned R : RegSet) {
554
1.24k
    if (!TargetRegisterInfo::isVirtualRegister(R)) {
555
0
      assert(TargetRegisterInfo::isPhysicalRegister(R));
556
0
      // There shouldn't be any physical registers as operands, except
557
0
      // possibly reserved registers.
558
0
      assert(MRI->isReserved(R));
559
0
      continue;
560
0
    }
561
1.24k
    if (Recalc)
562
776
      recalculateLiveInterval(R);
563
1.24k
    if (UpdateKills)
564
1.24k
      MRI->clearKillFlags(R);
565
1.24k
    if (UpdateDeads)
566
776
      updateDeadFlags(R);
567
1.24k
    // Fixing <dead> flags may extend live ranges, so reset <kill> flags
568
1.24k
    // after that.
569
1.24k
    if (UpdateKills)
570
1.24k
      updateKillFlags(R);
571
1.24k
    LIS->getInterval(R).verify();
572
1.24k
  }
573
6.70k
}
574
575
/// Get the opcode for a conditional transfer of the value in SO (source
576
/// operand). The condition (true/false) is given in Cond.
577
unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
578
688
      bool IfTrue) {
579
688
  using namespace Hexagon;
580
688
581
688
  if (SO.isReg()) {
582
389
    unsigned PhysR;
583
389
    RegisterRef RS = SO;
584
389
    if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
585
389
      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
586
389
      assert(VC->begin() != VC->end() && "Empty register class");
587
389
      PhysR = *VC->begin();
588
389
    } else {
589
0
      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
590
0
      PhysR = RS.Reg;
591
0
    }
592
389
    unsigned PhysS = (RS.Sub == 0) ? 
PhysR384
:
TRI->getSubReg(PhysR, RS.Sub)5
;
593
389
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
594
389
    switch (TRI->getRegSizeInBits(*RC)) {
595
389
      case 32:
596
385
        return IfTrue ? 
A2_tfrt218
:
A2_tfrf167
;
597
389
      case 64:
598
4
        return IfTrue ? 
A2_tfrpt2
:
A2_tfrpf2
;
599
0
    }
600
0
    llvm_unreachable("Invalid register operand");
601
0
  }
602
299
  switch (SO.getType()) {
603
299
    case MachineOperand::MO_Immediate:
604
299
    case MachineOperand::MO_FPImmediate:
605
299
    case MachineOperand::MO_ConstantPoolIndex:
606
299
    case MachineOperand::MO_TargetIndex:
607
299
    case MachineOperand::MO_JumpTableIndex:
608
299
    case MachineOperand::MO_ExternalSymbol:
609
299
    case MachineOperand::MO_GlobalAddress:
610
299
    case MachineOperand::MO_BlockAddress:
611
299
      return IfTrue ? 
C2_cmoveit124
:
C2_cmoveif175
;
612
299
    default:
613
0
      break;
614
0
  }
615
0
  llvm_unreachable("Unexpected source operand");
616
0
}
617
618
/// Generate a conditional transfer, copying the value SrcOp to the
619
/// destination register DstR:DstSR, and using the predicate register from
620
/// PredOp. The Cond argument specifies whether the predicate is to be
621
/// if(PredOp), or if(!PredOp).
622
MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
623
      MachineBasicBlock::iterator At,
624
      unsigned DstR, unsigned DstSR, const MachineOperand &PredOp,
625
688
      bool PredSense, bool ReadUndef, bool ImpUse) {
626
688
  MachineInstr *MI = SrcOp.getParent();
627
688
  MachineBasicBlock &B = *At->getParent();
628
688
  const DebugLoc &DL = MI->getDebugLoc();
629
688
630
688
  // Don't avoid identity copies here (i.e. if the source and the destination
631
688
  // are the same registers). It is actually better to generate them here,
632
688
  // since this would cause the copy to potentially be predicated in the next
633
688
  // step. The predication will remove such a copy if it is unable to
634
688
  /// predicate.
635
688
636
688
  unsigned Opc = getCondTfrOpcode(SrcOp, PredSense);
637
688
  unsigned DstState = RegState::Define | (ReadUndef ? 
RegState::Undef16
:
0672
);
638
688
  unsigned PredState = getRegState(PredOp) & ~RegState::Kill;
639
688
  MachineInstrBuilder MIB;
640
688
641
688
  if (SrcOp.isReg()) {
642
389
    unsigned SrcState = getRegState(SrcOp);
643
389
    if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR))
644
154
      SrcState &= ~RegState::Kill;
645
389
    MIB = BuildMI(B, At, DL, HII->get(Opc))
646
389
          .addReg(DstR, DstState, DstSR)
647
389
          .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
648
389
          .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg());
649
389
  } else {
650
299
    MIB = BuildMI(B, At, DL, HII->get(Opc))
651
299
              .addReg(DstR, DstState, DstSR)
652
299
              .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
653
299
              .add(SrcOp);
654
299
  }
655
688
656
688
  LLVM_DEBUG(dbgs() << "created an initial copy: " << *MIB);
657
688
  return &*MIB;
658
688
}
659
660
/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
661
/// performs all necessary changes to complete the replacement.
662
bool HexagonExpandCondsets::split(MachineInstr &MI,
663
345
                                  std::set<unsigned> &UpdRegs) {
664
345
  if (TfrLimitActive) {
665
0
    if (TfrCounter >= TfrLimit)
666
0
      return false;
667
0
    TfrCounter++;
668
0
  }
669
345
  LLVM_DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent())
670
345
                    << ": " << MI);
671
345
  MachineOperand &MD = MI.getOperand(0);  // Definition
672
345
  MachineOperand &MP = MI.getOperand(1);  // Predicate register
673
345
  assert(MD.isDef());
674
345
  unsigned DR = MD.getReg(), DSR = MD.getSubReg();
675
345
  bool ReadUndef = MD.isUndef();
676
345
  MachineBasicBlock::iterator At = MI;
677
345
678
345
  auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void {
679
345
    for (auto &Op : MI.operands())
680
1.38k
      if (Op.isReg())
681
1.08k
        UpdRegs.insert(Op.getReg());
682
345
  };
683
345
684
345
  // If this is a mux of the same register, just replace it with COPY.
685
345
  // Ideally, this would happen earlier, so that register coalescing would
686
345
  // see it.
687
345
  MachineOperand &ST = MI.getOperand(2);
688
345
  MachineOperand &SF = MI.getOperand(3);
689
345
  if (ST.isReg() && 
SF.isReg()221
) {
690
129
    RegisterRef RT(ST);
691
129
    if (RT == RegisterRef(SF)) {
692
1
      // Copy regs to update first.
693
1
      updateRegs(MI);
694
1
      MI.setDesc(HII->get(TargetOpcode::COPY));
695
1
      unsigned S = getRegState(ST);
696
4
      while (MI.getNumOperands() > 1)
697
3
        MI.RemoveOperand(MI.getNumOperands()-1);
698
1
      MachineFunction &MF = *MI.getParent()->getParent();
699
1
      MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub);
700
1
      return true;
701
1
    }
702
344
  }
703
344
704
344
  // First, create the two invididual conditional transfers, and add each
705
344
  // of them to the live intervals information. Do that first and then remove
706
344
  // the old instruction from live intervals.
707
344
  MachineInstr *TfrT =
708
344
      genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false);
709
344
  MachineInstr *TfrF =
710
344
      genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true);
711
344
  LIS->InsertMachineInstrInMaps(*TfrT);
712
344
  LIS->InsertMachineInstrInMaps(*TfrF);
713
344
714
344
  // Will need to recalculate live intervals for all registers in MI.
715
344
  updateRegs(MI);
716
344
717
344
  removeInstr(MI);
718
344
  return true;
719
344
}
720
721
169
bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
722
169
  if (HII->isPredicated(*MI) || 
!HII->isPredicable(*MI)115
)
723
76
    return false;
724
93
  if (MI->hasUnmodeledSideEffects() || MI->mayStore())
725
0
    return false;
726
93
  // Reject instructions with multiple defs (e.g. post-increment loads).
727
93
  bool HasDef = false;
728
257
  for (auto &Op : MI->operands()) {
729
257
    if (!Op.isReg() || 
!Op.isDef()199
)
730
162
      continue;
731
95
    if (HasDef)
732
2
      return false;
733
93
    HasDef = true;
734
93
  }
735
93
  
for (auto &Mo : MI->memoperands())91
736
16
    if (Mo->isVolatile() || Mo->isAtomic())
737
0
      return false;
738
91
  return true;
739
91
}
740
741
/// Find the reaching definition for a predicated use of RD. The RD is used
742
/// under the conditions given by PredR and Cond, and this function will ignore
743
/// definitions that set RD under the opposite conditions.
744
MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
745
495
      MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
746
495
  MachineBasicBlock &B = *UseIt->getParent();
747
495
  MachineBasicBlock::iterator I = UseIt, S = B.begin();
748
495
  if (I == S)
749
6
    return nullptr;
750
489
751
489
  bool PredValid = true;
752
3.06k
  do {
753
3.06k
    --I;
754
3.06k
    MachineInstr *MI = &*I;
755
3.06k
    // Check if this instruction can be ignored, i.e. if it is predicated
756
3.06k
    // on the complementary condition.
757
3.06k
    if (PredValid && 
HII->isPredicated(*MI)770
) {
758
124
      if (MI->readsRegister(PredR) && 
(Cond != HII->isPredicatedTrue(*MI))90
)
759
45
        continue;
760
3.02k
    }
761
3.02k
762
3.02k
    // Check the defs. If the PredR is defined, invalidate it. If RD is
763
3.02k
    // defined, return the instruction or 0, depending on the circumstances.
764
9.21k
    
for (auto &Op : MI->operands())3.02k
{
765
9.21k
      if (!Op.isReg() || 
!Op.isDef()8.22k
)
766
6.15k
        continue;
767
3.06k
      RegisterRef RR = Op;
768
3.06k
      if (RR.Reg == PredR) {
769
164
        PredValid = false;
770
164
        continue;
771
164
      }
772
2.89k
      if (RR.Reg != RD.Reg)
773
2.45k
        continue;
774
447
      // If the "Reg" part agrees, there is still the subregister to check.
775
447
      // If we are looking for %1:loreg, we can skip %1:hireg, but
776
447
      // not %1 (w/o subregisters).
777
447
      if (RR.Sub == RD.Sub)
778
444
        return MI;
779
3
      if (RR.Sub == 0 || 
RD.Sub == 00
)
780
3
        return nullptr;
781
3
      // We have different subregisters, so we can continue looking.
782
3
    }
783
3.02k
  } while (
I != S2.61k
);
784
489
785
489
  
return nullptr42
;
786
489
}
787
788
/// Check if the instruction MI can be safely moved over a set of instructions
789
/// whose side-effects (in terms of register defs and uses) are expressed in
790
/// the maps Defs and Uses. These maps reflect the conditional defs and uses
791
/// that depend on the same predicate register to allow moving instructions
792
/// over instructions predicated on the opposite condition.
793
bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs,
794
164
                                        ReferenceMap &Uses) {
795
164
  // In order to be able to safely move MI over instructions that define
796
164
  // "Defs" and use "Uses", no def operand from MI can be defined or used
797
164
  // and no use operand can be defined.
798
425
  for (auto &Op : MI.operands()) {
799
425
    if (!Op.isReg())
800
48
      continue;
801
377
    RegisterRef RR = Op;
802
377
    // For physical register we would need to check register aliases, etc.
803
377
    // and we don't want to bother with that. It would be of little value
804
377
    // before the actual register rewriting (from virtual to physical).
805
377
    if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
806
0
      return false;
807
377
    // No redefs for any operand.
808
377
    if (isRefInMap(RR, Defs, Exec_Then))
809
34
      return false;
810
343
    // For defs, there cannot be uses.
811
343
    if (Op.isDef() && 
isRefInMap(RR, Uses, Exec_Then)145
)
812
0
      return false;
813
343
  }
814
164
  
return true130
;
815
164
}
816
817
/// Check if the instruction accessing memory (TheI) can be moved to the
818
/// location ToI.
819
bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI,
820
14
                                         bool IsDown) {
821
14
  bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore();
822
14
  if (!IsLoad && 
!IsStore0
)
823
0
    return true;
824
14
  if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
825
0
    return true;
826
14
  if (TheI.hasUnmodeledSideEffects())
827
0
    return false;
828
14
829
14
  MachineBasicBlock::iterator StartI = IsDown ? TheI : 
ToI0
;
830
14
  MachineBasicBlock::iterator EndI = IsDown ? ToI : 
TheI0
;
831
14
  bool Ordered = TheI.hasOrderedMemoryRef();
832
14
833
14
  // Search for aliased memory reference in (StartI, EndI).
834
39
  for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; 
++I25
) {
835
26
    MachineInstr *MI = &*I;
836
26
    if (MI->hasUnmodeledSideEffects())
837
0
      return false;
838
26
    bool L = MI->mayLoad(), S = MI->mayStore();
839
26
    if (!L && !S)
840
25
      continue;
841
1
    if (Ordered && 
MI->hasOrderedMemoryRef()0
)
842
0
      return false;
843
1
844
1
    bool Conflict = (L && 
IsStore0
) || S;
845
1
    if (Conflict)
846
1
      return false;
847
1
  }
848
14
  
return true13
;
849
14
}
850
851
/// Generate a predicated version of MI (where the condition is given via
852
/// PredR and Cond) at the point indicated by Where.
853
void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
854
                                        MachineInstr &MI,
855
                                        MachineBasicBlock::iterator Where,
856
                                        const MachineOperand &PredOp, bool Cond,
857
80
                                        std::set<unsigned> &UpdRegs) {
858
80
  // The problem with updating live intervals is that we can move one def
859
80
  // past another def. In particular, this can happen when moving an A2_tfrt
860
80
  // over an A2_tfrf defining the same register. From the point of view of
861
80
  // live intervals, these two instructions are two separate definitions,
862
80
  // and each one starts another live segment. LiveIntervals's "handleMove"
863
80
  // does not allow such moves, so we need to handle it ourselves. To avoid
864
80
  // invalidating liveness data while we are using it, the move will be
865
80
  // implemented in 4 steps: (1) add a clone of the instruction MI at the
866
80
  // target location, (2) update liveness, (3) delete the old instruction,
867
80
  // and (4) update liveness again.
868
80
869
80
  MachineBasicBlock &B = *MI.getParent();
870
80
  DebugLoc DL = Where->getDebugLoc();  // "Where" points to an instruction.
871
80
  unsigned Opc = MI.getOpcode();
872
80
  unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
873
80
  MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
874
80
  unsigned Ox = 0, NP = MI.getNumOperands();
875
80
  // Skip all defs from MI first.
876
160
  while (Ox < NP) {
877
160
    MachineOperand &MO = MI.getOperand(Ox);
878
160
    if (!MO.isReg() || 
!MO.isDef()140
)
879
80
      break;
880
80
    Ox++;
881
80
  }
882
80
  // Add the new def, then the predicate register, then the rest of the
883
80
  // operands.
884
80
  MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg());
885
80
  MB.addReg(PredOp.getReg(), PredOp.isUndef() ? 
RegState::Undef0
: 0,
886
80
            PredOp.getSubReg());
887
225
  while (Ox < NP) {
888
145
    MachineOperand &MO = MI.getOperand(Ox);
889
145
    if (!MO.isReg() || 
!MO.isImplicit()98
)
890
145
      MB.add(MO);
891
145
    Ox++;
892
145
  }
893
80
  MB.cloneMemRefs(MI);
894
80
895
80
  MachineInstr *NewI = MB;
896
80
  NewI->clearKillInfo();
897
80
  LIS->InsertMachineInstrInMaps(*NewI);
898
80
899
80
  for (auto &Op : NewI->operands())
900
305
    if (Op.isReg())
901
258
      UpdRegs.insert(Op.getReg());
902
80
}
903
904
/// In the range [First, Last], rename all references to the "old" register RO
905
/// to the "new" register RN, but only in instructions predicated on the given
906
/// condition.
907
void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
908
      unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
909
80
      MachineBasicBlock::iterator Last) {
910
80
  MachineBasicBlock::iterator End = std::next(Last);
911
254
  for (MachineBasicBlock::iterator I = First; I != End; 
++I174
) {
912
174
    MachineInstr *MI = &*I;
913
174
    // Do not touch instructions that are not predicated, or are predicated
914
174
    // on the opposite condition.
915
174
    if (!HII->isPredicated(*MI))
916
43
      continue;
917
131
    if (!MI->readsRegister(PredR) || 
(Cond != HII->isPredicatedTrue(*MI))130
)
918
19
      continue;
919
112
920
369
    
for (auto &Op : MI->operands())112
{
921
369
      if (!Op.isReg() || 
RO != RegisterRef(Op)348
)
922
289
        continue;
923
80
      Op.setReg(RN.Reg);
924
80
      Op.setSubReg(RN.Sub);
925
80
      // In practice, this isn't supposed to see any defs.
926
80
      assert(!Op.isDef() && "Not expecting a def");
927
80
    }
928
112
  }
929
80
}
930
931
/// For a given conditional copy, predicate the definition of the source of
932
/// the copy under the given condition (using the same predicate register as
933
/// the copy).
934
bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
935
385
                                      std::set<unsigned> &UpdRegs) {
936
385
  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
937
385
  unsigned Opc = TfrI.getOpcode();
938
385
  (void)Opc;
939
385
  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
940
385
  LLVM_DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
941
385
                    << ": " << TfrI);
942
385
943
385
  MachineOperand &MD = TfrI.getOperand(0);
944
385
  MachineOperand &MP = TfrI.getOperand(1);
945
385
  MachineOperand &MS = TfrI.getOperand(2);
946
385
  // The source operand should be a <kill>. This is not strictly necessary,
947
385
  // but it makes things a lot simpler. Otherwise, we would need to rename
948
385
  // some registers, which would complicate the transformation considerably.
949
385
  if (!MS.isKill())
950
212
    return false;
951
173
  // Avoid predicating instructions that define a subregister if subregister
952
173
  // liveness tracking is not enabled.
953
173
  if (MD.getSubReg() && 
!MRI->shouldTrackSubRegLiveness(MD.getReg())13
)
954
0
    return false;
955
173
956
173
  RegisterRef RT(MS);
957
173
  unsigned PredR = MP.getReg();
958
173
  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
959
173
  if (!DefI || 
!isPredicable(DefI)169
)
960
82
    return false;
961
91
962
91
  LLVM_DEBUG(dbgs() << "Source def: " << *DefI);
963
91
964
91
  // Collect the information about registers defined and used between the
965
91
  // DefI and the TfrI.
966
91
  // Map: reg -> bitmask of subregs
967
91
  ReferenceMap Uses, Defs;
968
91
  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
969
91
970
91
  // Check if the predicate register is valid between DefI and TfrI.
971
91
  // If it is, we can then ignore instructions predicated on the negated
972
91
  // conditions when collecting def and use information.
973
91
  bool PredValid = true;
974
330
  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; 
++I239
) {
975
261
    if (!I->modifiesRegister(PredR, nullptr))
976
239
      continue;
977
22
    PredValid = false;
978
22
    break;
979
22
  }
980
91
981
178
  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; 
++I87
) {
982
92
    MachineInstr *MI = &*I;
983
92
    // If this instruction is predicated on the same register, it could
984
92
    // potentially be ignored.
985
92
    // By default assume that the instruction executes on the same condition
986
92
    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
987
92
    unsigned Exec = Exec_Then | Exec_Else;
988
92
    if (PredValid && 
HII->isPredicated(*MI)37
&&
MI->readsRegister(PredR)22
)
989
22
      Exec = (Cond == HII->isPredicatedTrue(*MI)) ? 
Exec_Then1
:
Exec_Else21
;
990
92
991
289
    for (auto &Op : MI->operands()) {
992
289
      if (!Op.isReg())
993
37
        continue;
994
252
      // We don't want to deal with physical registers. The reason is that
995
252
      // they can be aliased with other physical registers. Aliased virtual
996
252
      // registers must share the same register number, and can only differ
997
252
      // in the subregisters, which we are keeping track of. Physical
998
252
      // registers ters no longer have subregisters---their super- and
999
252
      // subregisters are other physical registers, and we are not checking
1000
252
      // that.
1001
252
      RegisterRef RR = Op;
1002
252
      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
1003
5
        return false;
1004
247
1005
247
      ReferenceMap &Map = Op.isDef() ? 
Defs87
:
Uses160
;
1006
247
      if (Op.isDef() && 
Op.isUndef()87
) {
1007
6
        assert(RR.Sub && "Expecting a subregister on <def,read-undef>");
1008
6
        // If this is a <def,read-undef>, then it invalidates the non-written
1009
6
        // part of the register. For the purpose of checking the validity of
1010
6
        // the move, assume that it modifies the whole register.
1011
6
        RR.Sub = 0;
1012
6
      }
1013
247
      addRefToMap(RR, Map, Exec);
1014
247
    }
1015
92
  }
1016
91
1017
91
  // The situation:
1018
91
  //   RT = DefI
1019
91
  //   ...
1020
91
  //   RD = TfrI ..., RT
1021
91
1022
91
  // If the register-in-the-middle (RT) is used or redefined between
1023
91
  // DefI and TfrI, we may not be able proceed with this transformation.
1024
91
  // We can ignore a def that will not execute together with TfrI, and a
1025
91
  // use that will. If there is such a use (that does execute together with
1026
91
  // TfrI), we will not be able to move DefI down. If there is a use that
1027
91
  // executed if TfrI's condition is false, then RT must be available
1028
91
  // unconditionally (cannot be predicated).
1029
91
  // Essentially, we need to be able to rename RT to RD in this segment.
1030
91
  
if (86
isRefInMap(RT, Defs, Exec_Then)86
||
isRefInMap(RT, Uses, Exec_Else)86
)
1031
4
    return false;
1032
82
  RegisterRef RD = MD;
1033
82
  // If the predicate register is defined between DefI and TfrI, the only
1034
82
  // potential thing to do would be to move the DefI down to TfrI, and then
1035
82
  // predicate. The reaching def (DefI) must be movable down to the location
1036
82
  // of the TfrI.
1037
82
  // If the target register of the TfrI (RD) is not used or defined between
1038
82
  // DefI and TfrI, consider moving TfrI up to DefI.
1039
82
  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
1040
82
  bool CanDown = canMoveOver(*DefI, Defs, Uses);
1041
82
  // The TfrI does not access memory, but DefI could. Check if it's safe
1042
82
  // to move DefI down to TfrI.
1043
82
  if (DefI->mayLoad() || 
DefI->mayStore()68
)
1044
14
    if (!canMoveMemTo(*DefI, TfrI, true))
1045
1
      CanDown = false;
1046
82
1047
82
  LLVM_DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
1048
82
                    << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
1049
82
  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
1050
82
  if (CanUp)
1051
49
    predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs);
1052
33
  else if (CanDown)
1053
31
    predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs);
1054
2
  else
1055
2
    return false;
1056
80
1057
80
  if (RT != RD) {
1058
80
    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
1059
80
    UpdRegs.insert(RT.Reg);
1060
80
  }
1061
80
1062
80
  removeInstr(TfrI);
1063
80
  removeInstr(*DefI);
1064
80
  return true;
1065
80
}
1066
1067
/// Predicate all cases of conditional copies in the specified block.
1068
bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
1069
5.47k
      std::set<unsigned> &UpdRegs) {
1070
5.47k
  bool Changed = false;
1071
5.47k
  MachineBasicBlock::iterator I, E, NextI;
1072
42.4k
  for (I = B.begin(), E = B.end(); I != E; 
I = NextI36.9k
) {
1073
36.9k
    NextI = std::next(I);
1074
36.9k
    unsigned Opc = I->getOpcode();
1075
36.9k
    if (Opc == Hexagon::A2_tfrt || 
Opc == Hexagon::A2_tfrf36.7k
) {
1076
385
      bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs);
1077
385
      if (!Done) {
1078
305
        // If we didn't predicate I, we may need to remove it in case it is
1079
305
        // an "identity" copy, e.g.  %1 = A2_tfrt %2, %1.
1080
305
        if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) {
1081
152
          for (auto &Op : I->operands())
1082
456
            if (Op.isReg())
1083
456
              UpdRegs.insert(Op.getReg());
1084
152
          removeInstr(*I);
1085
152
        }
1086
305
      }
1087
385
      Changed |= Done;
1088
385
    }
1089
36.9k
  }
1090
5.47k
  return Changed;
1091
5.47k
}
1092
1093
402
bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
1094
402
  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
1095
0
    return false;
1096
402
  const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
1097
402
  if (RC == &Hexagon::IntRegsRegClass) {
1098
386
    BW = 32;
1099
386
    return true;
1100
386
  }
1101
16
  if (RC == &Hexagon::DoubleRegsRegClass) {
1102
16
    BW = (RR.Sub != 0) ? 
328
:
648
;
1103
16
    return true;
1104
16
  }
1105
0
  return false;
1106
0
}
1107
1108
159
bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
1109
314
  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; 
++I155
) {
1110
173
    LiveRange::Segment &LR = *I;
1111
173
    // Range must start at a register...
1112
173
    if (!LR.start.isRegister())
1113
1
      return false;
1114
172
    // ...and end in a register or in a dead slot.
1115
172
    if (!LR.end.isRegister() && 
!LR.end.isDead()18
)
1116
17
      return false;
1117
172
  }
1118
159
  
return true141
;
1119
159
}
1120
1121
201
bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
1122
201
  if (CoaLimitActive) {
1123
0
    if (CoaCounter >= CoaLimit)
1124
0
      return false;
1125
0
    CoaCounter++;
1126
0
  }
1127
201
  unsigned BW1, BW2;
1128
201
  if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
1129
0
    return false;
1130
201
  if (MRI->isLiveIn(R1.Reg))
1131
2
    return false;
1132
199
  if (MRI->isLiveIn(R2.Reg))
1133
13
    return false;
1134
186
1135
186
  LiveInterval &L1 = LIS->getInterval(R1.Reg);
1136
186
  LiveInterval &L2 = LIS->getInterval(R2.Reg);
1137
186
  if (L2.empty())
1138
2
    return false;
1139
184
  if (L1.hasSubRanges() || 
L2.hasSubRanges()183
)
1140
5
    return false;
1141
179
  bool Overlap = L1.overlaps(L2);
1142
179
1143
179
  LLVM_DEBUG(dbgs() << "compatible registers: ("
1144
179
                    << (Overlap ? "overlap" : "disjoint") << ")\n  "
1145
179
                    << printReg(R1.Reg, TRI, R1.Sub) << "  " << L1 << "\n  "
1146
179
                    << printReg(R2.Reg, TRI, R2.Sub) << "  " << L2 << "\n");
1147
179
  if (R1.Sub || R2.Sub)
1148
0
    return false;
1149
179
  if (Overlap)
1150
36
    return false;
1151
143
1152
143
  // Coalescing could have a negative impact on scheduling, so try to limit
1153
143
  // to some reasonable extent. Only consider coalescing segments, when one
1154
143
  // of them does not cross basic block boundaries.
1155
143
  if (!isIntraBlocks(L1) && 
!isIntraBlocks(L2)16
)
1156
2
    return false;
1157
141
1158
141
  MRI->replaceRegWith(R2.Reg, R1.Reg);
1159
141
1160
141
  // Move all live segments from L2 to L1.
1161
141
  using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>;
1162
141
  ValueInfoMap VM;
1163
478
  for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; 
++I337
) {
1164
337
    VNInfo *NewVN, *OldVN = I->valno;
1165
337
    ValueInfoMap::iterator F = VM.find(OldVN);
1166
337
    if (F == VM.end()) {
1167
336
      NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
1168
336
      VM.insert(std::make_pair(OldVN, NewVN));
1169
336
    } else {
1170
1
      NewVN = F->second;
1171
1
    }
1172
337
    L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
1173
337
  }
1174
478
  while (L2.begin() != L2.end())
1175
337
    L2.removeSegment(*L2.begin());
1176
141
  LIS->removeInterval(R2.Reg);
1177
141
1178
141
  updateKillFlags(R1.Reg);
1179
141
  LLVM_DEBUG(dbgs() << "coalesced: " << L1 << "\n");
1180
141
  L1.verify();
1181
141
1182
141
  return true;
1183
141
}
1184
1185
/// Attempt to coalesce one of the source registers to a MUX instruction with
1186
/// the destination register. This could lead to having only one predicated
1187
/// instruction in the end instead of two.
1188
bool HexagonExpandCondsets::coalesceSegments(
1189
      const SmallVectorImpl<MachineInstr*> &Condsets,
1190
3.35k
      std::set<unsigned> &UpdRegs) {
1191
3.35k
  SmallVector<MachineInstr*,16> TwoRegs;
1192
3.35k
  for (MachineInstr *MI : Condsets) {
1193
345
    MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
1194
345
    if (!S1.isReg() && 
!S2.isReg()124
)
1195
83
      continue;
1196
262
    TwoRegs.push_back(MI);
1197
262
  }
1198
3.35k
1199
3.35k
  bool Changed = false;
1200
3.35k
  for (MachineInstr *CI : TwoRegs) {
1201
262
    RegisterRef RD = CI->getOperand(0);
1202
262
    RegisterRef RP = CI->getOperand(1);
1203
262
    MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
1204
262
    bool Done = false;
1205
262
    // Consider this case:
1206
262
    //   %1 = instr1 ...
1207
262
    //   %2 = instr2 ...
1208
262
    //   %0 = C2_mux ..., %1, %2
1209
262
    // If %0 was coalesced with %1, we could end up with the following
1210
262
    // code:
1211
262
    //   %0 = instr1 ...
1212
262
    //   %2 = instr2 ...
1213
262
    //   %0 = A2_tfrf ..., %2
1214
262
    // which will later become:
1215
262
    //   %0 = instr1 ...
1216
262
    //   %0 = instr2_cNotPt ...
1217
262
    // i.e. there will be an unconditional definition (instr1) of %0
1218
262
    // followed by a conditional one. The output dependency was there before
1219
262
    // and it unavoidable, but if instr1 is predicable, we will no longer be
1220
262
    // able to predicate it here.
1221
262
    // To avoid this scenario, don't coalesce the destination register with
1222
262
    // a source register that is defined by a predicable instruction.
1223
262
    if (S1.isReg()) {
1224
221
      RegisterRef RS = S1;
1225
221
      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
1226
221
      if (!RDef || 
!HII->isPredicable(*RDef)201
) {
1227
131
        Done = coalesceRegisters(RD, RegisterRef(S1));
1228
131
        if (Done) {
1229
98
          UpdRegs.insert(RD.Reg);
1230
98
          UpdRegs.insert(S1.getReg());
1231
98
        }
1232
131
      }
1233
221
    }
1234
262
    if (!Done && 
S2.isReg()164
) {
1235
101
      RegisterRef RS = S2;
1236
101
      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
1237
101
      if (!RDef || 
!HII->isPredicable(*RDef)74
) {
1238
70
        Done = coalesceRegisters(RD, RegisterRef(S2));
1239
70
        if (Done) {
1240
43
          UpdRegs.insert(RD.Reg);
1241
43
          UpdRegs.insert(S2.getReg());
1242
43
        }
1243
70
      }
1244
101
    }
1245
262
    Changed |= Done;
1246
262
  }
1247
3.35k
  return Changed;
1248
3.35k
}
1249
1250
3.36k
bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
1251
3.36k
  if (skipFunction(MF.getFunction()))
1252
10
    return false;
1253
3.35k
1254
3.35k
  HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
1255
3.35k
  TRI = MF.getSubtarget().getRegisterInfo();
1256
3.35k
  MDT = &getAnalysis<MachineDominatorTree>();
1257
3.35k
  LIS = &getAnalysis<LiveIntervals>();
1258
3.35k
  MRI = &MF.getRegInfo();
1259
3.35k
1260
3.35k
  LLVM_DEBUG(LIS->print(dbgs() << "Before expand-condsets\n",
1261
3.35k
                        MF.getFunction().getParent()));
1262
3.35k
1263
3.35k
  bool Changed = false;
1264
3.35k
  std::set<unsigned> CoalUpd, PredUpd;
1265
3.35k
1266
3.35k
  SmallVector<MachineInstr*,16> Condsets;
1267
3.35k
  for (auto &B : MF)
1268
5.47k
    for (auto &I : B)
1269
36.6k
      if (isCondset(I))
1270
345
        Condsets.push_back(&I);
1271
3.35k
1272
3.35k
  // Try to coalesce the target of a mux with one of its sources.
1273
3.35k
  // This could eliminate a register copy in some circumstances.
1274
3.35k
  Changed |= coalesceSegments(Condsets, CoalUpd);
1275
3.35k
1276
3.35k
  // Update kill flags on all source operands. This is done here because
1277
3.35k
  // at this moment (when expand-condsets runs), there are no kill flags
1278
3.35k
  // in the IR (they have been removed by live range analysis).
1279
3.35k
  // Updating them right before we split is the easiest, because splitting
1280
3.35k
  // adds definitions which would interfere with updating kills afterwards.
1281
3.35k
  std::set<unsigned> KillUpd;
1282
3.35k
  for (MachineInstr *MI : Condsets)
1283
345
    for (MachineOperand &Op : MI->operands())
1284
1.38k
      if (Op.isReg() && 
Op.isUse()1.08k
)
1285
736
        if (!CoalUpd.count(Op.getReg()))
1286
581
          KillUpd.insert(Op.getReg());
1287
3.35k
  updateLiveness(KillUpd, false, true, false);
1288
3.35k
  LLVM_DEBUG(
1289
3.35k
      LIS->print(dbgs() << "After coalescing\n", MF.getFunction().getParent()));
1290
3.35k
1291
3.35k
  // First, simply split all muxes into a pair of conditional transfers
1292
3.35k
  // and update the live intervals to reflect the new arrangement. The
1293
3.35k
  // goal is to update the kill flags, since predication will rely on
1294
3.35k
  // them.
1295
3.35k
  for (MachineInstr *MI : Condsets)
1296
345
    Changed |= split(*MI, PredUpd);
1297
3.35k
  Condsets.clear(); // The contents of Condsets are invalid here anyway.
1298
3.35k
1299
3.35k
  // Do not update live ranges after splitting. Recalculation of live
1300
3.35k
  // intervals removes kill flags, which were preserved by splitting on
1301
3.35k
  // the source operands of condsets. These kill flags are needed by
1302
3.35k
  // predication, and after splitting they are difficult to recalculate
1303
3.35k
  // (because of predicated defs), so make sure they are left untouched.
1304
3.35k
  // Predication does not use live intervals.
1305
3.35k
  LLVM_DEBUG(
1306
3.35k
      LIS->print(dbgs() << "After splitting\n", MF.getFunction().getParent()));
1307
3.35k
1308
3.35k
  // Traverse all blocks and collapse predicable instructions feeding
1309
3.35k
  // conditional transfers into predicated instructions.
1310
3.35k
  // Walk over all the instructions again, so we may catch pre-existing
1311
3.35k
  // cases that were not created in the previous step.
1312
3.35k
  for (auto &B : MF)
1313
5.47k
    Changed |= predicateInBlock(B, PredUpd);
1314
3.35k
  LLVM_DEBUG(LIS->print(dbgs() << "After predicating\n",
1315
3.35k
                        MF.getFunction().getParent()));
1316
3.35k
1317
3.35k
  PredUpd.insert(CoalUpd.begin(), CoalUpd.end());
1318
3.35k
  updateLiveness(PredUpd, true, true, true);
1319
3.35k
1320
3.35k
  LLVM_DEBUG({
1321
3.35k
    if (Changed)
1322
3.35k
      LIS->print(dbgs() << "After expand-condsets\n",
1323
3.35k
                 MF.getFunction().getParent());
1324
3.35k
  });
1325
3.35k
1326
3.35k
  return Changed;
1327
3.35k
}
1328
1329
//===----------------------------------------------------------------------===//
1330
//                         Public Constructor Functions
1331
//===----------------------------------------------------------------------===//
1332
0
FunctionPass *llvm::createHexagonExpandCondsets() {
1333
0
  return new HexagonExpandCondsets();
1334
0
}