Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonExpandCondsets.cpp ------------------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
10
// Replace mux instructions with the corresponding legal instructions.
11
// It is meant to work post-SSA, but still on virtual registers. It was
12
// originally placed between register coalescing and machine instruction
13
// scheduler.
14
// In this place in the optimization sequence, live interval analysis had
15
// been performed, and the live intervals should be preserved. A large part
16
// of the code deals with preserving the liveness information.
17
//
18
// Liveness tracking aside, the main functionality of this pass is divided
19
// into two steps. The first step is to replace an instruction
20
//   vreg0 = C2_mux vreg1, vreg2, vreg3
21
// with a pair of conditional transfers
22
//   vreg0 = A2_tfrt vreg1, vreg2
23
//   vreg0 = A2_tfrf vreg1, vreg3
24
// It is the intention that the execution of this pass could be terminated
25
// after this step, and the code generated would be functionally correct.
26
//
27
// If the uses of the source values vreg1 and vreg2 are kills, and their
28
// definitions are predicable, then in the second step, the conditional
29
// transfers will then be rewritten as predicated instructions. E.g.
30
//   vreg0 = A2_or vreg1, vreg2
31
//   vreg3 = A2_tfrt vreg99, vreg0<kill>
32
// will be rewritten as
33
//   vreg3 = A2_port vreg99, vreg1, vreg2
34
//
35
// This replacement has two variants: "up" and "down". Consider this case:
36
//   vreg0 = A2_or vreg1, vreg2
37
//   ... [intervening instructions] ...
38
//   vreg3 = A2_tfrt vreg99, vreg0<kill>
39
// variant "up":
40
//   vreg3 = A2_port vreg99, vreg1, vreg2
41
//   ... [intervening instructions, vreg0->vreg3] ...
42
//   [deleted]
43
// variant "down":
44
//   [deleted]
45
//   ... [intervening instructions] ...
46
//   vreg3 = A2_port vreg99, vreg1, vreg2
47
//
48
// Both, one or none of these variants may be valid, and checks are made
49
// to rule out inapplicable variants.
50
//
51
// As an additional optimization, before either of the two steps above is
52
// executed, the pass attempts to coalesce the target register with one of
53
// the source registers, e.g. given an instruction
54
//   vreg3 = C2_mux vreg0, vreg1, vreg2
55
// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
56
// the instruction would then be (for example)
57
//   vreg3 = C2_mux vreg0, vreg3, vreg2
58
// and, under certain circumstances, this could result in only one predicated
59
// instruction:
60
//   vreg3 = A2_tfrf vreg0, vreg2
61
//
62
63
// Splitting a definition of a register into two predicated transfers
64
// creates a complication in liveness tracking. Live interval computation
65
// will see both instructions as actual definitions, and will mark the
66
// first one as dead. The definition is not actually dead, and this
67
// situation will need to be fixed. For example:
68
//   vreg1<def,dead> = A2_tfrt ...  ; marked as dead
69
//   vreg1<def> = A2_tfrf ...
70
//
71
// Since any of the individual predicated transfers may end up getting
72
// removed (in case it is an identity copy), some pre-existing def may
73
// be marked as dead after live interval recomputation:
74
//   vreg1<def,dead> = ...          ; marked as dead
75
//   ...
76
//   vreg1<def> = A2_tfrf ...       ; if A2_tfrt is removed
77
// This case happens if vreg1 was used as a source in A2_tfrt, which means
78
// that is it actually live at the A2_tfrf, and so the now dead definition
79
// of vreg1 will need to be updated to non-dead at some point.
80
//
81
// This issue could be remedied by adding implicit uses to the predicated
82
// transfers, but this will create a problem with subsequent predication,
83
// since the transfers will no longer be possible to reorder. To avoid
84
// that, the initial splitting will not add any implicit uses. These
85
// implicit uses will be added later, after predication. The extra price,
86
// however, is that finding the locations where the implicit uses need
87
// to be added, and updating the live ranges will be more involved.
88
89
#include "HexagonInstrInfo.h"
90
#include "HexagonRegisterInfo.h"
91
#include "llvm/ADT/DenseMap.h"
92
#include "llvm/ADT/SetVector.h"
93
#include "llvm/ADT/SmallVector.h"
94
#include "llvm/ADT/StringRef.h"
95
#include "llvm/CodeGen/LiveInterval.h"
96
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
97
#include "llvm/CodeGen/MachineBasicBlock.h"
98
#include "llvm/CodeGen/MachineDominators.h"
99
#include "llvm/CodeGen/MachineFunction.h"
100
#include "llvm/CodeGen/MachineFunctionPass.h"
101
#include "llvm/CodeGen/MachineInstr.h"
102
#include "llvm/CodeGen/MachineInstrBuilder.h"
103
#include "llvm/CodeGen/MachineOperand.h"
104
#include "llvm/CodeGen/MachineRegisterInfo.h"
105
#include "llvm/CodeGen/SlotIndexes.h"
106
#include "llvm/IR/DebugLoc.h"
107
#include "llvm/IR/Function.h"
108
#include "llvm/MC/LaneBitmask.h"
109
#include "llvm/Pass.h"
110
#include "llvm/Support/CommandLine.h"
111
#include "llvm/Support/Debug.h"
112
#include "llvm/Support/ErrorHandling.h"
113
#include "llvm/Support/raw_ostream.h"
114
#include "llvm/Target/TargetRegisterInfo.h"
115
#include "llvm/Target/TargetSubtargetInfo.h"
116
#include <cassert>
117
#include <iterator>
118
#include <set>
119
#include <utility>
120
121
#define DEBUG_TYPE "expand-condsets"
122
123
using namespace llvm;
124
125
static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
126
  cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
127
static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
128
  cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
129
130
namespace llvm {
131
132
  void initializeHexagonExpandCondsetsPass(PassRegistry&);
133
  FunctionPass *createHexagonExpandCondsets();
134
135
} // end namespace llvm
136
137
namespace {
138
139
  class HexagonExpandCondsets : public MachineFunctionPass {
140
  public:
141
    static char ID;
142
143
401
    HexagonExpandCondsets() : MachineFunctionPass(ID) {
144
401
      if (OptCoaLimit.getPosition())
145
1
        CoaLimitActive = true, CoaLimit = OptCoaLimit;
146
401
      if (OptTfrLimit.getPosition())
147
0
        TfrLimitActive = true, TfrLimit = OptTfrLimit;
148
401
      initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
149
401
    }
150
151
401
    StringRef getPassName() const override { return "Hexagon Expand Condsets"; }
152
153
401
    void getAnalysisUsage(AnalysisUsage &AU) const override {
154
401
      AU.addRequired<LiveIntervals>();
155
401
      AU.addPreserved<LiveIntervals>();
156
401
      AU.addPreserved<SlotIndexes>();
157
401
      AU.addRequired<MachineDominatorTree>();
158
401
      AU.addPreserved<MachineDominatorTree>();
159
401
      MachineFunctionPass::getAnalysisUsage(AU);
160
401
    }
161
162
    bool runOnMachineFunction(MachineFunction &MF) override;
163
164
  private:
165
    const HexagonInstrInfo *HII = nullptr;
166
    const TargetRegisterInfo *TRI = nullptr;
167
    MachineDominatorTree *MDT;
168
    MachineRegisterInfo *MRI = nullptr;
169
    LiveIntervals *LIS = nullptr;
170
    bool CoaLimitActive = false;
171
    bool TfrLimitActive = false;
172
    unsigned CoaLimit;
173
    unsigned TfrLimit;
174
    unsigned CoaCounter = 0;
175
    unsigned TfrCounter = 0;
176
177
    struct RegisterRef {
178
      RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
179
2.30k
          Sub(Op.getSubReg()) {}
180
114
      RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
181
182
396
      bool operator== (RegisterRef RR) const {
183
100
        return Reg == RR.Reg && Sub == RR.Sub;
184
396
      }
185
157
      bool operator!= (RegisterRef RR) const { return !operator==(RR); }
186
681
      bool operator< (RegisterRef RR) const {
187
513
        return Reg < RR.Reg || 
(Reg == RR.Reg && 513
Sub < RR.Sub507
);
188
681
      }
189
190
      unsigned Reg, Sub;
191
    };
192
193
    using ReferenceMap = DenseMap<unsigned, unsigned>;
194
    enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
195
    enum { Exec_Then = 0x10, Exec_Else = 0x20 };
196
197
    unsigned getMaskForSub(unsigned Sub);
198
    bool isCondset(const MachineInstr &MI);
199
    LaneBitmask getLaneMask(unsigned Reg, unsigned Sub);
200
201
    void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
202
    bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
203
204
    void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range);
205
    void updateKillFlags(unsigned Reg);
206
    void updateDeadFlags(unsigned Reg);
207
    void recalculateLiveInterval(unsigned Reg);
208
    void removeInstr(MachineInstr &MI);
209
    void updateLiveness(std::set<unsigned> &RegSet, bool Recalc,
210
        bool UpdateKills, bool UpdateDeads);
211
212
    unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
213
    MachineInstr *genCondTfrFor(MachineOperand &SrcOp,
214
        MachineBasicBlock::iterator At, unsigned DstR,
215
        unsigned DstSR, const MachineOperand &PredOp, bool PredSense,
216
        bool ReadUndef, bool ImpUse);
217
    bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs);
218
219
    bool isPredicable(MachineInstr *MI);
220
    MachineInstr *getReachingDefForPred(RegisterRef RD,
221
        MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
222
    bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses);
223
    bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown);
224
    void predicateAt(const MachineOperand &DefOp, MachineInstr &MI,
225
                     MachineBasicBlock::iterator Where,
226
                     const MachineOperand &PredOp, bool Cond,
227
                     std::set<unsigned> &UpdRegs);
228
    void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
229
        bool Cond, MachineBasicBlock::iterator First,
230
        MachineBasicBlock::iterator Last);
231
    bool predicate(MachineInstr &TfrI, bool Cond, std::set<unsigned> &UpdRegs);
232
    bool predicateInBlock(MachineBasicBlock &B,
233
        std::set<unsigned> &UpdRegs);
234
235
    bool isIntReg(RegisterRef RR, unsigned &BW);
236
    bool isIntraBlocks(LiveInterval &LI);
237
    bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
238
    bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets,
239
                          std::set<unsigned> &UpdRegs);
240
  };
241
242
} // end anonymous namespace
243
244
char HexagonExpandCondsets::ID = 0;
245
246
namespace llvm {
247
248
  char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID;
249
250
} // end namespace llvm
251
252
488
INITIALIZE_PASS_BEGIN488
(HexagonExpandCondsets, "expand-condsets",
253
488
  "Hexagon Expand Condsets", false, false)
254
488
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
255
488
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
256
488
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
257
488
INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets",
258
  "Hexagon Expand Condsets", false, false)
259
260
91
unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
261
91
  switch (Sub) {
262
7
    case Hexagon::isub_lo:
263
7
    case Hexagon::vsub_lo:
264
7
      return Sub_Low;
265
3
    case Hexagon::isub_hi:
266
3
    case Hexagon::vsub_hi:
267
3
      return Sub_High;
268
81
    case Hexagon::NoSubRegister:
269
81
      return Sub_None;
270
0
  }
271
0
  
llvm_unreachable0
("Invalid subregister");
272
0
}
273
274
13.3k
bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) {
275
13.3k
  unsigned Opc = MI.getOpcode();
276
13.3k
  switch (Opc) {
277
107
    case Hexagon::C2_mux:
278
107
    case Hexagon::C2_muxii:
279
107
    case Hexagon::C2_muxir:
280
107
    case Hexagon::C2_muxri:
281
107
    case Hexagon::PS_pselect:
282
107
        return true;
283
0
      break;
284
13.2k
  }
285
13.2k
  return false;
286
13.2k
}
287
288
821
LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) {
289
821
  assert(TargetRegisterInfo::isVirtualRegister(Reg));
290
33
  return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
291
788
                  : MRI->getMaxLaneMaskForVReg(Reg);
292
821
}
293
294
void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
295
77
      unsigned Exec) {
296
77
  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
297
77
  ReferenceMap::iterator F = Map.find(RR.Reg);
298
77
  if (F == Map.end())
299
69
    Map.insert(std::make_pair(RR.Reg, Mask));
300
77
  else
301
8
    F->second |= Mask;
302
77
}
303
304
bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
305
281
      unsigned Exec) {
306
281
  ReferenceMap::iterator F = Map.find(RR.Reg);
307
281
  if (F == Map.end())
308
267
    return false;
309
14
  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
310
14
  if (Mask & F->second)
311
14
    return true;
312
0
  return false;
313
0
}
314
315
468
void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
316
458
  auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void {
317
458
    // Set the <kill> flag on a use of Reg whose lane mask is contained in LM.
318
458
    MachineInstr *MI = LIS->getInstructionFromIndex(K);
319
1.15k
    for (auto &Op : MI->operands()) {
320
1.15k
      if (
!Op.isReg() || 1.15k
!Op.isUse()1.14k
||
Op.getReg() != Reg700
)
321
699
        continue;
322
458
      LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg());
323
458
      if (
(SLM & LM) == SLM458
) {
324
458
        // Only set the kill flag on the first encountered use of Reg in this
325
458
        // instruction.
326
458
        Op.setIsKill(true);
327
458
        break;
328
458
      }
329
458
    }
330
458
  };
331
468
332
468
  LiveInterval &LI = LIS->getInterval(Reg);
333
1.15k
  for (auto I = LI.begin(), E = LI.end(); 
I != E1.15k
;
++I691
) {
334
691
    if (!I->end.isRegister())
335
74
      continue;
336
617
    // Do not mark the end of the segment as <kill>, if the next segment
337
617
    // starts with a predicated instruction.
338
617
    auto NextI = std::next(I);
339
617
    if (
NextI != E && 617
NextI->start.isRegister()241
) {
340
233
      MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start);
341
233
      if (HII->isPredicated(*DefI))
342
120
        continue;
343
497
    }
344
497
    bool WholeReg = true;
345
497
    if (
LI.hasSubRanges()497
) {
346
78
      auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool {
347
78
        LiveRange::iterator F = S.find(I->end);
348
49
        return F != S.end() && I->end == F->end;
349
78
      };
350
39
      // Check if all subranges end at I->end. If so, make sure to kill
351
39
      // the whole register.
352
78
      for (LiveInterval::SubRange &S : LI.subranges()) {
353
78
        if (EndsAtI(S))
354
0
          KillAt(I->end, S.LaneMask);
355
78
        else
356
78
          WholeReg = false;
357
78
      }
358
39
    }
359
497
    if (WholeReg)
360
458
      KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg));
361
691
  }
362
468
}
363
364
void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
365
285
      LiveRange &Range) {
366
285
  assert(TargetRegisterInfo::isVirtualRegister(Reg));
367
285
  if (Range.empty())
368
59
    return;
369
226
370
226
  // Return two booleans: { def-modifes-reg, def-covers-reg }.
371
226
  
auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> 226
{
372
1.06k
    if (
!Op.isReg() || 1.06k
!Op.isDef()858
)
373
689
      return { false, false };
374
376
    unsigned DR = Op.getReg(), DSR = Op.getSubReg();
375
376
    if (
!TargetRegisterInfo::isVirtualRegister(DR) || 376
DR != Reg365
)
376
13
      return { false, false };
377
363
    LaneBitmask SLM = getLaneMask(DR, DSR);
378
363
    LaneBitmask A = SLM & LM;
379
363
    return { A.any(), A == SLM };
380
363
  };
381
226
382
226
  // The splitting step will create pairs of predicated definitions without
383
226
  // any implicit uses (since implicit uses would interfere with predication).
384
226
  // This can cause the reaching defs to become dead after live range
385
226
  // recomputation, even though they are not really dead.
386
226
  // We need to identify predicated defs that need implicit uses, and
387
226
  // dead defs that are not really dead, and correct both problems.
388
226
389
226
  auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs,
390
19
                          MachineBasicBlock *Dest) -> bool {
391
19
    for (MachineBasicBlock *D : Defs)
392
21
      
if (21
D != Dest && 21
MDT->dominates(D, Dest)6
)
393
4
        return true;
394
15
395
15
    MachineBasicBlock *Entry = &Dest->getParent()->front();
396
15
    SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end());
397
70
    for (unsigned i = 0; 
i < Work.size()70
;
++i55
) {
398
69
      MachineBasicBlock *B = Work[i];
399
69
      if (Defs.count(B))
400
2
        continue;
401
67
      
if (67
B == Entry67
)
402
14
        return false;
403
53
      for (auto *P : B->predecessors())
404
71
        Work.insert(P);
405
69
    }
406
1
    return true;
407
19
  };
408
226
409
226
  // First, try to extend live range within individual basic blocks. This
410
226
  // will leave us only with dead defs that do not reach any predicated
411
226
  // defs in the same block.
412
226
  SetVector<MachineBasicBlock*> Defs;
413
226
  SmallVector<SlotIndex,4> PredDefs;
414
388
  for (auto &Seg : Range) {
415
388
    if (!Seg.start.isRegister())
416
25
      continue;
417
363
    MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
418
363
    Defs.insert(DefI->getParent());
419
363
    if (HII->isPredicated(*DefI))
420
183
      PredDefs.push_back(Seg.start);
421
388
  }
422
226
423
226
  SmallVector<SlotIndex,8> Undefs;
424
226
  LiveInterval &LI = LIS->getInterval(Reg);
425
226
  LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes());
426
226
427
183
  for (auto &SI : PredDefs) {
428
183
    MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
429
183
    auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI);
430
183
    if (
P.first != nullptr || 183
P.second76
)
431
112
      SI = SlotIndex();
432
183
  }
433
226
434
226
  // Calculate reachability for those predicated defs that were not handled
435
226
  // by the in-block extension.
436
226
  SmallVector<SlotIndex,4> ExtTo;
437
183
  for (auto &SI : PredDefs) {
438
183
    if (!SI.isValid())
439
112
      continue;
440
71
    MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
441
71
    if (BB->pred_empty())
442
52
      continue;
443
19
    // If the defs from this range reach SI via all predecessors, it is live.
444
19
    // It can happen that SI is reached by the defs through some paths, but
445
19
    // not all. In the IR coming into this optimization, SI would not be
446
19
    // considered live, since the defs would then not jointly dominate SI.
447
19
    // That means that SI is an overwriting def, and no implicit use is
448
19
    // needed at this point. Do not add SI to the extension points, since
449
19
    // extendToIndices will abort if there is no joint dominance.
450
19
    // If the abort was avoided by adding extra undefs added to Undefs,
451
19
    // extendToIndices could actually indicate that SI is live, contrary
452
19
    // to the original IR.
453
19
    
if (19
Dominate(Defs, BB)19
)
454
5
      ExtTo.push_back(SI);
455
183
  }
456
226
457
226
  if (!ExtTo.empty())
458
3
    LIS->extendToIndices(Range, ExtTo, Undefs);
459
226
460
226
  // Remove <dead> flags from all defs that are not dead after live range
461
226
  // extension, and collect all def operands. They will be used to generate
462
226
  // the necessary implicit uses.
463
226
  // At the same time, add <dead> flag to all defs that are actually dead.
464
226
  // This can happen, for example, when a mux with identical inputs is
465
226
  // replaced with a COPY: the use of the predicate register disappears and
466
226
  // the dead can become dead.
467
226
  std::set<RegisterRef> DefRegs;
468
389
  for (auto &Seg : Range) {
469
389
    if (!Seg.start.isRegister())
470
26
      continue;
471
363
    MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
472
1.06k
    for (auto &Op : DefI->operands()) {
473
1.06k
      auto P = IsRegDef(Op);
474
1.06k
      if (
P.second && 1.06k
Seg.end.isDead()349
) {
475
4
        Op.setIsDead(true);
476
1.06k
      } else 
if (1.06k
P.first1.06k
) {
477
359
        DefRegs.insert(Op);
478
359
        Op.setIsDead(false);
479
359
      }
480
1.06k
    }
481
389
  }
482
226
483
226
  // Now, add implicit uses to each predicated def that is reached
484
226
  // by other defs.
485
389
  for (auto &Seg : Range) {
486
389
    if (
!Seg.start.isRegister() || 389
!Range.liveAt(Seg.start.getPrevSlot())363
)
487
269
      continue;
488
120
    MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
489
120
    if (!HII->isPredicated(*DefI))
490
10
      continue;
491
110
    // Construct the set of all necessary implicit uses, based on the def
492
110
    // operands in the instruction. We need to tie the implicit uses to
493
110
    // the corresponding defs.
494
110
    std::map<RegisterRef,unsigned> ImpUses;
495
456
    for (unsigned i = 0, e = DefI->getNumOperands(); 
i != e456
;
++i346
) {
496
346
      MachineOperand &Op = DefI->getOperand(i);
497
346
      if (
!Op.isReg() || 346
!DefRegs.count(Op)279
)
498
231
        continue;
499
115
      
if (115
Op.isDef()115
) {
500
109
        ImpUses.insert({Op, i});
501
115
      } else {
502
6
        // This function can be called for the same register with different
503
6
        // lane masks. If the def in this instruction was for the whole
504
6
        // register, we can get here more than once. Avoid adding multiple
505
6
        // implicit uses (or adding an implicit use when an explicit one is
506
6
        // present).
507
6
        ImpUses.erase(Op);
508
6
      }
509
346
    }
510
110
    if (ImpUses.empty())
511
7
      continue;
512
103
    MachineFunction &MF = *DefI->getParent()->getParent();
513
103
    for (std::pair<RegisterRef, unsigned> P : ImpUses) {
514
103
      RegisterRef R = P.first;
515
103
      MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
516
103
      DefI->tieOperands(P.second, DefI->getNumOperands()-1);
517
103
    }
518
389
  }
519
285
}
520
521
275
void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
522
275
  LiveInterval &LI = LIS->getInterval(Reg);
523
275
  if (
LI.hasSubRanges()275
) {
524
20
    for (LiveInterval::SubRange &S : LI.subranges()) {
525
20
      updateDeadsInRange(Reg, S.LaneMask, S);
526
20
      LIS->shrinkToUses(S, Reg);
527
20
    }
528
10
    LI.clear();
529
10
    LIS->constructMainRangeFromSubranges(LI);
530
275
  } else {
531
265
    updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI);
532
265
  }
533
275
}
534
535
275
void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) {
536
275
  LIS->removeInterval(Reg);
537
275
  LIS->createAndComputeVirtRegInterval(Reg);
538
275
}
539
540
203
void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
541
203
  LIS->RemoveMachineInstrFromMaps(MI);
542
203
  MI.eraseFromParent();
543
203
}
544
545
void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
546
1.72k
      bool Recalc, bool UpdateKills, bool UpdateDeads) {
547
1.72k
  UpdateKills |= UpdateDeads;
548
441
  for (auto R : RegSet) {
549
441
    if (Recalc)
550
275
      recalculateLiveInterval(R);
551
441
    if (UpdateKills)
552
441
      MRI->clearKillFlags(R);
553
441
    if (UpdateDeads)
554
275
      updateDeadFlags(R);
555
441
    // Fixing <dead> flags may extend live ranges, so reset <kill> flags
556
441
    // after that.
557
441
    if (UpdateKills)
558
441
      updateKillFlags(R);
559
441
    LIS->getInterval(R).verify();
560
441
  }
561
1.72k
}
562
563
/// Get the opcode for a conditional transfer of the value in SO (source
564
/// operand). The condition (true/false) is given in Cond.
565
unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
566
212
      bool IfTrue) {
567
212
  using namespace Hexagon;
568
212
569
212
  if (
SO.isReg()212
) {
570
114
    unsigned PhysR;
571
114
    RegisterRef RS = SO;
572
114
    if (
TargetRegisterInfo::isVirtualRegister(RS.Reg)114
) {
573
114
      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
574
114
      assert(VC->begin() != VC->end() && "Empty register class");
575
114
      PhysR = *VC->begin();
576
114
    } else {
577
0
      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
578
0
      PhysR = RS.Reg;
579
0
    }
580
114
    unsigned PhysS = (RS.Sub == 0) ? 
PhysR100
:
TRI->getSubReg(PhysR, RS.Sub)14
;
581
114
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
582
114
    switch (TRI->getRegSizeInBits(*RC)) {
583
110
      case 32:
584
110
        return IfTrue ? 
A2_tfrt61
:
A2_tfrf49
;
585
4
      case 64:
586
4
        return IfTrue ? 
A2_tfrpt2
:
A2_tfrpf2
;
587
0
    }
588
0
    
llvm_unreachable0
("Invalid register operand");
589
0
  }
590
98
  switch (SO.getType()) {
591
98
    case MachineOperand::MO_Immediate:
592
98
    case MachineOperand::MO_FPImmediate:
593
98
    case MachineOperand::MO_ConstantPoolIndex:
594
98
    case MachineOperand::MO_TargetIndex:
595
98
    case MachineOperand::MO_JumpTableIndex:
596
98
    case MachineOperand::MO_ExternalSymbol:
597
98
    case MachineOperand::MO_GlobalAddress:
598
98
    case MachineOperand::MO_BlockAddress:
599
98
      return IfTrue ? 
C2_cmoveit43
:
C2_cmoveif55
;
600
0
    default:
601
0
      break;
602
0
  }
603
0
  
llvm_unreachable0
("Unexpected source operand");
604
0
}
605
606
/// Generate a conditional transfer, copying the value SrcOp to the
607
/// destination register DstR:DstSR, and using the predicate register from
608
/// PredOp. The Cond argument specifies whether the predicate is to be
609
/// if(PredOp), or if(!PredOp).
610
MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
611
      MachineBasicBlock::iterator At,
612
      unsigned DstR, unsigned DstSR, const MachineOperand &PredOp,
613
212
      bool PredSense, bool ReadUndef, bool ImpUse) {
614
212
  MachineInstr *MI = SrcOp.getParent();
615
212
  MachineBasicBlock &B = *At->getParent();
616
212
  const DebugLoc &DL = MI->getDebugLoc();
617
212
618
212
  // Don't avoid identity copies here (i.e. if the source and the destination
619
212
  // are the same registers). It is actually better to generate them here,
620
212
  // since this would cause the copy to potentially be predicated in the next
621
212
  // step. The predication will remove such a copy if it is unable to
622
212
  /// predicate.
623
212
624
212
  unsigned Opc = getCondTfrOpcode(SrcOp, PredSense);
625
212
  unsigned DstState = RegState::Define | (ReadUndef ? 
RegState::Undef4
:
0208
);
626
212
  unsigned PredState = getRegState(PredOp) & ~RegState::Kill;
627
212
  MachineInstrBuilder MIB;
628
212
629
212
  if (
SrcOp.isReg()212
) {
630
114
    unsigned SrcState = getRegState(SrcOp);
631
114
    if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR))
632
33
      SrcState &= ~RegState::Kill;
633
114
    MIB = BuildMI(B, At, DL, HII->get(Opc))
634
114
          .addReg(DstR, DstState, DstSR)
635
114
          .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
636
114
          .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg());
637
212
  } else {
638
98
    MIB = BuildMI(B, At, DL, HII->get(Opc))
639
98
              .addReg(DstR, DstState, DstSR)
640
98
              .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
641
98
              .add(SrcOp);
642
98
  }
643
212
644
212
  DEBUG(dbgs() << "created an initial copy: " << *MIB);
645
212
  return &*MIB;
646
212
}
647
648
/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
649
/// performs all necessary changes to complete the replacement.
650
bool HexagonExpandCondsets::split(MachineInstr &MI,
651
107
                                  std::set<unsigned> &UpdRegs) {
652
107
  if (
TfrLimitActive107
) {
653
0
    if (TfrCounter >= TfrLimit)
654
0
      return false;
655
0
    TfrCounter++;
656
0
  }
657
107
  
DEBUG107
(dbgs() << "\nsplitting BB#" << MI.getParent()->getNumber() << ": "
658
107
               << MI);
659
107
  MachineOperand &MD = MI.getOperand(0);  // Definition
660
107
  MachineOperand &MP = MI.getOperand(1);  // Predicate register
661
107
  assert(MD.isDef());
662
107
  unsigned DR = MD.getReg(), DSR = MD.getSubReg();
663
107
  bool ReadUndef = MD.isUndef();
664
107
  MachineBasicBlock::iterator At = MI;
665
107
666
107
  auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void {
667
107
    for (auto &Op : MI.operands())
668
428
      
if (428
Op.isReg()428
)
669
330
        UpdRegs.insert(Op.getReg());
670
107
  };
671
107
672
107
  // If this is a mux of the same register, just replace it with COPY.
673
107
  // Ideally, this would happen earlier, so that register coalescing would
674
107
  // see it.
675
107
  MachineOperand &ST = MI.getOperand(2);
676
107
  MachineOperand &SF = MI.getOperand(3);
677
107
  if (
ST.isReg() && 107
SF.isReg()64
) {
678
48
    RegisterRef RT(ST);
679
48
    if (
RT == RegisterRef(SF)48
) {
680
1
      // Copy regs to update first.
681
1
      updateRegs(MI);
682
1
      MI.setDesc(HII->get(TargetOpcode::COPY));
683
1
      unsigned S = getRegState(ST);
684
4
      while (MI.getNumOperands() > 1)
685
3
        MI.RemoveOperand(MI.getNumOperands()-1);
686
1
      MachineFunction &MF = *MI.getParent()->getParent();
687
1
      MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub);
688
1
      return true;
689
1
    }
690
106
  }
691
106
692
106
  // First, create the two invididual conditional transfers, and add each
693
106
  // of them to the live intervals information. Do that first and then remove
694
106
  // the old instruction from live intervals.
695
106
  MachineInstr *TfrT =
696
106
      genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false);
697
106
  MachineInstr *TfrF =
698
106
      genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true);
699
106
  LIS->InsertMachineInstrInMaps(*TfrT);
700
106
  LIS->InsertMachineInstrInMaps(*TfrF);
701
106
702
106
  // Will need to recalculate live intervals for all registers in MI.
703
106
  updateRegs(MI);
704
106
705
106
  removeInstr(MI);
706
106
  return true;
707
106
}
708
709
50
bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
710
50
  if (
HII->isPredicated(*MI) || 50
!HII->isPredicable(*MI)43
)
711
13
    return false;
712
37
  
if (37
MI->hasUnmodeledSideEffects() || 37
MI->mayStore()37
)
713
0
    return false;
714
37
  // Reject instructions with multiple defs (e.g. post-increment loads).
715
37
  bool HasDef = false;
716
97
  for (auto &Op : MI->operands()) {
717
97
    if (
!Op.isReg() || 97
!Op.isDef()73
)
718
60
      continue;
719
37
    
if (37
HasDef37
)
720
0
      return false;
721
37
    HasDef = true;
722
37
  }
723
37
  for (auto &Mo : MI->memoperands())
724
2
    
if (2
Mo->isVolatile()2
)
725
0
      return false;
726
37
  return true;
727
37
}
728
729
/// Find the reaching definition for a predicated use of RD. The RD is used
730
/// under the conditions given by PredR and Cond, and this function will ignore
731
/// definitions that set RD under the opposite conditions.
732
MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
733
157
      MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
734
157
  MachineBasicBlock &B = *UseIt->getParent();
735
157
  MachineBasicBlock::iterator I = UseIt, S = B.begin();
736
157
  if (I == S)
737
6
    return nullptr;
738
151
739
151
  bool PredValid = true;
740
349
  do {
741
349
    --I;
742
349
    MachineInstr *MI = &*I;
743
349
    // Check if this instruction can be ignored, i.e. if it is predicated
744
349
    // on the complementary condition.
745
349
    if (
PredValid && 349
HII->isPredicated(*MI)259
) {
746
45
      if (
MI->readsRegister(PredR) && 45
(Cond != HII->isPredicatedTrue(*MI))28
)
747
21
        continue;
748
328
    }
749
328
750
328
    // Check the defs. If the PredR is defined, invalidate it. If RD is
751
328
    // defined, return the instruction or 0, depending on the circumstances.
752
328
    
for (auto &Op : MI->operands()) 328
{
753
689
      if (
!Op.isReg() || 689
!Op.isDef()594
)
754
351
        continue;
755
338
      RegisterRef RR = Op;
756
338
      if (
RR.Reg == PredR338
) {
757
39
        PredValid = false;
758
39
        continue;
759
39
      }
760
299
      
if (299
RR.Reg != RD.Reg299
)
761
163
        continue;
762
136
      // If the "Reg" part agrees, there is still the subregister to check.
763
136
      // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
764
136
      // not vreg1 (w/o subregisters).
765
136
      
if (136
RR.Sub == RD.Sub136
)
766
131
        return MI;
767
5
      
if (5
RR.Sub == 0 || 5
RD.Sub == 02
)
768
3
        return nullptr;
769
215
      // We have different subregisters, so we can continue looking.
770
215
    }
771
215
  } while (I != S);
772
151
773
17
  return nullptr;
774
157
}
775
776
/// Check if the instruction MI can be safely moved over a set of instructions
777
/// whose side-effects (in terms of register defs and uses) are expressed in
778
/// the maps Defs and Uses. These maps reflect the conditional defs and uses
779
/// that depend on the same predicate register to allow moving instructions
780
/// over instructions predicated on the opposite condition.
781
bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs,
782
68
                                        ReferenceMap &Uses) {
783
68
  // In order to be able to safely move MI over instructions that define
784
68
  // "Defs" and use "Uses", no def operand from MI can be defined or used
785
68
  // and no use operand can be defined.
786
169
  for (auto &Op : MI.operands()) {
787
169
    if (!Op.isReg())
788
20
      continue;
789
149
    RegisterRef RR = Op;
790
149
    // For physical register we would need to check register aliases, etc.
791
149
    // and we don't want to bother with that. It would be of little value
792
149
    // before the actual register rewriting (from virtual to physical).
793
149
    if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
794
0
      return false;
795
149
    // No redefs for any operand.
796
149
    
if (149
isRefInMap(RR, Defs, Exec_Then)149
)
797
11
      return false;
798
138
    // For defs, there cannot be uses.
799
138
    
if (138
Op.isDef() && 138
isRefInMap(RR, Uses, Exec_Then)58
)
800
0
      return false;
801
57
  }
802
57
  return true;
803
57
}
804
805
/// Check if the instruction accessing memory (TheI) can be moved to the
806
/// location ToI.
807
bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI,
808
1
                                         bool IsDown) {
809
1
  bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore();
810
1
  if (
!IsLoad && 1
!IsStore0
)
811
0
    return true;
812
1
  
if (1
HII->areMemAccessesTriviallyDisjoint(TheI, ToI)1
)
813
0
    return true;
814
1
  
if (1
TheI.hasUnmodeledSideEffects()1
)
815
0
    return false;
816
1
817
1
  
MachineBasicBlock::iterator StartI = IsDown ? 1
TheI1
:
ToI0
;
818
1
  MachineBasicBlock::iterator EndI = IsDown ? 
ToI1
:
TheI0
;
819
1
  bool Ordered = TheI.hasOrderedMemoryRef();
820
1
821
1
  // Search for aliased memory reference in (StartI, EndI).
822
1
  for (MachineBasicBlock::iterator I = std::next(StartI); 
I != EndI1
;
++I0
) {
823
0
    MachineInstr *MI = &*I;
824
0
    if (MI->hasUnmodeledSideEffects())
825
0
      return false;
826
0
    bool L = MI->mayLoad(), S = MI->mayStore();
827
0
    if (
!L && 0
!S0
)
828
0
      continue;
829
0
    
if (0
Ordered && 0
MI->hasOrderedMemoryRef()0
)
830
0
      return false;
831
0
832
0
    
bool Conflict = (L && 0
IsStore0
) ||
S0
;
833
0
    if (Conflict)
834
0
      return false;
835
0
  }
836
1
  return true;
837
1
}
838
839
/// Generate a predicated version of MI (where the condition is given via
840
/// PredR and Cond) at the point indicated by Where.
841
void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
842
                                        MachineInstr &MI,
843
                                        MachineBasicBlock::iterator Where,
844
                                        const MachineOperand &PredOp, bool Cond,
845
33
                                        std::set<unsigned> &UpdRegs) {
846
33
  // The problem with updating live intervals is that we can move one def
847
33
  // past another def. In particular, this can happen when moving an A2_tfrt
848
33
  // over an A2_tfrf defining the same register. From the point of view of
849
33
  // live intervals, these two instructions are two separate definitions,
850
33
  // and each one starts another live segment. LiveIntervals's "handleMove"
851
33
  // does not allow such moves, so we need to handle it ourselves. To avoid
852
33
  // invalidating liveness data while we are using it, the move will be
853
33
  // implemented in 4 steps: (1) add a clone of the instruction MI at the
854
33
  // target location, (2) update liveness, (3) delete the old instruction,
855
33
  // and (4) update liveness again.
856
33
857
33
  MachineBasicBlock &B = *MI.getParent();
858
33
  DebugLoc DL = Where->getDebugLoc();  // "Where" points to an instruction.
859
33
  unsigned Opc = MI.getOpcode();
860
33
  unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
861
33
  MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
862
33
  unsigned Ox = 0, NP = MI.getNumOperands();
863
33
  // Skip all defs from MI first.
864
66
  while (
Ox < NP66
) {
865
66
    MachineOperand &MO = MI.getOperand(Ox);
866
66
    if (
!MO.isReg() || 66
!MO.isDef()56
)
867
33
      break;
868
33
    Ox++;
869
33
  }
870
33
  // Add the new def, then the predicate register, then the rest of the
871
33
  // operands.
872
33
  MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg());
873
33
  MB.addReg(PredOp.getReg(), PredOp.isUndef() ? 
RegState::Undef2
:
031
,
874
33
            PredOp.getSubReg());
875
85
  while (
Ox < NP85
) {
876
52
    MachineOperand &MO = MI.getOperand(Ox);
877
52
    if (
!MO.isReg() || 52
!MO.isImplicit()32
)
878
52
      MB.add(MO);
879
52
    Ox++;
880
52
  }
881
33
882
33
  MachineFunction &MF = *B.getParent();
883
33
  MachineInstr::mmo_iterator I = MI.memoperands_begin();
884
33
  unsigned NR = std::distance(I, MI.memoperands_end());
885
33
  MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
886
34
  for (unsigned i = 0; 
i < NR34
;
++i1
)
887
1
    MemRefs[i] = *I++;
888
33
  MB.setMemRefs(MemRefs, MemRefs+NR);
889
33
890
33
  MachineInstr *NewI = MB;
891
33
  NewI->clearKillInfo();
892
33
  LIS->InsertMachineInstrInMaps(*NewI);
893
33
894
33
  for (auto &Op : NewI->operands())
895
118
    
if (118
Op.isReg()118
)
896
98
      UpdRegs.insert(Op.getReg());
897
33
}
898
899
/// In the range [First, Last], rename all references to the "old" register RO
900
/// to the "new" register RN, but only in instructions predicated on the given
901
/// condition.
902
void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
903
      unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
904
33
      MachineBasicBlock::iterator Last) {
905
33
  MachineBasicBlock::iterator End = std::next(Last);
906
97
  for (MachineBasicBlock::iterator I = First; 
I != End97
;
++I64
) {
907
64
    MachineInstr *MI = &*I;
908
64
    // Do not touch instructions that are not predicated, or are predicated
909
64
    // on the opposite condition.
910
64
    if (!HII->isPredicated(*MI))
911
14
      continue;
912
50
    
if (50
!MI->readsRegister(PredR) || 50
(Cond != HII->isPredicatedTrue(*MI))50
)
913
8
      continue;
914
42
915
42
    
for (auto &Op : MI->operands()) 42
{
916
130
      if (
!Op.isReg() || 130
RO != RegisterRef(Op)124
)
917
97
        continue;
918
33
      Op.setReg(RN.Reg);
919
33
      Op.setSubReg(RN.Sub);
920
33
      // In practice, this isn't supposed to see any defs.
921
33
      assert(!Op.isDef() && "Not expecting a def");
922
33
    }
923
64
  }
924
33
}
925
926
/// For a given conditional copy, predicate the definition of the source of
927
/// the copy under the given condition (using the same predicate register as
928
/// the copy).
929
bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
930
110
                                      std::set<unsigned> &UpdRegs) {
931
110
  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
932
110
  unsigned Opc = TfrI.getOpcode();
933
110
  (void)Opc;
934
110
  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
935
110
  DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
936
110
               << ": " << TfrI);
937
110
938
110
  MachineOperand &MD = TfrI.getOperand(0);
939
110
  MachineOperand &MP = TfrI.getOperand(1);
940
110
  MachineOperand &MS = TfrI.getOperand(2);
941
110
  // The source operand should be a <kill>. This is not strictly necessary,
942
110
  // but it makes things a lot simpler. Otherwise, we would need to rename
943
110
  // some registers, which would complicate the transformation considerably.
944
110
  if (!MS.isKill())
945
57
    return false;
946
53
  // Avoid predicating instructions that define a subregister if subregister
947
53
  // liveness tracking is not enabled.
948
53
  
if (53
MD.getSubReg() && 53
!MRI->shouldTrackSubRegLiveness(MD.getReg())9
)
949
0
    return false;
950
53
951
53
  RegisterRef RT(MS);
952
53
  unsigned PredR = MP.getReg();
953
53
  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
954
53
  if (
!DefI || 53
!isPredicable(DefI)50
)
955
16
    return false;
956
37
957
37
  
DEBUG37
(dbgs() << "Source def: " << *DefI);
958
37
959
37
  // Collect the information about registers defined and used between the
960
37
  // DefI and the TfrI.
961
37
  // Map: reg -> bitmask of subregs
962
37
  ReferenceMap Uses, Defs;
963
37
  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
964
37
965
37
  // Check if the predicate register is valid between DefI and TfrI.
966
37
  // If it is, we can then ignore instructions predicated on the negated
967
37
  // conditions when collecting def and use information.
968
37
  bool PredValid = true;
969
59
  for (MachineBasicBlock::iterator I = std::next(DefIt); 
I != TfrIt59
;
++I22
) {
970
26
    if (!I->modifiesRegister(PredR, nullptr))
971
22
      continue;
972
4
    PredValid = false;
973
4
    break;
974
4
  }
975
37
976
69
  for (MachineBasicBlock::iterator I = std::next(DefIt); 
I != TfrIt69
;
++I32
) {
977
32
    MachineInstr *MI = &*I;
978
32
    // If this instruction is predicated on the same register, it could
979
32
    // potentially be ignored.
980
32
    // By default assume that the instruction executes on the same condition
981
32
    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
982
32
    unsigned Exec = Exec_Then | Exec_Else;
983
32
    if (
PredValid && 32
HII->isPredicated(*MI)21
&&
MI->readsRegister(PredR)8
)
984
8
      
Exec = (Cond == HII->isPredicatedTrue(*MI)) ? 8
Exec_Then0
:
Exec_Else8
;
985
32
986
95
    for (auto &Op : MI->operands()) {
987
95
      if (!Op.isReg())
988
18
        continue;
989
77
      // We don't want to deal with physical registers. The reason is that
990
77
      // they can be aliased with other physical registers. Aliased virtual
991
77
      // registers must share the same register number, and can only differ
992
77
      // in the subregisters, which we are keeping track of. Physical
993
77
      // registers ters no longer have subregisters---their super- and
994
77
      // subregisters are other physical registers, and we are not checking
995
77
      // that.
996
77
      RegisterRef RR = Op;
997
77
      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
998
0
        return false;
999
77
1000
77
      
ReferenceMap &Map = Op.isDef() ? 77
Defs31
:
Uses46
;
1001
77
      if (
Op.isDef() && 77
Op.isUndef()31
) {
1002
5
        assert(RR.Sub && "Expecting a subregister on <def,read-undef>");
1003
5
        // If this is a <def,read-undef>, then it invalidates the non-written
1004
5
        // part of the register. For the purpose of checking the validity of
1005
5
        // the move, assume that it modifies the whole register.
1006
5
        RR.Sub = 0;
1007
5
      }
1008
95
      addRefToMap(RR, Map, Exec);
1009
95
    }
1010
32
  }
1011
37
1012
37
  // The situation:
1013
37
  //   RT = DefI
1014
37
  //   ...
1015
37
  //   RD = TfrI ..., RT
1016
37
1017
37
  // If the register-in-the-middle (RT) is used or redefined between
1018
37
  // DefI and TfrI, we may not be able proceed with this transformation.
1019
37
  // We can ignore a def that will not execute together with TfrI, and a
1020
37
  // use that will. If there is such a use (that does execute together with
1021
37
  // TfrI), we will not be able to move DefI down. If there is a use that
1022
37
  // executed if TfrI's condition is false, then RT must be available
1023
37
  // unconditionally (cannot be predicated).
1024
37
  // Essentially, we need to be able to rename RT to RD in this segment.
1025
37
  
if (37
isRefInMap(RT, Defs, Exec_Then) || 37
isRefInMap(RT, Uses, Exec_Else)37
)
1026
3
    return false;
1027
34
  RegisterRef RD = MD;
1028
34
  // If the predicate register is defined between DefI and TfrI, the only
1029
34
  // potential thing to do would be to move the DefI down to TfrI, and then
1030
34
  // predicate. The reaching def (DefI) must be movable down to the location
1031
34
  // of the TfrI.
1032
34
  // If the target register of the TfrI (RD) is not used or defined between
1033
34
  // DefI and TfrI, consider moving TfrI up to DefI.
1034
34
  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
1035
34
  bool CanDown = canMoveOver(*DefI, Defs, Uses);
1036
34
  // The TfrI does not access memory, but DefI could. Check if it's safe
1037
34
  // to move DefI down to TfrI.
1038
34
  if (
DefI->mayLoad() || 34
DefI->mayStore()33
)
1039
1
    
if (1
!canMoveMemTo(*DefI, TfrI, true)1
)
1040
0
      CanDown = false;
1041
34
1042
34
  DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
1043
34
               << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
1044
34
  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
1045
34
  if (CanUp)
1046
24
    predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs);
1047
10
  else 
if (10
CanDown10
)
1048
9
    predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs);
1049
10
  else
1050
1
    return false;
1051
33
1052
33
  
if (33
RT != RD33
) {
1053
33
    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
1054
33
    UpdRegs.insert(RT.Reg);
1055
33
  }
1056
110
1057
110
  removeInstr(TfrI);
1058
110
  removeInstr(*DefI);
1059
110
  return true;
1060
110
}
1061
1062
/// Predicate all cases of conditional copies in the specified block.
1063
bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
1064
2.00k
      std::set<unsigned> &UpdRegs) {
1065
2.00k
  bool Changed = false;
1066
2.00k
  MachineBasicBlock::iterator I, E, NextI;
1067
15.4k
  for (I = B.begin(), E = B.end(); 
I != E15.4k
;
I = NextI13.4k
) {
1068
13.4k
    NextI = std::next(I);
1069
13.4k
    unsigned Opc = I->getOpcode();
1070
13.4k
    if (
Opc == Hexagon::A2_tfrt || 13.4k
Opc == Hexagon::A2_tfrf13.3k
) {
1071
110
      bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs);
1072
110
      if (
!Done110
) {
1073
77
        // If we didn't predicate I, we may need to remove it in case it is
1074
77
        // an "identity" copy, e.g.  vreg1 = A2_tfrt vreg2, vreg1.
1075
77
        if (
RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))77
) {
1076
31
          for (auto &Op : I->operands())
1077
93
            
if (93
Op.isReg()93
)
1078
93
              UpdRegs.insert(Op.getReg());
1079
31
          removeInstr(*I);
1080
31
        }
1081
77
      }
1082
110
      Changed |= Done;
1083
110
    }
1084
13.4k
  }
1085
2.00k
  return Changed;
1086
2.00k
}
1087
1088
124
bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
1089
124
  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
1090
0
    return false;
1091
124
  const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
1092
124
  if (
RC == &Hexagon::IntRegsRegClass124
) {
1093
92
    BW = 32;
1094
92
    return true;
1095
92
  }
1096
32
  
if (32
RC == &Hexagon::DoubleRegsRegClass32
) {
1097
32
    BW = (RR.Sub != 0) ? 
3224
:
648
;
1098
32
    return true;
1099
32
  }
1100
0
  return false;
1101
0
}
1102
1103
32
bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
1104
66
  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); 
I != E66
;
++I34
) {
1105
39
    LiveRange::Segment &LR = *I;
1106
39
    // Range must start at a register...
1107
39
    if (!LR.start.isRegister())
1108
1
      return false;
1109
38
    // ...and end in a register or in a dead slot.
1110
38
    
if (38
!LR.end.isRegister() && 38
!LR.end.isDead()4
)
1111
4
      return false;
1112
39
  }
1113
27
  return true;
1114
32
}
1115
1116
62
bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
1117
62
  if (
CoaLimitActive62
) {
1118
0
    if (CoaCounter >= CoaLimit)
1119
0
      return false;
1120
0
    CoaCounter++;
1121
0
  }
1122
62
  unsigned BW1, BW2;
1123
62
  if (
!isIntReg(R1, BW1) || 62
!isIntReg(R2, BW2)62
||
BW1 != BW262
)
1124
0
    return false;
1125
62
  
if (62
MRI->isLiveIn(R1.Reg)62
)
1126
2
    return false;
1127
60
  
if (60
MRI->isLiveIn(R2.Reg)60
)
1128
8
    return false;
1129
52
1130
52
  LiveInterval &L1 = LIS->getInterval(R1.Reg);
1131
52
  LiveInterval &L2 = LIS->getInterval(R2.Reg);
1132
52
  if (L2.empty())
1133
6
    return false;
1134
46
  
if (46
L1.hasSubRanges() || 46
L2.hasSubRanges()44
)
1135
6
    return false;
1136
40
  bool Overlap = L1.overlaps(L2);
1137
40
1138
40
  DEBUG(dbgs() << "compatible registers: ("
1139
40
               << (Overlap ? "overlap" : "disjoint") << ")\n  "
1140
40
               << PrintReg(R1.Reg, TRI, R1.Sub) << "  " << L1 << "\n  "
1141
40
               << PrintReg(R2.Reg, TRI, R2.Sub) << "  " << L2 << "\n");
1142
40
  if (
R1.Sub || 40
R2.Sub40
)
1143
0
    return false;
1144
40
  
if (40
Overlap40
)
1145
12
    return false;
1146
28
1147
28
  // Coalescing could have a negative impact on scheduling, so try to limit
1148
28
  // to some reasonable extent. Only consider coalescing segments, when one
1149
28
  // of them does not cross basic block boundaries.
1150
28
  
if (28
!isIntraBlocks(L1) && 28
!isIntraBlocks(L2)4
)
1151
1
    return false;
1152
27
1153
27
  MRI->replaceRegWith(R2.Reg, R1.Reg);
1154
27
1155
27
  // Move all live segments from L2 to L1.
1156
27
  using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>;
1157
27
  ValueInfoMap VM;
1158
109
  for (LiveInterval::iterator I = L2.begin(), E = L2.end(); 
I != E109
;
++I82
) {
1159
82
    VNInfo *NewVN, *OldVN = I->valno;
1160
82
    ValueInfoMap::iterator F = VM.find(OldVN);
1161
82
    if (
F == VM.end()82
) {
1162
81
      NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
1163
81
      VM.insert(std::make_pair(OldVN, NewVN));
1164
82
    } else {
1165
1
      NewVN = F->second;
1166
1
    }
1167
82
    L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
1168
82
  }
1169
109
  while (L2.begin() != L2.end())
1170
82
    L2.removeSegment(*L2.begin());
1171
27
  LIS->removeInterval(R2.Reg);
1172
27
1173
27
  updateKillFlags(R1.Reg);
1174
27
  DEBUG(dbgs() << "coalesced: " << L1 << "\n");
1175
62
  L1.verify();
1176
62
1177
62
  return true;
1178
62
}
1179
1180
/// Attempt to coalesce one of the source registers to a MUX instruction with
1181
/// the destination register. This could lead to having only one predicated
1182
/// instruction in the end instead of two.
1183
bool HexagonExpandCondsets::coalesceSegments(
1184
      const SmallVectorImpl<MachineInstr*> &Condsets,
1185
860
      std::set<unsigned> &UpdRegs) {
1186
860
  SmallVector<MachineInstr*,16> TwoRegs;
1187
107
  for (MachineInstr *MI : Condsets) {
1188
107
    MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
1189
107
    if (
!S1.isReg() && 107
!S2.isReg()43
)
1190
39
      continue;
1191
68
    TwoRegs.push_back(MI);
1192
68
  }
1193
860
1194
860
  bool Changed = false;
1195
68
  for (MachineInstr *CI : TwoRegs) {
1196
68
    RegisterRef RD = CI->getOperand(0);
1197
68
    RegisterRef RP = CI->getOperand(1);
1198
68
    MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
1199
68
    bool Done = false;
1200
68
    // Consider this case:
1201
68
    //   vreg1 = instr1 ...
1202
68
    //   vreg2 = instr2 ...
1203
68
    //   vreg0 = C2_mux ..., vreg1, vreg2
1204
68
    // If vreg0 was coalesced with vreg1, we could end up with the following
1205
68
    // code:
1206
68
    //   vreg0 = instr1 ...
1207
68
    //   vreg2 = instr2 ...
1208
68
    //   vreg0 = A2_tfrf ..., vreg2
1209
68
    // which will later become:
1210
68
    //   vreg0 = instr1 ...
1211
68
    //   vreg0 = instr2_cNotPt ...
1212
68
    // i.e. there will be an unconditional definition (instr1) of vreg0
1213
68
    // followed by a conditional one. The output dependency was there before
1214
68
    // and it unavoidable, but if instr1 is predicable, we will no longer be
1215
68
    // able to predicate it here.
1216
68
    // To avoid this scenario, don't coalesce the destination register with
1217
68
    // a source register that is defined by a predicable instruction.
1218
68
    if (
S1.isReg()68
) {
1219
64
      RegisterRef RS = S1;
1220
64
      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
1221
64
      if (
!RDef || 64
!HII->isPredicable(*RDef)50
) {
1222
41
        Done = coalesceRegisters(RD, RegisterRef(S1));
1223
41
        if (
Done41
) {
1224
20
          UpdRegs.insert(RD.Reg);
1225
20
          UpdRegs.insert(S1.getReg());
1226
20
        }
1227
41
      }
1228
64
    }
1229
68
    if (
!Done && 68
S2.isReg()48
) {
1230
40
      RegisterRef RS = S2;
1231
40
      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
1232
40
      if (
!RDef || 40
!HII->isPredicable(*RDef)31
) {
1233
21
        Done = coalesceRegisters(RD, RegisterRef(S2));
1234
21
        if (
Done21
) {
1235
7
          UpdRegs.insert(RD.Reg);
1236
7
          UpdRegs.insert(S2.getReg());
1237
7
        }
1238
21
      }
1239
40
    }
1240
68
    Changed |= Done;
1241
68
  }
1242
860
  return Changed;
1243
860
}
1244
1245
860
bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
1246
860
  if (skipFunction(*MF.getFunction()))
1247
0
    return false;
1248
860
1249
860
  HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
1250
860
  TRI = MF.getSubtarget().getRegisterInfo();
1251
860
  MDT = &getAnalysis<MachineDominatorTree>();
1252
860
  LIS = &getAnalysis<LiveIntervals>();
1253
860
  MRI = &MF.getRegInfo();
1254
860
1255
860
  DEBUG(LIS->print(dbgs() << "Before expand-condsets\n",
1256
860
                   MF.getFunction()->getParent()));
1257
860
1258
860
  bool Changed = false;
1259
860
  std::set<unsigned> CoalUpd, PredUpd;
1260
860
1261
860
  SmallVector<MachineInstr*,16> Condsets;
1262
860
  for (auto &B : MF)
1263
2.00k
    for (auto &I : B)
1264
13.3k
      
if (13.3k
isCondset(I)13.3k
)
1265
107
        Condsets.push_back(&I);
1266
860
1267
860
  // Try to coalesce the target of a mux with one of its sources.
1268
860
  // This could eliminate a register copy in some circumstances.
1269
860
  Changed |= coalesceSegments(Condsets, CoalUpd);
1270
860
1271
860
  // Update kill flags on all source operands. This is done here because
1272
860
  // at this moment (when expand-condsets runs), there are no kill flags
1273
860
  // in the IR (they have been removed by live range analysis).
1274
860
  // Updating them right before we split is the easiest, because splitting
1275
860
  // adds definitions which would interfere with updating kills afterwards.
1276
860
  std::set<unsigned> KillUpd;
1277
860
  for (MachineInstr *MI : Condsets)
1278
107
    for (MachineOperand &Op : MI->operands())
1279
428
      
if (428
Op.isReg() && 428
Op.isUse()330
)
1280
223
        
if (223
!CoalUpd.count(Op.getReg())223
)
1281
184
          KillUpd.insert(Op.getReg());
1282
860
  updateLiveness(KillUpd, false, true, false);
1283
860
  DEBUG(LIS->print(dbgs() << "After coalescing\n",
1284
860
                   MF.getFunction()->getParent()));
1285
860
1286
860
  // First, simply split all muxes into a pair of conditional transfers
1287
860
  // and update the live intervals to reflect the new arrangement. The
1288
860
  // goal is to update the kill flags, since predication will rely on
1289
860
  // them.
1290
860
  for (MachineInstr *MI : Condsets)
1291
107
    Changed |= split(*MI, PredUpd);
1292
860
  Condsets.clear(); // The contents of Condsets are invalid here anyway.
1293
860
1294
860
  // Do not update live ranges after splitting. Recalculation of live
1295
860
  // intervals removes kill flags, which were preserved by splitting on
1296
860
  // the source operands of condsets. These kill flags are needed by
1297
860
  // predication, and after splitting they are difficult to recalculate
1298
860
  // (because of predicated defs), so make sure they are left untouched.
1299
860
  // Predication does not use live intervals.
1300
860
  DEBUG(LIS->print(dbgs() << "After splitting\n",
1301
860
                   MF.getFunction()->getParent()));
1302
860
1303
860
  // Traverse all blocks and collapse predicable instructions feeding
1304
860
  // conditional transfers into predicated instructions.
1305
860
  // Walk over all the instructions again, so we may catch pre-existing
1306
860
  // cases that were not created in the previous step.
1307
860
  for (auto &B : MF)
1308
2.00k
    Changed |= predicateInBlock(B, PredUpd);
1309
860
  DEBUG(LIS->print(dbgs() << "After predicating\n",
1310
860
                   MF.getFunction()->getParent()));
1311
860
1312
860
  PredUpd.insert(CoalUpd.begin(), CoalUpd.end());
1313
860
  updateLiveness(PredUpd, true, true, true);
1314
860
1315
860
  DEBUG({
1316
860
    if (Changed)
1317
860
      LIS->print(dbgs() << "After expand-condsets\n",
1318
860
                 MF.getFunction()->getParent());
1319
860
  });
1320
860
1321
860
  return Changed;
1322
860
}
1323
1324
//===----------------------------------------------------------------------===//
1325
//                         Public Constructor Functions
1326
//===----------------------------------------------------------------------===//
1327
1328
0
FunctionPass *llvm::createHexagonExpandCondsets() {
1329
0
  return new HexagonExpandCondsets();
1330
0
}