Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass identifies loops where we can generate the Hexagon hardware
11
// loop instruction.  The hardware loop can perform loop branches with a
12
// zero-cycle overhead.
13
//
14
// The pattern that defines the induction variable can changed depending on
15
// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
16
// normalizes induction variables, and the Loop Strength Reduction pass
17
// run by 'llc' may also make changes to the induction variable.
18
// The pattern detected by this phase is due to running Strength Reduction.
19
//
20
// Criteria for hardware loops:
21
//  - Countable loops (w/ ind. var for a trip count)
22
//  - Assumes loops are normalized by IndVarSimplify
23
//  - Try inner-most loops first
24
//  - No function calls in loops.
25
//
26
//===----------------------------------------------------------------------===//
27
28
#include "HexagonInstrInfo.h"
29
#include "HexagonSubtarget.h"
30
#include "llvm/ADT/ArrayRef.h"
31
#include "llvm/ADT/STLExtras.h"
32
#include "llvm/ADT/SmallSet.h"
33
#include "llvm/ADT/SmallVector.h"
34
#include "llvm/ADT/Statistic.h"
35
#include "llvm/ADT/StringRef.h"
36
#include "llvm/CodeGen/MachineBasicBlock.h"
37
#include "llvm/CodeGen/MachineDominators.h"
38
#include "llvm/CodeGen/MachineFunction.h"
39
#include "llvm/CodeGen/MachineFunctionPass.h"
40
#include "llvm/CodeGen/MachineInstr.h"
41
#include "llvm/CodeGen/MachineInstrBuilder.h"
42
#include "llvm/CodeGen/MachineLoopInfo.h"
43
#include "llvm/CodeGen/MachineOperand.h"
44
#include "llvm/CodeGen/MachineRegisterInfo.h"
45
#include "llvm/IR/Constants.h"
46
#include "llvm/IR/DebugLoc.h"
47
#include "llvm/Pass.h"
48
#include "llvm/Support/CommandLine.h"
49
#include "llvm/Support/Debug.h"
50
#include "llvm/Support/ErrorHandling.h"
51
#include "llvm/Support/MathExtras.h"
52
#include "llvm/Support/raw_ostream.h"
53
#include "llvm/Target/TargetRegisterInfo.h"
54
#include <cassert>
55
#include <cstdint>
56
#include <cstdlib>
57
#include <iterator>
58
#include <map>
59
#include <set>
60
#include <string>
61
#include <utility>
62
#include <vector>
63
64
using namespace llvm;
65
66
#define DEBUG_TYPE "hwloops"
67
68
#ifndef NDEBUG
69
static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70
71
// Option to create preheader only for a specific function.
72
static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73
                                 cl::init(""));
74
#endif
75
76
// Option to create a preheader if one doesn't exist.
77
static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78
    cl::Hidden, cl::init(true),
79
    cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80
81
// Turn it off by default. If a preheader block is not created here, the
82
// software pipeliner may be unable to find a block suitable to serve as
83
// a preheader. In that case SWP will not run.
84
static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
85
  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
86
  "instructions"));
87
88
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89
90
namespace llvm {
91
92
  FunctionPass *createHexagonHardwareLoops();
93
  void initializeHexagonHardwareLoopsPass(PassRegistry&);
94
95
} // end namespace llvm
96
97
namespace {
98
99
  class CountValue;
100
101
  struct HexagonHardwareLoops : public MachineFunctionPass {
102
    MachineLoopInfo            *MLI;
103
    MachineRegisterInfo        *MRI;
104
    MachineDominatorTree       *MDT;
105
    const HexagonInstrInfo     *TII;
106
    const HexagonRegisterInfo  *TRI;
107
#ifndef NDEBUG
108
    static int Counter;
109
#endif
110
111
  public:
112
    static char ID;
113
114
405
    HexagonHardwareLoops() : MachineFunctionPass(ID) {
115
405
      initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
116
405
    }
117
118
    bool runOnMachineFunction(MachineFunction &MF) override;
119
120
401
    StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
121
122
401
    void getAnalysisUsage(AnalysisUsage &AU) const override {
123
401
      AU.addRequired<MachineDominatorTree>();
124
401
      AU.addRequired<MachineLoopInfo>();
125
401
      MachineFunctionPass::getAnalysisUsage(AU);
126
401
    }
127
128
  private:
129
    using LoopFeederMap = std::map<unsigned, MachineInstr *>;
130
131
    /// Kinds of comparisons in the compare instructions.
132
    struct Comparison {
133
      enum Kind {
134
        EQ  = 0x01,
135
        NE  = 0x02,
136
        L   = 0x04,
137
        G   = 0x08,
138
        U   = 0x40,
139
        LTs = L,
140
        LEs = L | EQ,
141
        GTs = G,
142
        GEs = G | EQ,
143
        LTu = L      | U,
144
        LEu = L | EQ | U,
145
        GTu = G      | U,
146
        GEu = G | EQ | U
147
      };
148
149
25
      static Kind getSwappedComparison(Kind Cmp) {
150
25
        assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
151
25
        if (
(Cmp & L) || 25
(Cmp & G)24
)
152
23
          return (Kind)(Cmp ^ (L|G));
153
2
        return Cmp;
154
2
      }
155
156
122
      static Kind getNegatedComparison(Kind Cmp) {
157
122
        if (
(Cmp & L) || 122
(Cmp & G)122
)
158
40
          return (Kind)((Cmp ^ (L | G)) ^ EQ);
159
82
        
if (82
(Cmp & NE) || 82
(Cmp & EQ)82
)
160
82
          return (Kind)(Cmp ^ (EQ | NE));
161
0
        return (Kind)0;
162
0
      }
163
164
32
      static bool isSigned(Kind Cmp) {
165
29
        return (Cmp & (L | G) && !(Cmp & U));
166
32
      }
167
168
3
      static bool isUnsigned(Kind Cmp) {
169
3
        return (Cmp & U);
170
3
      }
171
    };
172
173
    /// \brief Find the register that contains the loop controlling
174
    /// induction variable.
175
    /// If successful, it will return true and set the \p Reg, \p IVBump
176
    /// and \p IVOp arguments.  Otherwise it will return false.
177
    /// The returned induction register is the register R that follows the
178
    /// following induction pattern:
179
    /// loop:
180
    ///   R = phi ..., [ R.next, LatchBlock ]
181
    ///   R.next = R + #bump
182
    ///   if (R.next < #N) goto loop
183
    /// IVBump is the immediate value added to R, and IVOp is the instruction
184
    /// "R.next = R + #bump".
185
    bool findInductionRegister(MachineLoop *L, unsigned &Reg,
186
                               int64_t &IVBump, MachineInstr *&IVOp) const;
187
188
    /// \brief Return the comparison kind for the specified opcode.
189
    Comparison::Kind getComparisonKind(unsigned CondOpc,
190
                                       MachineOperand *InitialValue,
191
                                       const MachineOperand *Endvalue,
192
                                       int64_t IVBump) const;
193
194
    /// \brief Analyze the statements in a loop to determine if the loop
195
    /// has a computable trip count and, if so, return a value that represents
196
    /// the trip count expression.
197
    CountValue *getLoopTripCount(MachineLoop *L,
198
                                 SmallVectorImpl<MachineInstr *> &OldInsts);
199
200
    /// \brief Return the expression that represents the number of times
201
    /// a loop iterates.  The function takes the operands that represent the
202
    /// loop start value, loop end value, and induction value.  Based upon
203
    /// these operands, the function attempts to compute the trip count.
204
    /// If the trip count is not directly available (as an immediate value,
205
    /// or a register), the function will attempt to insert computation of it
206
    /// to the loop's preheader.
207
    CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
208
                             const MachineOperand *End, unsigned IVReg,
209
                             int64_t IVBump, Comparison::Kind Cmp) const;
210
211
    /// \brief Return true if the instruction is not valid within a hardware
212
    /// loop.
213
    bool isInvalidLoopOperation(const MachineInstr *MI,
214
                                bool IsInnerHWLoop) const;
215
216
    /// \brief Return true if the loop contains an instruction that inhibits
217
    /// using the hardware loop.
218
    bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
219
220
    /// \brief Given a loop, check if we can convert it to a hardware loop.
221
    /// If so, then perform the conversion and return true.
222
    bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
223
224
    /// \brief Return true if the instruction is now dead.
225
    bool isDead(const MachineInstr *MI,
226
                SmallVectorImpl<MachineInstr *> &DeadPhis) const;
227
228
    /// \brief Remove the instruction if it is now dead.
229
    void removeIfDead(MachineInstr *MI);
230
231
    /// \brief Make sure that the "bump" instruction executes before the
232
    /// compare.  We need that for the IV fixup, so that the compare
233
    /// instruction would not use a bumped value that has not yet been
234
    /// defined.  If the instructions are out of order, try to reorder them.
235
    bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
236
237
    /// \brief Return true if MO and MI pair is visited only once. If visited
238
    /// more than once, this indicates there is recursion. In such a case,
239
    /// return false.
240
    bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
241
                      const MachineOperand *MO,
242
                      LoopFeederMap &LoopFeederPhi) const;
243
244
    /// \brief Return true if the Phi may generate a value that may underflow,
245
    /// or may wrap.
246
    bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
247
                               MachineBasicBlock *MBB, MachineLoop *L,
248
                               LoopFeederMap &LoopFeederPhi) const;
249
250
    /// \brief Return true if the induction variable may underflow an unsigned
251
    /// value in the first iteration.
252
    bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
253
                                     const MachineOperand *EndVal,
254
                                     MachineBasicBlock *MBB, MachineLoop *L,
255
                                     LoopFeederMap &LoopFeederPhi) const;
256
257
    /// \brief Check if the given operand has a compile-time known constant
258
    /// value. Return true if yes, and false otherwise. When returning true, set
259
    /// Val to the corresponding constant value.
260
    bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
261
262
    /// \brief Check if the operand has a compile-time known constant value.
263
198
    bool isImmediate(const MachineOperand &MO) const {
264
198
      int64_t V;
265
198
      return checkForImmediate(MO, V);
266
198
    }
267
268
    /// \brief Return the immediate for the specified operand.
269
2
    int64_t getImmediate(const MachineOperand &MO) const {
270
2
      int64_t V;
271
2
      if (!checkForImmediate(MO, V))
272
0
        llvm_unreachable("Invalid operand");
273
2
      return V;
274
2
    }
275
276
    /// \brief Reset the given machine operand to now refer to a new immediate
277
    /// value.  Assumes that the operand was already referencing an immediate
278
    /// value, either directly, or via a register.
279
    void setImmediate(MachineOperand &MO, int64_t Val);
280
281
    /// \brief Fix the data flow of the induction variable.
282
    /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
283
    ///                                     |
284
    ///                                     +-> back to phi
285
    /// where "bump" is the increment of the induction variable:
286
    ///   iv = iv + #const.
287
    /// Due to some prior code transformations, the actual flow may look
288
    /// like this:
289
    ///   phi -+-> bump ---> back to phi
290
    ///        |
291
    ///        +-> comparison-in-latch (against upper_bound-bump),
292
    /// i.e. the comparison that controls the loop execution may be using
293
    /// the value of the induction variable from before the increment.
294
    ///
295
    /// Return true if the loop's flow is the desired one (i.e. it's
296
    /// either been fixed, or no fixing was necessary).
297
    /// Otherwise, return false.  This can happen if the induction variable
298
    /// couldn't be identified, or if the value in the latch's comparison
299
    /// cannot be adjusted to reflect the post-bump value.
300
    bool fixupInductionVariable(MachineLoop *L);
301
302
    /// \brief Given a loop, if it does not have a preheader, create one.
303
    /// Return the block that is the preheader.
304
    MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
305
  };
306
307
  char HexagonHardwareLoops::ID = 0;
308
#ifndef NDEBUG
309
  int HexagonHardwareLoops::Counter = 0;
310
#endif
311
312
  /// \brief Abstraction for a trip count of a loop. A smaller version
313
  /// of the MachineOperand class without the concerns of changing the
314
  /// operand representation.
315
  class CountValue {
316
  public:
317
    enum CountValueType {
318
      CV_Register,
319
      CV_Immediate
320
    };
321
322
  private:
323
    CountValueType Kind;
324
    union Values {
325
      struct {
326
        unsigned Reg;
327
        unsigned Sub;
328
      } R;
329
      unsigned ImmVal;
330
    } Contents;
331
332
  public:
333
132
    explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
334
132
      Kind = t;
335
132
      if (
Kind == CV_Register132
) {
336
88
        Contents.R.Reg = v;
337
88
        Contents.R.Sub = u;
338
132
      } else {
339
44
        Contents.ImmVal = v;
340
44
      }
341
132
    }
342
343
264
    bool isReg() const { return Kind == CV_Register; }
344
0
    bool isImm() const { return Kind == CV_Immediate; }
345
346
176
    unsigned getReg() const {
347
176
      assert(isReg() && "Wrong CountValue accessor");
348
176
      return Contents.R.Reg;
349
176
    }
350
351
88
    unsigned getSubReg() const {
352
88
      assert(isReg() && "Wrong CountValue accessor");
353
88
      return Contents.R.Sub;
354
88
    }
355
356
44
    unsigned getImm() const {
357
44
      assert(isImm() && "Wrong CountValue accessor");
358
44
      return Contents.ImmVal;
359
44
    }
360
361
0
    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
362
0
      if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
363
0
      if (isImm()) { OS << Contents.ImmVal; }
364
0
    }
365
  };
366
367
} // end anonymous namespace
368
369
405
INITIALIZE_PASS_BEGIN405
(HexagonHardwareLoops, "hwloops",
370
405
                      "Hexagon Hardware Loops", false, false)
371
405
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
372
405
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
373
405
INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
374
                    "Hexagon Hardware Loops", false, false)
375
376
405
FunctionPass *llvm::createHexagonHardwareLoops() {
377
405
  return new HexagonHardwareLoops();
378
405
}
379
380
860
bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
381
860
  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
382
860
  if (skipFunction(*MF.getFunction()))
383
0
    return false;
384
860
385
860
  bool Changed = false;
386
860
387
860
  MLI = &getAnalysis<MachineLoopInfo>();
388
860
  MRI = &MF.getRegInfo();
389
860
  MDT = &getAnalysis<MachineDominatorTree>();
390
860
  const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
391
860
  TII = HST.getInstrInfo();
392
860
  TRI = HST.getRegisterInfo();
393
860
394
860
  for (auto &L : *MLI)
395
194
    
if (194
!L->getParentLoop()194
) {
396
194
      bool L0Used = false;
397
194
      bool L1Used = false;
398
194
      Changed |= convertToHardwareLoop(L, L0Used, L1Used);
399
194
    }
400
860
401
860
  return Changed;
402
860
}
403
404
bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
405
                                                 unsigned &Reg,
406
                                                 int64_t &IVBump,
407
                                                 MachineInstr *&IVOp
408
136
                                                 ) const {
409
136
  MachineBasicBlock *Header = L->getHeader();
410
136
  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
411
136
  MachineBasicBlock *Latch = L->getLoopLatch();
412
136
  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
413
136
  if (
!Header || 136
!Preheader136
||
!Latch136
||
!ExitingBlock136
)
414
0
    return false;
415
136
416
136
  // This pair represents an induction register together with an immediate
417
136
  // value that will be added to it in each loop iteration.
418
136
  using RegisterBump = std::pair<unsigned, int64_t>;
419
136
420
136
  // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
421
136
  // from an induction operation
422
136
  //   R.next = R + bump
423
136
  // where bump is an immediate value.
424
136
  using InductionMap = std::map<unsigned, RegisterBump>;
425
136
426
136
  InductionMap IndMap;
427
136
428
136
  using instr_iterator = MachineBasicBlock::instr_iterator;
429
136
430
136
  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
431
436
       
I != E && 436
I->isPHI()436
;
++I300
) {
432
300
    MachineInstr *Phi = &*I;
433
300
434
300
    // Have a PHI instruction.  Get the operand that corresponds to the
435
300
    // latch block, and see if is a result of an addition of form "reg+imm",
436
300
    // where the "reg" is defined by the PHI node we are looking at.
437
900
    for (unsigned i = 1, n = Phi->getNumOperands(); 
i < n900
;
i += 2600
) {
438
600
      if (Phi->getOperand(i+1).getMBB() != Latch)
439
300
        continue;
440
300
441
300
      unsigned PhiOpReg = Phi->getOperand(i).getReg();
442
300
      MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
443
300
444
300
      if (
DI->getDesc().isAdd()300
) {
445
168
        // If the register operand to the add is the PHI we're looking at, this
446
168
        // meets the induction pattern.
447
168
        unsigned IndReg = DI->getOperand(1).getReg();
448
168
        MachineOperand &Opnd2 = DI->getOperand(2);
449
168
        int64_t V;
450
168
        if (
MRI->getVRegDef(IndReg) == Phi && 168
checkForImmediate(Opnd2, V)167
) {
451
167
          unsigned UpdReg = DI->getOperand(0).getReg();
452
167
          IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
453
167
        }
454
168
      }
455
600
    }  // for (i)
456
300
  }  // for (instr)
457
136
458
136
  SmallVector<MachineOperand,2> Cond;
459
136
  MachineBasicBlock *TB = nullptr, *FB = nullptr;
460
136
  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
461
136
  if (NotAnalyzed)
462
0
    return false;
463
136
464
136
  unsigned PredR, PredPos, PredRegFlags;
465
136
  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
466
0
    return false;
467
136
468
136
  MachineInstr *PredI = MRI->getVRegDef(PredR);
469
136
  if (!PredI->isCompare())
470
0
    return false;
471
136
472
136
  unsigned CmpReg1 = 0, CmpReg2 = 0;
473
136
  int CmpImm = 0, CmpMask = 0;
474
136
  bool CmpAnalyzed =
475
136
      TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
476
136
  // Fail if the compare was not analyzed, or it's not comparing a register
477
136
  // with an immediate value.  Not checking the mask here, since we handle
478
136
  // the individual compare opcodes (including A4_cmpb*) later on.
479
136
  if (!CmpAnalyzed)
480
0
    return false;
481
136
482
136
  // Exactly one of the input registers to the comparison should be among
483
136
  // the induction registers.
484
136
  InductionMap::iterator IndMapEnd = IndMap.end();
485
136
  InductionMap::iterator F = IndMapEnd;
486
136
  if (
CmpReg1 != 0136
) {
487
136
    InductionMap::iterator F1 = IndMap.find(CmpReg1);
488
136
    if (F1 != IndMapEnd)
489
111
      F = F1;
490
136
  }
491
136
  if (
CmpReg2 != 0136
) {
492
55
    InductionMap::iterator F2 = IndMap.find(CmpReg2);
493
55
    if (
F2 != IndMapEnd55
) {
494
25
      if (F != IndMapEnd)
495
0
        return false;
496
25
      F = F2;
497
25
    }
498
55
  }
499
136
  
if (136
F == IndMapEnd136
)
500
0
    return false;
501
136
502
136
  Reg = F->second.first;
503
136
  IVBump = F->second.second;
504
136
  IVOp = MRI->getVRegDef(F->first);
505
136
  return true;
506
136
}
507
508
// Return the comparison kind for the specified opcode.
509
HexagonHardwareLoops::Comparison::Kind
510
HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
511
                                        MachineOperand *InitialValue,
512
                                        const MachineOperand *EndValue,
513
171
                                        int64_t IVBump) const {
514
171
  Comparison::Kind Cmp = (Comparison::Kind)0;
515
171
  switch (CondOpc) {
516
84
  case Hexagon::C2_cmpeqi:
517
84
  case Hexagon::C2_cmpeq:
518
84
  case Hexagon::C2_cmpeqp:
519
84
    Cmp = Comparison::EQ;
520
84
    break;
521
0
  case Hexagon::C4_cmpneq:
522
0
  case Hexagon::C4_cmpneqi:
523
0
    Cmp = Comparison::NE;
524
0
    break;
525
11
  case Hexagon::C4_cmplte:
526
11
    Cmp = Comparison::LEs;
527
11
    break;
528
0
  case Hexagon::C4_cmplteu:
529
0
    Cmp = Comparison::LEu;
530
0
    break;
531
2
  case Hexagon::C2_cmpgtui:
532
2
  case Hexagon::C2_cmpgtu:
533
2
  case Hexagon::C2_cmpgtup:
534
2
    Cmp = Comparison::GTu;
535
2
    break;
536
74
  case Hexagon::C2_cmpgti:
537
74
  case Hexagon::C2_cmpgt:
538
74
  case Hexagon::C2_cmpgtp:
539
74
    Cmp = Comparison::GTs;
540
74
    break;
541
0
  default:
542
0
    return (Comparison::Kind)0;
543
171
  }
544
171
  return Cmp;
545
171
}
546
547
/// \brief Analyze the statements in a loop to determine if the loop has
548
/// a computable trip count and, if so, return a value that represents
549
/// the trip count expression.
550
///
551
/// This function iterates over the phi nodes in the loop to check for
552
/// induction variable patterns that are used in the calculation for
553
/// the number of time the loop is executed.
554
CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
555
137
    SmallVectorImpl<MachineInstr *> &OldInsts) {
556
137
  MachineBasicBlock *TopMBB = L->getTopBlock();
557
137
  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
558
137
  assert(PI != TopMBB->pred_end() &&
559
137
         "Loop must have more than one incoming edge!");
560
137
  MachineBasicBlock *Backedge = *PI++;
561
137
  if (PI == TopMBB->pred_end())  // dead loop?
562
1
    return nullptr;
563
136
  MachineBasicBlock *Incoming = *PI++;
564
136
  if (PI != TopMBB->pred_end())  // multiple backedges?
565
0
    return nullptr;
566
136
567
136
  // Make sure there is one incoming and one backedge and determine which
568
136
  // is which.
569
136
  
if (136
L->contains(Incoming)136
) {
570
134
    if (L->contains(Backedge))
571
0
      return nullptr;
572
134
    std::swap(Incoming, Backedge);
573
136
  } else 
if (2
!L->contains(Backedge)2
)
574
0
    return nullptr;
575
136
576
136
  // Look for the cmp instruction to determine if we can get a useful trip
577
136
  // count.  The trip count can be either a register or an immediate.  The
578
136
  // location of the value depends upon the type (reg or imm).
579
136
  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
580
136
  if (!ExitingBlock)
581
0
    return nullptr;
582
136
583
136
  unsigned IVReg = 0;
584
136
  int64_t IVBump = 0;
585
136
  MachineInstr *IVOp;
586
136
  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
587
136
  if (!FoundIV)
588
0
    return nullptr;
589
136
590
136
  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
591
136
592
136
  MachineOperand *InitialValue = nullptr;
593
136
  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
594
136
  MachineBasicBlock *Latch = L->getLoopLatch();
595
408
  for (unsigned i = 1, n = IV_Phi->getNumOperands(); 
i < n408
;
i += 2272
) {
596
272
    MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
597
272
    if (MBB == Preheader)
598
136
      InitialValue = &IV_Phi->getOperand(i);
599
136
    else 
if (136
MBB == Latch136
)
600
136
      IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
601
272
  }
602
136
  if (!InitialValue)
603
0
    return nullptr;
604
136
605
136
  SmallVector<MachineOperand,2> Cond;
606
136
  MachineBasicBlock *TB = nullptr, *FB = nullptr;
607
136
  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
608
136
  if (NotAnalyzed)
609
0
    return nullptr;
610
136
611
136
  MachineBasicBlock *Header = L->getHeader();
612
136
  // TB must be non-null.  If FB is also non-null, one of them must be
613
136
  // the header.  Otherwise, branch to TB could be exiting the loop, and
614
136
  // the fall through can go to the header.
615
136
  assert (TB && "Exit block without a branch?");
616
136
  if (
ExitingBlock != Latch && 136
(TB == Latch || 0
FB == Latch0
)) {
617
0
    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
618
0
    SmallVector<MachineOperand,2> LCond;
619
0
    bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
620
0
    if (NotAnalyzed)
621
0
      return nullptr;
622
0
    
if (0
TB == Latch0
)
623
0
      
TB = (LTB == Header) ? 0
LTB0
:
LFB0
;
624
0
    else
625
0
      
FB = (LTB == Header) ? 0
LTB0
:
LFB0
;
626
0
  }
627
136
  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
628
136
  if (
!TB || 136
(FB && 136
TB != Header135
&&
FB != Header6
))
629
0
    return nullptr;
630
136
631
136
  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
632
136
  // to put imm(0), followed by P in the vector Cond.
633
136
  // If TB is not the header, it means that the "not-taken" path must lead
634
136
  // to the header.
635
136
  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
636
136
  unsigned PredReg, PredPos, PredRegFlags;
637
136
  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
638
0
    return nullptr;
639
136
  MachineInstr *CondI = MRI->getVRegDef(PredReg);
640
136
  unsigned CondOpc = CondI->getOpcode();
641
136
642
136
  unsigned CmpReg1 = 0, CmpReg2 = 0;
643
136
  int Mask = 0, ImmValue = 0;
644
136
  bool AnalyzedCmp =
645
136
      TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
646
136
  if (!AnalyzedCmp)
647
0
    return nullptr;
648
136
649
136
  // The comparison operator type determines how we compute the loop
650
136
  // trip count.
651
136
  OldInsts.push_back(CondI);
652
136
  OldInsts.push_back(IVOp);
653
136
654
136
  // Sadly, the following code gets information based on the position
655
136
  // of the operands in the compare instruction.  This has to be done
656
136
  // this way, because the comparisons check for a specific relationship
657
136
  // between the operands (e.g. is-less-than), rather than to find out
658
136
  // what relationship the operands are in (as on PPC).
659
136
  Comparison::Kind Cmp;
660
136
  bool isSwapped = false;
661
136
  const MachineOperand &Op1 = CondI->getOperand(1);
662
136
  const MachineOperand &Op2 = CondI->getOperand(2);
663
136
  const MachineOperand *EndValue = nullptr;
664
136
665
136
  if (
Op1.isReg()136
) {
666
136
    if (
Op2.isImm() || 136
Op1.getReg() == IVReg55
)
667
111
      EndValue = &Op2;
668
25
    else {
669
25
      EndValue = &Op1;
670
25
      isSwapped = true;
671
25
    }
672
136
  }
673
136
674
136
  if (!EndValue)
675
0
    return nullptr;
676
136
677
136
  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
678
136
  if (!Cmp)
679
0
    return nullptr;
680
136
  
if (136
Negated136
)
681
102
    Cmp = Comparison::getNegatedComparison(Cmp);
682
136
  if (isSwapped)
683
25
    Cmp = Comparison::getSwappedComparison(Cmp);
684
136
685
136
  if (
InitialValue->isReg()136
) {
686
136
    unsigned R = InitialValue->getReg();
687
136
    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
688
136
    if (!MDT->properlyDominates(DefBB, Header))
689
0
      return nullptr;
690
136
    OldInsts.push_back(MRI->getVRegDef(R));
691
136
  }
692
136
  
if (136
EndValue->isReg()136
) {
693
55
    unsigned R = EndValue->getReg();
694
55
    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
695
55
    if (!MDT->properlyDominates(DefBB, Header))
696
1
      return nullptr;
697
54
    OldInsts.push_back(MRI->getVRegDef(R));
698
54
  }
699
136
700
135
  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
701
137
}
702
703
/// \brief Helper function that returns the expression that represents the
704
/// number of times a loop iterates.  The function takes the operands that
705
/// represent the loop start value, loop end value, and induction value.
706
/// Based upon these operands, the function attempts to compute the trip count.
707
CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
708
                                               const MachineOperand *Start,
709
                                               const MachineOperand *End,
710
                                               unsigned IVReg,
711
                                               int64_t IVBump,
712
135
                                               Comparison::Kind Cmp) const {
713
135
  // Cannot handle comparison EQ, i.e. while (A == B).
714
135
  if (Cmp == Comparison::EQ)
715
0
    return nullptr;
716
135
717
135
  // Check if either the start or end values are an assignment of an immediate.
718
135
  // If so, use the immediate value rather than the register.
719
135
  
if (135
Start->isReg()135
) {
720
135
    const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
721
135
    if (
StartValInstr && 135
(StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
722
67
                          StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
723
68
      Start = &StartValInstr->getOperand(1);
724
135
  }
725
135
  if (
End->isReg()135
) {
726
54
    const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
727
54
    if (
EndValInstr && 54
(EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
728
38
                        EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
729
16
      End = &EndValInstr->getOperand(1);
730
54
  }
731
135
732
135
  if (
!Start->isReg() && 135
!Start->isImm()68
)
733
0
    return nullptr;
734
135
  
if (135
!End->isReg() && 135
!End->isImm()97
)
735
0
    return nullptr;
736
135
737
135
  bool CmpLess =     Cmp & Comparison::L;
738
135
  bool CmpGreater =  Cmp & Comparison::G;
739
135
  bool CmpHasEqual = Cmp & Comparison::EQ;
740
135
741
135
  // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
742
135
  if (
CmpLess && 135
IVBump < 054
)
743
135
    // Loop going while iv is "less" with the iv value going down.  Must wrap.
744
0
    return nullptr;
745
135
746
135
  
if (135
CmpGreater && 135
IVBump > 02
)
747
135
    // Loop going while iv is "greater" with the iv value going up.  Must wrap.
748
0
    return nullptr;
749
135
750
135
  // Phis that may feed into the loop.
751
135
  LoopFeederMap LoopFeederPhi;
752
135
753
135
  // Check if the initial value may be zero and can be decremented in the first
754
135
  // iteration. If the value is zero, the endloop instruction will not decrement
755
135
  // the loop counter, so we shouldn't generate a hardware loop in this case.
756
135
  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
757
135
                                  LoopFeederPhi))
758
3
      return nullptr;
759
132
760
132
  
if (132
Start->isImm() && 132
End->isImm()68
) {
761
44
    // Both, start and end are immediates.
762
44
    int64_t StartV = Start->getImm();
763
44
    int64_t EndV = End->getImm();
764
44
    int64_t Dist = EndV - StartV;
765
44
    if (Dist == 0)
766
0
      return nullptr;
767
44
768
44
    bool Exact = (Dist % IVBump) == 0;
769
44
770
44
    if (
Cmp == Comparison::NE44
) {
771
30
      if (!Exact)
772
0
        return nullptr;
773
30
      
if (30
(Dist < 0) ^ (IVBump < 0)30
)
774
0
        return nullptr;
775
44
    }
776
44
777
44
    // For comparisons that include the final value (i.e. include equality
778
44
    // with the final value), we need to increase the distance by 1.
779
44
    
if (44
CmpHasEqual44
)
780
13
      
Dist = Dist > 0 ? 13
Dist+113
:
Dist-10
;
781
44
782
44
    // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
783
44
    // CmpGreater should imply Dist < 0.  These conditions could actually
784
44
    // fail, for example, in unreachable code (which may still appear to be
785
44
    // reachable in the CFG).
786
44
    if (
(CmpLess && 44
Dist < 014
) ||
(CmpGreater && 44
Dist > 00
))
787
0
      return nullptr;
788
44
789
44
    // "Normalized" distance, i.e. with the bump set to +-1.
790
44
    
int64_t Dist1 = (IVBump > 0) ? 44
(Dist + (IVBump - 1)) / IVBump43
791
1
                                 : (-Dist + (-IVBump - 1)) / (-IVBump);
792
44
    assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
793
44
794
44
    uint64_t Count = Dist1;
795
44
796
44
    if (Count > 0xFFFFFFFFULL)
797
0
      return nullptr;
798
44
799
44
    return new CountValue(CountValue::CV_Immediate, Count);
800
44
  }
801
88
802
88
  // A general case: Start and End are some values, but the actual
803
88
  // iteration count may not be available.  If it is not, insert
804
88
  // a computation of it into the preheader.
805
88
806
88
  // If the induction variable bump is not a power of 2, quit.
807
88
  // Othwerise we'd need a general integer division.
808
88
  
if (88
!isPowerOf2_64(std::abs(IVBump))88
)
809
0
    return nullptr;
810
88
811
88
  MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
812
88
  assert (PH && "Should have a preheader by now");
813
88
  MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
814
88
  DebugLoc DL;
815
88
  if (InsertPos != PH->end())
816
8
    DL = InsertPos->getDebugLoc();
817
88
818
88
  // If Start is an immediate and End is a register, the trip count
819
88
  // will be "reg - imm".  Hexagon's "subtract immediate" instruction
820
88
  // is actually "reg + -imm".
821
88
822
88
  // If the loop IV is going downwards, i.e. if the bump is negative,
823
88
  // then the iteration count (computed as End-Start) will need to be
824
88
  // negated.  To avoid the negation, just swap Start and End.
825
88
  if (
IVBump < 088
) {
826
36
    std::swap(Start, End);
827
36
    IVBump = -IVBump;
828
36
  }
829
88
  // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
830
88
  // Signedness, and "including equality" are preserved.
831
88
832
29
  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
833
29
  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
834
88
835
88
  int64_t StartV = 0, EndV = 0;
836
88
  if (Start->isImm())
837
59
    StartV = Start->getImm();
838
88
  if (End->isImm())
839
15
    EndV = End->getImm();
840
88
841
88
  int64_t AdjV = 0;
842
88
  // To compute the iteration count, we would need this computation:
843
88
  //   Count = (End - Start + (IVBump-1)) / IVBump
844
88
  // or, when CmpHasEqual:
845
88
  //   Count = (End - Start + (IVBump-1)+1) / IVBump
846
88
  // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
847
88
  // generating an instruction specifically to add it if we can adjust
848
88
  // the immediate values for Start or End.
849
88
850
88
  if (
CmpHasEqual88
) {
851
21
    // Need to add 1 to the total iteration count.
852
21
    if (Start->isImm())
853
5
      StartV--;
854
16
    else 
if (16
End->isImm()16
)
855
10
      EndV++;
856
16
    else
857
6
      AdjV += 1;
858
21
  }
859
88
860
88
  if (
Cmp != Comparison::NE88
) {
861
42
    if (Start->isImm())
862
19
      StartV -= (IVBump-1);
863
23
    else 
if (23
End->isImm()23
)
864
10
      EndV += (IVBump-1);
865
23
    else
866
13
      AdjV += (IVBump-1);
867
42
  }
868
88
869
88
  unsigned R = 0, SR = 0;
870
88
  if (
Start->isReg()88
) {
871
29
    R = Start->getReg();
872
29
    SR = Start->getSubReg();
873
88
  } else {
874
59
    R = End->getReg();
875
59
    SR = End->getSubReg();
876
59
  }
877
88
  const TargetRegisterClass *RC = MRI->getRegClass(R);
878
88
  // Hardware loops cannot handle 64-bit registers.  If it's a double
879
88
  // register, it has to have a subregister.
880
88
  if (
!SR && 88
RC == &Hexagon::DoubleRegsRegClass88
)
881
0
    return nullptr;
882
88
  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
883
88
884
88
  // Compute DistR (register with the distance between Start and End).
885
88
  unsigned DistR, DistSR;
886
88
887
88
  // Avoid special case, where the start value is an imm(0).
888
88
  if (
Start->isImm() && 88
StartV == 059
) {
889
42
    DistR = End->getReg();
890
42
    DistSR = End->getSubReg();
891
88
  } else {
892
14
    const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
893
32
                              
(RegToImm ? 32
TII->get(Hexagon::A2_subri)15
:
894
32
                                          TII->get(Hexagon::A2_addi));
895
46
    if (
RegToReg || 46
RegToImm32
) {
896
29
      unsigned SubR = MRI->createVirtualRegister(IntRC);
897
29
      MachineInstrBuilder SubIB =
898
29
        BuildMI(*PH, InsertPos, DL, SubD, SubR);
899
29
900
29
      if (RegToReg)
901
14
        SubIB.addReg(End->getReg(), 0, End->getSubReg())
902
14
          .addReg(Start->getReg(), 0, Start->getSubReg());
903
29
      else
904
15
        SubIB.addImm(EndV)
905
15
          .addReg(Start->getReg(), 0, Start->getSubReg());
906
29
      DistR = SubR;
907
46
    } else {
908
17
      // If the loop has been unrolled, we should use the original loop count
909
17
      // instead of recalculating the value. This will avoid additional
910
17
      // 'Add' instruction.
911
17
      const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
912
17
      if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
913
17
          
EndValInstr->getOperand(2).getImm() == StartV1
) {
914
1
        DistR = EndValInstr->getOperand(1).getReg();
915
17
      } else {
916
16
        unsigned SubR = MRI->createVirtualRegister(IntRC);
917
16
        MachineInstrBuilder SubIB =
918
16
          BuildMI(*PH, InsertPos, DL, SubD, SubR);
919
16
        SubIB.addReg(End->getReg(), 0, End->getSubReg())
920
16
             .addImm(-StartV);
921
16
        DistR = SubR;
922
16
      }
923
17
    }
924
46
    DistSR = 0;
925
46
  }
926
88
927
88
  // From DistR, compute AdjR (register with the adjusted distance).
928
88
  unsigned AdjR, AdjSR;
929
88
930
88
  if (
AdjV == 088
) {
931
77
    AdjR = DistR;
932
77
    AdjSR = DistSR;
933
88
  } else {
934
11
    // Generate CountR = ADD DistR, AdjVal
935
11
    unsigned AddR = MRI->createVirtualRegister(IntRC);
936
11
    MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
937
11
    BuildMI(*PH, InsertPos, DL, AddD, AddR)
938
11
      .addReg(DistR, 0, DistSR)
939
11
      .addImm(AdjV);
940
11
941
11
    AdjR = AddR;
942
11
    AdjSR = 0;
943
11
  }
944
88
945
88
  // From AdjR, compute CountR (register with the final count).
946
88
  unsigned CountR, CountSR;
947
88
948
88
  if (
IVBump == 188
) {
949
43
    CountR = AdjR;
950
43
    CountSR = AdjSR;
951
88
  } else {
952
45
    // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
953
45
    unsigned Shift = Log2_32(IVBump);
954
45
955
45
    // Generate NormR = LSR DistR, Shift.
956
45
    unsigned LsrR = MRI->createVirtualRegister(IntRC);
957
45
    const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
958
45
    BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
959
45
      .addReg(AdjR, 0, AdjSR)
960
45
      .addImm(Shift);
961
45
962
45
    CountR = LsrR;
963
45
    CountSR = 0;
964
45
  }
965
135
966
135
  return new CountValue(CountValue::CV_Register, CountR, CountSR);
967
135
}
968
969
/// \brief Return true if the operation is invalid within hardware loop.
970
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
971
2.96k
                                                  bool IsInnerHWLoop) const {
972
2.96k
  // Call is not allowed because the callee may use a hardware loop except for
973
2.96k
  // the case when the call never returns.
974
2.96k
  if (MI->getDesc().isCall())
975
15
    return !TII->doesNotReturn(*MI);
976
2.95k
977
2.95k
  // Check if the instruction defines a hardware loop register.
978
2.95k
  using namespace Hexagon;
979
2.95k
980
2.95k
  static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
981
2.95k
  static const unsigned Regs1[]  = { LC1, SA1 };
982
2.72k
  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
983
223
                                 : makeArrayRef(Regs1, array_lengthof(Regs1));
984
2.95k
  for (unsigned R : CheckRegs)
985
11.3k
    
if (11.3k
MI->modifiesRegister(R, TRI)11.3k
)
986
0
      return true;
987
2.95k
988
2.95k
  return false;
989
2.95k
}
990
991
/// \brief Return true if the loop contains an instruction that inhibits
992
/// the use of the hardware loop instruction.
993
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
994
208
    bool IsInnerHWLoop) const {
995
208
  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
996
208
  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
997
473
  for (unsigned i = 0, e = Blocks.size(); 
i != e473
;
++i265
) {
998
278
    MachineBasicBlock *MBB = Blocks[i];
999
278
    for (MachineBasicBlock::iterator
1000
3.23k
           MII = MBB->begin(), E = MBB->end(); 
MII != E3.23k
;
++MII2.95k
) {
1001
2.96k
      const MachineInstr *MI = &*MII;
1002
2.96k
      if (
isInvalidLoopOperation(MI, IsInnerHWLoop)2.96k
) {
1003
13
        DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
1004
13
        return true;
1005
13
      }
1006
2.96k
    }
1007
278
  }
1008
195
  return false;
1009
208
}
1010
1011
/// \brief Returns true if the instruction is dead.  This was essentially
1012
/// copied from DeadMachineInstructionElim::isDead, but with special cases
1013
/// for inline asm, physical registers and instructions with side effects
1014
/// removed.
1015
bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1016
450
                              SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1017
450
  // Examine each operand.
1018
1.08k
  for (unsigned i = 0, e = MI->getNumOperands(); 
i != e1.08k
;
++i634
) {
1019
862
    const MachineOperand &MO = MI->getOperand(i);
1020
862
    if (
!MO.isReg() || 862
!MO.isDef()694
)
1021
412
      continue;
1022
450
1023
450
    unsigned Reg = MO.getReg();
1024
450
    if (MRI->use_nodbg_empty(Reg))
1025
164
      continue;
1026
286
1027
286
    using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1028
286
1029
286
    // This instruction has users, but if the only user is the phi node for the
1030
286
    // parent block, and the only use of that phi node is this instruction, then
1031
286
    // this instruction is dead: both it (and the phi node) can be removed.
1032
286
    use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1033
286
    use_nodbg_iterator End = MRI->use_nodbg_end();
1034
286
    if (
std::next(I) != End || 286
!I->getParent()->isPHI()188
)
1035
123
      return false;
1036
163
1037
163
    MachineInstr *OnePhi = I->getParent();
1038
453
    for (unsigned j = 0, f = OnePhi->getNumOperands(); 
j != f453
;
++j290
) {
1039
395
      const MachineOperand &OPO = OnePhi->getOperand(j);
1040
395
      if (
!OPO.isReg() || 395
!OPO.isDef()279
)
1041
232
        continue;
1042
163
1043
163
      unsigned OPReg = OPO.getReg();
1044
163
      use_nodbg_iterator nextJ;
1045
163
      for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1046
266
           
J != End266
;
J = nextJ103
) {
1047
208
        nextJ = std::next(J);
1048
208
        MachineOperand &Use = *J;
1049
208
        MachineInstr *UseMI = Use.getParent();
1050
208
1051
208
        // If the phi node has a user that is not MI, bail.
1052
208
        if (MI != UseMI)
1053
105
          return false;
1054
208
      }
1055
395
    }
1056
58
    DeadPhis.push_back(OnePhi);
1057
58
  }
1058
450
1059
450
  // If there are no defs with uses, the instruction is dead.
1060
222
  return true;
1061
450
}
1062
1063
450
void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1064
450
  // This procedure was essentially copied from DeadMachineInstructionElim.
1065
450
1066
450
  SmallVector<MachineInstr*, 1> DeadPhis;
1067
450
  if (
isDead(MI, DeadPhis)450
) {
1068
222
    DEBUG(dbgs() << "HW looping will remove: " << *MI);
1069
222
1070
222
    // It is possible that some DBG_VALUE instructions refer to this
1071
222
    // instruction.  Examine each def operand for such references;
1072
222
    // if found, mark the DBG_VALUE as undef (but don't delete it).
1073
856
    for (unsigned i = 0, e = MI->getNumOperands(); 
i != e856
;
++i634
) {
1074
634
      const MachineOperand &MO = MI->getOperand(i);
1075
634
      if (
!MO.isReg() || 634
!MO.isDef()466
)
1076
412
        continue;
1077
222
      unsigned Reg = MO.getReg();
1078
222
      MachineRegisterInfo::use_iterator nextI;
1079
222
      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1080
281
           E = MRI->use_end(); 
I != E281
;
I = nextI59
) {
1081
59
        nextI = std::next(I);  // I is invalidated by the setReg
1082
59
        MachineOperand &Use = *I;
1083
59
        MachineInstr *UseMI = I->getParent();
1084
59
        if (UseMI == MI)
1085
0
          continue;
1086
59
        
if (59
Use.isDebug()59
)
1087
1
          UseMI->getOperand(0).setReg(0U);
1088
59
      }
1089
634
    }
1090
222
1091
222
    MI->eraseFromParent();
1092
280
    for (unsigned i = 0; 
i < DeadPhis.size()280
;
++i58
)
1093
58
      DeadPhis[i]->eraseFromParent();
1094
222
  }
1095
450
}
1096
1097
/// \brief Check if the loop is a candidate for converting to a hardware
1098
/// loop.  If so, then perform the transformation.
1099
///
1100
/// This function works on innermost loops first.  A loop can be converted
1101
/// if it is a counting loop; either a register value or an immediate.
1102
///
1103
/// The code makes several assumptions about the representation of the loop
1104
/// in llvm.
1105
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1106
                                                 bool &RecL0used,
1107
210
                                                 bool &RecL1used) {
1108
210
  // This is just for sanity.
1109
210
  assert(L->getHeader() && "Loop without a header?");
1110
210
1111
210
  bool Changed = false;
1112
210
  bool L0Used = false;
1113
210
  bool L1Used = false;
1114
210
1115
210
  // Process nested loops first.
1116
226
  for (MachineLoop::iterator I = L->begin(), E = L->end(); 
I != E226
;
++I16
) {
1117
16
    Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1118
16
    L0Used |= RecL0used;
1119
16
    L1Used |= RecL1used;
1120
16
  }
1121
210
1122
210
  // If a nested loop has been converted, then we can't convert this loop.
1123
210
  if (
Changed && 210
L0Used9
&&
L1Used9
)
1124
2
    return Changed;
1125
208
1126
208
  unsigned LOOP_i;
1127
208
  unsigned LOOP_r;
1128
208
  unsigned ENDLOOP;
1129
208
1130
208
  // Flag used to track loopN instruction:
1131
208
  // 1 - Hardware loop is being generated for the inner most loop.
1132
208
  // 0 - Hardware loop is being generated for the outer loop.
1133
208
  unsigned IsInnerHWLoop = 1;
1134
208
1135
208
  if (
L0Used208
) {
1136
7
    LOOP_i = Hexagon::J2_loop1i;
1137
7
    LOOP_r = Hexagon::J2_loop1r;
1138
7
    ENDLOOP = Hexagon::ENDLOOP1;
1139
7
    IsInnerHWLoop = 0;
1140
208
  } else {
1141
201
    LOOP_i = Hexagon::J2_loop0i;
1142
201
    LOOP_r = Hexagon::J2_loop0r;
1143
201
    ENDLOOP = Hexagon::ENDLOOP0;
1144
201
  }
1145
208
1146
#ifndef NDEBUG
1147
  // Stop trying after reaching the limit (if any).
1148
  int Limit = HWLoopLimit;
1149
  if (Limit >= 0) {
1150
    if (Counter >= HWLoopLimit)
1151
      return false;
1152
    Counter++;
1153
  }
1154
#endif
1155
1156
208
  // Does the loop contain any invalid instructions?
1157
208
  if (containsInvalidInstruction(L, IsInnerHWLoop))
1158
13
    return false;
1159
195
1160
195
  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1161
195
  // Don't generate hw loop if the loop has more than one exit.
1162
195
  if (!LastMBB)
1163
13
    return false;
1164
182
1165
182
  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1166
182
  if (LastI == LastMBB->end())
1167
0
    return false;
1168
182
1169
182
  // Is the induction variable bump feeding the latch condition?
1170
182
  
if (182
!fixupInductionVariable(L)182
)
1171
45
    return false;
1172
137
1173
137
  // Ensure the loop has a preheader: the loop instruction will be
1174
137
  // placed there.
1175
137
  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1176
137
  if (
!Preheader137
) {
1177
0
    Preheader = createPreheaderForLoop(L);
1178
0
    if (!Preheader)
1179
0
      return false;
1180
137
  }
1181
137
1182
137
  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1183
137
1184
137
  SmallVector<MachineInstr*, 2> OldInsts;
1185
137
  // Are we able to determine the trip count for the loop?
1186
137
  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1187
137
  if (!TripCount)
1188
5
    return false;
1189
132
1190
132
  // Is the trip count available in the preheader?
1191
132
  
if (132
TripCount->isReg()132
) {
1192
88
    // There will be a use of the register inserted into the preheader,
1193
88
    // so make sure that the register is actually defined at that point.
1194
88
    MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1195
88
    MachineBasicBlock *BBDef = TCDef->getParent();
1196
88
    if (!MDT->dominates(BBDef, Preheader))
1197
0
      return false;
1198
132
  }
1199
132
1200
132
  // Determine the loop start.
1201
132
  MachineBasicBlock *TopBlock = L->getTopBlock();
1202
132
  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1203
132
  MachineBasicBlock *LoopStart = nullptr;
1204
132
  if (
ExitingBlock != L->getLoopLatch()132
) {
1205
0
    MachineBasicBlock *TB = nullptr, *FB = nullptr;
1206
0
    SmallVector<MachineOperand, 2> Cond;
1207
0
1208
0
    if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1209
0
      return false;
1210
0
1211
0
    
if (0
L->contains(TB)0
)
1212
0
      LoopStart = TB;
1213
0
    else 
if (0
L->contains(FB)0
)
1214
0
      LoopStart = FB;
1215
0
    else
1216
0
      return false;
1217
132
  }
1218
132
  else
1219
132
    LoopStart = TopBlock;
1220
132
1221
132
  // Convert the loop to a hardware loop.
1222
132
  
DEBUG132
(dbgs() << "Change to hardware loop at "; L->dump());
1223
132
  DebugLoc DL;
1224
132
  if (InsertPos != Preheader->end())
1225
21
    DL = InsertPos->getDebugLoc();
1226
132
1227
132
  if (
TripCount->isReg()132
) {
1228
88
    // Create a copy of the loop count register.
1229
88
    unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1230
88
    BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1231
88
      .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1232
88
    // Add the Loop instruction to the beginning of the loop.
1233
88
    BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1234
88
      .addReg(CountReg);
1235
132
  } else {
1236
44
    assert(TripCount->isImm() && "Expecting immediate value for trip count");
1237
44
    // Add the Loop immediate instruction to the beginning of the loop,
1238
44
    // if the immediate fits in the instructions.  Otherwise, we need to
1239
44
    // create a new virtual register.
1240
44
    int64_t CountImm = TripCount->getImm();
1241
44
    if (
!TII->isValidOffset(LOOP_i, CountImm, TRI)44
) {
1242
10
      unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1243
10
      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1244
10
        .addImm(CountImm);
1245
10
      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1246
10
        .addMBB(LoopStart).addReg(CountReg);
1247
10
    } else
1248
34
      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1249
34
        .addMBB(LoopStart).addImm(CountImm);
1250
44
  }
1251
132
1252
132
  // Make sure the loop start always has a reference in the CFG.  We need
1253
132
  // to create a BlockAddress operand to get this mechanism to work both the
1254
132
  // MachineBasicBlock and BasicBlock objects need the flag set.
1255
132
  LoopStart->setHasAddressTaken();
1256
132
  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1257
132
  // object.
1258
132
  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1259
132
1260
132
  // Replace the loop branch with an endloop instruction.
1261
132
  DebugLoc LastIDL = LastI->getDebugLoc();
1262
132
  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1263
132
1264
132
  // The loop ends with either:
1265
132
  //  - a conditional branch followed by an unconditional branch, or
1266
132
  //  - a conditional branch to the loop start.
1267
132
  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1268
132
      
LastI->getOpcode() == Hexagon::J2_jumpf93
) {
1269
132
    // Delete one and change/add an uncond. branch to out of the loop.
1270
132
    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1271
132
    LastI = LastMBB->erase(LastI);
1272
132
    if (
!L->contains(BranchTarget)132
) {
1273
6
      if (LastI != LastMBB->end())
1274
6
        LastI = LastMBB->erase(LastI);
1275
6
      SmallVector<MachineOperand, 0> Cond;
1276
6
      TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1277
6
    }
1278
0
  } else {
1279
0
    // Conditional branch to loop start; just delete it.
1280
0
    LastMBB->erase(LastI);
1281
0
  }
1282
132
  delete TripCount;
1283
132
1284
132
  // The induction operation and the comparison may now be
1285
132
  // unneeded. If these are unneeded, then remove them.
1286
582
  for (unsigned i = 0; 
i < OldInsts.size()582
;
++i450
)
1287
450
    removeIfDead(OldInsts[i]);
1288
132
1289
132
  ++NumHWLoops;
1290
132
1291
132
  // Set RecL1used and RecL0used only after hardware loop has been
1292
132
  // successfully generated. Doing it earlier can cause wrong loop instruction
1293
132
  // to be used.
1294
132
  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1295
4
    RecL1used = true;
1296
132
  else
1297
128
    RecL0used = true;
1298
132
1299
132
  return true;
1300
210
}
1301
1302
bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1303
3
                                            MachineInstr *CmpI) {
1304
3
  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1305
3
1306
3
  MachineBasicBlock *BB = BumpI->getParent();
1307
3
  if (CmpI->getParent() != BB)
1308
2
    return false;
1309
1
1310
1
  using instr_iterator = MachineBasicBlock::instr_iterator;
1311
1
1312
1
  // Check if things are in order to begin with.
1313
2
  for (instr_iterator I(BumpI), E = BB->instr_end(); 
I != E2
;
++I1
)
1314
2
    
if (2
&*I == CmpI2
)
1315
1
      return true;
1316
1
1317
1
  // Out of order.
1318
0
  unsigned PredR = CmpI->getOperand(0).getReg();
1319
0
  bool FoundBump = false;
1320
0
  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1321
0
  for (instr_iterator I = NextIt, E = BB->instr_end(); 
I != E0
;
++I0
) {
1322
0
    MachineInstr *In = &*I;
1323
0
    for (unsigned i = 0, n = In->getNumOperands(); 
i < n0
;
++i0
) {
1324
0
      MachineOperand &MO = In->getOperand(i);
1325
0
      if (
MO.isReg() && 0
MO.isUse()0
) {
1326
0
        if (MO.getReg() == PredR)  // Found an intervening use of PredR.
1327
0
          return false;
1328
0
      }
1329
0
    }
1330
0
1331
0
    
if (0
In == BumpI0
) {
1332
0
      BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1333
0
      FoundBump = true;
1334
0
      break;
1335
0
    }
1336
0
  }
1337
0
  assert (FoundBump && "Cannot determine instruction order");
1338
0
  return FoundBump;
1339
3
}
1340
1341
/// This function is required to break recursion. Visiting phis in a loop may
1342
/// result in recursion during compilation. We break the recursion by making
1343
/// sure that we visit a MachineOperand and its definition in a
1344
/// MachineInstruction only once. If we attempt to visit more than once, then
1345
/// there is recursion, and will return false.
1346
bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1347
                                        MachineInstr *MI,
1348
                                        const MachineOperand *MO,
1349
3
                                        LoopFeederMap &LoopFeederPhi) const {
1350
3
  if (
LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()3
) {
1351
3
    const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1352
3
    DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1353
3
    // Ignore all BBs that form Loop.
1354
8
    for (unsigned i = 0, e = Blocks.size(); 
i != e8
;
++i5
) {
1355
5
      MachineBasicBlock *MBB = Blocks[i];
1356
5
      if (A == MBB)
1357
0
        return false;
1358
5
    }
1359
3
    MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1360
3
    LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1361
3
    return true;
1362
3
  } else
1363
3
    // Already visited node.
1364
0
    return false;
1365
0
}
1366
1367
/// Return true if a Phi may generate a value that can underflow.
1368
/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1369
bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1370
    MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1371
2
    MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1372
2
  assert(Phi->isPHI() && "Expecting a Phi.");
1373
2
  // Walk through each Phi, and its used operands. Make sure that
1374
2
  // if there is recursion in Phi, we won't generate hardware loops.
1375
3
  for (int i = 1, n = Phi->getNumOperands(); 
i < n3
;
i += 21
)
1376
3
    
if (3
isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)3
)
1377
3
      
if (3
loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1378
3
                                      Phi->getParent(), L, LoopFeederPhi))
1379
2
        return true;
1380
0
  return false;
1381
2
}
1382
1383
/// Return true if the induction variable can underflow in the first iteration.
1384
/// An example, is an initial unsigned value that is 0 and is decrement in the
1385
/// first itertion of a do-while loop.  In this case, we cannot generate a
1386
/// hardware loop because the endloop instruction does not decrement the loop
1387
/// counter if it is <= 1. We only need to perform this analysis if the
1388
/// initial value is a register.
1389
///
1390
/// This function assumes the initial value may underfow unless proven
1391
/// otherwise. If the type is signed, then we don't care because signed
1392
/// underflow is undefined. We attempt to prove the initial value is not
1393
/// zero by perfoming a crude analysis of the loop counter. This function
1394
/// checks if the initial value is used in any comparison prior to the loop
1395
/// and, if so, assumes the comparison is a range check. This is inexact,
1396
/// but will catch the simple cases.
1397
bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1398
    const MachineOperand *InitVal, const MachineOperand *EndVal,
1399
    MachineBasicBlock *MBB, MachineLoop *L,
1400
168
    LoopFeederMap &LoopFeederPhi) const {
1401
168
  // Only check register values since they are unknown.
1402
168
  if (!InitVal->isReg())
1403
68
    return false;
1404
100
1405
100
  
if (100
!EndVal->isImm()100
)
1406
14
    return false;
1407
86
1408
86
  // A register value that is assigned an immediate is a known value, and it
1409
86
  // won't underflow in the first iteration.
1410
86
  int64_t Imm;
1411
86
  if (checkForImmediate(*InitVal, Imm))
1412
3
    return (EndVal->getImm() == Imm);
1413
83
1414
83
  unsigned Reg = InitVal->getReg();
1415
83
1416
83
  // We don't know the value of a physical register.
1417
83
  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1418
30
    return true;
1419
53
1420
53
  MachineInstr *Def = MRI->getVRegDef(Reg);
1421
53
  if (!Def)
1422
0
    return true;
1423
53
1424
53
  // If the initial value is a Phi or copy and the operands may not underflow,
1425
53
  // then the definition cannot be underflow either.
1426
53
  
if (53
Def->isPHI() && 53
!phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1427
2
                                             L, LoopFeederPhi))
1428
0
    return false;
1429
53
  
if (53
Def->isCopy() && 53
!loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1430
30
                                                    EndVal, Def->getParent(),
1431
30
                                                    L, LoopFeederPhi))
1432
0
    return false;
1433
53
1434
53
  // Iterate over the uses of the initial value. If the initial value is used
1435
53
  // in a compare, then we assume this is a range check that ensures the loop
1436
53
  // doesn't underflow. This is not an exact test and should be improved.
1437
53
  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1438
78
         E = MRI->use_instr_nodbg_end(); 
I != E78
;
++I25
) {
1439
57
    MachineInstr *MI = &*I;
1440
57
    unsigned CmpReg1 = 0, CmpReg2 = 0;
1441
57
    int CmpMask = 0, CmpValue = 0;
1442
57
1443
57
    if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1444
25
      continue;
1445
32
1446
32
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1447
32
    SmallVector<MachineOperand, 2> Cond;
1448
32
    if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1449
0
      continue;
1450
32
1451
32
    Comparison::Kind Cmp =
1452
32
        getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1453
32
    if (Cmp == 0)
1454
0
      continue;
1455
32
    
if (32
TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)32
)
1456
20
      Cmp = Comparison::getNegatedComparison(Cmp);
1457
32
    if (
CmpReg2 != 0 && 32
CmpReg2 == Reg15
)
1458
0
      Cmp = Comparison::getSwappedComparison(Cmp);
1459
32
1460
32
    // Signed underflow is undefined.
1461
32
    if (Comparison::isSigned(Cmp))
1462
29
      return false;
1463
3
1464
3
    // Check if there is a comparison of the initial value. If the initial value
1465
3
    // is greater than or not equal to another value, then assume this is a
1466
3
    // range check.
1467
3
    
if (3
(Cmp & Comparison::G) || 3
Cmp == Comparison::NE3
)
1468
3
      return false;
1469
57
  }
1470
53
1471
53
  // OK - this is a hack that needs to be improved. We really need to analyze
1472
53
  // the instructions performed on the initial value. This works on the simplest
1473
53
  // cases only.
1474
21
  
if (21
!Def->isCopy() && 21
!Def->isPHI()20
)
1475
18
    return false;
1476
3
1477
3
  return true;
1478
3
}
1479
1480
bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1481
713
                                             int64_t &Val) const {
1482
713
  if (
MO.isImm()713
) {
1483
367
    Val = MO.getImm();
1484
367
    return true;
1485
367
  }
1486
346
  
if (346
!MO.isReg()346
)
1487
0
    return false;
1488
346
1489
346
  // MO is a register. Check whether it is defined as an immediate value,
1490
346
  // and if so, get the value of it in TV. That value will then need to be
1491
346
  // processed to handle potential subregisters in MO.
1492
346
  int64_t TV;
1493
346
1494
346
  unsigned R = MO.getReg();
1495
346
  if (!TargetRegisterInfo::isVirtualRegister(R))
1496
92
    return false;
1497
254
  MachineInstr *DI = MRI->getVRegDef(R);
1498
254
  unsigned DOpc = DI->getOpcode();
1499
254
  switch (DOpc) {
1500
81
    case TargetOpcode::COPY:
1501
81
    case Hexagon::A2_tfrsi:
1502
81
    case Hexagon::A2_tfrpi:
1503
81
    case Hexagon::CONST32:
1504
81
    case Hexagon::CONST64:
1505
81
      // Call recursively to avoid an extra check whether operand(1) is
1506
81
      // indeed an immediate (it could be a global address, for example),
1507
81
      // plus we can handle COPY at the same time.
1508
81
      if (!checkForImmediate(DI->getOperand(1), TV))
1509
62
        return false;
1510
19
      break;
1511
0
    case Hexagon::A2_combineii:
1512
0
    case Hexagon::A4_combineir:
1513
0
    case Hexagon::A4_combineii:
1514
0
    case Hexagon::A4_combineri:
1515
0
    case Hexagon::A2_combinew: {
1516
0
      const MachineOperand &S1 = DI->getOperand(1);
1517
0
      const MachineOperand &S2 = DI->getOperand(2);
1518
0
      int64_t V1, V2;
1519
0
      if (
!checkForImmediate(S1, V1) || 0
!checkForImmediate(S2, V2)0
)
1520
0
        return false;
1521
0
      TV = V2 | (static_cast<uint64_t>(V1) << 32);
1522
0
      break;
1523
0
    }
1524
0
    case TargetOpcode::REG_SEQUENCE: {
1525
0
      const MachineOperand &S1 = DI->getOperand(1);
1526
0
      const MachineOperand &S3 = DI->getOperand(3);
1527
0
      int64_t V1, V3;
1528
0
      if (
!checkForImmediate(S1, V1) || 0
!checkForImmediate(S3, V3)0
)
1529
0
        return false;
1530
0
      unsigned Sub2 = DI->getOperand(2).getImm();
1531
0
      unsigned Sub4 = DI->getOperand(4).getImm();
1532
0
      if (
Sub2 == Hexagon::isub_lo && 0
Sub4 == Hexagon::isub_hi0
)
1533
0
        TV = V1 | (V3 << 32);
1534
0
      else 
if (0
Sub2 == Hexagon::isub_hi && 0
Sub4 == Hexagon::isub_lo0
)
1535
0
        TV = V3 | (V1 << 32);
1536
0
      else
1537
0
        llvm_unreachable("Unexpected form of REG_SEQUENCE");
1538
0
      break;
1539
0
    }
1540
0
1541
173
    default:
1542
173
      return false;
1543
19
  }
1544
19
1545
19
  // By now, we should have successfully obtained the immediate value defining
1546
19
  // the register referenced in MO. Handle a potential use of a subregister.
1547
19
  switch (MO.getSubReg()) {
1548
0
    case Hexagon::isub_lo:
1549
0
      Val = TV & 0xFFFFFFFFULL;
1550
0
      break;
1551
0
    case Hexagon::isub_hi:
1552
0
      Val = (TV >> 32) & 0xFFFFFFFFULL;
1553
0
      break;
1554
19
    default:
1555
19
      Val = TV;
1556
19
      break;
1557
19
  }
1558
19
  return true;
1559
19
}
1560
1561
0
void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1562
0
  if (
MO.isImm()0
) {
1563
0
    MO.setImm(Val);
1564
0
    return;
1565
0
  }
1566
0
1567
0
  assert(MO.isReg());
1568
0
  unsigned R = MO.getReg();
1569
0
  MachineInstr *DI = MRI->getVRegDef(R);
1570
0
1571
0
  const TargetRegisterClass *RC = MRI->getRegClass(R);
1572
0
  unsigned NewR = MRI->createVirtualRegister(RC);
1573
0
  MachineBasicBlock &B = *DI->getParent();
1574
0
  DebugLoc DL = DI->getDebugLoc();
1575
0
  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1576
0
  MO.setReg(NewR);
1577
0
}
1578
1579
2
static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1580
2
  // These two instructions are not extendable.
1581
2
  if (CmpOpc == Hexagon::A4_cmpbeqi)
1582
0
    return isUInt<8>(Imm);
1583
2
  
if (2
CmpOpc == Hexagon::A4_cmpbgti2
)
1584
0
    return isInt<8>(Imm);
1585
2
  // The rest of the comparison-with-immediate instructions are extendable.
1586
2
  return true;
1587
2
}
1588
1589
182
bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1590
182
  MachineBasicBlock *Header = L->getHeader();
1591
182
  MachineBasicBlock *Latch = L->getLoopLatch();
1592
182
  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1593
182
1594
182
  if (
!(Header && 182
Latch182
&&
ExitingBlock182
))
1595
0
    return false;
1596
182
1597
182
  // These data structures follow the same concept as the corresponding
1598
182
  // ones in findInductionRegister (where some comments are).
1599
182
  using RegisterBump = std::pair<unsigned, int64_t>;
1600
182
  using RegisterInduction = std::pair<unsigned, RegisterBump>;
1601
182
  using RegisterInductionSet = std::set<RegisterInduction>;
1602
182
1603
182
  // Register candidates for induction variables, with their associated bumps.
1604
182
  RegisterInductionSet IndRegs;
1605
182
1606
182
  // Look for induction patterns:
1607
182
  //   vreg1 = PHI ..., [ latch, vreg2 ]
1608
182
  //   vreg2 = ADD vreg1, imm
1609
182
  using instr_iterator = MachineBasicBlock::instr_iterator;
1610
182
1611
182
  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1612
518
       
I != E && 518
I->isPHI()517
;
++I336
) {
1613
336
    MachineInstr *Phi = &*I;
1614
336
1615
336
    // Have a PHI instruction.
1616
1.00k
    for (unsigned i = 1, n = Phi->getNumOperands(); 
i < n1.00k
;
i += 2672
) {
1617
672
      if (Phi->getOperand(i+1).getMBB() != Latch)
1618
336
        continue;
1619
336
1620
336
      unsigned PhiReg = Phi->getOperand(i).getReg();
1621
336
      MachineInstr *DI = MRI->getVRegDef(PhiReg);
1622
336
1623
336
      if (
DI->getDesc().isAdd()336
) {
1624
183
        // If the register operand to the add/sub is the PHI we are looking
1625
183
        // at, this meets the induction pattern.
1626
183
        unsigned IndReg = DI->getOperand(1).getReg();
1627
183
        MachineOperand &Opnd2 = DI->getOperand(2);
1628
183
        int64_t V;
1629
183
        if (
MRI->getVRegDef(IndReg) == Phi && 183
checkForImmediate(Opnd2, V)179
) {
1630
179
          unsigned UpdReg = DI->getOperand(0).getReg();
1631
179
          IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1632
179
        }
1633
183
      }
1634
672
    }  // for (i)
1635
336
  }  // for (instr)
1636
182
1637
182
  if (IndRegs.empty())
1638
39
    return false;
1639
143
1640
143
  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1641
143
  SmallVector<MachineOperand,2> Cond;
1642
143
  // AnalyzeBranch returns true if it fails to analyze branch.
1643
143
  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1644
143
  if (
NotAnalyzed || 143
Cond.empty()143
)
1645
0
    return false;
1646
143
1647
143
  
if (143
ExitingBlock != Latch && 143
(TB == Latch || 2
FB == Latch1
)) {
1648
2
    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1649
2
    SmallVector<MachineOperand,2> LCond;
1650
2
    bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1651
2
    if (NotAnalyzed)
1652
0
      return false;
1653
2
1654
2
    // Since latch is not the exiting block, the latch branch should be an
1655
2
    // unconditional branch to the loop header.
1656
2
    
if (2
TB == Latch2
)
1657
1
      
TB = (LTB == Header) ? 1
LTB1
:
LFB0
;
1658
2
    else
1659
1
      
FB = (LTB == Header) ? 1
LTB1
:
LFB0
;
1660
2
  }
1661
143
  
if (143
TB != Header143
) {
1662
9
    if (
FB != Header9
) {
1663
0
      // The latch/exit block does not go back to the header.
1664
0
      return false;
1665
0
    }
1666
9
    // FB is the header (i.e., uncond. jump to branch header)
1667
9
    // In this case, the LoopBody -> TB should not be a back edge otherwise
1668
9
    // it could result in an infinite loop after conversion to hw_loop.
1669
9
    // This case can happen when the Latch has two jumps like this:
1670
9
    // Jmp_c OuterLoopHeader <-- TB
1671
9
    // Jmp   InnerLoopHeader <-- FB
1672
9
    
if (9
MDT->dominates(TB, FB)9
)
1673
0
      return false;
1674
143
  }
1675
143
1676
143
  // Expecting a predicate register as a condition.  It won't be a hardware
1677
143
  // predicate register at this point yet, just a vreg.
1678
143
  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1679
143
  // into Cond, followed by the predicate register.  For non-negated branches
1680
143
  // it's just the register.
1681
143
  unsigned CSz = Cond.size();
1682
143
  if (
CSz != 1 && 143
CSz != 2143
)
1683
0
    return false;
1684
143
1685
143
  
if (143
!Cond[CSz-1].isReg()143
)
1686
0
    return false;
1687
143
1688
143
  unsigned P = Cond[CSz-1].getReg();
1689
143
  MachineInstr *PredDef = MRI->getVRegDef(P);
1690
143
1691
143
  if (!PredDef->isCompare())
1692
1
    return false;
1693
142
1694
142
  SmallSet<unsigned,2> CmpRegs;
1695
142
  MachineOperand *CmpImmOp = nullptr;
1696
142
1697
142
  // Go over all operands to the compare and look for immediate and register
1698
142
  // operands.  Assume that if the compare has a single register use and a
1699
142
  // single immediate operand, then the register is being compared with the
1700
142
  // immediate value.
1701
568
  for (unsigned i = 0, n = PredDef->getNumOperands(); 
i < n568
;
++i426
) {
1702
426
    MachineOperand &MO = PredDef->getOperand(i);
1703
426
    if (
MO.isReg()426
) {
1704
340
      // Skip all implicit references.  In one case there was:
1705
340
      //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1706
340
      if (MO.isImplicit())
1707
0
        continue;
1708
340
      
if (340
MO.isUse()340
) {
1709
198
        if (
!isImmediate(MO)198
) {
1710
182
          CmpRegs.insert(MO.getReg());
1711
182
          continue;
1712
182
        }
1713
16
        // Consider the register to be the "immediate" operand.
1714
16
        
if (16
CmpImmOp16
)
1715
0
          return false;
1716
16
        CmpImmOp = &MO;
1717
16
      }
1718
426
    } else 
if (86
MO.isImm()86
) {
1719
86
      if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
1720
0
        return false;
1721
86
      CmpImmOp = &MO;
1722
86
    }
1723
426
  }
1724
142
1725
142
  
if (142
CmpRegs.empty()142
)
1726
0
    return false;
1727
142
1728
142
  // Check if the compared register follows the order we want.  Fix if needed.
1729
142
  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1730
148
       
I != E148
;
++I6
) {
1731
146
    // This is a success.  If the register used in the comparison is one that
1732
146
    // we have identified as a bumped (updated) induction register, there is
1733
146
    // nothing to do.
1734
146
    if (CmpRegs.count(I->first))
1735
136
      return true;
1736
10
1737
10
    // Otherwise, if the register being compared comes out of a PHI node,
1738
10
    // and has been recognized as following the induction pattern, and is
1739
10
    // compared against an immediate, we can fix it.
1740
10
    const RegisterBump &RB = I->second;
1741
10
    if (
CmpRegs.count(RB.first)10
) {
1742
4
      if (
!CmpImmOp4
) {
1743
1
        // If both operands to the compare instruction are registers, see if
1744
1
        // it can be changed to use induction register as one of the operands.
1745
1
        MachineInstr *IndI = nullptr;
1746
1
        MachineInstr *nonIndI = nullptr;
1747
1
        MachineOperand *IndMO = nullptr;
1748
1
        MachineOperand *nonIndMO = nullptr;
1749
1
1750
3
        for (unsigned i = 1, n = PredDef->getNumOperands(); 
i < n3
;
++i2
) {
1751
2
          MachineOperand &MO = PredDef->getOperand(i);
1752
2
          if (
MO.isReg() && 2
MO.getReg() == RB.first2
) {
1753
1
            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1754
1
                         << *(MRI->getVRegDef(I->first)));
1755
1
            if (IndI)
1756
0
              return false;
1757
1
1758
1
            IndI = MRI->getVRegDef(I->first);
1759
1
            IndMO = &MO;
1760
2
          } else 
if (1
MO.isReg()1
) {
1761
1
            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1762
1
                         << *(MRI->getVRegDef(MO.getReg())));
1763
1
            if (nonIndI)
1764
0
              return false;
1765
1
1766
1
            nonIndI = MRI->getVRegDef(MO.getReg());
1767
1
            nonIndMO = &MO;
1768
1
          }
1769
2
        }
1770
1
        
if (1
IndI && 1
nonIndI1
&&
1771
1
            nonIndI->getOpcode() == Hexagon::A2_addi &&
1772
1
            nonIndI->getOperand(2).isImm() &&
1773
1
            
nonIndI->getOperand(2).getImm() == - RB.second1
) {
1774
1
          bool Order = orderBumpCompare(IndI, PredDef);
1775
1
          if (
Order1
) {
1776
1
            IndMO->setReg(I->first);
1777
1
            nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1778
1
            return true;
1779
1
          }
1780
0
        }
1781
0
        return false;
1782
0
      }
1783
3
1784
3
      // It is not valid to do this transformation on an unsigned comparison
1785
3
      // because it may underflow.
1786
3
      Comparison::Kind Cmp =
1787
3
          getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1788
3
      if (
!Cmp || 3
Comparison::isUnsigned(Cmp)3
)
1789
1
        return false;
1790
2
1791
2
      // If the register is being compared against an immediate, try changing
1792
2
      // the compare instruction to use induction register and adjust the
1793
2
      // immediate operand.
1794
2
      int64_t CmpImm = getImmediate(*CmpImmOp);
1795
2
      int64_t V = RB.second;
1796
2
      // Handle Overflow (64-bit).
1797
2
      if (
((V > 0) && 2
(CmpImm > INT64_MAX - V)1
) ||
1798
2
          
((V < 0) && 2
(CmpImm < INT64_MIN - V)1
))
1799
0
        return false;
1800
2
      CmpImm += V;
1801
2
      // Most comparisons of register against an immediate value allow
1802
2
      // the immediate to be constant-extended. There are some exceptions
1803
2
      // though. Make sure the new combination will work.
1804
2
      if (CmpImmOp->isImm())
1805
2
        
if (2
!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)2
)
1806
0
          return false;
1807
2
1808
2
      // Make sure that the compare happens after the bump.  Otherwise,
1809
2
      // after the fixup, the compare would use a yet-undefined register.
1810
2
      MachineInstr *BumpI = MRI->getVRegDef(I->first);
1811
2
      bool Order = orderBumpCompare(BumpI, PredDef);
1812
2
      if (!Order)
1813
2
        return false;
1814
0
1815
0
      // Finally, fix the compare instruction.
1816
0
      setImmediate(*CmpImmOp, CmpImm);
1817
0
      for (unsigned i = 0, n = PredDef->getNumOperands(); 
i < n0
;
++i0
) {
1818
0
        MachineOperand &MO = PredDef->getOperand(i);
1819
0
        if (
MO.isReg() && 0
MO.getReg() == RB.first0
) {
1820
0
          MO.setReg(I->first);
1821
0
          return true;
1822
0
        }
1823
0
      }
1824
4
    }
1825
146
  }
1826
142
1827
2
  return false;
1828
182
}
1829
1830
/// createPreheaderForLoop - Create a preheader for a given loop.
1831
MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1832
0
      MachineLoop *L) {
1833
0
  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1834
0
    return TmpPH;
1835
0
  
if (0
!HWCreatePreheader0
)
1836
0
    return nullptr;
1837
0
1838
0
  MachineBasicBlock *Header = L->getHeader();
1839
0
  MachineBasicBlock *Latch = L->getLoopLatch();
1840
0
  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1841
0
  MachineFunction *MF = Header->getParent();
1842
0
  DebugLoc DL;
1843
0
1844
#ifndef NDEBUG
1845
  if ((!PHFn.empty()) && (PHFn != MF->getName()))
1846
    return nullptr;
1847
#endif
1848
1849
0
  if (
!Latch || 0
!ExitingBlock0
||
Header->hasAddressTaken()0
)
1850
0
    return nullptr;
1851
0
1852
0
  using instr_iterator = MachineBasicBlock::instr_iterator;
1853
0
1854
0
  // Verify that all existing predecessors have analyzable branches
1855
0
  // (or no branches at all).
1856
0
  using MBBVector = std::vector<MachineBasicBlock *>;
1857
0
1858
0
  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1859
0
  SmallVector<MachineOperand,2> Tmp1;
1860
0
  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1861
0
1862
0
  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1863
0
    return nullptr;
1864
0
1865
0
  
for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); 0
I != E0
;
++I0
) {
1866
0
    MachineBasicBlock *PB = *I;
1867
0
    bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1868
0
    if (NotAnalyzed)
1869
0
      return nullptr;
1870
0
  }
1871
0
1872
0
  MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1873
0
  MF->insert(Header->getIterator(), NewPH);
1874
0
1875
0
  if (
Header->pred_size() > 20
) {
1876
0
    // Ensure that the header has only two predecessors: the preheader and
1877
0
    // the loop latch.  Any additional predecessors of the header should
1878
0
    // join at the newly created preheader. Inspect all PHI nodes from the
1879
0
    // header and create appropriate corresponding PHI nodes in the preheader.
1880
0
1881
0
    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1882
0
         
I != E && 0
I->isPHI()0
;
++I0
) {
1883
0
      MachineInstr *PN = &*I;
1884
0
1885
0
      const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1886
0
      MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1887
0
      NewPH->insert(NewPH->end(), NewPN);
1888
0
1889
0
      unsigned PR = PN->getOperand(0).getReg();
1890
0
      const TargetRegisterClass *RC = MRI->getRegClass(PR);
1891
0
      unsigned NewPR = MRI->createVirtualRegister(RC);
1892
0
      NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1893
0
1894
0
      // Copy all non-latch operands of a header's PHI node to the newly
1895
0
      // created PHI node in the preheader.
1896
0
      for (unsigned i = 1, n = PN->getNumOperands(); 
i < n0
;
i += 20
) {
1897
0
        unsigned PredR = PN->getOperand(i).getReg();
1898
0
        unsigned PredRSub = PN->getOperand(i).getSubReg();
1899
0
        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1900
0
        if (PredB == Latch)
1901
0
          continue;
1902
0
1903
0
        MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1904
0
        MO.setSubReg(PredRSub);
1905
0
        NewPN->addOperand(MO);
1906
0
        NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1907
0
      }
1908
0
1909
0
      // Remove copied operands from the old PHI node and add the value
1910
0
      // coming from the preheader's PHI.
1911
0
      for (int i = PN->getNumOperands()-2; 
i > 00
;
i -= 20
) {
1912
0
        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1913
0
        if (
PredB != Latch0
) {
1914
0
          PN->RemoveOperand(i+1);
1915
0
          PN->RemoveOperand(i);
1916
0
        }
1917
0
      }
1918
0
      PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1919
0
      PN->addOperand(MachineOperand::CreateMBB(NewPH));
1920
0
    }
1921
0
  } else {
1922
0
    assert(Header->pred_size() == 2);
1923
0
1924
0
    // The header has only two predecessors, but the non-latch predecessor
1925
0
    // is not a preheader (e.g. it has other successors, etc.)
1926
0
    // In such a case we don't need any extra PHI nodes in the new preheader,
1927
0
    // all we need is to adjust existing PHIs in the header to now refer to
1928
0
    // the new preheader.
1929
0
    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1930
0
         
I != E && 0
I->isPHI()0
;
++I0
) {
1931
0
      MachineInstr *PN = &*I;
1932
0
      for (unsigned i = 1, n = PN->getNumOperands(); 
i < n0
;
i += 20
) {
1933
0
        MachineOperand &MO = PN->getOperand(i+1);
1934
0
        if (MO.getMBB() != Latch)
1935
0
          MO.setMBB(NewPH);
1936
0
      }
1937
0
    }
1938
0
  }
1939
0
1940
0
  // "Reroute" the CFG edges to link in the new preheader.
1941
0
  // If any of the predecessors falls through to the header, insert a branch
1942
0
  // to the new preheader in that place.
1943
0
  SmallVector<MachineOperand,1> Tmp2;
1944
0
  SmallVector<MachineOperand,1> EmptyCond;
1945
0
1946
0
  TB = FB = nullptr;
1947
0
1948
0
  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); 
I != E0
;
++I0
) {
1949
0
    MachineBasicBlock *PB = *I;
1950
0
    if (
PB != Latch0
) {
1951
0
      Tmp2.clear();
1952
0
      bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1953
0
      (void)NotAnalyzed; // suppress compiler warning
1954
0
      assert (!NotAnalyzed && "Should be analyzable!");
1955
0
      if (
TB != Header && 0
(Tmp2.empty() || 0
FB != Header0
))
1956
0
        TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1957
0
      PB->ReplaceUsesOfBlockWith(Header, NewPH);
1958
0
    }
1959
0
  }
1960
0
1961
0
  // It can happen that the latch block will fall through into the header.
1962
0
  // Insert an unconditional branch to the header.
1963
0
  TB = FB = nullptr;
1964
0
  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1965
0
  (void)LatchNotAnalyzed; // suppress compiler warning
1966
0
  assert (!LatchNotAnalyzed && "Should be analyzable!");
1967
0
  if (
!TB && 0
!FB0
)
1968
0
    TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1969
0
1970
0
  // Finally, the branch from the preheader to the header.
1971
0
  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1972
0
  NewPH->addSuccessor(Header);
1973
0
1974
0
  MachineLoop *ParentLoop = L->getParentLoop();
1975
0
  if (ParentLoop)
1976
0
    ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1977
0
1978
0
  // Update the dominator information with the new preheader.
1979
0
  if (
MDT0
) {
1980
0
    if (MachineDomTreeNode *
HN0
= MDT->getNode(Header)) {
1981
0
      if (MachineDomTreeNode *
DHN0
= HN->getIDom()) {
1982
0
        MDT->addNewBlock(NewPH, DHN->getBlock());
1983
0
        MDT->changeImmediateDominator(Header, NewPH);
1984
0
      }
1985
0
    }
1986
0
  }
1987
0
1988
0
  return NewPH;
1989
0
}