Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass removes the computation of provably redundant expressions that have
10
// been computed earlier in a previous iteration. It relies on the use of PHIs
11
// to identify loop carried dependences. This is scalar replacement for vector
12
// types.
13
//
14
//-----------------------------------------------------------------------------
15
// Motivation: Consider the case where we have the following loop structure.
16
//
17
// Loop:
18
//  t0 = a[i];
19
//  t1 = f(t0);
20
//  t2 = g(t1);
21
//  ...
22
//  t3 = a[i+1];
23
//  t4 = f(t3);
24
//  t5 = g(t4);
25
//  t6 = op(t2, t5)
26
//  cond_branch <Loop>
27
//
28
// This can be converted to
29
//  t00 = a[0];
30
//  t10 = f(t00);
31
//  t20 = g(t10);
32
// Loop:
33
//  t2 = t20;
34
//  t3 = a[i+1];
35
//  t4 = f(t3);
36
//  t5 = g(t4);
37
//  t6 = op(t2, t5)
38
//  t20 = t5
39
//  cond_branch <Loop>
40
//
41
// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42
// Such a loop comes to this pass in the following form.
43
//
44
// LoopPreheader:
45
//  X0 = a[0];
46
// Loop:
47
//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48
//  t1 = f(X2)   <-- I1
49
//  t2 = g(t1)
50
//  ...
51
//  X1 = a[i+1]
52
//  t4 = f(X1)   <-- I2
53
//  t5 = g(t4)
54
//  t6 = op(t2, t5)
55
//  cond_branch <Loop>
56
//
57
// In this pass, we look for PHIs such as X2 whose incoming values come only
58
// from the Loop Preheader and over the backedge and additionaly, both these
59
// values are the results of the same operation in terms of opcode. We call such
60
// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61
// over X1 is carried over only one iteration and so the DepChain is only one
62
// PHI node long.
63
//
64
// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65
// PHI coming  over the backedge (X1). We stop at the first pair of such users
66
// I1 (of X2) and I2 (of X1) that meet the following conditions.
67
// 1. I1 and I2 are the same operation, but with different operands.
68
// 2. X2 and X1 are used at the same operand number in the two instructions.
69
// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70
//    a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71
//
72
// We then make the following transformation
73
// LoopPreheader:
74
//  X0 = a[0];
75
//  Y0 = f(X0);
76
// Loop:
77
//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78
//  Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79
//  t1 = f(X2)   <-- Will be removed by DCE.
80
//  t2 = g(Y2)
81
//  ...
82
//  X1 = a[i+1]
83
//  t4 = f(X1)
84
//  t5 = g(t4)
85
//  t6 = op(t2, t5)
86
//  cond_branch <Loop>
87
//
88
// We proceed until we cannot find any more such instructions I1 and I2.
89
//
90
// --- DepChains & Loop carried dependences ---
91
// Consider a single basic block loop such as
92
//
93
// LoopPreheader:
94
//  X0 = ...
95
//  Y0 = ...
96
// Loop:
97
//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98
//  Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99
//  ...
100
//  X1 = ...
101
//  ...
102
//  cond_branch <Loop>
103
//
104
// Then there is a dependence between X2 and X1 that goes back one iteration,
105
// i.e. X1 is used as X2 in the very next iteration. We represent this as a
106
// DepChain from X2 to X1 (X2->X1).
107
// Similarly, there is a dependence between Y2 and X1 that goes back two
108
// iterations. X1 is used as Y2 two iterations after it is computed. This is
109
// represented by a DepChain as (Y2->X2->X1).
110
//
111
// A DepChain has the following properties.
112
// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113
//    iterations of carried dependence + 1.
114
// 2. All instructions in the DepChain except the last are PHIs.
115
//
116
//===----------------------------------------------------------------------===//
117
118
#include "llvm/ADT/SetVector.h"
119
#include "llvm/ADT/SmallVector.h"
120
#include "llvm/ADT/Statistic.h"
121
#include "llvm/Analysis/LoopInfo.h"
122
#include "llvm/Analysis/LoopPass.h"
123
#include "llvm/IR/BasicBlock.h"
124
#include "llvm/IR/DerivedTypes.h"
125
#include "llvm/IR/IRBuilder.h"
126
#include "llvm/IR/Instruction.h"
127
#include "llvm/IR/Instructions.h"
128
#include "llvm/IR/IntrinsicInst.h"
129
#include "llvm/IR/Intrinsics.h"
130
#include "llvm/IR/Use.h"
131
#include "llvm/IR/User.h"
132
#include "llvm/IR/Value.h"
133
#include "llvm/Pass.h"
134
#include "llvm/Support/Casting.h"
135
#include "llvm/Support/CommandLine.h"
136
#include "llvm/Support/Compiler.h"
137
#include "llvm/Support/Debug.h"
138
#include "llvm/Support/raw_ostream.h"
139
#include "llvm/Transforms/Scalar.h"
140
#include "llvm/Transforms/Utils.h"
141
#include <algorithm>
142
#include <cassert>
143
#include <cstddef>
144
#include <map>
145
#include <memory>
146
#include <set>
147
148
using namespace llvm;
149
150
#define DEBUG_TYPE "hexagon-vlcr"
151
152
STATISTIC(HexagonNumVectorLoopCarriedReuse,
153
          "Number of values that were reused from a previous iteration.");
154
155
static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
156
    cl::Hidden,
157
    cl::desc("Maximum distance of loop carried dependences that are handled"),
158
    cl::init(2), cl::ZeroOrMore);
159
160
namespace llvm {
161
162
void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
163
Pass *createHexagonVectorLoopCarriedReusePass();
164
165
} // end namespace llvm
166
167
namespace {
168
169
  // See info about DepChain in the comments at the top of this file.
170
  using ChainOfDependences = SmallVector<Instruction *, 4>;
171
172
  class DepChain {
173
    ChainOfDependences Chain;
174
175
  public:
176
0
    bool isIdentical(DepChain &Other) const {
177
0
      if (Other.size() != size())
178
0
        return false;
179
0
      ChainOfDependences &OtherChain = Other.getChain();
180
0
      for (int i = 0; i < size(); ++i) {
181
0
        if (Chain[i] != OtherChain[i])
182
0
          return false;
183
0
      }
184
0
      return true;
185
0
    }
186
187
0
    ChainOfDependences &getChain() {
188
0
      return Chain;
189
0
    }
190
191
81
    int size() const {
192
81
      return Chain.size();
193
81
    }
194
195
0
    void clear() {
196
0
      Chain.clear();
197
0
    }
198
199
56
    void push_back(Instruction *I) {
200
56
      Chain.push_back(I);
201
56
    }
202
203
53
    int iterations() const {
204
53
      return size() - 1;
205
53
    }
206
207
73
    Instruction *front() const {
208
73
      return Chain.front();
209
73
    }
210
211
35
    Instruction *back() const {
212
35
      return Chain.back();
213
35
    }
214
215
8
    Instruction *&operator[](const int index) {
216
8
      return Chain[index];
217
8
    }
218
219
   friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
220
  };
221
222
  LLVM_ATTRIBUTE_UNUSED
223
0
  raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
224
0
    const ChainOfDependences &CD = D.Chain;
225
0
    int ChainSize = CD.size();
226
0
    OS << "**DepChain Start::**\n";
227
0
    for (int i = 0; i < ChainSize -1; ++i) {
228
0
      OS << *(CD[i]) << " -->\n";
229
0
    }
230
0
    OS << *CD[ChainSize-1] << "\n";
231
0
    return OS;
232
0
  }
233
234
  struct ReuseValue {
235
    Instruction *Inst2Replace = nullptr;
236
237
    // In the new PHI node that we'll construct this is the value that'll be
238
    // used over the backedge. This is teh value that gets reused from a
239
    // previous iteration.
240
    Instruction *BackedgeInst = nullptr;
241
    std::map<Instruction *, DepChain *> DepChains;
242
    int Iterations = -1;
243
244
16
    ReuseValue() = default;
245
246
9
    void reset() {
247
9
      Inst2Replace = nullptr;
248
9
      BackedgeInst = nullptr;
249
9
      DepChains.clear();
250
9
      Iterations = -1;
251
9
    }
252
9
    bool isDefined() { return Inst2Replace != nullptr; }
253
  };
254
255
  LLVM_ATTRIBUTE_UNUSED
256
0
  raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
257
0
    OS << "** ReuseValue ***\n";
258
0
    OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
259
0
    OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
260
0
    return OS;
261
0
  }
262
263
  class HexagonVectorLoopCarriedReuse : public LoopPass {
264
  public:
265
    static char ID;
266
267
16
    explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
268
16
      PassRegistry *PR = PassRegistry::getPassRegistry();
269
16
      initializeHexagonVectorLoopCarriedReusePass(*PR);
270
16
    }
271
272
5
    StringRef getPassName() const override {
273
5
      return "Hexagon-specific loop carried reuse for HVX vectors";
274
5
    }
275
276
16
    void getAnalysisUsage(AnalysisUsage &AU) const override {
277
16
      AU.addRequired<LoopInfoWrapperPass>();
278
16
      AU.addRequiredID(LoopSimplifyID);
279
16
      AU.addRequiredID(LCSSAID);
280
16
      AU.addPreservedID(LCSSAID);
281
16
      AU.setPreservesCFG();
282
16
    }
283
284
    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
285
286
  private:
287
    SetVector<DepChain *> Dependences;
288
    std::set<Instruction *> ReplacedInsts;
289
    Loop *CurLoop;
290
    ReuseValue ReuseCandidate;
291
292
    bool doVLCR();
293
    void findLoopCarriedDeps();
294
    void findValueToReuse();
295
    void findDepChainFromPHI(Instruction *I, DepChain &D);
296
    void reuseValue();
297
    Value *findValueInBlock(Value *Op, BasicBlock *BB);
298
    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
299
    bool isEquivalentOperation(Instruction *I1, Instruction *I2);
300
    bool canReplace(Instruction *I);
301
    bool isCallInstCommutative(CallInst *C);
302
  };
303
304
} // end anonymous namespace
305
306
char HexagonVectorLoopCarriedReuse::ID = 0;
307
308
101k
INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
309
101k
    "Hexagon-specific predictive commoning for HVX vectors", false, false)
310
101k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
311
101k
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
312
101k
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
313
101k
INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
314
    "Hexagon-specific predictive commoning for HVX vectors", false, false)
315
316
5
bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
317
5
  if (skipLoop(L))
318
0
    return false;
319
5
320
5
  if (!L->getLoopPreheader())
321
0
    return false;
322
5
323
5
  // Work only on innermost loops.
324
5
  if (!L->getSubLoops().empty())
325
0
    return false;
326
5
327
5
  // Work only on single basic blocks loops.
328
5
  if (L->getNumBlocks() != 1)
329
0
    return false;
330
5
331
5
  CurLoop = L;
332
5
333
5
  return doVLCR();
334
5
}
335
336
7
bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
337
7
  switch (C->getCalledFunction()->getIntrinsicID()) {
338
7
    case Intrinsic::hexagon_V6_vaddb:
339
5
    case Intrinsic::hexagon_V6_vaddb_128B:
340
5
    case Intrinsic::hexagon_V6_vaddh:
341
5
    case Intrinsic::hexagon_V6_vaddh_128B:
342
5
    case Intrinsic::hexagon_V6_vaddw:
343
5
    case Intrinsic::hexagon_V6_vaddw_128B:
344
5
    case Intrinsic::hexagon_V6_vaddubh:
345
5
    case Intrinsic::hexagon_V6_vaddubh_128B:
346
5
    case Intrinsic::hexagon_V6_vadduhw:
347
5
    case Intrinsic::hexagon_V6_vadduhw_128B:
348
5
    case Intrinsic::hexagon_V6_vaddhw:
349
5
    case Intrinsic::hexagon_V6_vaddhw_128B:
350
5
    case Intrinsic::hexagon_V6_vmaxb:
351
5
    case Intrinsic::hexagon_V6_vmaxb_128B:
352
5
    case Intrinsic::hexagon_V6_vmaxh:
353
5
    case Intrinsic::hexagon_V6_vmaxh_128B:
354
5
    case Intrinsic::hexagon_V6_vmaxw:
355
5
    case Intrinsic::hexagon_V6_vmaxw_128B:
356
5
    case Intrinsic::hexagon_V6_vmaxub:
357
5
    case Intrinsic::hexagon_V6_vmaxub_128B:
358
5
    case Intrinsic::hexagon_V6_vmaxuh:
359
5
    case Intrinsic::hexagon_V6_vmaxuh_128B:
360
5
    case Intrinsic::hexagon_V6_vminub:
361
5
    case Intrinsic::hexagon_V6_vminub_128B:
362
5
    case Intrinsic::hexagon_V6_vminuh:
363
5
    case Intrinsic::hexagon_V6_vminuh_128B:
364
5
    case Intrinsic::hexagon_V6_vminb:
365
5
    case Intrinsic::hexagon_V6_vminb_128B:
366
5
    case Intrinsic::hexagon_V6_vminh:
367
5
    case Intrinsic::hexagon_V6_vminh_128B:
368
5
    case Intrinsic::hexagon_V6_vminw:
369
5
    case Intrinsic::hexagon_V6_vminw_128B:
370
5
    case Intrinsic::hexagon_V6_vmpyub:
371
5
    case Intrinsic::hexagon_V6_vmpyub_128B:
372
5
    case Intrinsic::hexagon_V6_vmpyuh:
373
5
    case Intrinsic::hexagon_V6_vmpyuh_128B:
374
5
    case Intrinsic::hexagon_V6_vavgub:
375
5
    case Intrinsic::hexagon_V6_vavgub_128B:
376
5
    case Intrinsic::hexagon_V6_vavgh:
377
5
    case Intrinsic::hexagon_V6_vavgh_128B:
378
5
    case Intrinsic::hexagon_V6_vavguh:
379
5
    case Intrinsic::hexagon_V6_vavguh_128B:
380
5
    case Intrinsic::hexagon_V6_vavgw:
381
5
    case Intrinsic::hexagon_V6_vavgw_128B:
382
5
    case Intrinsic::hexagon_V6_vavgb:
383
5
    case Intrinsic::hexagon_V6_vavgb_128B:
384
5
    case Intrinsic::hexagon_V6_vavguw:
385
5
    case Intrinsic::hexagon_V6_vavguw_128B:
386
5
    case Intrinsic::hexagon_V6_vabsdiffh:
387
5
    case Intrinsic::hexagon_V6_vabsdiffh_128B:
388
5
    case Intrinsic::hexagon_V6_vabsdiffub:
389
5
    case Intrinsic::hexagon_V6_vabsdiffub_128B:
390
5
    case Intrinsic::hexagon_V6_vabsdiffuh:
391
5
    case Intrinsic::hexagon_V6_vabsdiffuh_128B:
392
5
    case Intrinsic::hexagon_V6_vabsdiffw:
393
5
    case Intrinsic::hexagon_V6_vabsdiffw_128B:
394
5
      return true;
395
5
    default:
396
2
      return false;
397
7
  }
398
7
}
399
400
bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
401
16
                                                          Instruction *I2) {
402
16
  if (!I1->isSameOperationAs(I2))
403
6
    return false;
404
10
  // This check is in place specifically for intrinsics. isSameOperationAs will
405
10
  // return two for any two hexagon intrinsics because they are essentially the
406
10
  // same instruciton (CallInst). We need to scratch the surface to see if they
407
10
  // are calls to the same function.
408
10
  if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
409
9
    if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
410
9
      if (C1->getCalledFunction() != C2->getCalledFunction())
411
0
        return false;
412
10
    }
413
9
  }
414
10
415
10
  // If both the Instructions are of Vector Type and any of the element
416
10
  // is integer constant, check their values too for equivalence.
417
10
  if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
418
10
    unsigned NumOperands = I1->getNumOperands();
419
40
    for (unsigned i = 0; i < NumOperands; 
++i30
) {
420
32
      ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
421
32
      ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
422
32
      if(!C1) 
continue28
;
423
4
      assert(C2);
424
4
      if (C1->getSExtValue() != C2->getSExtValue())
425
2
        return false;
426
4
    }
427
10
  }
428
10
429
10
  
return true8
;
430
10
}
431
432
10
bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
433
10
  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
434
10
  if (!II)
435
1
    return true;
436
9
437
9
  switch (II->getIntrinsicID()) {
438
9
  case Intrinsic::hexagon_V6_hi:
439
0
  case Intrinsic::hexagon_V6_lo:
440
0
  case Intrinsic::hexagon_V6_hi_128B:
441
0
  case Intrinsic::hexagon_V6_lo_128B:
442
0
    LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
443
0
    return false;
444
9
  default:
445
9
    return true;
446
9
  }
447
9
}
448
9
void HexagonVectorLoopCarriedReuse::findValueToReuse() {
449
22
  for (auto *D : Dependences) {
450
22
    LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
451
22
    if (D->iterations() > HexagonVLCRIterationLim) {
452
0
      LLVM_DEBUG(
453
0
          dbgs()
454
0
          << ".. Skipping because number of iterations > than the limit\n");
455
0
      continue;
456
0
    }
457
22
458
22
    PHINode *PN = cast<PHINode>(D->front());
459
22
    Instruction *BEInst = D->back();
460
22
    int Iters = D->iterations();
461
22
    BasicBlock *BB = PN->getParent();
462
22
    LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
463
22
                      << " can be reused\n");
464
22
465
22
    SmallVector<Instruction *, 4> PNUsers;
466
44
    for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; 
++UI22
) {
467
22
      Use &U = *UI;
468
22
      Instruction *User = cast<Instruction>(U.getUser());
469
22
470
22
      if (User->getParent() != BB)
471
0
        continue;
472
22
      if (ReplacedInsts.count(User)) {
473
12
        LLVM_DEBUG(dbgs() << *User
474
12
                          << " has already been replaced. Skipping...\n");
475
12
        continue;
476
12
      }
477
10
      if (isa<PHINode>(User))
478
0
        continue;
479
10
      if (User->mayHaveSideEffects())
480
0
        continue;
481
10
      if (!canReplace(User))
482
0
        continue;
483
10
484
10
      PNUsers.push_back(User);
485
10
    }
486
22
    LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
487
22
488
22
    // For each interesting use I of PN, find an Instruction BEUser that
489
22
    // performs the same operation as I on BEInst and whose other operands,
490
22
    // if any, can also be rematerialized in OtherBB. We stop when we find the
491
22
    // first such Instruction BEUser. This is because once BEUser is
492
22
    // rematerialized in OtherBB, we may find more such "fixup" opportunities
493
22
    // in this block. So, we'll start over again.
494
22
    for (Instruction *I : PNUsers) {
495
22
      for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
496
16
           
++UI12
) {
497
16
        Use &U = *UI;
498
16
        Instruction *BEUser = cast<Instruction>(U.getUser());
499
16
500
16
        if (BEUser->getParent() != BB)
501
0
          continue;
502
16
        if (!isEquivalentOperation(I, BEUser))
503
8
          continue;
504
8
505
8
        int NumOperands = I->getNumOperands();
506
8
507
8
        // Take operands of each PNUser one by one and try to find DepChain
508
8
        // with every operand of the BEUser. If any of the operands of BEUser
509
8
        // has DepChain with current operand of the PNUser, break the matcher
510
8
        // loop. Keep doing this for Every PNUser operand. If PNUser operand
511
8
        // does not have DepChain with any of the BEUser operand, break the
512
8
        // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
513
8
        // This ensures that DepChain exist for all the PNUser operand with
514
8
        // BEUser operand. This also ensures that DepChains are independent of
515
8
        // the positions in PNUser and BEUser.
516
8
        std::map<Instruction *, DepChain *> DepChains;
517
8
        CallInst *C1 = dyn_cast<CallInst>(I);
518
8
        if ((I && I->isCommutative()) || (C1 && 
isCallInstCommutative(C1)7
)) {
519
5
          bool Found = false;
520
17
          for (int OpNo = 0; OpNo < NumOperands; 
++OpNo12
) {
521
13
            Value *Op = I->getOperand(OpNo);
522
13
            Instruction *OpInst = dyn_cast<Instruction>(Op);
523
13
            Found = false;
524
28
            for (int T = 0; T < NumOperands; 
++T15
) {
525
27
              Value *BEOp = BEUser->getOperand(T);
526
27
              Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
527
27
              if (!OpInst && 
!BEOpInst12
) {
528
4
                if (Op == BEOp) {
529
4
                  Found = true;
530
4
                  break;
531
4
                }
532
23
              }
533
23
534
23
              if ((OpInst && 
!BEOpInst15
) ||
(22
!OpInst22
&&
BEOpInst8
))
535
9
                continue;
536
14
537
14
              DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
538
14
539
14
              if (D) {
540
8
                Found = true;
541
8
                DepChains[OpInst] = D;
542
8
                break;
543
8
              }
544
14
            }
545
13
            if (!Found) {
546
1
              BEUser = nullptr;
547
1
              break;
548
1
            }
549
13
          }
550
5
        } else {
551
3
552
5
          for (int OpNo = 0; OpNo < NumOperands; 
++OpNo2
) {
553
5
            Value *Op = I->getOperand(OpNo);
554
5
            Value *BEOp = BEUser->getOperand(OpNo);
555
5
556
5
            Instruction *OpInst = dyn_cast<Instruction>(Op);
557
5
            if (!OpInst) {
558
2
              if (Op == BEOp)
559
1
                continue;
560
1
              // Do not allow reuse to occur when the operands may be different
561
1
              // values.
562
1
              BEUser = nullptr;
563
1
              break;
564
1
            }
565
3
566
3
            Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
567
3
            DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
568
3
569
3
            if (D) {
570
1
              DepChains[OpInst] = D;
571
2
            } else {
572
2
              BEUser = nullptr;
573
2
              break;
574
2
            }
575
3
          }
576
3
        }
577
8
        if (BEUser) {
578
4
          LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
579
4
          ReuseCandidate.Inst2Replace = I;
580
4
          ReuseCandidate.BackedgeInst = BEUser;
581
4
          ReuseCandidate.DepChains = DepChains;
582
4
          ReuseCandidate.Iterations = Iters;
583
4
          return;
584
4
        }
585
4
        ReuseCandidate.reset();
586
4
      }
587
10
    }
588
22
  }
589
9
  ReuseCandidate.reset();
590
5
}
591
592
Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
593
8
                                                       BasicBlock *BB) {
594
8
  PHINode *PN = dyn_cast<PHINode>(Op);
595
8
  assert(PN);
596
8
  Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
597
8
  return ValueInBlock;
598
8
}
599
600
4
void HexagonVectorLoopCarriedReuse::reuseValue() {
601
4
  LLVM_DEBUG(dbgs() << ReuseCandidate);
602
4
  Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
603
4
  Instruction *BEInst = ReuseCandidate.BackedgeInst;
604
4
  int NumOperands = Inst2Replace->getNumOperands();
605
4
  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
606
4
  int Iterations = ReuseCandidate.Iterations;
607
4
  BasicBlock *LoopPH = CurLoop->getLoopPreheader();
608
4
  assert(!DepChains.empty() && "No DepChains");
609
4
  LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
610
4
611
4
  SmallVector<Instruction *, 4> InstsInPreheader;
612
8
  for (int i = 0; i < Iterations; 
++i4
) {
613
4
    Instruction *InstInPreheader = Inst2Replace->clone();
614
4
    SmallVector<Value *, 4> Ops;
615
16
    for (int j = 0; j < NumOperands; 
++j12
) {
616
12
      Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
617
12
      if (!I)
618
4
        continue;
619
8
      // Get the DepChain corresponding to this operand.
620
8
      DepChain &D = *DepChains[I];
621
8
      // Get the PHI for the iteration number and find
622
8
      // the incoming value from the Loop Preheader for
623
8
      // that PHI.
624
8
      Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
625
8
      InstInPreheader->setOperand(j, ValInPreheader);
626
8
    }
627
4
    InstsInPreheader.push_back(InstInPreheader);
628
4
    InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
629
4
    InstInPreheader->insertBefore(LoopPH->getTerminator());
630
4
    LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
631
4
                      << LoopPH->getName() << "\n");
632
4
  }
633
4
  BasicBlock *BB = BEInst->getParent();
634
4
  IRBuilder<> IRB(BB);
635
4
  IRB.SetInsertPoint(BB->getFirstNonPHI());
636
4
  Value *BEVal = BEInst;
637
4
  PHINode *NewPhi;
638
8
  for (int i = Iterations-1; i >=0 ; 
--i4
) {
639
4
    Instruction *InstInPreheader = InstsInPreheader[i];
640
4
    NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
641
4
    NewPhi->addIncoming(InstInPreheader, LoopPH);
642
4
    NewPhi->addIncoming(BEVal, BB);
643
4
    LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
644
4
                      << "\n");
645
4
    BEVal = NewPhi;
646
4
  }
647
4
  // We are in LCSSA form. So, a value defined inside the Loop is used only
648
4
  // inside the loop. So, the following is safe.
649
4
  Inst2Replace->replaceAllUsesWith(NewPhi);
650
4
  ReplacedInsts.insert(Inst2Replace);
651
4
  ++HexagonNumVectorLoopCarriedReuse;
652
4
}
653
654
5
bool HexagonVectorLoopCarriedReuse::doVLCR() {
655
5
  assert(CurLoop->getSubLoops().empty() &&
656
5
         "Can do VLCR on the innermost loop only");
657
5
  assert((CurLoop->getNumBlocks() == 1) &&
658
5
         "Can do VLCR only on single block loops");
659
5
660
5
  bool Changed = false;
661
5
  bool Continue;
662
5
663
5
  LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
664
9
  do {
665
9
    // Reset datastructures.
666
9
    Dependences.clear();
667
9
    Continue = false;
668
9
669
9
    findLoopCarriedDeps();
670
9
    findValueToReuse();
671
9
    if (ReuseCandidate.isDefined()) {
672
4
      reuseValue();
673
4
      Changed = true;
674
4
      Continue = true;
675
4
    }
676
9
    llvm::for_each(Dependences, std::default_delete<DepChain>());
677
9
  } while (Continue);
678
5
  return Changed;
679
5
}
680
681
void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
682
56
                                                        DepChain &D) {
683
56
  PHINode *PN = dyn_cast<PHINode>(I);
684
56
  if (!PN) {
685
28
    D.push_back(I);
686
28
    return;
687
28
  } else {
688
28
    auto NumIncomingValues = PN->getNumIncomingValues();
689
28
    if (NumIncomingValues != 2) {
690
0
      D.clear();
691
0
      return;
692
0
    }
693
28
694
28
    BasicBlock *BB = PN->getParent();
695
28
    if (BB != CurLoop->getHeader()) {
696
0
      D.clear();
697
0
      return;
698
0
    }
699
28
700
28
    Value *BEVal = PN->getIncomingValueForBlock(BB);
701
28
    Instruction *BEInst = dyn_cast<Instruction>(BEVal);
702
28
    // This is a single block loop with a preheader, so at least
703
28
    // one value should come over the backedge.
704
28
    assert(BEInst && "There should be a value over the backedge");
705
28
706
28
    Value *PreHdrVal =
707
28
      PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
708
28
    if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
709
0
      D.clear();
710
0
      return;
711
0
    }
712
28
    D.push_back(PN);
713
28
    findDepChainFromPHI(BEInst, D);
714
28
  }
715
56
}
716
717
DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
718
                                                         Instruction *I2,
719
17
                                                         int Iters) {
720
51
  for (auto *D : Dependences) {
721
51
    if (D->front() == I1 && 
D->back() == I213
&&
D->iterations() == Iters9
)
722
9
      return D;
723
51
  }
724
17
  
return nullptr8
;
725
17
}
726
727
9
void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
728
9
  BasicBlock *BB = CurLoop->getHeader();
729
73
  for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); 
++I64
) {
730
64
    auto *PN = cast<PHINode>(I);
731
64
    if (!isa<VectorType>(PN->getType()))
732
36
      continue;
733
28
734
28
    DepChain *D = new DepChain();
735
28
    findDepChainFromPHI(PN, *D);
736
28
    if (D->size() != 0)
737
28
      Dependences.insert(D);
738
0
    else
739
0
      delete D;
740
28
  }
741
9
  LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
742
9
  LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
743
9
                  ++i) { dbgs() << *Dependences[i] << "\n"; });
744
9
}
745
746
12
Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
747
12
  return new HexagonVectorLoopCarriedReuse();
748
12
}