Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/HardwareLoops.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// Insert hardware loop intrinsics into loops which are deemed profitable by
10
/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11
/// two intrinsics: one, outside the loop, to set the loop iteration count and
12
/// another, in the exit block, to decrement the counter. The decremented value
13
/// can either be carried through the loop via a phi or handled in some opaque
14
/// way by the target.
15
///
16
//===----------------------------------------------------------------------===//
17
18
#include "llvm/Pass.h"
19
#include "llvm/PassRegistry.h"
20
#include "llvm/PassSupport.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/Analysis/AssumptionCache.h"
23
#include "llvm/Analysis/LoopInfo.h"
24
#include "llvm/Analysis/ScalarEvolution.h"
25
#include "llvm/Analysis/ScalarEvolutionExpander.h"
26
#include "llvm/Analysis/TargetTransformInfo.h"
27
#include "llvm/CodeGen/Passes.h"
28
#include "llvm/CodeGen/TargetPassConfig.h"
29
#include "llvm/IR/BasicBlock.h"
30
#include "llvm/IR/DataLayout.h"
31
#include "llvm/IR/Dominators.h"
32
#include "llvm/IR/Constants.h"
33
#include "llvm/IR/IRBuilder.h"
34
#include "llvm/IR/Instructions.h"
35
#include "llvm/IR/IntrinsicInst.h"
36
#include "llvm/IR/Value.h"
37
#include "llvm/Support/Debug.h"
38
#include "llvm/Transforms/Scalar.h"
39
#include "llvm/Transforms/Utils.h"
40
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
41
#include "llvm/Transforms/Utils/Local.h"
42
#include "llvm/Transforms/Utils/LoopUtils.h"
43
44
#define DEBUG_TYPE "hardware-loops"
45
46
#define HW_LOOPS_NAME "Hardware Loop Insertion"
47
48
using namespace llvm;
49
50
static cl::opt<bool>
51
ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
52
                   cl::desc("Force hardware loops intrinsics to be inserted"));
53
54
static cl::opt<bool>
55
ForceHardwareLoopPHI(
56
  "force-hardware-loop-phi", cl::Hidden, cl::init(false),
57
  cl::desc("Force hardware loop counter to be updated through a phi"));
58
59
static cl::opt<bool>
60
ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
61
                cl::desc("Force allowance of nested hardware loops"));
62
63
static cl::opt<unsigned>
64
LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
65
            cl::desc("Set the loop decrement value"));
66
67
static cl::opt<unsigned>
68
CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
69
                cl::desc("Set the loop counter bitwidth"));
70
71
static cl::opt<bool>
72
ForceGuardLoopEntry(
73
  "force-hardware-loop-guard", cl::Hidden, cl::init(false),
74
  cl::desc("Force generation of loop guard intrinsic"));
75
76
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
77
78
namespace {
79
80
  using TTI = TargetTransformInfo;
81
82
  class HardwareLoops : public FunctionPass {
83
  public:
84
    static char ID;
85
86
6.60k
    HardwareLoops() : FunctionPass(ID) {
87
6.60k
      initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
88
6.60k
    }
89
90
    bool runOnFunction(Function &F) override;
91
92
6.57k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
93
6.57k
      AU.addRequired<LoopInfoWrapperPass>();
94
6.57k
      AU.addPreserved<LoopInfoWrapperPass>();
95
6.57k
      AU.addRequired<DominatorTreeWrapperPass>();
96
6.57k
      AU.addPreserved<DominatorTreeWrapperPass>();
97
6.57k
      AU.addRequired<ScalarEvolutionWrapperPass>();
98
6.57k
      AU.addRequired<AssumptionCacheTracker>();
99
6.57k
      AU.addRequired<TargetTransformInfoWrapperPass>();
100
6.57k
    }
101
102
    // Try to convert the given Loop into a hardware loop.
103
    bool TryConvertLoop(Loop *L);
104
105
    // Given that the target believes the loop to be profitable, try to
106
    // convert it.
107
    bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
108
109
  private:
110
    ScalarEvolution *SE = nullptr;
111
    LoopInfo *LI = nullptr;
112
    const DataLayout *DL = nullptr;
113
    const TargetTransformInfo *TTI = nullptr;
114
    DominatorTree *DT = nullptr;
115
    bool PreserveLCSSA = false;
116
    AssumptionCache *AC = nullptr;
117
    TargetLibraryInfo *LibInfo = nullptr;
118
    Module *M = nullptr;
119
    bool MadeChange = false;
120
  };
121
122
  class HardwareLoop {
123
    // Expand the trip count scev into a value that we can use.
124
    Value *InitLoopCount();
125
126
    // Insert the set_loop_iteration intrinsic.
127
    void InsertIterationSetup(Value *LoopCountInit);
128
129
    // Insert the loop_decrement intrinsic.
130
    void InsertLoopDec();
131
132
    // Insert the loop_decrement_reg intrinsic.
133
    Instruction *InsertLoopRegDec(Value *EltsRem);
134
135
    // If the target requires the counter value to be updated in the loop,
136
    // insert a phi to hold the value. The intended purpose is for use by
137
    // loop_decrement_reg.
138
    PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
139
140
    // Create a new cmp, that checks the returned value of loop_decrement*,
141
    // and update the exit branch to use it.
142
    void UpdateBranch(Value *EltsRem);
143
144
  public:
145
    HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
146
                 const DataLayout &DL) :
147
      SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
148
      ExitCount(Info.ExitCount),
149
      CountType(Info.CountType),
150
      ExitBranch(Info.ExitBranch),
151
      LoopDecrement(Info.LoopDecrement),
152
      UsePHICounter(Info.CounterInReg),
153
331
      UseLoopGuard(Info.PerformEntryTest) { }
154
155
    void Create();
156
157
  private:
158
    ScalarEvolution &SE;
159
    const DataLayout &DL;
160
    Loop *L                 = nullptr;
161
    Module *M               = nullptr;
162
    const SCEV *ExitCount   = nullptr;
163
    Type *CountType         = nullptr;
164
    BranchInst *ExitBranch  = nullptr;
165
    Value *LoopDecrement    = nullptr;
166
    bool UsePHICounter      = false;
167
    bool UseLoopGuard       = false;
168
    BasicBlock *BeginBB     = nullptr;
169
  };
170
}
171
172
char HardwareLoops::ID = 0;
173
174
35.9k
bool HardwareLoops::runOnFunction(Function &F) {
175
35.9k
  if (skipFunction(F))
176
12
    return false;
177
35.9k
178
35.9k
  LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
179
35.9k
180
35.9k
  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
181
35.9k
  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
182
35.9k
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
183
35.9k
  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
184
35.9k
  DL = &F.getParent()->getDataLayout();
185
35.9k
  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
186
35.9k
  LibInfo = TLIP ? &TLIP->getTLI() : 
nullptr0
;
187
35.9k
  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
188
35.9k
  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
189
35.9k
  M = F.getParent();
190
35.9k
191
40.3k
  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; 
++I4.42k
) {
192
4.42k
    Loop *L = *I;
193
4.42k
    if (!L->getParentLoop())
194
4.42k
      TryConvertLoop(L);
195
4.42k
  }
196
35.9k
197
35.9k
  return MadeChange;
198
35.9k
}
199
200
// Return true if the search should stop, which will be when an inner loop is
201
// converted and the parent loop doesn't support containing a hardware loop.
202
5.30k
bool HardwareLoops::TryConvertLoop(Loop *L) {
203
5.30k
  // Process nested loops first.
204
6.11k
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; 
++I811
)
205
879
    if (TryConvertLoop(*I))
206
68
      return true; // Stop search.
207
5.30k
208
5.30k
  HardwareLoopInfo HWLoopInfo(L);
209
5.23k
  if (!HWLoopInfo.canAnalyze(*LI))
210
0
    return false;
211
5.23k
212
5.23k
  if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
213
5.23k
      
ForceHardwareLoops4.85k
) {
214
458
215
458
    // Allow overriding of the counter width and loop decrement value.
216
458
    if (CounterBitWidth.getNumOccurrences())
217
82
      HWLoopInfo.CountType =
218
82
        IntegerType::get(M->getContext(), CounterBitWidth);
219
458
220
458
    if (LoopDecrement.getNumOccurrences())
221
82
      HWLoopInfo.LoopDecrement =
222
82
        ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
223
458
224
458
    MadeChange |= TryConvertLoop(HWLoopInfo);
225
458
    return MadeChange && 
(368
!HWLoopInfo.IsNestingLegal368
&&
!ForceNestedLoop368
);
226
458
  }
227
4.77k
228
4.77k
  return false;
229
4.77k
}
230
231
458
bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
232
458
233
458
  Loop *L = HWLoopInfo.L;
234
458
  LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
235
458
236
458
  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
237
458
                                          ForceHardwareLoopPHI))
238
127
    return false;
239
331
240
331
  assert(
241
331
      (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
242
331
      "Hardware Loop must have set exit info.");
243
331
244
331
  BasicBlock *Preheader = L->getLoopPreheader();
245
331
246
331
  // If we don't have a preheader, then insert one.
247
331
  if (!Preheader)
248
64
    Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
249
331
  if (!Preheader)
250
0
    return false;
251
331
252
331
  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
253
331
  HWLoop.Create();
254
331
  ++NumHWLoops;
255
331
  return true;
256
331
}
257
258
331
void HardwareLoop::Create() {
259
331
  LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
260
331
 
261
331
  Value *LoopCountInit = InitLoopCount();
262
331
  if (!LoopCountInit)
263
0
    return;
264
331
265
331
  InsertIterationSetup(LoopCountInit);
266
331
267
331
  if (UsePHICounter || 
ForceHardwareLoopPHI243
) {
268
111
    Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
269
111
    Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
270
111
    LoopDec->setOperand(0, EltsRem);
271
111
    UpdateBranch(LoopDec);
272
111
  } else
273
220
    InsertLoopDec();
274
331
275
331
  // Run through the basic blocks of the loop and see if any of them have dead
276
331
  // PHIs that can be removed.
277
331
  for (auto I : L->blocks())
278
376
    DeleteDeadPHIs(I);
279
331
}
280
281
116
static bool CanGenerateTest(Loop *L, Value *Count) {
282
116
  BasicBlock *Preheader = L->getLoopPreheader();
283
116
  if (!Preheader->getSinglePredecessor())
284
47
    return false;
285
69
286
69
  BasicBlock *Pred = Preheader->getSinglePredecessor();
287
69
  if (!isa<BranchInst>(Pred->getTerminator()))
288
0
    return false;
289
69
290
69
  auto *BI = cast<BranchInst>(Pred->getTerminator());
291
69
  if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
292
18
    return false;
293
51
294
51
  // Check that the icmp is checking for equality of Count and zero and that
295
51
  // a non-zero value results in entering the loop.
296
51
  auto ICmp = cast<ICmpInst>(BI->getCondition());
297
51
  LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
298
51
  if (!ICmp->isEquality())
299
18
    return false;
300
33
301
66
  
auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) 33
{
302
66
    if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
303
33
      return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
304
33
    return false;
305
33
  };
306
33
307
33
  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
308
5
    return false;
309
28
310
28
  unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 
012
:
116
;
311
28
  if (BI->getSuccessor(SuccIdx) != Preheader)
312
0
    return false;
313
28
314
28
  return true;
315
28
}
316
317
331
Value *HardwareLoop::InitLoopCount() {
318
331
  LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
319
331
  // Can we replace a conditional branch with an intrinsic that sets the
320
331
  // loop counter and tests that is not zero?
321
331
322
331
  SCEVExpander SCEVE(SE, DL, "loopcnt");
323
331
  if (!ExitCount->getType()->isPointerTy() &&
324
331
      ExitCount->getType() != CountType)
325
56
    ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
326
331
327
331
  ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
328
331
329
331
  // If we're trying to use the 'test and set' form of the intrinsic, we need
330
331
  // to replace a conditional branch that is controlling entry to the loop. It
331
331
  // is likely (guaranteed?) that the preheader has an unconditional branch to
332
331
  // the loop header, so also check if it has a single predecessor.
333
331
  if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
334
331
                                  SE.getZero(ExitCount->getType()))) {
335
287
    LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
336
287
    UseLoopGuard |= ForceGuardLoopEntry;
337
287
  } else
338
44
    UseLoopGuard = false;
339
331
340
331
  BasicBlock *BB = L->getLoopPreheader();
341
331
  if (UseLoopGuard && 
BB->getSinglePredecessor()116
&&
342
331
      
cast<BranchInst>(BB->getTerminator())->isUnconditional()69
)
343
69
    BB = BB->getSinglePredecessor();
344
331
345
331
  if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
346
0
    LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
347
0
               << *ExitCount << "\n");
348
0
    return nullptr;
349
0
  }
350
331
351
331
  Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
352
331
                                     BB->getTerminator());
353
331
354
331
  // FIXME: We've expanded Count where we hope to insert the counter setting
355
331
  // intrinsic. But, in the case of the 'test and set' form, we may fallback to
356
331
  // the just 'set' form and in which case the insertion block is most likely
357
331
  // different. It means there will be instruction(s) in a block that possibly
358
331
  // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
359
331
  // but it's doesn't appear to work in all cases.
360
331
361
331
  UseLoopGuard = UseLoopGuard && 
CanGenerateTest(L, Count)116
;
362
331
  BeginBB = UseLoopGuard ? 
BB28
:
L->getLoopPreheader()303
;
363
331
  LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
364
331
             << " - Expanded Count in " << BB->getName() << "\n"
365
331
             << " - Will insert set counter intrinsic into: "
366
331
             << BeginBB->getName() << "\n");
367
331
  return Count;
368
331
}
369
370
331
void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
371
331
  IRBuilder<> Builder(BeginBB->getTerminator());
372
331
  Type *Ty = LoopCountInit->getType();
373
331
  Intrinsic::ID ID = UseLoopGuard ?
374
303
    
Intrinsic::test_set_loop_iterations28
: Intrinsic::set_loop_iterations;
375
331
  Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
376
331
  Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
377
331
378
331
  // Use the return value of the intrinsic to control the entry of the loop.
379
331
  if (UseLoopGuard) {
380
28
    assert((isa<BranchInst>(BeginBB->getTerminator()) &&
381
28
            cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
382
28
           "Expected conditional branch");
383
28
    auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
384
28
    LoopGuard->setCondition(SetCount);
385
28
    if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
386
16
      LoopGuard->swapSuccessors();
387
28
  }
388
331
  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
389
331
             << *SetCount << "\n");
390
331
}
391
392
220
void HardwareLoop::InsertLoopDec() {
393
220
  IRBuilder<> CondBuilder(ExitBranch);
394
220
395
220
  Function *DecFunc =
396
220
    Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
397
220
                              LoopDecrement->getType());
398
220
  Value *Ops[] = { LoopDecrement };
399
220
  Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
400
220
  Value *OldCond = ExitBranch->getCondition();
401
220
  ExitBranch->setCondition(NewCond);
402
220
403
220
  // The false branch must exit the loop.
404
220
  if (!L->contains(ExitBranch->getSuccessor(0)))
405
163
    ExitBranch->swapSuccessors();
406
220
407
220
  // The old condition may be dead now, and may have even created a dead PHI
408
220
  // (the original induction variable).
409
220
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
410
220
411
220
  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
412
220
}
413
414
111
Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
415
111
  IRBuilder<> CondBuilder(ExitBranch);
416
111
417
111
  Function *DecFunc =
418
111
      Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
419
111
                                { EltsRem->getType(), EltsRem->getType(),
420
111
                                  LoopDecrement->getType()
421
111
                                });
422
111
  Value *Ops[] = { EltsRem, LoopDecrement };
423
111
  Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
424
111
425
111
  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
426
111
  return cast<Instruction>(Call);
427
111
}
428
429
111
PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
430
111
  BasicBlock *Preheader = L->getLoopPreheader();
431
111
  BasicBlock *Header = L->getHeader();
432
111
  BasicBlock *Latch = ExitBranch->getParent();
433
111
  IRBuilder<> Builder(Header->getFirstNonPHI());
434
111
  PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
435
111
  Index->addIncoming(NumElts, Preheader);
436
111
  Index->addIncoming(EltsRem, Latch);
437
111
  LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
438
111
  return Index;
439
111
}
440
441
111
void HardwareLoop::UpdateBranch(Value *EltsRem) {
442
111
  IRBuilder<> CondBuilder(ExitBranch);
443
111
  Value *NewCond =
444
111
    CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
445
111
  Value *OldCond = ExitBranch->getCondition();
446
111
  ExitBranch->setCondition(NewCond);
447
111
448
111
  // The false branch must exit the loop.
449
111
  if (!L->contains(ExitBranch->getSuccessor(0)))
450
38
    ExitBranch->swapSuccessors();
451
111
452
111
  // The old condition may be dead now, and may have even created a dead PHI
453
111
  // (the original induction variable).
454
111
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
455
111
}
456
457
49.1k
INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
458
49.1k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
459
49.1k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
460
49.1k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
461
49.1k
INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
462
463
6.57k
FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }