Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass hoists instructions to enable speculative execution on
11
// targets where branches are expensive. This is aimed at GPUs. It
12
// currently works on simple if-then and if-then-else
13
// patterns.
14
//
15
// Removing branches is not the only motivation for this
16
// pass. E.g. consider this code and assume that there is no
17
// addressing mode for multiplying by sizeof(*a):
18
//
19
//   if (b > 0)
20
//     c = a[i + 1]
21
//   if (d > 0)
22
//     e = a[i + 2]
23
//
24
// turns into
25
//
26
//   p = &a[i + 1];
27
//   if (b > 0)
28
//     c = *p;
29
//   q = &a[i + 2];
30
//   if (d > 0)
31
//     e = *q;
32
//
33
// which could later be optimized to
34
//
35
//   r = &a[i];
36
//   if (b > 0)
37
//     c = r[1];
38
//   if (d > 0)
39
//     e = r[2];
40
//
41
// Later passes sink back much of the speculated code that did not enable
42
// further optimization.
43
//
44
// This pass is more aggressive than the function SpeculativeyExecuteBB in
45
// SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and
46
// it will speculate at most one instruction. It also will not speculate if
47
// there is a value defined in the if-block that is only used in the then-block.
48
// These restrictions make sense since the speculation in SimplifyCFG seems
49
// aimed at introducing cheap selects, while this pass is intended to do more
50
// aggressive speculation while counting on later passes to either capitalize on
51
// that or clean it up.
52
//
53
// If the pass was created by calling
54
// createSpeculativeExecutionIfHasBranchDivergencePass or the
55
// -spec-exec-only-if-divergent-target option is present, this pass only has an
56
// effect on targets where TargetTransformInfo::hasBranchDivergence() is true;
57
// on other targets, it is a nop.
58
//
59
// This lets you include this pass unconditionally in the IR pass pipeline, but
60
// only enable it for relevant targets.
61
//
62
//===----------------------------------------------------------------------===//
63
64
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
65
#include "llvm/ADT/SmallSet.h"
66
#include "llvm/Analysis/GlobalsModRef.h"
67
#include "llvm/Analysis/ValueTracking.h"
68
#include "llvm/IR/Instructions.h"
69
#include "llvm/IR/Module.h"
70
#include "llvm/IR/Operator.h"
71
#include "llvm/Support/CommandLine.h"
72
#include "llvm/Support/Debug.h"
73
74
using namespace llvm;
75
76
#define DEBUG_TYPE "speculative-execution"
77
78
// The risk that speculation will not pay off increases with the
79
// number of instructions speculated, so we put a limit on that.
80
static cl::opt<unsigned> SpecExecMaxSpeculationCost(
81
    "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
82
    cl::desc("Speculative execution is not applied to basic blocks where "
83
             "the cost of the instructions to speculatively execute "
84
             "exceeds this limit."));
85
86
// Speculating just a few instructions from a larger block tends not
87
// to be profitable and this limit prevents that. A reason for that is
88
// that small basic blocks are more likely to be candidates for
89
// further optimization.
90
static cl::opt<unsigned> SpecExecMaxNotHoisted(
91
    "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
92
    cl::desc("Speculative execution is not applied to basic blocks where the "
93
             "number of instructions that would not be speculatively executed "
94
             "exceeds this limit."));
95
96
static cl::opt<bool> SpecExecOnlyIfDivergentTarget(
97
    "spec-exec-only-if-divergent-target", cl::init(false), cl::Hidden,
98
    cl::desc("Speculative execution is applied only to targets with divergent "
99
             "branches, even if the pass was configured to apply only to all "
100
             "targets."));
101
102
namespace {
103
104
class SpeculativeExecutionLegacyPass : public FunctionPass {
105
public:
106
  static char ID;
107
  explicit SpeculativeExecutionLegacyPass(bool OnlyIfDivergentTarget = false)
108
      : FunctionPass(ID), OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
109
                                                SpecExecOnlyIfDivergentTarget),
110
19.1k
        Impl(OnlyIfDivergentTarget) {}
111
112
  void getAnalysisUsage(AnalysisUsage &AU) const override;
113
  bool runOnFunction(Function &F) override;
114
115
51
  StringRef getPassName() const override {
116
51
    if (OnlyIfDivergentTarget)
117
51
      return "Speculatively execute instructions if target has divergent "
118
51
             "branches";
119
0
    return "Speculatively execute instructions";
120
0
  }
121
122
private:
123
  // Variable preserved purely for correct name printing.
124
  const bool OnlyIfDivergentTarget;
125
126
  SpeculativeExecutionPass Impl;
127
};
128
} // namespace
129
130
char SpeculativeExecutionLegacyPass::ID = 0;
131
24.6k
INITIALIZE_PASS_BEGIN24.6k
(SpeculativeExecutionLegacyPass, "speculative-execution",
132
24.6k
                      "Speculatively execute instructions", false, false)
133
24.6k
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
134
24.6k
INITIALIZE_PASS_END(SpeculativeExecutionLegacyPass, "speculative-execution",
135
                    "Speculatively execute instructions", false, false)
136
137
19.1k
void SpeculativeExecutionLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
138
19.1k
  AU.addRequired<TargetTransformInfoWrapperPass>();
139
19.1k
  AU.addPreserved<GlobalsAAWrapperPass>();
140
19.1k
}
141
142
533k
bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
143
533k
  if (skipFunction(F))
144
58
    return false;
145
533k
146
533k
  auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
147
533k
  return Impl.runImpl(F, TTI);
148
533k
}
149
150
namespace llvm {
151
152
533k
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
153
533k
  if (
OnlyIfDivergentTarget && 533k
!TTI->hasBranchDivergence()516k
) {
154
515k
    DEBUG(dbgs() << "Not running SpeculativeExecution because "
155
515k
                    "TTI->hasBranchDivergence() is false.\n");
156
515k
    return false;
157
515k
  }
158
18.5k
159
18.5k
  this->TTI = TTI;
160
18.5k
  bool Changed = false;
161
20.9k
  for (auto& B : F) {
162
20.9k
    Changed |= runOnBasicBlock(B);
163
20.9k
  }
164
533k
  return Changed;
165
533k
}
166
167
20.9k
bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) {
168
20.9k
  BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
169
20.9k
  if (BI == nullptr)
170
18.6k
    return false;
171
2.30k
172
2.30k
  
if (2.30k
BI->getNumSuccessors() != 22.30k
)
173
1.21k
    return false;
174
1.09k
  BasicBlock &Succ0 = *BI->getSuccessor(0);
175
1.09k
  BasicBlock &Succ1 = *BI->getSuccessor(1);
176
1.09k
177
1.09k
  if (
&B == &Succ0 || 1.09k
&B == &Succ11.07k
||
&Succ0 == &Succ1988
) {
178
106
    return false;
179
106
  }
180
988
181
988
  // Hoist from if-then (triangle).
182
988
  
if (988
Succ0.getSinglePredecessor() != nullptr &&
183
988
      
Succ0.getSingleSuccessor() == &Succ1550
) {
184
229
    return considerHoistingFromTo(Succ0, B);
185
229
  }
186
759
187
759
  // Hoist from if-else (triangle).
188
759
  
if (759
Succ1.getSinglePredecessor() != nullptr &&
189
759
      
Succ1.getSingleSuccessor() == &Succ0620
) {
190
292
    return considerHoistingFromTo(Succ1, B);
191
292
  }
192
467
193
467
  // Hoist from if-then-else (diamond), but only if it is equivalent to
194
467
  // an if-else or if-then due to one of the branches doing nothing.
195
467
  
if (467
Succ0.getSinglePredecessor() != nullptr &&
196
321
      Succ1.getSinglePredecessor() != nullptr &&
197
240
      Succ1.getSingleSuccessor() != nullptr &&
198
154
      Succ1.getSingleSuccessor() != &B &&
199
467
      
Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()133
) {
200
130
    // If a block has only one instruction, then that is a terminator
201
130
    // instruction so that the block does nothing. This does happen.
202
130
    if (Succ1.size() == 1) // equivalent to if-then
203
25
      return considerHoistingFromTo(Succ0, B);
204
105
    
if (105
Succ0.size() == 1105
) // equivalent to if-else
205
4
      return considerHoistingFromTo(Succ1, B);
206
438
  }
207
438
208
438
  return false;
209
438
}
210
211
static unsigned ComputeSpeculationCost(const Instruction *I,
212
1.37k
                                       const TargetTransformInfo &TTI) {
213
1.37k
  switch (Operator::getOpcode(I)) {
214
540
    case Instruction::GetElementPtr:
215
540
    case Instruction::Add:
216
540
    case Instruction::Mul:
217
540
    case Instruction::And:
218
540
    case Instruction::Or:
219
540
    case Instruction::Select:
220
540
    case Instruction::Shl:
221
540
    case Instruction::Sub:
222
540
    case Instruction::LShr:
223
540
    case Instruction::AShr:
224
540
    case Instruction::Xor:
225
540
    case Instruction::ZExt:
226
540
    case Instruction::SExt:
227
540
    case Instruction::Call:
228
540
    case Instruction::BitCast:
229
540
    case Instruction::PtrToInt:
230
540
    case Instruction::IntToPtr:
231
540
    case Instruction::AddrSpaceCast:
232
540
    case Instruction::FPToUI:
233
540
    case Instruction::FPToSI:
234
540
    case Instruction::UIToFP:
235
540
    case Instruction::SIToFP:
236
540
    case Instruction::FPExt:
237
540
    case Instruction::FPTrunc:
238
540
    case Instruction::FAdd:
239
540
    case Instruction::FSub:
240
540
    case Instruction::FMul:
241
540
    case Instruction::FDiv:
242
540
    case Instruction::FRem:
243
540
    case Instruction::ICmp:
244
540
    case Instruction::FCmp:
245
540
      return TTI.getUserCost(I);
246
540
247
839
    default:
248
839
      return UINT_MAX; // Disallow anything not whitelisted.
249
0
  }
250
0
}
251
252
bool SpeculativeExecutionPass::considerHoistingFromTo(
253
550
    BasicBlock &FromBlock, BasicBlock &ToBlock) {
254
550
  SmallSet<const Instruction *, 8> NotHoisted;
255
414
  const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
256
729
    for (Value* V : U->operand_values()) {
257
729
      if (Instruction *
I729
= dyn_cast<Instruction>(V)) {
258
434
        if (NotHoisted.count(I) > 0)
259
81
          return false;
260
333
      }
261
729
    }
262
333
    return true;
263
333
  };
264
550
265
550
  unsigned TotalSpeculationCost = 0;
266
1.37k
  for (auto& I : FromBlock) {
267
1.37k
    const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
268
1.37k
    if (
Cost != UINT_MAX && 1.37k
isSafeToSpeculativelyExecute(&I)540
&&
269
1.37k
        
AllPrecedingUsesFromBlockHoisted(&I)414
) {
270
333
      TotalSpeculationCost += Cost;
271
333
      if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
272
18
        return false;  // too much to hoist
273
1.04k
    } else {
274
1.04k
      NotHoisted.insert(&I);
275
1.04k
      if (NotHoisted.size() > SpecExecMaxNotHoisted)
276
8
        return false; // too much left behind
277
524
    }
278
1.37k
  }
279
524
280
524
  
if (524
TotalSpeculationCost == 0524
)
281
389
    return false; // nothing to hoist
282
135
283
504
  
for (auto I = FromBlock.begin(); 135
I != FromBlock.end()504
;) {
284
369
    // We have to increment I before moving Current as moving Current
285
369
    // changes the list that I is iterating through.
286
369
    auto Current = I;
287
369
    ++I;
288
369
    if (
!NotHoisted.count(&*Current)369
) {
289
199
      Current->moveBefore(ToBlock.getTerminator());
290
199
    }
291
369
  }
292
550
  return true;
293
550
}
294
295
1.88k
FunctionPass *createSpeculativeExecutionPass() {
296
1.88k
  return new SpeculativeExecutionLegacyPass();
297
1.88k
}
298
299
17.2k
FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
300
17.2k
  return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
301
17.2k
}
302
303
SpeculativeExecutionPass::SpeculativeExecutionPass(bool OnlyIfDivergentTarget)
304
    : OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
305
19.2k
                            SpecExecOnlyIfDivergentTarget) {}
306
307
PreservedAnalyses SpeculativeExecutionPass::run(Function &F,
308
118
                                                FunctionAnalysisManager &AM) {
309
118
  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
310
118
311
118
  bool Changed = runImpl(F, TTI);
312
118
313
118
  if (!Changed)
314
113
    return PreservedAnalyses::all();
315
5
  PreservedAnalyses PA;
316
5
  PA.preserve<GlobalsAA>();
317
5
  return PA;
318
5
}
319
}  // namespace llvm