Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/DivergenceAnalysis.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements a general divergence analysis for loop vectorization
10
// and GPU programs. It determines which branches and values in a loop or GPU
11
// program are divergent. It can help branch optimizations such as jump
12
// threading and loop unswitching to make better decisions.
13
//
14
// GPU programs typically use the SIMD execution model, where multiple threads
15
// in the same execution group have to execute in lock-step. Therefore, if the
16
// code contains divergent branches (i.e., threads in a group do not agree on
17
// which path of the branch to take), the group of threads has to execute all
18
// the paths from that branch with different subsets of threads enabled until
19
// they re-converge.
20
//
21
// Due to this execution model, some optimizations such as jump
22
// threading and loop unswitching can interfere with thread re-convergence.
23
// Therefore, an analysis that computes which branches in a GPU program are
24
// divergent can help the compiler to selectively run these optimizations.
25
//
26
// This implementation is derived from the Vectorization Analysis of the
27
// Region Vectorizer (RV). That implementation in turn is based on the approach
28
// described in
29
//
30
//   Improving Performance of OpenCL on CPUs
31
//   Ralf Karrenberg and Sebastian Hack
32
//   CC '12
33
//
34
// This DivergenceAnalysis implementation is generic in the sense that it does
35
// not itself identify original sources of divergence.
36
// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
37
// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
38
// (e.g., special variables that hold the thread ID or the iteration variable).
39
//
40
// The generic implementation propagates divergence to variables that are data
41
// or sync dependent on a source of divergence.
42
//
43
// While data dependency is a well-known concept, the notion of sync dependency
44
// is worth more explanation. Sync dependence characterizes the control flow
45
// aspect of the propagation of branch divergence. For example,
46
//
47
//   %cond = icmp slt i32 %tid, 10
48
//   br i1 %cond, label %then, label %else
49
// then:
50
//   br label %merge
51
// else:
52
//   br label %merge
53
// merge:
54
//   %a = phi i32 [ 0, %then ], [ 1, %else ]
55
//
56
// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
57
// because %tid is not on its use-def chains, %a is sync dependent on %tid
58
// because the branch "br i1 %cond" depends on %tid and affects which value %a
59
// is assigned to.
60
//
61
// The sync dependence detection (which branch induces divergence in which join
62
// points) is implemented in the SyncDependenceAnalysis.
63
//
64
// The current DivergenceAnalysis implementation has the following limitations:
65
// 1. intra-procedural. It conservatively considers the arguments of a
66
//    non-kernel-entry function and the return value of a function call as
67
//    divergent.
68
// 2. memory as black box. It conservatively considers values loaded from
69
//    generic or local address as divergent. This can be improved by leveraging
70
//    pointer analysis and/or by modelling non-escaping memory objects in SSA
71
//    as done in RV.
72
//
73
//===----------------------------------------------------------------------===//
74
75
#include "llvm/Analysis/DivergenceAnalysis.h"
76
#include "llvm/Analysis/LoopInfo.h"
77
#include "llvm/Analysis/Passes.h"
78
#include "llvm/Analysis/PostDominators.h"
79
#include "llvm/Analysis/TargetTransformInfo.h"
80
#include "llvm/IR/Dominators.h"
81
#include "llvm/IR/InstIterator.h"
82
#include "llvm/IR/Instructions.h"
83
#include "llvm/IR/IntrinsicInst.h"
84
#include "llvm/IR/Value.h"
85
#include "llvm/Support/Debug.h"
86
#include "llvm/Support/raw_ostream.h"
87
#include <vector>
88
89
using namespace llvm;
90
91
#define DEBUG_TYPE "divergence-analysis"
92
93
// class DivergenceAnalysis
94
DivergenceAnalysis::DivergenceAnalysis(
95
    const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
96
    const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
97
    : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
98
107
      IsLCSSAForm(IsLCSSAForm) {}
99
100
466
void DivergenceAnalysis::markDivergent(const Value &DivVal) {
101
466
  assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
102
466
  assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
103
466
  DivergentValues.insert(&DivVal);
104
466
}
105
106
3
void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
107
3
  UniformOverrides.insert(&UniVal);
108
3
}
109
110
98
bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
111
98
  if (Term.getNumSuccessors() <= 1)
112
61
    return false;
113
37
  if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
114
36
    assert(BranchTerm->isConditional());
115
36
    return isDivergent(*BranchTerm->getCondition());
116
36
  }
117
1
  if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
118
1
    return isDivergent(*SwitchTerm->getCondition());
119
1
  }
120
0
  if (isa<InvokeInst>(Term)) {
121
0
    return false; // ignore abnormal executions through landingpad
122
0
  }
123
0
124
0
  llvm_unreachable("unexpected terminator");
125
0
}
126
127
147
bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
128
147
  // TODO function calls with side effects, etc
129
154
  for (const auto &Op : I.operands()) {
130
154
    if (isDivergent(*Op))
131
147
      return true;
132
154
  }
133
147
  
return false0
;
134
147
}
135
136
bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
137
17
                                             const Value &Val) const {
138
17
  const auto *Inst = dyn_cast<const Instruction>(&Val);
139
17
  if (!Inst)
140
11
    return false;
141
6
  // check whether any divergent loop carrying Val terminates before control
142
6
  // proceeds to ObservingBlock
143
6
  for (const auto *Loop = LI.getLoopFor(Inst->getParent());
144
6
       Loop != RegionLoop && !Loop->contains(&ObservingBlock);
145
6
       
Loop = Loop->getParentLoop()0
) {
146
5
    if (DivergentLoops.find(Loop) != DivergentLoops.end())
147
5
      return true;
148
5
  }
149
6
150
6
  
return false1
;
151
6
}
152
153
37
bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
154
37
  // joining divergent disjoint path in Phi parent block
155
37
  if (!Phi.hasConstantOrUndefValue() && 
isJoinDivergent(*Phi.getParent())33
) {
156
25
    return true;
157
25
  }
158
12
159
12
  // An incoming value could be divergent by itself.
160
12
  // Otherwise, an incoming value could be uniform within the loop
161
12
  // that carries its definition but it may appear divergent
162
12
  // from outside the loop. This happens when divergent loop exits
163
12
  // drop definitions of that uniform value in different iterations.
164
12
  //
165
12
  // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
166
12
  //   if (i % thread_id == 0) break;    // divergent loop exit
167
12
  // }
168
12
  // int divI = i;                 // divI is divergent
169
24
  
for (size_t i = 0; 12
i < Phi.getNumIncomingValues();
++i12
) {
170
19
    const auto *InVal = Phi.getIncomingValue(i);
171
19
    if (isDivergent(*Phi.getIncomingValue(i)) ||
172
19
        
isTemporalDivergent(*Phi.getParent(), *InVal)17
) {
173
7
      return true;
174
7
    }
175
19
  }
176
12
  
return false5
;
177
12
}
178
179
188
bool DivergenceAnalysis::inRegion(const Instruction &I) const {
180
188
  return I.getParent() && inRegion(*I.getParent());
181
188
}
182
183
294
bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
184
294
  return (!RegionLoop && BB.getParent() == &F) || 
RegionLoop->contains(&BB)0
;
185
294
}
186
187
// marks all users of loop-carried values of the loop headed by LoopHeader as
188
// divergent
189
17
void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
190
17
  auto *DivLoop = LI.getLoopFor(&LoopHeader);
191
17
  assert(DivLoop && "loopHeader is not actually part of a loop");
192
17
193
17
  SmallVector<BasicBlock *, 8> TaintStack;
194
17
  DivLoop->getExitBlocks(TaintStack);
195
17
196
17
  // Otherwise potential users of loop-carried values could be anywhere in the
197
17
  // dominance region of DivLoop (including its fringes for phi nodes)
198
17
  DenseSet<const BasicBlock *> Visited;
199
25
  for (auto *Block : TaintStack) {
200
25
    Visited.insert(Block);
201
25
  }
202
17
  Visited.insert(&LoopHeader);
203
17
204
54
  while (!TaintStack.empty()) {
205
37
    auto *UserBlock = TaintStack.back();
206
37
    TaintStack.pop_back();
207
37
208
37
    // don't spread divergence beyond the region
209
37
    if (!inRegion(*UserBlock))
210
0
      continue;
211
37
212
37
    assert(!DivLoop->contains(UserBlock) &&
213
37
           "irreducible control flow detected");
214
37
215
37
    // phi nodes at the fringes of the dominance region
216
37
    if (!DT.dominates(&LoopHeader, UserBlock)) {
217
15
      // all PHI nodes of UserBlock become divergent
218
15
      for (auto &Phi : UserBlock->phis()) {
219
10
        Worklist.push_back(&Phi);
220
10
      }
221
15
      continue;
222
15
    }
223
22
224
22
    // taint outside users of values carried by DivLoop
225
35
    
for (auto &I : *UserBlock)22
{
226
35
      if (isAlwaysUniform(I))
227
0
        continue;
228
35
      if (isDivergent(I))
229
0
        continue;
230
35
231
40
      
for (auto &Op : I.operands())35
{
232
40
        auto *OpInst = dyn_cast<Instruction>(&Op);
233
40
        if (!OpInst)
234
23
          continue;
235
17
        if (DivLoop->contains(OpInst->getParent())) {
236
9
          markDivergent(I);
237
9
          pushUsers(I);
238
9
          break;
239
9
        }
240
17
      }
241
35
    }
242
22
243
22
    // visit all blocks in the dominance region
244
22
    for (auto *SuccBlock : successors(UserBlock)) {
245
15
      if (!Visited.insert(SuccBlock).second) {
246
3
        continue;
247
3
      }
248
12
      TaintStack.push_back(SuccBlock);
249
12
    }
250
22
  }
251
17
}
252
253
51
void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
254
51
  for (const auto &Phi : Block.phis()) {
255
38
    if (isDivergent(Phi))
256
2
      continue;
257
36
    Worklist.push_back(&Phi);
258
36
  }
259
51
}
260
261
429
void DivergenceAnalysis::pushUsers(const Value &V) {
262
429
  for (const auto *User : V.users()) {
263
315
    const auto *UserInst = dyn_cast<const Instruction>(User);
264
315
    if (!UserInst)
265
0
      continue;
266
315
267
315
    if (isDivergent(*UserInst))
268
127
      continue;
269
188
270
188
    // only compute divergent inside loop
271
188
    if (!inRegion(*UserInst))
272
0
      continue;
273
188
    Worklist.push_back(UserInst);
274
188
  }
275
429
}
276
277
bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
278
51
                                                 const Loop *BranchLoop) {
279
51
  LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
280
51
281
51
  // ignore divergence outside the region
282
51
  if (!inRegion(JoinBlock)) {
283
0
    return false;
284
0
  }
285
51
286
51
  // push non-divergent phi nodes in JoinBlock to the worklist
287
51
  pushPHINodes(JoinBlock);
288
51
289
51
  // JoinBlock is a divergent loop exit
290
51
  if (BranchLoop && 
!BranchLoop->contains(&JoinBlock)24
) {
291
20
    return true;
292
20
  }
293
31
294
31
  // disjoint-paths divergent at JoinBlock
295
31
  markBlockJoinDivergent(JoinBlock);
296
31
  return false;
297
31
}
298
299
37
void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
300
37
  LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
301
37
302
37
  markDivergent(Term);
303
37
304
37
  const auto *BranchLoop = LI.getLoopFor(Term.getParent());
305
37
306
37
  // whether there is a divergent loop exit from BranchLoop (if any)
307
37
  bool IsBranchLoopDivergent = false;
308
37
309
37
  // iterate over all blocks reachable by disjoint from Term within the loop
310
37
  // also iterates over loop exits that become divergent due to Term.
311
40
  for (const auto *JoinBlock : SDA.join_blocks(Term)) {
312
40
    IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
313
40
  }
314
37
315
37
  // Branch loop is a divergent loop due to the divergent branch in Term
316
37
  if (IsBranchLoopDivergent) {
317
18
    assert(BranchLoop);
318
18
    if (!DivergentLoops.insert(BranchLoop).second) {
319
2
      return;
320
2
    }
321
16
    propagateLoopDivergence(*BranchLoop);
322
16
  }
323
37
}
324
325
18
void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
326
18
  LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
327
18
328
18
  // don't propagate beyond region
329
18
  if (!inRegion(*ExitingLoop.getHeader()))
330
0
    return;
331
18
332
18
  const auto *BranchLoop = ExitingLoop.getParentLoop();
333
18
334
18
  // Uses of loop-carried values could occur anywhere
335
18
  // within the dominance region of the definition. All loop-carried
336
18
  // definitions are dominated by the loop header (reducible control).
337
18
  // Thus all users have to be in the dominance region of the loop header,
338
18
  // except PHI nodes that can also live at the fringes of the dom region
339
18
  // (incoming defining value).
340
18
  if (!IsLCSSAForm)
341
17
    taintLoopLiveOuts(*ExitingLoop.getHeader());
342
18
343
18
  // whether there is a divergent loop exit from BranchLoop (if any)
344
18
  bool IsBranchLoopDivergent = false;
345
18
346
18
  // iterate over all blocks reachable by disjoint paths from exits of
347
18
  // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
348
18
  // become divergent.
349
18
  for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
350
11
    IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
351
11
  }
352
18
353
18
  // Branch loop is a divergent due to divergent loop exit in ExitingLoop
354
18
  if (IsBranchLoopDivergent) {
355
2
    assert(BranchLoop);
356
2
    if (!DivergentLoops.insert(BranchLoop).second) {
357
0
      return;
358
0
    }
359
2
    propagateLoopDivergence(*BranchLoop);
360
2
  }
361
18
}
362
363
108
void DivergenceAnalysis::compute() {
364
241
  for (auto *DivVal : DivergentValues) {
365
241
    pushUsers(*DivVal);
366
241
  }
367
108
368
108
  // propagate divergence
369
342
  while (!Worklist.empty()) {
370
234
    const Instruction &I = *Worklist.back();
371
234
    Worklist.pop_back();
372
234
373
234
    // maintain uniformity of overrides
374
234
    if (isAlwaysUniform(I))
375
1
      continue;
376
233
377
233
    bool WasDivergent = isDivergent(I);
378
233
    if (WasDivergent)
379
12
      continue;
380
221
381
221
    // propagate divergence caused by terminator
382
221
    if (I.isTerminator()) {
383
98
      if (updateTerminator(I)) {
384
37
        // propagate control divergence to affected instructions
385
37
        propagateBranchDivergence(I);
386
37
        continue;
387
37
      }
388
184
    }
389
184
390
184
    // update divergence of I due to divergent operands
391
184
    bool DivergentUpd = false;
392
184
    const auto *Phi = dyn_cast<const PHINode>(&I);
393
184
    if (Phi) {
394
37
      DivergentUpd = updatePHINode(*Phi);
395
147
    } else {
396
147
      DivergentUpd = updateNormalInstruction(I);
397
147
    }
398
184
399
184
    // propagate value divergence to users
400
184
    if (DivergentUpd) {
401
179
      markDivergent(I);
402
179
      pushUsers(I);
403
179
    }
404
184
  }
405
108
}
406
407
269
bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
408
269
  return UniformOverrides.find(&V) != UniformOverrides.end();
409
269
}
410
411
1.52k
bool DivergenceAnalysis::isDivergent(const Value &V) const {
412
1.52k
  return DivergentValues.find(&V) != DivergentValues.end();
413
1.52k
}
414
415
0
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
416
0
  if (DivergentValues.empty())
417
0
    return;
418
0
  // iterate instructions using instructions() to ensure a deterministic order.
419
0
  for (auto &I : instructions(F)) {
420
0
    if (isDivergent(I))
421
0
      OS << "DIVERGENT:" << I << '\n';
422
0
  }
423
0
}
424
425
// class GPUDivergenceAnalysis
426
GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
427
                                             const DominatorTree &DT,
428
                                             const PostDominatorTree &PDT,
429
                                             const LoopInfo &LI,
430
                                             const TargetTransformInfo &TTI)
431
94
    : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
432
452
  for (auto &I : instructions(F)) {
433
452
    if (TTI.isSourceOfDivergence(&I)) {
434
87
      DA.markDivergent(I);
435
365
    } else if (TTI.isAlwaysUniform(&I)) {
436
3
      DA.addUniformOverride(I);
437
3
    }
438
452
  }
439
226
  for (auto &Arg : F.args()) {
440
226
    if (TTI.isSourceOfDivergence(&Arg)) {
441
141
      DA.markDivergent(Arg);
442
141
    }
443
226
  }
444
94
445
94
  DA.compute();
446
94
}
447
448
663
bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
449
663
  return DA.isDivergent(val);
450
663
}
451
452
0
void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
453
0
  OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
454
0
  DA.print(OS, mod);
455
0
  OS << "}\n";
456
0
}