Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass merges conditional blocks of code and reduces the number of
10
// conditional branches in the hot paths based on profiles.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/DenseSet.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/StringSet.h"
19
#include "llvm/Analysis/BlockFrequencyInfo.h"
20
#include "llvm/Analysis/GlobalsModRef.h"
21
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22
#include "llvm/Analysis/ProfileSummaryInfo.h"
23
#include "llvm/Analysis/RegionInfo.h"
24
#include "llvm/Analysis/RegionIterator.h"
25
#include "llvm/Analysis/ValueTracking.h"
26
#include "llvm/IR/CFG.h"
27
#include "llvm/IR/Dominators.h"
28
#include "llvm/IR/IRBuilder.h"
29
#include "llvm/IR/MDBuilder.h"
30
#include "llvm/Support/BranchProbability.h"
31
#include "llvm/Support/MemoryBuffer.h"
32
#include "llvm/Transforms/Utils.h"
33
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
34
#include "llvm/Transforms/Utils/Cloning.h"
35
#include "llvm/Transforms/Utils/ValueMapper.h"
36
37
#include <set>
38
#include <sstream>
39
40
using namespace llvm;
41
42
0
#define DEBUG_TYPE "chr"
43
44
5.84k
#define CHR_DEBUG(X) LLVM_DEBUG(X)
45
46
static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
47
                              cl::desc("Apply CHR for all functions"));
48
49
static cl::opt<double> CHRBiasThreshold(
50
    "chr-bias-threshold", cl::init(0.99), cl::Hidden,
51
    cl::desc("CHR considers a branch bias greater than this ratio as biased"));
52
53
static cl::opt<unsigned> CHRMergeThreshold(
54
    "chr-merge-threshold", cl::init(2), cl::Hidden,
55
    cl::desc("CHR merges a group of N branches/selects where N >= this value"));
56
57
static cl::opt<std::string> CHRModuleList(
58
    "chr-module-list", cl::init(""), cl::Hidden,
59
    cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
60
61
static cl::opt<std::string> CHRFunctionList(
62
    "chr-function-list", cl::init(""), cl::Hidden,
63
    cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
64
65
static StringSet<> CHRModules;
66
static StringSet<> CHRFunctions;
67
68
2
static void parseCHRFilterFiles() {
69
2
  if (!CHRModuleList.empty()) {
70
0
    auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
71
0
    if (!FileOrErr) {
72
0
      errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
73
0
      std::exit(1);
74
0
    }
75
0
    StringRef Buf = FileOrErr->get()->getBuffer();
76
0
    SmallVector<StringRef, 0> Lines;
77
0
    Buf.split(Lines, '\n');
78
0
    for (StringRef Line : Lines) {
79
0
      Line = Line.trim();
80
0
      if (!Line.empty())
81
0
        CHRModules.insert(Line);
82
0
    }
83
0
  }
84
2
  if (!CHRFunctionList.empty()) {
85
0
    auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
86
0
    if (!FileOrErr) {
87
0
      errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
88
0
      std::exit(1);
89
0
    }
90
0
    StringRef Buf = FileOrErr->get()->getBuffer();
91
0
    SmallVector<StringRef, 0> Lines;
92
0
    Buf.split(Lines, '\n');
93
0
    for (StringRef Line : Lines) {
94
0
      Line = Line.trim();
95
0
      if (!Line.empty())
96
0
        CHRFunctions.insert(Line);
97
0
    }
98
0
  }
99
2
}
100
101
namespace {
102
class ControlHeightReductionLegacyPass : public FunctionPass {
103
public:
104
  static char ID;
105
106
1
  ControlHeightReductionLegacyPass() : FunctionPass(ID) {
107
1
    initializeControlHeightReductionLegacyPassPass(
108
1
        *PassRegistry::getPassRegistry());
109
1
    parseCHRFilterFiles();
110
1
  }
111
112
  bool runOnFunction(Function &F) override;
113
1
  void getAnalysisUsage(AnalysisUsage &AU) const override {
114
1
    AU.addRequired<BlockFrequencyInfoWrapperPass>();
115
1
    AU.addRequired<DominatorTreeWrapperPass>();
116
1
    AU.addRequired<ProfileSummaryInfoWrapperPass>();
117
1
    AU.addRequired<RegionInfoPass>();
118
1
    AU.addPreserved<GlobalsAAWrapperPass>();
119
1
  }
120
};
121
} // end anonymous namespace
122
123
char ControlHeightReductionLegacyPass::ID = 0;
124
125
11.0k
INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
126
11.0k
                      "chr",
127
11.0k
                      "Reduce control height in the hot paths",
128
11.0k
                      false, false)
129
11.0k
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
130
11.0k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
131
11.0k
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
132
11.0k
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
133
11.0k
INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
134
                    "chr",
135
                    "Reduce control height in the hot paths",
136
                    false, false)
137
138
0
FunctionPass *llvm::createControlHeightReductionLegacyPass() {
139
0
  return new ControlHeightReductionLegacyPass();
140
0
}
141
142
namespace {
143
144
struct CHRStats {
145
  CHRStats() : NumBranches(0), NumBranchesDelta(0),
146
48
               WeightedNumBranchesDelta(0) {}
147
0
  void print(raw_ostream &OS) const {
148
0
    OS << "CHRStats: NumBranches " << NumBranches
149
0
       << " NumBranchesDelta " << NumBranchesDelta
150
0
       << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
151
0
  }
152
  uint64_t NumBranches;       // The original number of conditional branches /
153
                              // selects
154
  uint64_t NumBranchesDelta;  // The decrease of the number of conditional
155
                              // branches / selects in the hot paths due to CHR.
156
  uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
157
                                     // count at the scope entry.
158
};
159
160
// RegInfo - some properties of a Region.
161
struct RegInfo {
162
0
  RegInfo() : R(nullptr), HasBranch(false) {}
163
100
  RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
164
  Region *R;
165
  bool HasBranch;
166
  SmallVector<SelectInst *, 8> Selects;
167
};
168
169
typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
170
171
// CHRScope - a sequence of regions to CHR together. It corresponds to a
172
// sequence of conditional blocks. It can have subscopes which correspond to
173
// nested conditional blocks. Nested CHRScopes form a tree.
174
class CHRScope {
175
 public:
176
100
  CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
177
100
    assert(RI.R && "Null RegionIn");
178
100
    RegInfos.push_back(RI);
179
100
  }
180
181
0
  Region *getParentRegion() {
182
0
    assert(RegInfos.size() > 0 && "Empty CHRScope");
183
0
    Region *Parent = RegInfos[0].R->getParent();
184
0
    assert(Parent && "Unexpected to call this on the top-level region");
185
0
    return Parent;
186
0
  }
187
188
38
  BasicBlock *getEntryBlock() {
189
38
    assert(RegInfos.size() > 0 && "Empty CHRScope");
190
38
    return RegInfos.front().R->getEntry();
191
38
  }
192
193
38
  BasicBlock *getExitBlock() {
194
38
    assert(RegInfos.size() > 0 && "Empty CHRScope");
195
38
    return RegInfos.back().R->getExit();
196
38
  }
197
198
38
  bool appendable(CHRScope *Next) {
199
38
    // The next scope is appendable only if this scope is directly connected to
200
38
    // it (which implies it post-dominates this scope) and this scope dominates
201
38
    // it (no edge to the next scope outside this scope).
202
38
    BasicBlock *NextEntry = Next->getEntryBlock();
203
38
    if (getExitBlock() != NextEntry)
204
2
      // Not directly connected.
205
2
      return false;
206
36
    Region *LastRegion = RegInfos.back().R;
207
36
    for (BasicBlock *Pred : predecessors(NextEntry))
208
74
      if (!LastRegion->contains(Pred))
209
2
        // There's an edge going into the entry of the next scope from outside
210
2
        // of this scope.
211
2
        return false;
212
36
    
return true34
;
213
36
  }
214
215
34
  void append(CHRScope *Next) {
216
34
    assert(RegInfos.size() > 0 && "Empty CHRScope");
217
34
    assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
218
34
    assert(getParentRegion() == Next->getParentRegion() &&
219
34
           "Must be siblings");
220
34
    assert(getExitBlock() == Next->getEntryBlock() &&
221
34
           "Must be adjacent");
222
34
    for (RegInfo &RI : Next->RegInfos)
223
34
      RegInfos.push_back(RI);
224
34
    for (CHRScope *Sub : Next->Subs)
225
0
      Subs.push_back(Sub);
226
34
  }
227
228
16
  void addSub(CHRScope *SubIn) {
229
#ifndef NDEBUG
230
    bool IsChild = false;
231
    for (RegInfo &RI : RegInfos)
232
      if (RI.R == SubIn->getParentRegion()) {
233
        IsChild = true;
234
        break;
235
      }
236
    assert(IsChild && "Must be a child");
237
#endif
238
    Subs.push_back(SubIn);
239
16
  }
240
241
  // Split this scope at the boundary region into two, which will belong to the
242
  // tail and returns the tail.
243
6
  CHRScope *split(Region *Boundary) {
244
6
    assert(Boundary && "Boundary null");
245
6
    assert(RegInfos.begin()->R != Boundary &&
246
6
           "Can't be split at beginning");
247
6
    auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
248
14
                                   [&Boundary](const RegInfo& RI) {
249
14
                                     return Boundary == RI.R;
250
14
                                   });
251
6
    if (BoundaryIt == RegInfos.end())
252
0
      return nullptr;
253
6
    SmallVector<RegInfo, 8> TailRegInfos;
254
6
    SmallVector<CHRScope *, 8> TailSubs;
255
6
    TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
256
6
    RegInfos.resize(BoundaryIt - RegInfos.begin());
257
6
    DenseSet<Region *> TailRegionSet;
258
6
    for (RegInfo &RI : TailRegInfos)
259
8
      TailRegionSet.insert(RI.R);
260
6
    for (auto It = Subs.begin(); It != Subs.end(); ) {
261
0
      CHRScope *Sub = *It;
262
0
      assert(Sub && "null Sub");
263
0
      Region *Parent = Sub->getParentRegion();
264
0
      if (TailRegionSet.count(Parent)) {
265
0
        TailSubs.push_back(Sub);
266
0
        It = Subs.erase(It);
267
0
      } else {
268
0
        assert(std::find_if(RegInfos.begin(), RegInfos.end(),
269
0
                            [&Parent](const RegInfo& RI) {
270
0
                              return Parent == RI.R;
271
0
                            }) != RegInfos.end() &&
272
0
               "Must be in head");
273
0
        ++It;
274
0
      }
275
0
    }
276
6
    assert(HoistStopMap.empty() && "MapHoistStops must be empty");
277
6
    return new CHRScope(TailRegInfos, TailSubs);
278
6
  }
279
280
0
  bool contains(Instruction *I) const {
281
0
    BasicBlock *Parent = I->getParent();
282
0
    for (const RegInfo &RI : RegInfos)
283
0
      if (RI.R->contains(Parent))
284
0
        return true;
285
0
    return false;
286
0
  }
287
288
  void print(raw_ostream &OS) const;
289
290
  SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
291
  SmallVector<CHRScope *, 8> Subs;  // Subscopes.
292
293
  // The instruction at which to insert the CHR conditional branch (and hoist
294
  // the dependent condition values).
295
  Instruction *BranchInsertPoint;
296
297
  // True-biased and false-biased regions (conditional blocks),
298
  // respectively. Used only for the outermost scope and includes regions in
299
  // subscopes. The rest are unbiased.
300
  DenseSet<Region *> TrueBiasedRegions;
301
  DenseSet<Region *> FalseBiasedRegions;
302
  // Among the biased regions, the regions that get CHRed.
303
  SmallVector<RegInfo, 8> CHRRegions;
304
305
  // True-biased and false-biased selects, respectively. Used only for the
306
  // outermost scope and includes ones in subscopes.
307
  DenseSet<SelectInst *> TrueBiasedSelects;
308
  DenseSet<SelectInst *> FalseBiasedSelects;
309
310
  // Map from one of the above regions to the instructions to stop
311
  // hoisting instructions at through use-def chains.
312
  HoistStopMapTy HoistStopMap;
313
314
 private:
315
  CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
316
           SmallVector<CHRScope *, 8> &SubsIn)
317
6
    : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
318
};
319
320
class CHR {
321
 public:
322
  CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
323
      ProfileSummaryInfo &PSIin, RegionInfo &RIin,
324
      OptimizationRemarkEmitter &OREin)
325
48
      : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
326
327
48
  ~CHR() {
328
106
    for (CHRScope *Scope : Scopes) {
329
106
      delete Scope;
330
106
    }
331
48
  }
332
333
  bool run();
334
335
 private:
336
  // See the comments in CHR::run() for the high level flow of the algorithm and
337
  // what the following functions do.
338
339
48
  void findScopes(SmallVectorImpl<CHRScope *> &Output) {
340
48
    Region *R = RI.getTopLevelRegion();
341
48
    CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
342
48
    if (Scope) {
343
4
      Output.push_back(Scope);
344
4
    }
345
48
  }
346
  CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
347
                        SmallVectorImpl<CHRScope *> &Scopes);
348
  CHRScope *findScope(Region *R);
349
  void checkScopeHoistable(CHRScope *Scope);
350
351
  void splitScopes(SmallVectorImpl<CHRScope *> &Input,
352
                   SmallVectorImpl<CHRScope *> &Output);
353
  SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
354
                                        CHRScope *Outer,
355
                                        DenseSet<Value *> *OuterConditionValues,
356
                                        Instruction *OuterInsertPoint,
357
                                        SmallVectorImpl<CHRScope *> &Output,
358
                                        DenseSet<Instruction *> &Unhoistables);
359
360
  void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
361
  void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
362
363
  void filterScopes(SmallVectorImpl<CHRScope *> &Input,
364
                    SmallVectorImpl<CHRScope *> &Output);
365
366
  void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
367
                     SmallVectorImpl<CHRScope *> &Output);
368
  void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
369
370
  void sortScopes(SmallVectorImpl<CHRScope *> &Input,
371
                  SmallVectorImpl<CHRScope *> &Output);
372
373
  void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
374
  void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
375
  void cloneScopeBlocks(CHRScope *Scope,
376
                        BasicBlock *PreEntryBlock,
377
                        BasicBlock *ExitBlock,
378
                        Region *LastRegion,
379
                        ValueToValueMapTy &VMap);
380
  BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
381
                                 BasicBlock *EntryBlock,
382
                                 BasicBlock *NewEntryBlock,
383
                                 ValueToValueMapTy &VMap);
384
  void fixupBranchesAndSelects(CHRScope *Scope,
385
                               BasicBlock *PreEntryBlock,
386
                               BranchInst *MergedBR,
387
                               uint64_t ProfileCount);
388
  void fixupBranch(Region *R,
389
                   CHRScope *Scope,
390
                   IRBuilder<> &IRB,
391
                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
392
  void fixupSelect(SelectInst* SI,
393
                   CHRScope *Scope,
394
                   IRBuilder<> &IRB,
395
                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
396
  void addToMergedCondition(bool IsTrueBiased, Value *Cond,
397
                            Instruction *BranchOrSelect,
398
                            CHRScope *Scope,
399
                            IRBuilder<> &IRB,
400
                            Value *&MergedCondition);
401
402
  Function &F;
403
  BlockFrequencyInfo &BFI;
404
  DominatorTree &DT;
405
  ProfileSummaryInfo &PSI;
406
  RegionInfo &RI;
407
  OptimizationRemarkEmitter &ORE;
408
  CHRStats Stats;
409
410
  // All the true-biased regions in the function
411
  DenseSet<Region *> TrueBiasedRegionsGlobal;
412
  // All the false-biased regions in the function
413
  DenseSet<Region *> FalseBiasedRegionsGlobal;
414
  // All the true-biased selects in the function
415
  DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
416
  // All the false-biased selects in the function
417
  DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
418
  // A map from biased regions to their branch bias
419
  DenseMap<Region *, BranchProbability> BranchBiasMap;
420
  // A map from biased selects to their branch bias
421
  DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
422
  // All the scopes.
423
  DenseSet<CHRScope *> Scopes;
424
};
425
426
} // end anonymous namespace
427
428
static inline
429
raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
430
0
                                              const CHRStats &Stats) {
431
0
  Stats.print(OS);
432
0
  return OS;
433
0
}
434
435
static inline
436
0
raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
437
0
  Scope.print(OS);
438
0
  return OS;
439
0
}
440
441
48
static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
442
48
  if (ForceCHR)
443
0
    return true;
444
48
445
48
  if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
446
0
    if (CHRModules.count(F.getParent()->getName()))
447
0
      return true;
448
0
    return CHRFunctions.count(F.getName());
449
0
  }
450
48
451
48
  assert(PSI.hasProfileSummary() && "Empty PSI?");
452
48
  return PSI.isFunctionEntryHot(&F);
453
48
}
454
455
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
456
0
                                         CHRStats *Stats) {
457
0
  StringRef FuncName = F.getName();
458
0
  StringRef ModuleName = F.getParent()->getName();
459
0
  (void)(FuncName); // Unused in release build.
460
0
  (void)(ModuleName); // Unused in release build.
461
0
  CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
462
0
            << FuncName);
463
0
  if (Stats)
464
0
    CHR_DEBUG(dbgs() << " " << *Stats);
465
0
  CHR_DEBUG(dbgs() << "\n");
466
0
  CHR_DEBUG(F.dump());
467
0
}
468
469
0
void CHRScope::print(raw_ostream &OS) const {
470
0
  assert(RegInfos.size() > 0 && "Empty CHRScope");
471
0
  OS << "CHRScope[";
472
0
  OS << RegInfos.size() << ", Regions[";
473
0
  for (const RegInfo &RI : RegInfos) {
474
0
    OS << RI.R->getNameStr();
475
0
    if (RI.HasBranch)
476
0
      OS << " B";
477
0
    if (RI.Selects.size() > 0)
478
0
      OS << " S" << RI.Selects.size();
479
0
    OS << ", ";
480
0
  }
481
0
  if (RegInfos[0].R->getParent()) {
482
0
    OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
483
0
  } else {
484
0
    // top level region
485
0
    OS << "]";
486
0
  }
487
0
  OS << ", Subs[";
488
0
  for (CHRScope *Sub : Subs) {
489
0
    OS << *Sub << ", ";
490
0
  }
491
0
  OS << "]]";
492
0
}
493
494
// Return true if the given instruction type can be hoisted by CHR.
495
1.83k
static bool isHoistableInstructionType(Instruction *I) {
496
1.83k
  return isa<BinaryOperator>(I) || 
isa<CastInst>(I)397
||
isa<SelectInst>(I)383
||
497
1.83k
      
isa<GetElementPtrInst>(I)383
||
isa<CmpInst>(I)383
||
498
1.83k
      
isa<InsertElementInst>(I)100
||
isa<ExtractElementInst>(I)100
||
499
1.83k
      
isa<ShuffleVectorInst>(I)100
||
isa<ExtractValueInst>(I)100
||
500
1.83k
      
isa<InsertValueInst>(I)100
;
501
1.83k
}
502
503
// Return true if the given instruction can be hoisted by CHR.
504
1.83k
static bool isHoistable(Instruction *I, DominatorTree &DT) {
505
1.83k
  if (!isHoistableInstructionType(I))
506
100
    return false;
507
1.73k
  return isSafeToSpeculativelyExecute(I, nullptr, &DT);
508
1.73k
}
509
510
// Recursively traverse the use-def chains of the given value and return a set
511
// of the unhoistable base values defined within the scope (excluding the
512
// first-region entry block) or the (hoistable or unhoistable) base values that
513
// are defined outside (including the first-region entry block) of the
514
// scope. The returned set doesn't include constants.
515
static std::set<Value *> getBaseValues(Value *V,
516
494
                                       DominatorTree &DT) {
517
494
  std::set<Value *> Result;
518
494
  if (auto *I = dyn_cast<Instruction>(V)) {
519
284
    // We don't stop at a block that's not in the Scope because we would miss some
520
284
    // instructions that are based on the same base values if we stop there.
521
284
    if (!isHoistable(I, DT)) {
522
86
      Result.insert(I);
523
86
      return Result;
524
86
    }
525
198
    // I is hoistable above the Scope.
526
390
    
for (Value *Op : I->operands())198
{
527
390
      std::set<Value *> OpResult = getBaseValues(Op, DT);
528
390
      Result.insert(OpResult.begin(), OpResult.end());
529
390
    }
530
198
    return Result;
531
198
  }
532
210
  if (isa<Argument>(V)) {
533
30
    Result.insert(V);
534
30
    return Result;
535
30
  }
536
180
  // We don't include others like constants because those won't lead to any
537
180
  // chance of folding of conditions (eg two bit checks merged into one check)
538
180
  // after CHR.
539
180
  return Result;  // empty
540
180
}
541
542
// Return true if V is already hoisted or can be hoisted (along with its
543
// operands) above the insert point. When it returns true and HoistStops is
544
// non-null, the instructions to stop hoisting at through the use-def chains are
545
// inserted into HoistStops.
546
static bool
547
checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
548
                DenseSet<Instruction *> &Unhoistables,
549
                DenseSet<Instruction *> *HoistStops,
550
3.28k
                DenseMap<Instruction *, bool> &Visited) {
551
3.28k
  assert(InsertPoint && "Null InsertPoint");
552
3.28k
  if (auto *I = dyn_cast<Instruction>(V)) {
553
2.95k
    if (Visited.count(I)) {
554
1.19k
      return Visited[I];
555
1.19k
    }
556
1.76k
    assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
557
1.76k
    assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
558
1.76k
    if (Unhoistables.count(I)) {
559
4
      // Don't hoist if they are not to be hoisted.
560
4
      Visited[I] = false;
561
4
      return false;
562
4
    }
563
1.75k
    if (DT.dominates(I, InsertPoint)) {
564
209
      // We are already above the insert point. Stop here.
565
209
      if (HoistStops)
566
205
        HoistStops->insert(I);
567
209
      Visited[I] = true;
568
209
      return true;
569
209
    }
570
1.54k
    // We aren't not above the insert point, check if we can hoist it above the
571
1.54k
    // insert point.
572
1.54k
    if (isHoistable(I, DT)) {
573
1.53k
      // Check operands first.
574
1.53k
      DenseSet<Instruction *> OpsHoistStops;
575
1.53k
      bool AllOpsHoisted = true;
576
3.04k
      for (Value *Op : I->operands()) {
577
3.04k
        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
578
3.04k
                             Visited)) {
579
24
          AllOpsHoisted = false;
580
24
          break;
581
24
        }
582
3.04k
      }
583
1.53k
      if (AllOpsHoisted) {
584
1.51k
        CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
585
1.51k
        if (HoistStops)
586
1.41k
          HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
587
1.51k
        Visited[I] = true;
588
1.51k
        return true;
589
1.51k
      }
590
38
    }
591
38
    Visited[I] = false;
592
38
    return false;
593
38
  }
594
327
  // Non-instructions are considered hoistable.
595
327
  return true;
596
327
}
597
598
// Returns true and sets the true probability and false probability of an
599
// MD_prof metadata if it's well-formed.
600
static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
601
154
                        BranchProbability &FalseProb) {
602
154
  if (!MD) 
return false6
;
603
148
  MDString *MDName = cast<MDString>(MD->getOperand(0));
604
148
  if (MDName->getString() != "branch_weights" ||
605
148
      MD->getNumOperands() != 3)
606
0
    return false;
607
148
  ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
608
148
  ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
609
148
  if (!TrueWeight || !FalseWeight)
610
0
    return false;
611
148
  uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
612
148
  uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
613
148
  uint64_t SumWt = TrueWt + FalseWt;
614
148
615
148
  assert(SumWt >= TrueWt && SumWt >= FalseWt &&
616
148
         "Overflow calculating branch probabilities.");
617
148
618
148
  TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
619
148
  FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
620
148
  return true;
621
148
}
622
623
148
static BranchProbability getCHRBiasThreshold() {
624
148
  return BranchProbability::getBranchProbability(
625
148
      static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
626
148
}
627
628
// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
629
// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
630
// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
631
// false.
632
template <typename K, typename S, typename M>
633
static bool checkBias(K *Key, BranchProbability TrueProb,
634
                      BranchProbability FalseProb, S &TrueSet, S &FalseSet,
635
148
                      M &BiasMap) {
636
148
  BranchProbability Threshold = getCHRBiasThreshold();
637
148
  if (TrueProb >= Threshold) {
638
80
    TrueSet.insert(Key);
639
80
    BiasMap[Key] = TrueProb;
640
80
    return true;
641
80
  } else 
if (68
FalseProb >= Threshold68
) {
642
64
    FalseSet.insert(Key);
643
64
    BiasMap[Key] = FalseProb;
644
64
    return true;
645
64
  }
646
4
  return false;
647
4
}
ControlHeightReduction.cpp:bool checkBias<llvm::Region, llvm::DenseSet<llvm::Region*, llvm::DenseMapInfo<llvm::Region*> >, llvm::DenseMap<llvm::Region*, llvm::BranchProbability, llvm::DenseMapInfo<llvm::Region*>, llvm::detail::DenseMapPair<llvm::Region*, llvm::BranchProbability> > >(llvm::Region*, llvm::BranchProbability, llvm::BranchProbability, llvm::DenseSet<llvm::Region*, llvm::DenseMapInfo<llvm::Region*> >&, llvm::DenseSet<llvm::Region*, llvm::DenseMapInfo<llvm::Region*> >&, llvm::DenseMap<llvm::Region*, llvm::BranchProbability, llvm::DenseMapInfo<llvm::Region*>, llvm::detail::DenseMapPair<llvm::Region*, llvm::BranchProbability> >&)
Line
Count
Source
635
90
                      M &BiasMap) {
636
90
  BranchProbability Threshold = getCHRBiasThreshold();
637
90
  if (TrueProb >= Threshold) {
638
80
    TrueSet.insert(Key);
639
80
    BiasMap[Key] = TrueProb;
640
80
    return true;
641
80
  } else 
if (10
FalseProb >= Threshold10
) {
642
6
    FalseSet.insert(Key);
643
6
    BiasMap[Key] = FalseProb;
644
6
    return true;
645
6
  }
646
4
  return false;
647
4
}
ControlHeightReduction.cpp:bool checkBias<llvm::SelectInst, llvm::DenseSet<llvm::SelectInst*, llvm::DenseMapInfo<llvm::SelectInst*> >, llvm::DenseMap<llvm::SelectInst*, llvm::BranchProbability, llvm::DenseMapInfo<llvm::SelectInst*>, llvm::detail::DenseMapPair<llvm::SelectInst*, llvm::BranchProbability> > >(llvm::SelectInst*, llvm::BranchProbability, llvm::BranchProbability, llvm::DenseSet<llvm::SelectInst*, llvm::DenseMapInfo<llvm::SelectInst*> >&, llvm::DenseSet<llvm::SelectInst*, llvm::DenseMapInfo<llvm::SelectInst*> >&, llvm::DenseMap<llvm::SelectInst*, llvm::BranchProbability, llvm::DenseMapInfo<llvm::SelectInst*>, llvm::detail::DenseMapPair<llvm::SelectInst*, llvm::BranchProbability> >&)
Line
Count
Source
635
58
                      M &BiasMap) {
636
58
  BranchProbability Threshold = getCHRBiasThreshold();
637
58
  if (TrueProb >= Threshold) {
638
0
    TrueSet.insert(Key);
639
0
    BiasMap[Key] = TrueProb;
640
0
    return true;
641
58
  } else if (FalseProb >= Threshold) {
642
58
    FalseSet.insert(Key);
643
58
    BiasMap[Key] = FalseProb;
644
58
    return true;
645
58
  }
646
0
  return false;
647
0
}
648
649
// Returns true and insert a region into the right biased set and the map if the
650
// branch of the region is biased.
651
static bool checkBiasedBranch(BranchInst *BI, Region *R,
652
                              DenseSet<Region *> &TrueBiasedRegionsGlobal,
653
                              DenseSet<Region *> &FalseBiasedRegionsGlobal,
654
96
                              DenseMap<Region *, BranchProbability> &BranchBiasMap) {
655
96
  if (!BI->isConditional())
656
0
    return false;
657
96
  BranchProbability ThenProb, ElseProb;
658
96
  if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
659
96
                   ThenProb, ElseProb))
660
6
    return false;
661
90
  BasicBlock *IfThen = BI->getSuccessor(0);
662
90
  BasicBlock *IfElse = BI->getSuccessor(1);
663
90
  assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
664
90
         IfThen != IfElse &&
665
90
         "Invariant from findScopes");
666
90
  if (IfThen == R->getExit()) {
667
84
    // Swap them so that IfThen/ThenProb means going into the conditional code
668
84
    // and IfElse/ElseProb means skipping it.
669
84
    std::swap(IfThen, IfElse);
670
84
    std::swap(ThenProb, ElseProb);
671
84
  }
672
90
  CHR_DEBUG(dbgs() << "BI " << *BI << " ");
673
90
  CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
674
90
  CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
675
90
  return checkBias(R, ThenProb, ElseProb,
676
90
                   TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
677
90
                   BranchBiasMap);
678
90
}
679
680
// Returns true and insert a select into the right biased set and the map if the
681
// select is biased.
682
static bool checkBiasedSelect(
683
    SelectInst *SI, Region *R,
684
    DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
685
    DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
686
58
    DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
687
58
  BranchProbability TrueProb, FalseProb;
688
58
  if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
689
58
                   TrueProb, FalseProb))
690
0
    return false;
691
58
  CHR_DEBUG(dbgs() << "SI " << *SI << " ");
692
58
  CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
693
58
  CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
694
58
  return checkBias(SI, TrueProb, FalseProb,
695
58
                   TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
696
58
                   SelectBiasMap);
697
58
}
698
699
// Returns the instruction at which to hoist the dependent condition values and
700
// insert the CHR branch for a region. This is the terminator branch in the
701
// entry block or the first select in the entry block, if any.
702
292
static Instruction* getBranchInsertPoint(RegInfo &RI) {
703
292
  Region *R = RI.R;
704
292
  BasicBlock *EntryBB = R->getEntry();
705
292
  // The hoist point is by default the terminator of the entry block, which is
706
292
  // the same as the branch instruction if RI.HasBranch is true.
707
292
  Instruction *HoistPoint = EntryBB->getTerminator();
708
292
  for (SelectInst *SI : RI.Selects) {
709
84
    if (SI->getParent() == EntryBB) {
710
82
      // Pick the first select in Selects in the entry block.  Note Selects is
711
82
      // sorted in the instruction order within a block (asserted below).
712
82
      HoistPoint = SI;
713
82
      break;
714
82
    }
715
84
  }
716
292
  assert(HoistPoint && "Null HoistPoint");
717
#ifndef NDEBUG
718
  // Check that HoistPoint is the first one in Selects in the entry block,
719
  // if any.
720
  DenseSet<Instruction *> EntryBlockSelectSet;
721
  for (SelectInst *SI : RI.Selects) {
722
    if (SI->getParent() == EntryBB) {
723
      EntryBlockSelectSet.insert(SI);
724
    }
725
  }
726
  for (Instruction &I : *EntryBB) {
727
    if (EntryBlockSelectSet.count(&I) > 0) {
728
      assert(&I == HoistPoint &&
729
             "HoistPoint must be the first one in Selects");
730
      break;
731
    }
732
  }
733
#endif
734
  return HoistPoint;
735
292
}
736
737
// Find a CHR scope in the given region.
738
148
CHRScope * CHR::findScope(Region *R) {
739
148
  CHRScope *Result = nullptr;
740
148
  BasicBlock *Entry = R->getEntry();
741
148
  BasicBlock *Exit = R->getExit();  // null if top level.
742
148
  assert(Entry && "Entry must not be null");
743
148
  assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
744
148
         "Only top level region has a null exit");
745
148
  if (Entry)
746
148
    CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
747
148
  else
748
148
    CHR_DEBUG(dbgs() << "Entry null\n");
749
148
  if (Exit)
750
148
    CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
751
148
  else
752
148
    CHR_DEBUG(dbgs() << "Exit null\n");
753
148
  // Exclude cases where Entry is part of a subregion (hence it doesn't belong
754
148
  // to this region).
755
148
  bool EntryInSubregion = RI.getRegionFor(Entry) != R;
756
148
  if (EntryInSubregion)
757
44
    return nullptr;
758
104
  // Exclude loops
759
104
  for (BasicBlock *Pred : predecessors(Entry))
760
96
    if (R->contains(Pred))
761
0
      return nullptr;
762
104
  if (Exit) {
763
98
    // Try to find an if-then block (check if R is an if-then).
764
98
    // if (cond) {
765
98
    //  ...
766
98
    // }
767
98
    auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
768
98
    if (BI)
769
98
      CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
770
98
    else
771
98
      CHR_DEBUG(dbgs() << "BI null\n");
772
98
    if (BI && BI->isConditional()) {
773
98
      BasicBlock *S0 = BI->getSuccessor(0);
774
98
      BasicBlock *S1 = BI->getSuccessor(1);
775
98
      CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
776
98
      CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
777
98
      if (S0 != S1 && (S0 == Exit || 
S1 == Exit10
)) {
778
96
        RegInfo RI(R);
779
96
        RI.HasBranch = checkBiasedBranch(
780
96
            BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
781
96
            BranchBiasMap);
782
96
        Result = new CHRScope(RI);
783
96
        Scopes.insert(Result);
784
96
        CHR_DEBUG(dbgs() << "Found a region with a branch\n");
785
96
        ++Stats.NumBranches;
786
96
        if (!RI.HasBranch) {
787
10
          ORE.emit([&]() {
788
0
            return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
789
0
                << "Branch not biased";
790
0
          });
791
10
        }
792
96
      }
793
98
    }
794
98
  }
795
104
  {
796
104
    // Try to look for selects in the direct child blocks (as opposed to in
797
104
    // subregions) of R.
798
104
    // ...
799
104
    // if (..) { // Some subregion
800
104
    //   ...
801
104
    // }
802
104
    // if (..) { // Some subregion
803
104
    //   ...
804
104
    // }
805
104
    // ...
806
104
    // a = cond ? b : c;
807
104
    // ...
808
104
    SmallVector<SelectInst *, 8> Selects;
809
230
    for (RegionNode *E : R->elements()) {
810
230
      if (E->isSubRegion())
811
28
        continue;
812
202
      // This returns the basic block of E if E is a direct child of R (not a
813
202
      // subregion.)
814
202
      BasicBlock *BB = E->getEntry();
815
202
      // Need to push in the order to make it easier to find the first Select
816
202
      // later.
817
1.36k
      for (Instruction &I : *BB) {
818
1.36k
        if (auto *SI = dyn_cast<SelectInst>(&I)) {
819
58
          Selects.push_back(SI);
820
58
          ++Stats.NumBranches;
821
58
        }
822
1.36k
      }
823
202
    }
824
104
    if (Selects.size() > 0) {
825
30
      auto AddSelects = [&](RegInfo &RI) {
826
30
        for (auto *SI : Selects)
827
58
          if (checkBiasedSelect(SI, RI.R,
828
58
                                TrueBiasedSelectsGlobal,
829
58
                                FalseBiasedSelectsGlobal,
830
58
                                SelectBiasMap))
831
58
            RI.Selects.push_back(SI);
832
0
          else
833
0
            ORE.emit([&]() {
834
0
              return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
835
0
                  << "Select not biased";
836
0
            });
837
30
      };
838
30
      if (!Result) {
839
4
        CHR_DEBUG(dbgs() << "Found a select-only region\n");
840
4
        RegInfo RI(R);
841
4
        AddSelects(RI);
842
4
        Result = new CHRScope(RI);
843
4
        Scopes.insert(Result);
844
26
      } else {
845
26
        CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
846
26
        AddSelects(Result->RegInfos[0]);
847
26
      }
848
30
    }
849
104
  }
850
104
851
104
  if (Result) {
852
100
    checkScopeHoistable(Result);
853
100
  }
854
104
  return Result;
855
104
}
856
857
// Check that any of the branch and the selects in the region could be
858
// hoisted above the the CHR branch insert point (the most dominating of
859
// them, either the branch (at the end of the first block) or the first
860
// select in the first block). If the branch can't be hoisted, drop the
861
// selects in the first blocks.
862
//
863
// For example, for the following scope/region with selects, we want to insert
864
// the merged branch right before the first select in the first/entry block by
865
// hoisting c1, c2, c3, and c4.
866
//
867
// // Branch insert point here.
868
// a = c1 ? b : c; // Select 1
869
// d = c2 ? e : f; // Select 2
870
// if (c3) { // Branch
871
//   ...
872
//   c4 = foo() // A call.
873
//   g = c4 ? h : i; // Select 3
874
// }
875
//
876
// But suppose we can't hoist c4 because it's dependent on the preceding
877
// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
878
// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
879
100
void CHR::checkScopeHoistable(CHRScope *Scope) {
880
100
  RegInfo &RI = Scope->RegInfos[0];
881
100
  Region *R = RI.R;
882
100
  BasicBlock *EntryBB = R->getEntry();
883
100
  auto *Branch = RI.HasBranch ?
884
86
                 cast<BranchInst>(EntryBB->getTerminator()) : 
nullptr14
;
885
100
  SmallVector<SelectInst *, 8> &Selects = RI.Selects;
886
100
  if (RI.HasBranch || 
!Selects.empty()14
) {
887
96
    Instruction *InsertPoint = getBranchInsertPoint(RI);
888
96
    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
889
96
    // Avoid a data dependence from a select or a branch to a(nother)
890
96
    // select. Note no instruction can't data-depend on a branch (a branch
891
96
    // instruction doesn't produce a value).
892
96
    DenseSet<Instruction *> Unhoistables;
893
96
    // Initialize Unhoistables with the selects.
894
96
    for (SelectInst *SI : Selects) {
895
58
      Unhoistables.insert(SI);
896
58
    }
897
96
    // Remove Selects that can't be hoisted.
898
154
    for (auto it = Selects.begin(); it != Selects.end(); ) {
899
58
      SelectInst *SI = *it;
900
58
      if (SI == InsertPoint) {
901
30
        ++it;
902
30
        continue;
903
30
      }
904
28
      DenseMap<Instruction *, bool> Visited;
905
28
      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
906
28
                                         DT, Unhoistables, nullptr, Visited);
907
28
      if (!IsHoistable) {
908
2
        CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
909
2
        ORE.emit([&]() {
910
0
          return OptimizationRemarkMissed(DEBUG_TYPE,
911
0
                                          "DropUnhoistableSelect", SI)
912
0
              << "Dropped unhoistable select";
913
0
        });
914
2
        it = Selects.erase(it);
915
2
        // Since we are dropping the select here, we also drop it from
916
2
        // Unhoistables.
917
2
        Unhoistables.erase(SI);
918
2
      } else
919
26
        ++it;
920
28
    }
921
96
    // Update InsertPoint after potentially removing selects.
922
96
    InsertPoint = getBranchInsertPoint(RI);
923
96
    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
924
96
    if (RI.HasBranch && 
InsertPoint != Branch86
) {
925
20
      DenseMap<Instruction *, bool> Visited;
926
20
      bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
927
20
                                         DT, Unhoistables, nullptr, Visited);
928
20
      if (!IsHoistable) {
929
8
        // If the branch isn't hoistable, drop the selects in the entry
930
8
        // block, preferring the branch, which makes the branch the hoist
931
8
        // point.
932
8
        assert(InsertPoint != Branch && "Branch must not be the hoist point");
933
8
        CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
934
8
        CHR_DEBUG(
935
8
            for (SelectInst *SI : Selects) {
936
8
              dbgs() << "SI " << *SI << "\n";
937
8
            });
938
12
        for (SelectInst *SI : Selects) {
939
12
          ORE.emit([&]() {
940
0
            return OptimizationRemarkMissed(DEBUG_TYPE,
941
0
                                            "DropSelectUnhoistableBranch", SI)
942
0
                << "Dropped select due to unhoistable branch";
943
0
          });
944
12
        }
945
8
        Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
946
12
                                     [EntryBB](SelectInst *SI) {
947
12
                                       return SI->getParent() == EntryBB;
948
12
                                     }), Selects.end());
949
8
        Unhoistables.clear();
950
8
        InsertPoint = Branch;
951
8
      }
952
20
    }
953
96
    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
954
#ifndef NDEBUG
955
    if (RI.HasBranch) {
956
      assert(!DT.dominates(Branch, InsertPoint) &&
957
             "Branch can't be already above the hoist point");
958
      DenseMap<Instruction *, bool> Visited;
959
      assert(checkHoistValue(Branch->getCondition(), InsertPoint,
960
                             DT, Unhoistables, nullptr, Visited) &&
961
             "checkHoistValue for branch");
962
    }
963
    for (auto *SI : Selects) {
964
      assert(!DT.dominates(SI, InsertPoint) &&
965
             "SI can't be already above the hoist point");
966
      DenseMap<Instruction *, bool> Visited;
967
      assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
968
                             Unhoistables, nullptr, Visited) &&
969
             "checkHoistValue for selects");
970
    }
971
    CHR_DEBUG(dbgs() << "Result\n");
972
    if (RI.HasBranch) {
973
      CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
974
    }
975
    for (auto *SI : Selects) {
976
      CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
977
    }
978
#endif
979
  }
980
100
}
981
982
// Traverse the region tree, find all nested scopes and merge them if possible.
983
CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
984
148
                           SmallVectorImpl<CHRScope *> &Scopes) {
985
148
  CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
986
148
  CHRScope *Result = findScope(R);
987
148
  // Visit subscopes.
988
148
  CHRScope *ConsecutiveSubscope = nullptr;
989
148
  SmallVector<CHRScope *, 8> Subscopes;
990
248
  for (auto It = R->begin(); It != R->end(); 
++It100
) {
991
100
    const std::unique_ptr<Region> &SubR = *It;
992
100
    auto NextIt = std::next(It);
993
100
    Region *NextSubR = NextIt != R->end() ? 
NextIt->get()38
:
nullptr62
;
994
100
    CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
995
100
              << "\n");
996
100
    CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
997
100
    if (SubCHRScope) {
998
96
      CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
999
96
    } else {
1000
4
      CHR_DEBUG(dbgs() << "Subregion Scope null\n");
1001
4
    }
1002
100
    if (SubCHRScope) {
1003
96
      if (!ConsecutiveSubscope)
1004
58
        ConsecutiveSubscope = SubCHRScope;
1005
38
      else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1006
4
        Subscopes.push_back(ConsecutiveSubscope);
1007
4
        ConsecutiveSubscope = SubCHRScope;
1008
4
      } else
1009
34
        ConsecutiveSubscope->append(SubCHRScope);
1010
96
    } else {
1011
4
      if (ConsecutiveSubscope) {
1012
0
        Subscopes.push_back(ConsecutiveSubscope);
1013
0
      }
1014
4
      ConsecutiveSubscope = nullptr;
1015
4
    }
1016
100
  }
1017
148
  if (ConsecutiveSubscope) {
1018
58
    Subscopes.push_back(ConsecutiveSubscope);
1019
58
  }
1020
148
  for (CHRScope *Sub : Subscopes) {
1021
62
    if (Result) {
1022
16
      // Combine it with the parent.
1023
16
      Result->addSub(Sub);
1024
46
    } else {
1025
46
      // Push Subscopes as they won't be combined with the parent.
1026
46
      Scopes.push_back(Sub);
1027
46
    }
1028
62
  }
1029
148
  return Result;
1030
148
}
1031
1032
100
static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1033
100
  DenseSet<Value *> ConditionValues;
1034
100
  if (RI.HasBranch) {
1035
86
    auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1036
86
    ConditionValues.insert(BI->getCondition());
1037
86
  }
1038
100
  for (SelectInst *SI : RI.Selects) {
1039
46
    ConditionValues.insert(SI->getCondition());
1040
46
  }
1041
100
  return ConditionValues;
1042
100
}
1043
1044
1045
// Determine whether to split a scope depending on the sets of the branch
1046
// condition values of the previous region and the current region. We split
1047
// (return true) it if 1) the condition values of the inner/lower scope can't be
1048
// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1049
// values have an empty intersection (because the combined branch conditions
1050
// won't probably lead to a simpler combined condition).
1051
static bool shouldSplit(Instruction *InsertPoint,
1052
                        DenseSet<Value *> &PrevConditionValues,
1053
                        DenseSet<Value *> &ConditionValues,
1054
                        DominatorTree &DT,
1055
50
                        DenseSet<Instruction *> &Unhoistables) {
1056
50
  CHR_DEBUG(
1057
50
      dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1058
50
      for (Value *V : PrevConditionValues) {
1059
50
        dbgs() << *V << ", ";
1060
50
      }
1061
50
      dbgs() << " ConditionValues ";
1062
50
      for (Value *V : ConditionValues) {
1063
50
        dbgs() << *V << ", ";
1064
50
      }
1065
50
      dbgs() << "\n");
1066
50
  assert(InsertPoint && "Null InsertPoint");
1067
50
  // If any of Bases isn't hoistable to the hoist point, split.
1068
67
  for (Value *V : ConditionValues) {
1069
67
    DenseMap<Instruction *, bool> Visited;
1070
67
    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1071
8
      CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1072
8
      return true; // Not hoistable, split.
1073
8
    }
1074
67
  }
1075
50
  // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1076
50
  // unnecessary splits at scopes with no branch/selects.  If
1077
50
  // PrevConditionValues and ConditionValues don't intersect at all, split.
1078
50
  
if (42
!PrevConditionValues.empty()42
&&
!ConditionValues.empty()40
) {
1079
40
    // Use std::set as DenseSet doesn't work with set_intersection.
1080
40
    std::set<Value *> PrevBases, Bases;
1081
46
    for (Value *V : PrevConditionValues) {
1082
46
      std::set<Value *> BaseValues = getBaseValues(V, DT);
1083
46
      PrevBases.insert(BaseValues.begin(), BaseValues.end());
1084
46
    }
1085
58
    for (Value *V : ConditionValues) {
1086
58
      std::set<Value *> BaseValues = getBaseValues(V, DT);
1087
58
      Bases.insert(BaseValues.begin(), BaseValues.end());
1088
58
    }
1089
40
    CHR_DEBUG(
1090
40
        dbgs() << "PrevBases ";
1091
40
        for (Value *V : PrevBases) {
1092
40
          dbgs() << *V << ", ";
1093
40
        }
1094
40
        dbgs() << " Bases ";
1095
40
        for (Value *V : Bases) {
1096
40
          dbgs() << *V << ", ";
1097
40
        }
1098
40
        dbgs() << "\n");
1099
40
    std::set<Value *> Intersection;
1100
40
    std::set_intersection(PrevBases.begin(), PrevBases.end(),
1101
40
                          Bases.begin(), Bases.end(),
1102
40
                          std::inserter(Intersection, Intersection.begin()));
1103
40
    if (Intersection.empty()) {
1104
2
      // Empty intersection, split.
1105
2
      CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1106
2
      return true;
1107
2
    }
1108
40
  }
1109
40
  CHR_DEBUG(dbgs() << "No split\n");
1110
40
  return false;  // Don't split.
1111
40
}
1112
1113
static void getSelectsInScope(CHRScope *Scope,
1114
154
                              DenseSet<Instruction *> &Output) {
1115
154
  for (RegInfo &RI : Scope->RegInfos)
1116
222
    for (SelectInst *SI : RI.Selects)
1117
116
      Output.insert(SI);
1118
154
  for (CHRScope *Sub : Scope->Subs)
1119
32
    getSelectsInScope(Sub, Output);
1120
154
}
1121
1122
void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1123
48
                      SmallVectorImpl<CHRScope *> &Output) {
1124
50
  for (CHRScope *Scope : Input) {
1125
50
    assert(!Scope->BranchInsertPoint &&
1126
50
           "BranchInsertPoint must not be set");
1127
50
    DenseSet<Instruction *> Unhoistables;
1128
50
    getSelectsInScope(Scope, Unhoistables);
1129
50
    splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1130
50
  }
1131
#ifndef NDEBUG
1132
  for (CHRScope *Scope : Output) {
1133
    assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1134
  }
1135
#endif
1136
}
1137
1138
SmallVector<CHRScope *, 8> CHR::splitScope(
1139
    CHRScope *Scope,
1140
    CHRScope *Outer,
1141
    DenseSet<Value *> *OuterConditionValues,
1142
    Instruction *OuterInsertPoint,
1143
    SmallVectorImpl<CHRScope *> &Output,
1144
66
    DenseSet<Instruction *> &Unhoistables) {
1145
66
  if (Outer) {
1146
16
    assert(OuterConditionValues && "Null OuterConditionValues");
1147
16
    assert(OuterInsertPoint && "Null OuterInsertPoint");
1148
16
  }
1149
66
  bool PrevSplitFromOuter = true;
1150
66
  DenseSet<Value *> PrevConditionValues;
1151
66
  Instruction *PrevInsertPoint = nullptr;
1152
66
  SmallVector<CHRScope *, 8> Splits;
1153
66
  SmallVector<bool, 8> SplitsSplitFromOuter;
1154
66
  SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1155
66
  SmallVector<Instruction *, 8> SplitsInsertPoints;
1156
66
  SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
1157
100
  for (RegInfo &RI : RegInfos) {
1158
100
    Instruction *InsertPoint = getBranchInsertPoint(RI);
1159
100
    DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1160
100
    CHR_DEBUG(
1161
100
        dbgs() << "ConditionValues ";
1162
100
        for (Value *V : ConditionValues) {
1163
100
          dbgs() << *V << ", ";
1164
100
        }
1165
100
        dbgs() << "\n");
1166
100
    if (RI.R == RegInfos[0].R) {
1167
66
      // First iteration. Check to see if we should split from the outer.
1168
66
      if (Outer) {
1169
16
        CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1170
16
        CHR_DEBUG(dbgs() << "Should split from outer at "
1171
16
                  << RI.R->getNameStr() << "\n");
1172
16
        if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1173
16
                        ConditionValues, DT, Unhoistables)) {
1174
4
          PrevConditionValues = ConditionValues;
1175
4
          PrevInsertPoint = InsertPoint;
1176
4
          ORE.emit([&]() {
1177
0
            return OptimizationRemarkMissed(DEBUG_TYPE,
1178
0
                                            "SplitScopeFromOuter",
1179
0
                                            RI.R->getEntry()->getTerminator())
1180
0
                << "Split scope from outer due to unhoistable branch/select "
1181
0
                << "and/or lack of common condition values";
1182
0
          });
1183
12
        } else {
1184
12
          // Not splitting from the outer. Use the outer bases and insert
1185
12
          // point. Union the bases.
1186
12
          PrevSplitFromOuter = false;
1187
12
          PrevConditionValues = *OuterConditionValues;
1188
12
          PrevConditionValues.insert(ConditionValues.begin(),
1189
12
                                     ConditionValues.end());
1190
12
          PrevInsertPoint = OuterInsertPoint;
1191
12
        }
1192
50
      } else {
1193
50
        CHR_DEBUG(dbgs() << "Outer null\n");
1194
50
        PrevConditionValues = ConditionValues;
1195
50
        PrevInsertPoint = InsertPoint;
1196
50
      }
1197
66
    } else {
1198
34
      CHR_DEBUG(dbgs() << "Should split from prev at "
1199
34
                << RI.R->getNameStr() << "\n");
1200
34
      if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1201
34
                      DT, Unhoistables)) {
1202
6
        CHRScope *Tail = Scope->split(RI.R);
1203
6
        Scopes.insert(Tail);
1204
6
        Splits.push_back(Scope);
1205
6
        SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1206
6
        SplitsConditionValues.push_back(PrevConditionValues);
1207
6
        SplitsInsertPoints.push_back(PrevInsertPoint);
1208
6
        Scope = Tail;
1209
6
        PrevConditionValues = ConditionValues;
1210
6
        PrevInsertPoint = InsertPoint;
1211
6
        PrevSplitFromOuter = true;
1212
6
        ORE.emit([&]() {
1213
0
          return OptimizationRemarkMissed(DEBUG_TYPE,
1214
0
                                          "SplitScopeFromPrev",
1215
0
                                          RI.R->getEntry()->getTerminator())
1216
0
              << "Split scope from previous due to unhoistable branch/select "
1217
0
              << "and/or lack of common condition values";
1218
0
        });
1219
28
      } else {
1220
28
        // Not splitting. Union the bases. Keep the hoist point.
1221
28
        PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1222
28
      }
1223
34
    }
1224
100
  }
1225
66
  Splits.push_back(Scope);
1226
66
  SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1227
66
  SplitsConditionValues.push_back(PrevConditionValues);
1228
66
  assert(PrevInsertPoint && "Null PrevInsertPoint");
1229
66
  SplitsInsertPoints.push_back(PrevInsertPoint);
1230
66
  assert(Splits.size() == SplitsConditionValues.size() &&
1231
66
         Splits.size() == SplitsSplitFromOuter.size() &&
1232
66
         Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1233
138
  for (size_t I = 0; I < Splits.size(); 
++I72
) {
1234
72
    CHRScope *Split = Splits[I];
1235
72
    DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1236
72
    Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1237
72
    SmallVector<CHRScope *, 8> NewSubs;
1238
72
    DenseSet<Instruction *> SplitUnhoistables;
1239
72
    getSelectsInScope(Split, SplitUnhoistables);
1240
72
    for (CHRScope *Sub : Split->Subs) {
1241
16
      SmallVector<CHRScope *, 8> SubSplits = splitScope(
1242
16
          Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1243
16
          SplitUnhoistables);
1244
16
      NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1245
16
    }
1246
72
    Split->Subs = NewSubs;
1247
72
  }
1248
66
  SmallVector<CHRScope *, 8> Result;
1249
138
  for (size_t I = 0; I < Splits.size(); 
++I72
) {
1250
72
    CHRScope *Split = Splits[I];
1251
72
    if (SplitsSplitFromOuter[I]) {
1252
60
      // Split from the outer.
1253
60
      Output.push_back(Split);
1254
60
      Split->BranchInsertPoint = SplitsInsertPoints[I];
1255
60
      CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1256
60
                << "\n");
1257
60
    } else {
1258
12
      // Connected to the outer.
1259
12
      Result.push_back(Split);
1260
12
    }
1261
72
  }
1262
66
  if (!Outer)
1263
66
    assert(Result.empty() &&
1264
66
           "If no outer (top-level), must return no nested ones");
1265
66
  return Result;
1266
66
}
1267
1268
48
void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1269
60
  for (CHRScope *Scope : Scopes) {
1270
60
    assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1271
60
    classifyBiasedScopes(Scope, Scope);
1272
60
    CHR_DEBUG(
1273
60
        dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1274
60
        dbgs() << "TrueBiasedRegions ";
1275
60
        for (Region *R : Scope->TrueBiasedRegions) {
1276
60
          dbgs() << R->getNameStr() << ", ";
1277
60
        }
1278
60
        dbgs() << "\n";
1279
60
        dbgs() << "FalseBiasedRegions ";
1280
60
        for (Region *R : Scope->FalseBiasedRegions) {
1281
60
          dbgs() << R->getNameStr() << ", ";
1282
60
        }
1283
60
        dbgs() << "\n";
1284
60
        dbgs() << "TrueBiasedSelects ";
1285
60
        for (SelectInst *SI : Scope->TrueBiasedSelects) {
1286
60
          dbgs() << *SI << ", ";
1287
60
        }
1288
60
        dbgs() << "\n";
1289
60
        dbgs() << "FalseBiasedSelects ";
1290
60
        for (SelectInst *SI : Scope->FalseBiasedSelects) {
1291
60
          dbgs() << *SI << ", ";
1292
60
        }
1293
60
        dbgs() << "\n";);
1294
60
  }
1295
48
}
1296
1297
72
void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1298
100
  for (RegInfo &RI : Scope->RegInfos) {
1299
100
    if (RI.HasBranch) {
1300
86
      Region *R = RI.R;
1301
86
      if (TrueBiasedRegionsGlobal.count(R) > 0)
1302
80
        OutermostScope->TrueBiasedRegions.insert(R);
1303
6
      else if (FalseBiasedRegionsGlobal.count(R) > 0)
1304
6
        OutermostScope->FalseBiasedRegions.insert(R);
1305
6
      else
1306
6
        
llvm_unreachable0
("Must be biased");
1307
86
    }
1308
100
    for (SelectInst *SI : RI.Selects) {
1309
46
      if (TrueBiasedSelectsGlobal.count(SI) > 0)
1310
0
        OutermostScope->TrueBiasedSelects.insert(SI);
1311
46
      else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1312
46
        OutermostScope->FalseBiasedSelects.insert(SI);
1313
46
      else
1314
46
        
llvm_unreachable0
("Must be biased");
1315
46
    }
1316
100
  }
1317
72
  for (CHRScope *Sub : Scope->Subs) {
1318
12
    classifyBiasedScopes(Sub, OutermostScope);
1319
12
  }
1320
72
}
1321
1322
60
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1323
60
  unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1324
60
                       Scope->FalseBiasedRegions.size() +
1325
60
                       Scope->TrueBiasedSelects.size() +
1326
60
                       Scope->FalseBiasedSelects.size();
1327
60
  return NumBiased >= CHRMergeThreshold;
1328
60
}
1329
1330
void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1331
48
                       SmallVectorImpl<CHRScope *> &Output) {
1332
60
  for (CHRScope *Scope : Input) {
1333
60
    // Filter out the ones with only one region and no subs.
1334
60
    if (!hasAtLeastTwoBiasedBranches(Scope)) {
1335
14
      CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1336
14
                << Scope->TrueBiasedRegions.size()
1337
14
                << " falsy-regions " << Scope->FalseBiasedRegions.size()
1338
14
                << " true-selects " << Scope->TrueBiasedSelects.size()
1339
14
                << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1340
14
      ORE.emit([&]() {
1341
0
        return OptimizationRemarkMissed(
1342
0
            DEBUG_TYPE,
1343
0
            "DropScopeWithOneBranchOrSelect",
1344
0
            Scope->RegInfos[0].R->getEntry()->getTerminator())
1345
0
            << "Drop scope with < "
1346
0
            << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1347
0
            << " biased branch(es) or select(s)";
1348
0
      });
1349
14
      continue;
1350
14
    }
1351
46
    Output.push_back(Scope);
1352
46
  }
1353
48
}
1354
1355
void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1356
48
                        SmallVectorImpl<CHRScope *> &Output) {
1357
48
  for (CHRScope *Scope : Input) {
1358
46
    assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1359
46
           "Empty");
1360
46
    setCHRRegions(Scope, Scope);
1361
46
    Output.push_back(Scope);
1362
46
    CHR_DEBUG(
1363
46
        dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1364
46
        for (auto pair : Scope->HoistStopMap) {
1365
46
          Region *R = pair.first;
1366
46
          dbgs() << "Region " << R->getNameStr() << "\n";
1367
46
          for (Instruction *I : pair.second) {
1368
46
            dbgs() << "HoistStop " << *I << "\n";
1369
46
          }
1370
46
        }
1371
46
        dbgs() << "CHRRegions" << "\n";
1372
46
        for (RegInfo &RI : Scope->CHRRegions) {
1373
46
          dbgs() << RI.R->getNameStr() << "\n";
1374
46
        });
1375
46
  }
1376
48
}
1377
1378
58
void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1379
58
  DenseSet<Instruction *> Unhoistables;
1380
58
  // Put the biased selects in Unhoistables because they should stay where they
1381
58
  // are and constant-folded after CHR (in case one biased select or a branch
1382
58
  // can depend on another biased select.)
1383
84
  for (RegInfo &RI : Scope->RegInfos) {
1384
84
    for (SelectInst *SI : RI.Selects) {
1385
44
      Unhoistables.insert(SI);
1386
44
    }
1387
84
  }
1388
58
  Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1389
84
  for (RegInfo &RI : Scope->RegInfos) {
1390
84
    Region *R = RI.R;
1391
84
    DenseSet<Instruction *> HoistStops;
1392
84
    bool IsHoisted = false;
1393
84
    if (RI.HasBranch) {
1394
76
      assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1395
76
              OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1396
76
             "Must be truthy or falsy");
1397
76
      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1398
76
      // Note checkHoistValue fills in HoistStops.
1399
76
      DenseMap<Instruction *, bool> Visited;
1400
76
      bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1401
76
                                         Unhoistables, &HoistStops, Visited);
1402
76
      assert(IsHoistable && "Must be hoistable");
1403
76
      (void)(IsHoistable);  // Unused in release build
1404
76
      IsHoisted = true;
1405
76
    }
1406
84
    for (SelectInst *SI : RI.Selects) {
1407
44
      assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1408
44
              OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1409
44
             "Must be true or false biased");
1410
44
      // Note checkHoistValue fills in HoistStops.
1411
44
      DenseMap<Instruction *, bool> Visited;
1412
44
      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1413
44
                                         Unhoistables, &HoistStops, Visited);
1414
44
      assert(IsHoistable && "Must be hoistable");
1415
44
      (void)(IsHoistable);  // Unused in release build
1416
44
      IsHoisted = true;
1417
44
    }
1418
84
    if (IsHoisted) {
1419
84
      OutermostScope->CHRRegions.push_back(RI);
1420
84
      OutermostScope->HoistStopMap[R] = HoistStops;
1421
84
    }
1422
84
  }
1423
58
  for (CHRScope *Sub : Scope->Subs)
1424
12
    setCHRRegions(Sub, OutermostScope);
1425
58
}
1426
1427
6
bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1428
6
  return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1429
6
}
1430
1431
void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1432
48
                     SmallVectorImpl<CHRScope *> &Output) {
1433
48
  Output.resize(Input.size());
1434
48
  llvm::copy(Input, Output.begin());
1435
48
  llvm::stable_sort(Output, CHRScopeSorter);
1436
48
}
1437
1438
// Return true if V is already hoisted or was hoisted (along with its operands)
1439
// to the insert point.
1440
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1441
                       HoistStopMapTy &HoistStopMap,
1442
                       DenseSet<Instruction *> &HoistedSet,
1443
                       DenseSet<PHINode *> &TrivialPHIs,
1444
1.56k
                       DominatorTree &DT) {
1445
1.56k
  auto IT = HoistStopMap.find(R);
1446
1.56k
  assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1447
1.56k
  DenseSet<Instruction *> &HoistStops = IT->second;
1448
1.56k
  if (auto *I = dyn_cast<Instruction>(V)) {
1449
1.43k
    if (I == HoistPoint)
1450
0
      return;
1451
1.43k
    if (HoistStops.count(I))
1452
112
      return;
1453
1.32k
    if (auto *PN = dyn_cast<PHINode>(I))
1454
2
      if (TrivialPHIs.count(PN))
1455
2
        // The trivial phi inserted by the previous CHR scope could replace a
1456
2
        // non-phi in HoistStops. Note that since this phi is at the exit of a
1457
2
        // previous CHR scope, which dominates this scope, it's safe to stop
1458
2
        // hoisting there.
1459
2
        return;
1460
1.32k
    if (HoistedSet.count(I))
1461
596
      // Already hoisted, return.
1462
596
      return;
1463
728
    assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1464
728
    assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1465
728
    assert(DT.getNode(HoistPoint->getParent()) &&
1466
728
           "DT must contain HoistPoint block");
1467
728
    if (DT.dominates(I, HoistPoint))
1468
2
      // We are already above the hoist point. Stop here. This may be necessary
1469
2
      // when multiple scopes would independently hoist the same
1470
2
      // instruction. Since an outer (dominating) scope would hoist it to its
1471
2
      // entry before an inner (dominated) scope would to its entry, the inner
1472
2
      // scope may see the instruction already hoisted, in which case it
1473
2
      // potentially wrong for the inner scope to hoist it and could cause bad
1474
2
      // IR (non-dominating def), but safe to skip hoisting it instead because
1475
2
      // it's already in a block that dominates the inner scope.
1476
2
      return;
1477
1.44k
    
for (Value *Op : I->operands())726
{
1478
1.44k
      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1479
1.44k
    }
1480
726
    I->moveBefore(HoistPoint);
1481
726
    HoistedSet.insert(I);
1482
726
    CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1483
726
  }
1484
1.56k
}
1485
1486
// Hoist the dependent condition values of the branches and the selects in the
1487
// scope to the insert point.
1488
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1489
                                 DenseSet<PHINode *> &TrivialPHIs,
1490
46
                                 DominatorTree &DT) {
1491
46
  DenseSet<Instruction *> HoistedSet;
1492
84
  for (const RegInfo &RI : Scope->CHRRegions) {
1493
84
    Region *R = RI.R;
1494
84
    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1495
84
    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1496
84
    if (RI.HasBranch && 
(76
IsTrueBiased76
||
IsFalseBiased4
)) {
1497
76
      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1498
76
      hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1499
76
                 HoistedSet, TrivialPHIs, DT);
1500
76
    }
1501
84
    for (SelectInst *SI : RI.Selects) {
1502
44
      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1503
44
      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1504
44
      if (!(IsTrueBiased || IsFalseBiased))
1505
0
        continue;
1506
44
      hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1507
44
                 HoistedSet, TrivialPHIs, DT);
1508
44
    }
1509
84
  }
1510
46
}
1511
1512
// Negate the predicate if an ICmp if it's used only by branches or selects by
1513
// swapping the operands of the branches or the selects. Returns true if success.
1514
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1515
                                                 Instruction *ExcludedUser,
1516
112
                                                 CHRScope *Scope) {
1517
164
  for (User *U : ICmp->users()) {
1518
164
    if (U == ExcludedUser)
1519
112
      continue;
1520
52
    if (isa<BranchInst>(U) && 
cast<BranchInst>(U)->isConditional()36
)
1521
36
      continue;
1522
16
    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1523
16
      continue;
1524
0
    return false;
1525
0
  }
1526
164
  
for (User *U : ICmp->users())112
{
1527
164
    if (U == ExcludedUser)
1528
112
      continue;
1529
52
    if (auto *BI = dyn_cast<BranchInst>(U)) {
1530
36
      assert(BI->isConditional() && "Must be conditional");
1531
36
      BI->swapSuccessors();
1532
36
      // Don't need to swap this in terms of
1533
36
      // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1534
36
      // mean whehter the branch is likely go into the if-then rather than
1535
36
      // successor0/successor1 and because we can tell which edge is the then or
1536
36
      // the else one by comparing the destination to the region exit block.
1537
36
      continue;
1538
36
    }
1539
16
    if (auto *SI = dyn_cast<SelectInst>(U)) {
1540
16
      // Swap operands
1541
16
      Value *TrueValue = SI->getTrueValue();
1542
16
      Value *FalseValue = SI->getFalseValue();
1543
16
      SI->setTrueValue(FalseValue);
1544
16
      SI->setFalseValue(TrueValue);
1545
16
      SI->swapProfMetadata();
1546
16
      if (Scope->TrueBiasedSelects.count(SI)) {
1547
0
        assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1548
0
               "Must not be already in");
1549
0
        Scope->FalseBiasedSelects.insert(SI);
1550
16
      } else if (Scope->FalseBiasedSelects.count(SI)) {
1551
4
        assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1552
4
               "Must not be already in");
1553
4
        Scope->TrueBiasedSelects.insert(SI);
1554
4
      }
1555
16
      continue;
1556
16
    }
1557
0
    llvm_unreachable("Must be a branch or a select");
1558
0
  }
1559
112
  ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1560
112
  return true;
1561
112
}
1562
1563
// A helper for transformScopes. Insert a trivial phi at the scope exit block
1564
// for a value that's defined in the scope but used outside it (meaning it's
1565
// alive at the exit block).
1566
static void insertTrivialPHIs(CHRScope *Scope,
1567
                              BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1568
42
                              DenseSet<PHINode *> &TrivialPHIs) {
1569
42
  DenseSet<BasicBlock *> BlocksInScopeSet;
1570
42
  SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1571
64
  for (RegInfo &RI : Scope->RegInfos) {
1572
166
    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1573
166
                                            // sub-Scopes.
1574
166
      BlocksInScopeSet.insert(BB);
1575
166
      BlocksInScopeVec.push_back(BB);
1576
166
    }
1577
64
  }
1578
42
  CHR_DEBUG(
1579
42
      dbgs() << "Inserting redudant phis\n";
1580
42
      for (BasicBlock *BB : BlocksInScopeVec) {
1581
42
        dbgs() << "BlockInScope " << BB->getName() << "\n";
1582
42
      });
1583
166
  for (BasicBlock *BB : BlocksInScopeVec) {
1584
608
    for (Instruction &I : *BB) {
1585
608
      SmallVector<Instruction *, 8> Users;
1586
608
      for (User *U : I.users()) {
1587
492
        if (auto *UI = dyn_cast<Instruction>(U)) {
1588
492
          if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1589
492
              // Unless there's already a phi for I at the exit block.
1590
492
              
!(48
isa<PHINode>(UI)48
&&
UI->getParent() == ExitBlock24
)) {
1591
24
            CHR_DEBUG(dbgs() << "V " << I << "\n");
1592
24
            CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1593
24
            Users.push_back(UI);
1594
468
          } else if (UI->getParent() == EntryBlock && 
isa<PHINode>(UI)184
) {
1595
2
            // There's a loop backedge from a block that's dominated by this
1596
2
            // scope to the entry block.
1597
2
            CHR_DEBUG(dbgs() << "V " << I << "\n");
1598
2
            CHR_DEBUG(dbgs()
1599
2
                      << "Used at entry block (for a back edge) by a phi user "
1600
2
                      << *UI << "\n");
1601
2
            Users.push_back(UI);
1602
2
          }
1603
492
        }
1604
492
      }
1605
608
      if (Users.size() > 0) {
1606
16
        // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1607
16
        // ExitBlock. Replace I with the new phi in UI unless UI is another
1608
16
        // phi at ExitBlock.
1609
16
        unsigned PredCount = std::distance(pred_begin(ExitBlock),
1610
16
                                           pred_end(ExitBlock));
1611
16
        PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1612
16
                                      &ExitBlock->front());
1613
32
        for (BasicBlock *Pred : predecessors(ExitBlock)) {
1614
32
          PN->addIncoming(&I, Pred);
1615
32
        }
1616
16
        TrivialPHIs.insert(PN);
1617
16
        CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1618
26
        for (Instruction *UI : Users) {
1619
78
          for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; 
++J52
) {
1620
52
            if (UI->getOperand(J) == &I) {
1621
26
              UI->setOperand(J, PN);
1622
26
            }
1623
52
          }
1624
26
          CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1625
26
        }
1626
16
      }
1627
608
    }
1628
166
  }
1629
42
}
1630
1631
// Assert that all the CHR regions of the scope have a biased branch or select.
1632
static void LLVM_ATTRIBUTE_UNUSED
1633
0
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1634
0
#ifndef NDEBUG
1635
0
  auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1636
0
    if (Scope->TrueBiasedRegions.count(RI.R) ||
1637
0
        Scope->FalseBiasedRegions.count(RI.R))
1638
0
      return true;
1639
0
    for (SelectInst *SI : RI.Selects)
1640
0
      if (Scope->TrueBiasedSelects.count(SI) ||
1641
0
          Scope->FalseBiasedSelects.count(SI))
1642
0
        return true;
1643
0
    return false;
1644
0
  };
1645
0
  for (RegInfo &RI : Scope->CHRRegions) {
1646
0
    assert(HasBiasedBranchOrSelect(RI, Scope) &&
1647
0
           "Must have biased branch or select");
1648
0
  }
1649
0
#endif
1650
0
}
1651
1652
// Assert that all the condition values of the biased branches and selects have
1653
// been hoisted to the pre-entry block or outside of the scope.
1654
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1655
0
    CHRScope *Scope, BasicBlock *PreEntryBlock) {
1656
0
  CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1657
0
  for (RegInfo &RI : Scope->CHRRegions) {
1658
0
    Region *R = RI.R;
1659
0
    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1660
0
    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1661
0
    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1662
0
      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1663
0
      Value *V = BI->getCondition();
1664
0
      CHR_DEBUG(dbgs() << *V << "\n");
1665
0
      if (auto *I = dyn_cast<Instruction>(V)) {
1666
0
        (void)(I); // Unused in release build.
1667
0
        assert((I->getParent() == PreEntryBlock ||
1668
0
                !Scope->contains(I)) &&
1669
0
               "Must have been hoisted to PreEntryBlock or outside the scope");
1670
0
      }
1671
0
    }
1672
0
    for (SelectInst *SI : RI.Selects) {
1673
0
      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1674
0
      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1675
0
      if (!(IsTrueBiased || IsFalseBiased))
1676
0
        continue;
1677
0
      Value *V = SI->getCondition();
1678
0
      CHR_DEBUG(dbgs() << *V << "\n");
1679
0
      if (auto *I = dyn_cast<Instruction>(V)) {
1680
0
        (void)(I); // Unused in release build.
1681
0
        assert((I->getParent() == PreEntryBlock ||
1682
0
                !Scope->contains(I)) &&
1683
0
               "Must have been hoisted to PreEntryBlock or outside the scope");
1684
0
      }
1685
0
    }
1686
0
  }
1687
0
}
1688
1689
46
void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1690
46
  CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1691
46
1692
46
  assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1693
46
  Region *FirstRegion = Scope->RegInfos[0].R;
1694
46
  BasicBlock *EntryBlock = FirstRegion->getEntry();
1695
46
  Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1696
46
  BasicBlock *ExitBlock = LastRegion->getExit();
1697
46
  Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1698
46
1699
46
  if (ExitBlock) {
1700
42
    // Insert a trivial phi at the exit block (where the CHR hot path and the
1701
42
    // cold path merges) for a value that's defined in the scope but used
1702
42
    // outside it (meaning it's alive at the exit block). We will add the
1703
42
    // incoming values for the CHR cold paths to it below. Without this, we'd
1704
42
    // miss updating phi's for such values unless there happens to already be a
1705
42
    // phi for that value there.
1706
42
    insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1707
42
  }
1708
46
1709
46
  // Split the entry block of the first region. The new block becomes the new
1710
46
  // entry block of the first region. The old entry block becomes the block to
1711
46
  // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1712
46
  // through the split, we update the entry of the first region after the split,
1713
46
  // and Region only points to the entry and the exit blocks, rather than
1714
46
  // keeping everything in a list or set, the blocks membership and the
1715
46
  // entry/exit blocks of the region are still valid after the split.
1716
46
  CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1717
46
            << " at " << *Scope->BranchInsertPoint << "\n");
1718
46
  BasicBlock *NewEntryBlock =
1719
46
      SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1720
46
  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1721
46
         "NewEntryBlock's only pred must be EntryBlock");
1722
46
  FirstRegion->replaceEntryRecursive(NewEntryBlock);
1723
46
  BasicBlock *PreEntryBlock = EntryBlock;
1724
46
1725
46
  ValueToValueMapTy VMap;
1726
46
  // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1727
46
  // hot path (originals) and a cold path (clones) and update the PHIs at the
1728
46
  // exit block.
1729
46
  cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1730
46
1731
46
  // Replace the old (placeholder) branch with the new (merged) conditional
1732
46
  // branch.
1733
46
  BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1734
46
                                            NewEntryBlock, VMap);
1735
46
1736
#ifndef NDEBUG
1737
  assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1738
#endif
1739
1740
46
  // Hoist the conditional values of the branches/selects.
1741
46
  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1742
46
1743
#ifndef NDEBUG
1744
  assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1745
#endif
1746
1747
46
  // Create the combined branch condition and constant-fold the branches/selects
1748
46
  // in the hot path.
1749
46
  fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1750
46
                          ProfileCount ? ProfileCount.getValue() : 
00
);
1751
46
}
1752
1753
// A helper for transformScopes. Clone the blocks in the scope (excluding the
1754
// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1755
// at the exit block.
1756
void CHR::cloneScopeBlocks(CHRScope *Scope,
1757
                           BasicBlock *PreEntryBlock,
1758
                           BasicBlock *ExitBlock,
1759
                           Region *LastRegion,
1760
46
                           ValueToValueMapTy &VMap) {
1761
46
  // Clone all the blocks. The original blocks will be the hot-path
1762
46
  // CHR-optimized code and the cloned blocks will be the original unoptimized
1763
46
  // code. This is so that the block pointers from the
1764
46
  // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1765
46
  // which CHR should apply to.
1766
46
  SmallVector<BasicBlock*, 8> NewBlocks;
1767
46
  for (RegInfo &RI : Scope->RegInfos)
1768
170
    
for (BasicBlock *BB : RI.R->blocks())68
{ // This includes the blocks in the
1769
170
                                            // sub-Scopes.
1770
170
      assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1771
170
      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1772
170
      NewBlocks.push_back(NewBB);
1773
170
      VMap[BB] = NewBB;
1774
170
    }
1775
46
1776
46
  // Place the cloned blocks right after the original blocks (right before the
1777
46
  // exit block of.)
1778
46
  if (ExitBlock)
1779
42
    F.getBasicBlockList().splice(ExitBlock->getIterator(),
1780
42
                                 F.getBasicBlockList(),
1781
42
                                 NewBlocks[0]->getIterator(), F.end());
1782
46
1783
46
  // Update the cloned blocks/instructions to refer to themselves.
1784
216
  for (unsigned i = 0, e = NewBlocks.size(); i != e; 
++i170
)
1785
170
    for (Instruction &I : *NewBlocks[i])
1786
1.06k
      RemapInstruction(&I, VMap,
1787
1.06k
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1788
46
1789
46
  // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1790
46
  // the top-level region but we don't need to add PHIs. The trivial PHIs
1791
46
  // inserted above will be updated here.
1792
46
  if (ExitBlock)
1793
42
    for (PHINode &PN : ExitBlock->phis())
1794
96
      
for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); 32
I < NumOps;
1795
64
           ++I) {
1796
64
        BasicBlock *Pred = PN.getIncomingBlock(I);
1797
64
        if (LastRegion->contains(Pred)) {
1798
64
          Value *V = PN.getIncomingValue(I);
1799
64
          auto It = VMap.find(V);
1800
64
          if (It != VMap.end()) 
V = It->second38
;
1801
64
          assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1802
64
          PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1803
64
        }
1804
64
      }
1805
46
}
1806
1807
// A helper for transformScope. Replace the old (placeholder) branch with the
1808
// new (merged) conditional branch.
1809
BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1810
                                    BasicBlock *EntryBlock,
1811
                                    BasicBlock *NewEntryBlock,
1812
46
                                    ValueToValueMapTy &VMap) {
1813
46
  BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1814
46
  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1815
46
         "SplitBlock did not work correctly!");
1816
46
  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1817
46
         "NewEntryBlock's only pred must be EntryBlock");
1818
46
  assert(VMap.find(NewEntryBlock) != VMap.end() &&
1819
46
         "NewEntryBlock must have been copied");
1820
46
  OldBR->dropAllReferences();
1821
46
  OldBR->eraseFromParent();
1822
46
  // The true predicate is a placeholder. It will be replaced later in
1823
46
  // fixupBranchesAndSelects().
1824
46
  BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1825
46
                                         cast<BasicBlock>(VMap[NewEntryBlock]),
1826
46
                                         ConstantInt::getTrue(F.getContext()));
1827
46
  PreEntryBlock->getInstList().push_back(NewBR);
1828
46
  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1829
46
         "NewEntryBlock's only pred must be EntryBlock");
1830
46
  return NewBR;
1831
46
}
1832
1833
// A helper for transformScopes. Create the combined branch condition and
1834
// constant-fold the branches/selects in the hot path.
1835
void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1836
                                  BasicBlock *PreEntryBlock,
1837
                                  BranchInst *MergedBR,
1838
46
                                  uint64_t ProfileCount) {
1839
46
  Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1840
46
  BranchProbability CHRBranchBias(1, 1);
1841
46
  uint64_t NumCHRedBranches = 0;
1842
46
  IRBuilder<> IRB(PreEntryBlock->getTerminator());
1843
84
  for (RegInfo &RI : Scope->CHRRegions) {
1844
84
    Region *R = RI.R;
1845
84
    if (RI.HasBranch) {
1846
76
      fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1847
76
      ++NumCHRedBranches;
1848
76
    }
1849
84
    for (SelectInst *SI : RI.Selects) {
1850
44
      fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1851
44
      ++NumCHRedBranches;
1852
44
    }
1853
84
  }
1854
46
  Stats.NumBranchesDelta += NumCHRedBranches - 1;
1855
46
  Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1856
46
  ORE.emit([&]() {
1857
0
    return OptimizationRemark(DEBUG_TYPE,
1858
0
                              "CHR",
1859
0
                              // Refer to the hot (original) path
1860
0
                              MergedBR->getSuccessor(0)->getTerminator())
1861
0
        << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1862
0
        << " branches or selects";
1863
0
  });
1864
46
  MergedBR->setCondition(MergedCondition);
1865
46
  SmallVector<uint32_t, 2> Weights;
1866
46
  Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1867
46
  Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1868
46
  MDBuilder MDB(F.getContext());
1869
46
  MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1870
46
  CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1871
46
            << "\n");
1872
46
}
1873
1874
// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1875
// and constant-fold a branch in the hot path.
1876
void CHR::fixupBranch(Region *R, CHRScope *Scope,
1877
                      IRBuilder<> &IRB,
1878
                      Value *&MergedCondition,
1879
76
                      BranchProbability &CHRBranchBias) {
1880
76
  bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1881
76
  assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1882
76
         "Must be truthy or falsy");
1883
76
  auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1884
76
  assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1885
76
         "Must be in the bias map");
1886
76
  BranchProbability Bias = BranchBiasMap[R];
1887
76
  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1888
76
  // Take the min.
1889
76
  if (CHRBranchBias > Bias)
1890
0
    CHRBranchBias = Bias;
1891
76
  BasicBlock *IfThen = BI->getSuccessor(1);
1892
76
  BasicBlock *IfElse = BI->getSuccessor(0);
1893
76
  BasicBlock *RegionExitBlock = R->getExit();
1894
76
  assert(RegionExitBlock && "Null ExitBlock");
1895
76
  assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1896
76
         IfThen != IfElse && "Invariant from findScopes");
1897
76
  if (IfThen == RegionExitBlock) {
1898
6
    // Swap them so that IfThen means going into it and IfElse means skipping
1899
6
    // it.
1900
6
    std::swap(IfThen, IfElse);
1901
6
  }
1902
76
  CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1903
76
            << " IfElse " << IfElse->getName() << "\n");
1904
76
  Value *Cond = BI->getCondition();
1905
76
  BasicBlock *HotTarget = IsTrueBiased ? 
IfThen72
:
IfElse4
;
1906
76
  bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1907
76
  addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1908
76
                       MergedCondition);
1909
76
  // Constant-fold the branch at ClonedEntryBlock.
1910
76
  assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1911
76
         "The successor shouldn't change");
1912
76
  Value *NewCondition = ConditionTrue ?
1913
2
                        ConstantInt::getTrue(F.getContext()) :
1914
76
                        
ConstantInt::getFalse(F.getContext())74
;
1915
76
  BI->setCondition(NewCondition);
1916
76
}
1917
1918
// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1919
// and constant-fold a select in the hot path.
1920
void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1921
                      IRBuilder<> &IRB,
1922
                      Value *&MergedCondition,
1923
44
                      BranchProbability &CHRBranchBias) {
1924
44
  bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1925
44
  assert((IsTrueBiased ||
1926
44
          Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1927
44
  assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1928
44
         "Must be in the bias map");
1929
44
  BranchProbability Bias = SelectBiasMap[SI];
1930
44
  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1931
44
  // Take the min.
1932
44
  if (CHRBranchBias > Bias)
1933
0
    CHRBranchBias = Bias;
1934
44
  Value *Cond = SI->getCondition();
1935
44
  addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1936
44
                       MergedCondition);
1937
44
  Value *NewCondition = IsTrueBiased ?
1938
4
                        ConstantInt::getTrue(F.getContext()) :
1939
44
                        
ConstantInt::getFalse(F.getContext())40
;
1940
44
  SI->setCondition(NewCondition);
1941
44
}
1942
1943
// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1944
// condition.
1945
void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1946
                               Instruction *BranchOrSelect,
1947
                               CHRScope *Scope,
1948
                               IRBuilder<> &IRB,
1949
120
                               Value *&MergedCondition) {
1950
120
  if (IsTrueBiased) {
1951
6
    MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1952
114
  } else {
1953
114
    // If Cond is an icmp and all users of V except for BranchOrSelect is a
1954
114
    // branch, negate the icmp predicate and swap the branch targets and avoid
1955
114
    // inserting an Xor to negate Cond.
1956
114
    bool Done = false;
1957
114
    if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1958
112
      if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1959
112
        MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1960
112
        Done = true;
1961
112
      }
1962
114
    if (!Done) {
1963
2
      Value *Negate = IRB.CreateXor(
1964
2
          ConstantInt::getTrue(F.getContext()), Cond);
1965
2
      MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1966
2
    }
1967
114
  }
1968
120
}
1969
1970
40
void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1971
40
  unsigned I = 0;
1972
40
  DenseSet<PHINode *> TrivialPHIs;
1973
46
  for (CHRScope *Scope : CHRScopes) {
1974
46
    transformScopes(Scope, TrivialPHIs);
1975
46
    CHR_DEBUG(
1976
46
        std::ostringstream oss;
1977
46
        oss << " after transformScopes " << I++;
1978
46
        dumpIR(F, oss.str().c_str(), nullptr));
1979
46
    (void)I;
1980
46
  }
1981
40
}
1982
1983
static void LLVM_ATTRIBUTE_UNUSED
1984
0
dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1985
0
  dbgs() << Label << " " << Scopes.size() << "\n";
1986
0
  for (CHRScope *Scope : Scopes) {
1987
0
    dbgs() << *Scope << "\n";
1988
0
  }
1989
0
}
1990
1991
48
bool CHR::run() {
1992
48
  if (!shouldApply(F, PSI))
1993
0
    return false;
1994
48
1995
48
  CHR_DEBUG(dumpIR(F, "before", nullptr));
1996
48
1997
48
  bool Changed = false;
1998
48
  {
1999
48
    CHR_DEBUG(
2000
48
        dbgs() << "RegionInfo:\n";
2001
48
        RI.print(dbgs()));
2002
48
2003
48
    // Recursively traverse the region tree and find regions that have biased
2004
48
    // branches and/or selects and create scopes.
2005
48
    SmallVector<CHRScope *, 8> AllScopes;
2006
48
    findScopes(AllScopes);
2007
48
    CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2008
48
2009
48
    // Split the scopes if 1) the conditiona values of the biased
2010
48
    // branches/selects of the inner/lower scope can't be hoisted up to the
2011
48
    // outermost/uppermost scope entry, or 2) the condition values of the biased
2012
48
    // branches/selects in a scope (including subscopes) don't share at least
2013
48
    // one common value.
2014
48
    SmallVector<CHRScope *, 8> SplitScopes;
2015
48
    splitScopes(AllScopes, SplitScopes);
2016
48
    CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2017
48
2018
48
    // After splitting, set the biased regions and selects of a scope (a tree
2019
48
    // root) that include those of the subscopes.
2020
48
    classifyBiasedScopes(SplitScopes);
2021
48
    CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2022
48
2023
48
    // Filter out the scopes that has only one biased region or select (CHR
2024
48
    // isn't useful in such a case).
2025
48
    SmallVector<CHRScope *, 8> FilteredScopes;
2026
48
    filterScopes(SplitScopes, FilteredScopes);
2027
48
    CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2028
48
2029
48
    // Set the regions to be CHR'ed and their hoist stops for each scope.
2030
48
    SmallVector<CHRScope *, 8> SetScopes;
2031
48
    setCHRRegions(FilteredScopes, SetScopes);
2032
48
    CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2033
48
2034
48
    // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2035
48
    // ones. We need to apply CHR from outer to inner so that we apply CHR only
2036
48
    // to the hot path, rather than both hot and cold paths.
2037
48
    SmallVector<CHRScope *, 8> SortedScopes;
2038
48
    sortScopes(SetScopes, SortedScopes);
2039
48
    CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2040
48
2041
48
    CHR_DEBUG(
2042
48
        dbgs() << "RegionInfo:\n";
2043
48
        RI.print(dbgs()));
2044
48
2045
48
    // Apply the CHR transformation.
2046
48
    if (!SortedScopes.empty()) {
2047
40
      transformScopes(SortedScopes);
2048
40
      Changed = true;
2049
40
    }
2050
48
  }
2051
48
2052
48
  if (Changed) {
2053
40
    CHR_DEBUG(dumpIR(F, "after", &Stats));
2054
40
    ORE.emit([&]() {
2055
0
      return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2056
0
          << ore::NV("Function", &F) << " "
2057
0
          << "Reduced the number of branches in hot paths by "
2058
0
          << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2059
0
          << " (static) and "
2060
0
          << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2061
0
          << " (weighted by PGO count)";
2062
0
    });
2063
40
  }
2064
48
2065
48
  return Changed;
2066
48
}
2067
2068
24
bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
2069
24
  BlockFrequencyInfo &BFI =
2070
24
      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
2071
24
  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2072
24
  ProfileSummaryInfo &PSI =
2073
24
      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2074
24
  RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
2075
24
  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
2076
24
      llvm::make_unique<OptimizationRemarkEmitter>(&F);
2077
24
  return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
2078
24
}
2079
2080
namespace llvm {
2081
2082
1
ControlHeightReductionPass::ControlHeightReductionPass() {
2083
1
  parseCHRFilterFiles();
2084
1
}
2085
2086
PreservedAnalyses ControlHeightReductionPass::run(
2087
    Function &F,
2088
24
    FunctionAnalysisManager &FAM) {
2089
24
  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2090
24
  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2091
24
  auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2092
24
  auto &MAM = MAMProxy.getManager();
2093
24
  auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2094
24
  auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2095
24
  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2096
24
  bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2097
24
  if (!Changed)
2098
4
    return PreservedAnalyses::all();
2099
20
  auto PA = PreservedAnalyses();
2100
20
  PA.preserve<GlobalsAA>();
2101
20
  return PA;
2102
20
}
2103
2104
} // namespace llvm