Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/LoopSimplify.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass performs several transformations to transform natural loops into a
11
// simpler form, which makes subsequent analyses and transformations simpler and
12
// more effective.
13
//
14
// Loop pre-header insertion guarantees that there is a single, non-critical
15
// entry edge from outside of the loop to the loop header.  This simplifies a
16
// number of analyses and transformations, such as LICM.
17
//
18
// Loop exit-block insertion guarantees that all exit blocks from the loop
19
// (blocks which are outside of the loop that have predecessors inside of the
20
// loop) only have predecessors from inside of the loop (and are thus dominated
21
// by the loop header).  This simplifies transformations such as store-sinking
22
// that are built into LICM.
23
//
24
// This pass also guarantees that loops will have exactly one backedge.
25
//
26
// Indirectbr instructions introduce several complications. If the loop
27
// contains or is entered by an indirectbr instruction, it may not be possible
28
// to transform the loop and make these guarantees. Client code should check
29
// that these conditions are true before relying on them.
30
//
31
// Note that the simplifycfg pass will clean up blocks which are split out but
32
// end up being unnecessary, so usage of this pass should not pessimize
33
// generated code.
34
//
35
// This pass obviously modifies the CFG, but updates loop information and
36
// dominator information.
37
//
38
//===----------------------------------------------------------------------===//
39
40
#include "llvm/Transforms/Utils/LoopSimplify.h"
41
#include "llvm/ADT/DepthFirstIterator.h"
42
#include "llvm/ADT/SetOperations.h"
43
#include "llvm/ADT/SetVector.h"
44
#include "llvm/ADT/SmallVector.h"
45
#include "llvm/ADT/Statistic.h"
46
#include "llvm/Analysis/AliasAnalysis.h"
47
#include "llvm/Analysis/AssumptionCache.h"
48
#include "llvm/Analysis/BasicAliasAnalysis.h"
49
#include "llvm/Analysis/DependenceAnalysis.h"
50
#include "llvm/Analysis/GlobalsModRef.h"
51
#include "llvm/Analysis/InstructionSimplify.h"
52
#include "llvm/Analysis/LoopInfo.h"
53
#include "llvm/Analysis/ScalarEvolution.h"
54
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
55
#include "llvm/IR/CFG.h"
56
#include "llvm/IR/Constants.h"
57
#include "llvm/IR/DataLayout.h"
58
#include "llvm/IR/Dominators.h"
59
#include "llvm/IR/Function.h"
60
#include "llvm/IR/Instructions.h"
61
#include "llvm/IR/IntrinsicInst.h"
62
#include "llvm/IR/LLVMContext.h"
63
#include "llvm/IR/Module.h"
64
#include "llvm/IR/Type.h"
65
#include "llvm/Support/Debug.h"
66
#include "llvm/Support/raw_ostream.h"
67
#include "llvm/Transforms/Scalar.h"
68
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
69
#include "llvm/Transforms/Utils/Local.h"
70
#include "llvm/Transforms/Utils/LoopUtils.h"
71
using namespace llvm;
72
73
#define DEBUG_TYPE "loop-simplify"
74
75
STATISTIC(NumNested  , "Number of nested loops split out");
76
77
// If the block isn't already, move the new block to right after some 'outside
78
// block' block.  This prevents the preheader from being placed inside the loop
79
// body, e.g. when the loop hasn't been rotated.
80
static void placeSplitBlockCarefully(BasicBlock *NewBB,
81
                                     SmallVectorImpl<BasicBlock *> &SplitPreds,
82
159k
                                     Loop *L) {
83
159k
  // Check to see if NewBB is already well placed.
84
159k
  Function::iterator BBI = --NewBB->getIterator();
85
250k
  for (unsigned i = 0, e = SplitPreds.size(); 
i != e250k
;
++i91.7k
) {
86
181k
    if (&*BBI == SplitPreds[i])
87
89.6k
      return;
88
181k
  }
89
159k
90
159k
  // If it isn't already after an outside block, move it after one.  This is
91
159k
  // always good as it makes the uncond branch from the outside block into a
92
159k
  // fall-through.
93
159k
94
159k
  // Figure out *which* outside block to put this after.  Prefer an outside
95
159k
  // block that neighbors a BB actually in the loop.
96
69.4k
  BasicBlock *FoundBB = nullptr;
97
150k
  for (unsigned i = 0, e = SplitPreds.size(); 
i != e150k
;
++i81.1k
) {
98
85.3k
    Function::iterator BBI = SplitPreds[i]->getIterator();
99
85.3k
    if (
++BBI != NewBB->getParent()->end() && 85.3k
L->contains(&*BBI)85.3k
) {
100
4.27k
      FoundBB = SplitPreds[i];
101
4.27k
      break;
102
4.27k
    }
103
85.3k
  }
104
69.4k
105
69.4k
  // If our heuristic for a *good* bb to place this after doesn't find
106
69.4k
  // anything, just pick something.  It's likely better than leaving it within
107
69.4k
  // the loop.
108
69.4k
  if (!FoundBB)
109
65.1k
    FoundBB = SplitPreds[0];
110
159k
  NewBB->moveAfter(FoundBB);
111
159k
}
112
113
/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
114
/// preheader, this method is called to insert one.  This method has two phases:
115
/// preheader insertion and analysis updating.
116
///
117
BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
118
158k
                                         LoopInfo *LI, bool PreserveLCSSA) {
119
158k
  BasicBlock *Header = L->getHeader();
120
158k
121
158k
  // Compute the set of predecessors of the loop that are not in the loop.
122
158k
  SmallVector<BasicBlock*, 8> OutsideBlocks;
123
158k
  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
124
507k
       
PI != PE507k
;
++PI349k
) {
125
349k
    BasicBlock *P = *PI;
126
349k
    if (
!L->contains(P)349k
) { // Coming in from outside the loop?
127
181k
      // If the loop is branched to from an indirect branch, we won't
128
181k
      // be able to fully transform the loop, because it prohibits
129
181k
      // edge splitting.
130
181k
      if (
isa<IndirectBrInst>(P->getTerminator())181k
)
return nullptr42
;
131
181k
132
181k
      // Keep track of it.
133
181k
      OutsideBlocks.push_back(P);
134
181k
    }
135
349k
  }
136
158k
137
158k
  // Split out the loop pre-header.
138
158k
  BasicBlock *PreheaderBB;
139
158k
  PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
140
158k
                                       LI, PreserveLCSSA);
141
158k
  if (!PreheaderBB)
142
0
    return nullptr;
143
158k
144
158k
  
DEBUG158k
(dbgs() << "LoopSimplify: Creating pre-header "
145
158k
               << PreheaderBB->getName() << "\n");
146
158k
147
158k
  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
148
158k
  // code layout too horribly.
149
158k
  placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
150
158k
151
158k
  return PreheaderBB;
152
158k
}
153
154
/// Add the specified block, and all of its predecessors, to the specified set,
155
/// if it's not already in there.  Stop predecessor traversal when we reach
156
/// StopBlock.
157
static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
158
633
                                  std::set<BasicBlock*> &Blocks) {
159
633
  SmallVector<BasicBlock *, 8> Worklist;
160
633
  Worklist.push_back(InputBB);
161
12.2k
  do {
162
12.2k
    BasicBlock *BB = Worklist.pop_back_val();
163
12.2k
    if (
Blocks.insert(BB).second && 12.2k
BB != StopBlock8.49k
)
164
12.2k
      // If BB is not already processed and it is not a stop block then
165
12.2k
      // insert its predecessor in the work list
166
19.6k
      
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); 8.02k
I != E19.6k
;
++I11.5k
) {
167
11.5k
        BasicBlock *WBB = *I;
168
11.5k
        Worklist.push_back(WBB);
169
11.5k
      }
170
12.2k
  } while (!Worklist.empty());
171
633
}
172
173
/// \brief The first part of loop-nestification is to find a PHI node that tells
174
/// us how to partition the loops.
175
static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
176
4.56k
                                        AssumptionCache *AC) {
177
4.56k
  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
178
6.82k
  for (BasicBlock::iterator I = L->getHeader()->begin(); 
isa<PHINode>(I)6.82k
; ) {
179
2.74k
    PHINode *PN = cast<PHINode>(I);
180
2.74k
    ++I;
181
2.74k
    if (Value *
V2.74k
= SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
182
187
      // This is a degenerate PHI already, don't modify it!
183
187
      PN->replaceAllUsesWith(V);
184
187
      PN->eraseFromParent();
185
187
      continue;
186
187
    }
187
2.55k
188
2.55k
    // Scan this PHI node looking for a use of the PHI node by itself.
189
10.3k
    
for (unsigned i = 0, e = PN->getNumIncomingValues(); 2.55k
i != e10.3k
;
++i7.77k
)
190
8.25k
      
if (8.25k
PN->getIncomingValue(i) == PN &&
191
472
          L->contains(PN->getIncomingBlock(i)))
192
8.25k
        // We found something tasty to remove.
193
472
        return PN;
194
2.74k
  }
195
4.08k
  return nullptr;
196
4.56k
}
197
198
/// \brief If this loop has multiple backedges, try to pull one of them out into
199
/// a nested loop.
200
///
201
/// This is important for code that looks like
202
/// this:
203
///
204
///  Loop:
205
///     ...
206
///     br cond, Loop, Next
207
///     ...
208
///     br cond2, Loop, Out
209
///
210
/// To identify this common case, we look at the PHI nodes in the header of the
211
/// loop.  PHI nodes with unchanging values on one backedge correspond to values
212
/// that change in the "outer" loop, but not in the "inner" loop.
213
///
214
/// If we are able to separate out a loop, return the new outer loop that was
215
/// created.
216
///
217
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
218
                                DominatorTree *DT, LoopInfo *LI,
219
                                ScalarEvolution *SE, bool PreserveLCSSA,
220
4.56k
                                AssumptionCache *AC) {
221
4.56k
  // Don't try to separate loops without a preheader.
222
4.56k
  if (!Preheader)
223
1
    return nullptr;
224
4.56k
225
4.56k
  // The header is not a landing pad; preheader insertion should ensure this.
226
4.56k
  BasicBlock *Header = L->getHeader();
227
4.56k
  assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
228
4.56k
229
4.56k
  PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
230
4.56k
  if (
!PN4.56k
)
return nullptr4.08k
; // No known way to partition.
231
472
232
472
  // Pull out all predecessors that have varying values in the loop.  This
233
472
  // handles the case when a PHI node has multiple instances of itself as
234
472
  // arguments.
235
472
  SmallVector<BasicBlock*, 8> OuterLoopPreds;
236
2.23k
  for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e2.23k
;
++i1.76k
) {
237
1.76k
    if (PN->getIncomingValue(i) != PN ||
238
1.76k
        
!L->contains(PN->getIncomingBlock(i))633
) {
239
1.13k
      // We can't split indirectbr edges.
240
1.13k
      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
241
1
        return nullptr;
242
1.13k
      OuterLoopPreds.push_back(PN->getIncomingBlock(i));
243
1.13k
    }
244
1.76k
  }
245
471
  
DEBUG471
(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
246
471
247
471
  // If ScalarEvolution is around and knows anything about values in
248
471
  // this loop, tell it to forget them, because we're about to
249
471
  // substantially change it.
250
471
  if (SE)
251
1
    SE->forgetLoop(L);
252
471
253
471
  BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
254
471
                                             DT, LI, PreserveLCSSA);
255
471
256
471
  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
257
471
  // code layout too horribly.
258
471
  placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
259
471
260
471
  // Create the new outer loop.
261
471
  Loop *NewOuter = LI->AllocateLoop();
262
471
263
471
  // Change the parent loop to use the outer loop as its child now.
264
471
  if (Loop *Parent = L->getParentLoop())
265
122
    Parent->replaceChildLoopWith(L, NewOuter);
266
471
  else
267
349
    LI->changeTopLevelLoop(L, NewOuter);
268
471
269
471
  // L is now a subloop of our outer loop.
270
471
  NewOuter->addChildLoop(L);
271
471
272
471
  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
273
21.1k
       
I != E21.1k
;
++I20.6k
)
274
20.6k
    NewOuter->addBlockEntry(*I);
275
471
276
471
  // Now reset the header in L, which had been moved by
277
471
  // SplitBlockPredecessors for the outer loop.
278
471
  L->moveToHeader(Header);
279
471
280
471
  // Determine which blocks should stay in L and which should be moved out to
281
471
  // the Outer loop now.
282
471
  std::set<BasicBlock*> BlocksInL;
283
1.57k
  for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); 
PI!=E1.57k
;
++PI1.10k
) {
284
1.10k
    BasicBlock *P = *PI;
285
1.10k
    if (DT->dominates(Header, P))
286
633
      addBlockAndPredsToSet(P, Header, BlocksInL);
287
1.10k
  }
288
471
289
471
  // Scan all of the loop children of L, moving them to OuterLoop if they are
290
471
  // not part of the inner loop.
291
471
  const std::vector<Loop*> &SubLoops = L->getSubLoops();
292
1.41k
  for (size_t I = 0; I != SubLoops.size(); )
293
940
    
if (940
BlocksInL.count(SubLoops[I]->getHeader())940
)
294
351
      ++I;   // Loop remains in L
295
940
    else
296
589
      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
297
471
298
471
  SmallVector<BasicBlock *, 8> OuterLoopBlocks;
299
471
  OuterLoopBlocks.push_back(NewBB);
300
471
  // Now that we know which blocks are in L and which need to be moved to
301
471
  // OuterLoop, move any blocks that need it.
302
21.1k
  for (unsigned i = 0; 
i != L->getBlocks().size()21.1k
;
++i20.6k
) {
303
20.6k
    BasicBlock *BB = L->getBlocks()[i];
304
20.6k
    if (
!BlocksInL.count(BB)20.6k
) {
305
12.1k
      // Move this block to the parent, updating the exit blocks sets
306
12.1k
      L->removeBlockFromLoop(BB);
307
12.1k
      if (
(*LI)[BB] == L12.1k
) {
308
7.70k
        LI->changeLoopFor(BB, NewOuter);
309
7.70k
        OuterLoopBlocks.push_back(BB);
310
7.70k
      }
311
12.1k
      --i;
312
12.1k
    }
313
20.6k
  }
314
471
315
471
  // Split edges to exit blocks from the inner loop, if they emerged in the
316
471
  // process of separating the outer one.
317
471
  formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
318
471
319
471
  if (
PreserveLCSSA471
) {
320
4
    // Fix LCSSA form for L. Some values, which previously were only used inside
321
4
    // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
322
4
    // in corresponding exit blocks.
323
4
    // We don't need to form LCSSA recursively, because there cannot be uses
324
4
    // inside a newly created loop of defs from inner loops as those would
325
4
    // already be a use of an LCSSA phi node.
326
4
    formLCSSA(*L, *DT, LI, SE);
327
4
328
4
    assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
329
4
           "LCSSA is broken after separating nested loops!");
330
4
  }
331
471
332
471
  return NewOuter;
333
4.56k
}
334
335
/// \brief This method is called when the specified loop has more than one
336
/// backedge in it.
337
///
338
/// If this occurs, revector all of these backedges to target a new basic block
339
/// and have that block branch to the loop header.  This ensures that loops
340
/// have exactly one backedge.
341
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
342
4.16k
                                             DominatorTree *DT, LoopInfo *LI) {
343
4.16k
  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
344
4.16k
345
4.16k
  // Get information about the loop
346
4.16k
  BasicBlock *Header = L->getHeader();
347
4.16k
  Function *F = Header->getParent();
348
4.16k
349
4.16k
  // Unique backedge insertion currently depends on having a preheader.
350
4.16k
  if (!Preheader)
351
1
    return nullptr;
352
4.16k
353
4.16k
  // The header is not an EH pad; preheader insertion should ensure this.
354
4.16k
  assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
355
4.16k
356
4.16k
  // Figure out which basic blocks contain back-edges to the loop header.
357
4.16k
  std::vector<BasicBlock*> BackedgeBlocks;
358
23.9k
  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); 
I != E23.9k
;
++I19.7k
){
359
19.7k
    BasicBlock *P = *I;
360
19.7k
361
19.7k
    // Indirectbr edges cannot be split, so we must fail if we find one.
362
19.7k
    if (isa<IndirectBrInst>(P->getTerminator()))
363
4
      return nullptr;
364
19.7k
365
19.7k
    
if (19.7k
P != Preheader19.7k
)
BackedgeBlocks.push_back(P)15.5k
;
366
19.7k
  }
367
4.16k
368
4.16k
  // Create and insert the new backedge block...
369
4.15k
  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
370
4.15k
                                           Header->getName() + ".backedge", F);
371
4.15k
  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
372
4.15k
  BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
373
4.15k
374
4.15k
  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
375
4.15k
               << BEBlock->getName() << "\n");
376
4.15k
377
4.15k
  // Move the new backedge block to right after the last backedge block.
378
4.15k
  Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
379
4.15k
  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
380
4.15k
381
4.15k
  // Now that the block has been inserted into the function, create PHI nodes in
382
4.15k
  // the backedge block which correspond to any PHI nodes in the header block.
383
6.15k
  for (BasicBlock::iterator I = Header->begin(); 
isa<PHINode>(I)6.15k
;
++I2.00k
) {
384
2.00k
    PHINode *PN = cast<PHINode>(I);
385
2.00k
    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
386
2.00k
                                     PN->getName()+".be", BETerminator);
387
2.00k
388
2.00k
    // Loop over the PHI node, moving all entries except the one for the
389
2.00k
    // preheader over to the new PHI node.
390
2.00k
    unsigned PreheaderIdx = ~0U;
391
2.00k
    bool HasUniqueIncomingValue = true;
392
2.00k
    Value *UniqueValue = nullptr;
393
11.1k
    for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e11.1k
;
++i9.19k
) {
394
9.19k
      BasicBlock *IBB = PN->getIncomingBlock(i);
395
9.19k
      Value *IV = PN->getIncomingValue(i);
396
9.19k
      if (
IBB == Preheader9.19k
) {
397
2.00k
        PreheaderIdx = i;
398
9.19k
      } else {
399
7.19k
        NewPN->addIncoming(IV, IBB);
400
7.19k
        if (
HasUniqueIncomingValue7.19k
) {
401
4.80k
          if (!UniqueValue)
402
2.00k
            UniqueValue = IV;
403
2.80k
          else 
if (2.80k
UniqueValue != IV2.80k
)
404
1.22k
            HasUniqueIncomingValue = false;
405
4.80k
        }
406
7.19k
      }
407
9.19k
    }
408
2.00k
409
2.00k
    // Delete all of the incoming values from the old PN except the preheader's
410
2.00k
    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
411
2.00k
    if (
PreheaderIdx != 02.00k
) {
412
668
      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
413
668
      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
414
668
    }
415
2.00k
    // Nuke all entries except the zero'th.
416
9.19k
    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; 
i != e9.19k
;
++i7.19k
)
417
7.19k
      PN->removeIncomingValue(e-i, false);
418
2.00k
419
2.00k
    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
420
2.00k
    PN->addIncoming(NewPN, BEBlock);
421
2.00k
422
2.00k
    // As an optimization, if all incoming values in the new PhiNode (which is a
423
2.00k
    // subset of the incoming values of the old PHI node) have the same value,
424
2.00k
    // eliminate the PHI Node.
425
2.00k
    if (
HasUniqueIncomingValue2.00k
) {
426
772
      NewPN->replaceAllUsesWith(UniqueValue);
427
772
      BEBlock->getInstList().erase(NewPN);
428
772
    }
429
2.00k
  }
430
4.15k
431
4.15k
  // Now that all of the PHI nodes have been inserted and adjusted, modify the
432
4.15k
  // backedge blocks to jump to the BEBlock instead of the header.
433
4.15k
  // If one of the backedges has llvm.loop metadata attached, we remove
434
4.15k
  // it from the backedge and add it to BEBlock.
435
4.15k
  unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
436
4.15k
  MDNode *LoopMD = nullptr;
437
19.7k
  for (unsigned i = 0, e = BackedgeBlocks.size(); 
i != e19.7k
;
++i15.5k
) {
438
15.5k
    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
439
15.5k
    if (!LoopMD)
440
15.5k
      LoopMD = TI->getMetadata(LoopMDKind);
441
15.5k
    TI->setMetadata(LoopMDKind, nullptr);
442
40.5k
    for (unsigned Op = 0, e = TI->getNumSuccessors(); 
Op != e40.5k
;
++Op24.9k
)
443
24.9k
      
if (24.9k
TI->getSuccessor(Op) == Header24.9k
)
444
15.5k
        TI->setSuccessor(Op, BEBlock);
445
15.5k
  }
446
4.15k
  BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
447
4.15k
448
4.15k
  //===--- Update all analyses which we must preserve now -----------------===//
449
4.15k
450
4.15k
  // Update Loop Information - we know that this block is now in the current
451
4.15k
  // loop and all parent loops.
452
4.15k
  L->addBasicBlockToLoop(BEBlock, *LI);
453
4.15k
454
4.15k
  // Update dominator information
455
4.15k
  DT->splitBlock(BEBlock);
456
4.15k
457
4.15k
  return BEBlock;
458
4.16k
}
459
460
/// \brief Simplify one loop and queue further loops for simplification.
461
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
462
                            DominatorTree *DT, LoopInfo *LI,
463
                            ScalarEvolution *SE, AssumptionCache *AC,
464
3.31M
                            bool PreserveLCSSA) {
465
3.31M
  bool Changed = false;
466
3.31M
ReprocessLoop:
467
3.31M
468
3.31M
  // Check to see that no blocks (other than the header) in this loop have
469
3.31M
  // predecessors that are not in the loop.  This is not valid for natural
470
3.31M
  // loops, but can occur if the blocks are unreachable.  Since they are
471
3.31M
  // unreachable we can just shamelessly delete those CFG edges!
472
3.31M
  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
473
18.0M
       
BB != E18.0M
;
++BB14.7M
) {
474
14.7M
    if (
*BB == L->getHeader()14.7M
)
continue3.31M
;
475
11.4M
476
11.4M
    SmallPtrSet<BasicBlock*, 4> BadPreds;
477
11.4M
    for (pred_iterator PI = pred_begin(*BB),
478
28.7M
         PE = pred_end(*BB); 
PI != PE28.7M
;
++PI17.3M
) {
479
17.3M
      BasicBlock *P = *PI;
480
17.3M
      if (!L->contains(P))
481
10
        BadPreds.insert(P);
482
17.3M
    }
483
11.4M
484
11.4M
    // Delete each unique out-of-loop (and thus dead) predecessor.
485
10
    for (BasicBlock *P : BadPreds) {
486
10
487
10
      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
488
10
                   << P->getName() << "\n");
489
10
490
10
      // Zap the dead pred's terminator and replace it with unreachable.
491
10
      TerminatorInst *TI = P->getTerminator();
492
10
      changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
493
10
      Changed = true;
494
10
    }
495
14.7M
  }
496
3.31M
497
3.31M
  // If there are exiting blocks with branches on undef, resolve the undef in
498
3.31M
  // the direction which will exit the loop. This will help simplify loop
499
3.31M
  // trip count computations.
500
3.31M
  SmallVector<BasicBlock*, 8> ExitingBlocks;
501
3.31M
  L->getExitingBlocks(ExitingBlocks);
502
3.31M
  for (BasicBlock *ExitingBlock : ExitingBlocks)
503
4.37M
    
if (BranchInst *4.37M
BI4.37M
= dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
504
4.27M
      
if (4.27M
BI->isConditional()4.27M
) {
505
4.27M
        if (UndefValue *
Cond4.27M
= dyn_cast<UndefValue>(BI->getCondition())) {
506
634
507
634
          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
508
634
                       << ExitingBlock->getName() << "\n");
509
634
510
634
          BI->setCondition(ConstantInt::get(Cond->getType(),
511
634
                                            !L->contains(BI->getSuccessor(0))));
512
634
513
634
          // This may make the loop analyzable, force SCEV recomputation.
514
634
          if (SE)
515
27
            SE->forgetLoop(L);
516
634
517
634
          Changed = true;
518
634
        }
519
4.37M
      }
520
3.31M
521
3.31M
  // Does the loop already have a preheader?  If so, don't insert one.
522
3.31M
  BasicBlock *Preheader = L->getLoopPreheader();
523
3.31M
  if (
!Preheader3.31M
) {
524
158k
    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
525
158k
    if (Preheader)
526
158k
      Changed = true;
527
158k
  }
528
3.31M
529
3.31M
  // Next, check to make sure that all exit nodes of the loop only have
530
3.31M
  // predecessors that are inside of the loop.  This check guarantees that the
531
3.31M
  // loop preheader/header will dominate the exit blocks.  If the exit block has
532
3.31M
  // predecessors from outside of the loop, split the edge now.
533
3.31M
  if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
534
1.25M
    Changed = true;
535
3.31M
536
3.31M
  // If the header has more than two predecessors at this point (from the
537
3.31M
  // preheader and from multiple backedges), we must adjust the loop.
538
3.31M
  BasicBlock *LoopLatch = L->getLoopLatch();
539
3.31M
  if (
!LoopLatch3.31M
) {
540
4.63k
    // If this is really a nested loop, rip it out into a child loop.  Don't do
541
4.63k
    // this for loops with a giant number of backedges, just factor them into a
542
4.63k
    // common backedge instead.
543
4.63k
    if (
L->getNumBackEdges() < 84.63k
) {
544
4.56k
      if (Loop *OuterL =
545
471
              separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
546
471
        ++NumNested;
547
471
        // Enqueue the outer loop as it should be processed next in our
548
471
        // depth-first nest walk.
549
471
        Worklist.push_back(OuterL);
550
471
551
471
        // This is a big restructuring change, reprocess the whole loop.
552
471
        Changed = true;
553
471
        // GCC doesn't tail recursion eliminate this.
554
471
        // FIXME: It isn't clear we can't rely on LLVM to TRE this.
555
471
        goto ReprocessLoop;
556
471
      }
557
4.16k
    }
558
4.16k
559
4.16k
    // If we either couldn't, or didn't want to, identify nesting of the loops,
560
4.16k
    // insert a new block that all backedges target, then make it jump to the
561
4.16k
    // loop header.
562
4.16k
    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
563
4.16k
    if (LoopLatch)
564
4.15k
      Changed = true;
565
4.63k
  }
566
3.31M
567
3.31M
  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
568
3.31M
569
3.31M
  // Scan over the PHI nodes in the loop header.  Since they now have only two
570
3.31M
  // incoming values (the loop is canonicalized), we may have simplified the PHI
571
3.31M
  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
572
3.31M
  PHINode *PN;
573
3.31M
  for (BasicBlock::iterator I = L->getHeader()->begin();
574
7.73M
       (PN = dyn_cast<PHINode>(I++)); )
575
4.41M
    
if (Value *4.41M
V4.41M
= SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
576
1.39k
      if (
SE1.39k
)
SE->forgetValue(PN)388
;
577
1.39k
      if (
!PreserveLCSSA || 1.39k
LI->replacementPreservesLCSSAForm(PN, V)391
) {
578
1.39k
        PN->replaceAllUsesWith(V);
579
1.39k
        PN->eraseFromParent();
580
1.39k
      }
581
4.41M
    }
582
3.31M
583
3.31M
  // If this loop has multiple exits and the exits all go to the same
584
3.31M
  // block, attempt to merge the exits. This helps several passes, such
585
3.31M
  // as LoopRotation, which do not support loops with multiple exits.
586
3.31M
  // SimplifyCFG also does this (and this code uses the same utility
587
3.31M
  // function), however this code is loop-aware, where SimplifyCFG is
588
3.31M
  // not. That gives it the advantage of being able to hoist
589
3.31M
  // loop-invariant instructions out of the way to open up more
590
3.31M
  // opportunities, and the disadvantage of having the responsibility
591
3.31M
  // to preserve dominator information.
592
3.31M
  auto HasUniqueExitBlock = [&]() {
593
3.31M
    BasicBlock *UniqueExit = nullptr;
594
3.31M
    for (auto *ExitingBB : ExitingBlocks)
595
4.18M
      
for (auto *SuccBB : successors(ExitingBB)) 4.18M
{
596
8.11M
        if (L->contains(SuccBB))
597
3.90M
          continue;
598
4.20M
599
4.20M
        
if (4.20M
!UniqueExit4.20M
)
600
3.31M
          UniqueExit = SuccBB;
601
889k
        else 
if (889k
UniqueExit != SuccBB889k
)
602
655k
          return false;
603
2.66M
      }
604
2.66M
605
2.66M
    return true;
606
2.66M
  };
607
3.31M
  if (
HasUniqueExitBlock()3.31M
) {
608
5.52M
    for (unsigned i = 0, e = ExitingBlocks.size(); 
i != e5.52M
;
++i2.86M
) {
609
2.86M
      BasicBlock *ExitingBlock = ExitingBlocks[i];
610
2.86M
      if (
!ExitingBlock->getSinglePredecessor()2.86M
)
continue2.48M
;
611
373k
      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
612
373k
      if (
!BI || 373k
!BI->isConditional()373k
)
continue886
;
613
373k
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
614
373k
      if (
!CI || 373k
CI->getParent() != ExitingBlock368k
)
continue4.41k
;
615
368k
616
368k
      // Attempt to hoist out all instructions except for the
617
368k
      // comparison and the branch.
618
368k
      bool AllInvariant = true;
619
368k
      bool AnyInvariant = false;
620
400k
      for (BasicBlock::iterator I = ExitingBlock->begin(); 
&*I != BI400k
; ) {
621
369k
        Instruction *Inst = &*I++;
622
369k
        // Skip debug info intrinsics.
623
369k
        if (isa<DbgInfoIntrinsic>(Inst))
624
0
          continue;
625
369k
        
if (369k
Inst == CI369k
)
626
30.6k
          continue;
627
339k
        
if (339k
!L->makeLoopInvariant(Inst, AnyInvariant,
628
339k
                                  Preheader ? Preheader->getTerminator()
629
339k
                                            : 
nullptr0
)) {
630
338k
          AllInvariant = false;
631
338k
          break;
632
338k
        }
633
369k
      }
634
368k
      if (
AnyInvariant368k
) {
635
903
        Changed = true;
636
903
        // The loop disposition of all SCEV expressions that depend on any
637
903
        // hoisted values have also changed.
638
903
        if (SE)
639
145
          SE->forgetLoopDispositions(L);
640
903
      }
641
368k
      if (
!AllInvariant368k
)
continue338k
;
642
30.4k
643
30.4k
      // The block has now been cleared of all instructions except for
644
30.4k
      // a comparison and a conditional branch. SimplifyCFG may be able
645
30.4k
      // to fold it now.
646
30.4k
      
if (30.4k
!FoldBranchToCommonDest(BI)30.4k
)
647
30.3k
        continue;
648
67
649
67
      // Success. The block is now dead, so remove it from the loop,
650
67
      // update the dominator tree and delete it.
651
67
      
DEBUG67
(dbgs() << "LoopSimplify: Eliminating exiting block "
652
67
                   << ExitingBlock->getName() << "\n");
653
67
654
67
      // Notify ScalarEvolution before deleting this block. Currently assume the
655
67
      // parent loop doesn't change (spliting edges doesn't count). If blocks,
656
67
      // CFG edges, or other values in the parent loop change, then we need call
657
67
      // to forgetLoop() for the parent instead.
658
67
      if (SE)
659
0
        SE->forgetLoop(L);
660
67
661
67
      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
662
67
      Changed = true;
663
67
      LI->removeBlock(ExitingBlock);
664
67
665
67
      DomTreeNode *Node = DT->getNode(ExitingBlock);
666
67
      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
667
67
        Node->getChildren();
668
74
      while (
!Children.empty()74
) {
669
7
        DomTreeNode *Child = Children.front();
670
7
        DT->changeImmediateDominator(Child, Node->getIDom());
671
7
      }
672
2.86M
      DT->eraseNode(ExitingBlock);
673
2.86M
674
2.86M
      BI->getSuccessor(0)->removePredecessor(
675
2.86M
          ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
676
2.86M
      BI->getSuccessor(1)->removePredecessor(
677
2.86M
          ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
678
2.86M
      ExitingBlock->eraseFromParent();
679
2.86M
    }
680
2.66M
  }
681
3.31M
682
3.31M
  return Changed;
683
3.31M
}
684
685
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
686
                        ScalarEvolution *SE, AssumptionCache *AC,
687
2.37M
                        bool PreserveLCSSA) {
688
2.37M
  bool Changed = false;
689
2.37M
690
#ifndef NDEBUG
691
  // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
692
  // form.
693
  if (PreserveLCSSA) {
694
    assert(DT && "DT not available.");
695
    assert(LI && "LI not available.");
696
    assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
697
           "Requested to preserve LCSSA, but it's already broken.");
698
  }
699
#endif
700
701
2.37M
  // Worklist maintains our depth-first queue of loops in this nest to process.
702
2.37M
  SmallVector<Loop *, 4> Worklist;
703
2.37M
  Worklist.push_back(L);
704
2.37M
705
2.37M
  // Walk the worklist from front to back, pushing newly found sub loops onto
706
2.37M
  // the back. This will let us process loops from back to front in depth-first
707
2.37M
  // order. We can use this simple process because loops form a tree.
708
5.69M
  for (unsigned Idx = 0; 
Idx != Worklist.size()5.69M
;
++Idx3.31M
) {
709
3.31M
    Loop *L2 = Worklist[Idx];
710
3.31M
    Worklist.append(L2->begin(), L2->end());
711
3.31M
  }
712
2.37M
713
5.69M
  while (!Worklist.empty())
714
3.31M
    Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
715
3.31M
                               AC, PreserveLCSSA);
716
2.37M
717
2.37M
  return Changed;
718
2.37M
}
719
720
namespace {
721
  struct LoopSimplify : public FunctionPass {
722
    static char ID; // Pass identification, replacement for typeid
723
171k
    LoopSimplify() : FunctionPass(ID) {
724
171k
      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
725
171k
    }
726
727
    bool runOnFunction(Function &F) override;
728
729
171k
    void getAnalysisUsage(AnalysisUsage &AU) const override {
730
171k
      AU.addRequired<AssumptionCacheTracker>();
731
171k
732
171k
      // We need loop information to identify the loops...
733
171k
      AU.addRequired<DominatorTreeWrapperPass>();
734
171k
      AU.addPreserved<DominatorTreeWrapperPass>();
735
171k
736
171k
      AU.addRequired<LoopInfoWrapperPass>();
737
171k
      AU.addPreserved<LoopInfoWrapperPass>();
738
171k
739
171k
      AU.addPreserved<BasicAAWrapperPass>();
740
171k
      AU.addPreserved<AAResultsWrapperPass>();
741
171k
      AU.addPreserved<GlobalsAAWrapperPass>();
742
171k
      AU.addPreserved<ScalarEvolutionWrapperPass>();
743
171k
      AU.addPreserved<SCEVAAWrapperPass>();
744
171k
      AU.addPreservedID(LCSSAID);
745
171k
      AU.addPreserved<DependenceAnalysisWrapperPass>();
746
171k
      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
747
171k
    }
748
749
    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
750
    void verifyAnalysis() const override;
751
  };
752
}
753
754
char LoopSimplify::ID = 0;
755
90.1k
INITIALIZE_PASS_BEGIN90.1k
(LoopSimplify, "loop-simplify",
756
90.1k
                "Canonicalize natural loops", false, false)
757
90.1k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
758
90.1k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
759
90.1k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
760
90.1k
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
761
                "Canonicalize natural loops", false, false)
762
763
// Publicly exposed interface to pass...
764
char &llvm::LoopSimplifyID = LoopSimplify::ID;
765
0
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
766
767
/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
768
/// it in any convenient order) inserting preheaders...
769
///
770
4.44M
bool LoopSimplify::runOnFunction(Function &F) {
771
4.44M
  bool Changed = false;
772
4.44M
  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
773
4.44M
  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
774
4.44M
  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
775
4.44M
  ScalarEvolution *SE = SEWP ? 
&SEWP->getSE()462k
:
nullptr3.98M
;
776
4.44M
  AssumptionCache *AC =
777
4.44M
      &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
778
4.44M
779
4.44M
  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
780
4.44M
781
4.44M
  // Simplify each loop nest in the function.
782
6.59M
  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 
I != E6.59M
;
++I2.14M
)
783
2.14M
    Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
784
4.44M
785
#ifndef NDEBUG
786
  if (PreserveLCSSA) {
787
    bool InLCSSA = all_of(
788
        *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
789
    assert(InLCSSA && "LCSSA is broken after loop-simplify.");
790
  }
791
#endif
792
  return Changed;
793
4.44M
}
794
795
PreservedAnalyses LoopSimplifyPass::run(Function &F,
796
837
                                        FunctionAnalysisManager &AM) {
797
837
  bool Changed = false;
798
837
  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
799
837
  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
800
837
  ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
801
837
  AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
802
837
803
837
  // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
804
837
  // after simplifying the loops.
805
1.34k
  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 
I != E1.34k
;
++I508
)
806
508
    Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
807
837
808
837
  if (!Changed)
809
759
    return PreservedAnalyses::all();
810
78
811
78
  PreservedAnalyses PA;
812
78
  PA.preserve<DominatorTreeAnalysis>();
813
78
  PA.preserve<LoopAnalysis>();
814
78
  PA.preserve<BasicAA>();
815
78
  PA.preserve<GlobalsAA>();
816
78
  PA.preserve<SCEVAA>();
817
78
  PA.preserve<ScalarEvolutionAnalysis>();
818
78
  PA.preserve<DependenceAnalysis>();
819
78
  return PA;
820
78
}
821
822
// FIXME: Restore this code when we re-enable verification in verifyAnalysis
823
// below.
824
#if 0
825
static void verifyLoop(Loop *L) {
826
  // Verify subloops.
827
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
828
    verifyLoop(*I);
829
830
  // It used to be possible to just assert L->isLoopSimplifyForm(), however
831
  // with the introduction of indirectbr, there are now cases where it's
832
  // not possible to transform a loop as necessary. We can at least check
833
  // that there is an indirectbr near any time there's trouble.
834
835
  // Indirectbr can interfere with preheader and unique backedge insertion.
836
  if (!L->getLoopPreheader() || !L->getLoopLatch()) {
837
    bool HasIndBrPred = false;
838
    for (pred_iterator PI = pred_begin(L->getHeader()),
839
         PE = pred_end(L->getHeader()); PI != PE; ++PI)
840
      if (isa<IndirectBrInst>((*PI)->getTerminator())) {
841
        HasIndBrPred = true;
842
        break;
843
      }
844
    assert(HasIndBrPred &&
845
           "LoopSimplify has no excuse for missing loop header info!");
846
    (void)HasIndBrPred;
847
  }
848
849
  // Indirectbr can interfere with exit block canonicalization.
850
  if (!L->hasDedicatedExits()) {
851
    bool HasIndBrExiting = false;
852
    SmallVector<BasicBlock*, 8> ExitingBlocks;
853
    L->getExitingBlocks(ExitingBlocks);
854
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
855
      if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
856
        HasIndBrExiting = true;
857
        break;
858
      }
859
    }
860
861
    assert(HasIndBrExiting &&
862
           "LoopSimplify has no excuse for missing exit block info!");
863
    (void)HasIndBrExiting;
864
  }
865
}
866
#endif
867
868
0
void LoopSimplify::verifyAnalysis() const {
869
0
  // FIXME: This routine is being called mid-way through the loop pass manager
870
0
  // as loop passes destroy this analysis. That's actually fine, but we have no
871
0
  // way of expressing that here. Once all of the passes that destroy this are
872
0
  // hoisted out of the loop pass manager we can add back verification here.
873
#if 0
874
  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
875
    verifyLoop(*I);
876
#endif
877
}