Coverage Report

Created: 2018-02-20 23:11

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Analysis/ScopBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ScopBuilder.cpp ----------------------------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Create a polyhedral description for a static control flow region.
11
//
12
// The pass creates a polyhedral description of the Scops detected by the SCoP
13
// detection derived from their LLVM-IR code.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "polly/ScopBuilder.h"
18
#include "polly/Options.h"
19
#include "polly/ScopDetection.h"
20
#include "polly/ScopDetectionDiagnostic.h"
21
#include "polly/ScopInfo.h"
22
#include "polly/Support/GICHelper.h"
23
#include "polly/Support/SCEVValidator.h"
24
#include "polly/Support/ScopHelper.h"
25
#include "polly/Support/VirtualInstruction.h"
26
#include "llvm/ADT/APInt.h"
27
#include "llvm/ADT/ArrayRef.h"
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/EquivalenceClasses.h"
30
#include "llvm/ADT/SetVector.h"
31
#include "llvm/ADT/Statistic.h"
32
#include "llvm/Analysis/AliasAnalysis.h"
33
#include "llvm/Analysis/LoopInfo.h"
34
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
35
#include "llvm/Analysis/RegionInfo.h"
36
#include "llvm/Analysis/RegionIterator.h"
37
#include "llvm/Analysis/ScalarEvolution.h"
38
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
39
#include "llvm/IR/BasicBlock.h"
40
#include "llvm/IR/Constants.h"
41
#include "llvm/IR/DataLayout.h"
42
#include "llvm/IR/DebugLoc.h"
43
#include "llvm/IR/DerivedTypes.h"
44
#include "llvm/IR/DiagnosticInfo.h"
45
#include "llvm/IR/Dominators.h"
46
#include "llvm/IR/Function.h"
47
#include "llvm/IR/InstrTypes.h"
48
#include "llvm/IR/Instruction.h"
49
#include "llvm/IR/Instructions.h"
50
#include "llvm/IR/IntrinsicInst.h"
51
#include "llvm/IR/Operator.h"
52
#include "llvm/IR/Type.h"
53
#include "llvm/IR/Use.h"
54
#include "llvm/IR/Value.h"
55
#include "llvm/Support/Casting.h"
56
#include "llvm/Support/CommandLine.h"
57
#include "llvm/Support/Compiler.h"
58
#include "llvm/Support/Debug.h"
59
#include "llvm/Support/ErrorHandling.h"
60
#include "llvm/Support/raw_ostream.h"
61
#include <cassert>
62
#include <string>
63
#include <tuple>
64
#include <vector>
65
66
using namespace llvm;
67
using namespace polly;
68
69
2.37k
#define DEBUG_TYPE "polly-scops"
70
71
STATISTIC(ScopFound, "Number of valid Scops");
72
STATISTIC(RichScopFound, "Number of Scops containing a loop");
73
STATISTIC(InfeasibleScops,
74
          "Number of SCoPs with statically infeasible context.");
75
76
bool polly::ModelReadOnlyScalars;
77
78
static cl::opt<bool, true> XModelReadOnlyScalars(
79
    "polly-analyze-read-only-scalars",
80
    cl::desc("Model read-only scalar values in the scop description"),
81
    cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
82
    cl::init(true), cl::cat(PollyCategory));
83
84
static cl::opt<bool> UnprofitableScalarAccs(
85
    "polly-unprofitable-scalar-accs",
86
    cl::desc("Count statements with scalar accesses as not optimizable"),
87
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
88
89
static cl::opt<bool> DetectFortranArrays(
90
    "polly-detect-fortran-arrays",
91
    cl::desc("Detect Fortran arrays and use this for code generation"),
92
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
93
94
static cl::opt<bool> DetectReductions("polly-detect-reductions",
95
                                      cl::desc("Detect and exploit reductions"),
96
                                      cl::Hidden, cl::ZeroOrMore,
97
                                      cl::init(true), cl::cat(PollyCategory));
98
99
// Multiplicative reductions can be disabled separately as these kind of
100
// operations can overflow easily. Additive reductions and bit operations
101
// are in contrast pretty stable.
102
static cl::opt<bool> DisableMultiplicativeReductions(
103
    "polly-disable-multiplicative-reductions",
104
    cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
105
    cl::init(false), cl::cat(PollyCategory));
106
107
enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
108
109
static cl::opt<GranularityChoice> StmtGranularity(
110
    "polly-stmt-granularity",
111
    cl::desc(
112
        "Algorithm to use for splitting basic blocks into multiple statements"),
113
    cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
114
                          "One statement per basic block"),
115
               clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
116
                          "Scalar independence heuristic"),
117
               clEnumValN(GranularityChoice::Stores, "store",
118
                          "Store-level granularity")),
119
    cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
120
121
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
122
                                   Region *NonAffineSubRegion,
123
286
                                   bool IsExitBlock) {
124
286
  // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
125
286
  // true, are not modeled as ordinary PHI nodes as they are not part of the
126
286
  // region. However, we model the operands in the predecessor blocks that are
127
286
  // part of the region as regular scalar accesses.
128
286
129
286
  // If we can synthesize a PHI we can skip it, however only if it is in
130
286
  // the region. If it is not it can only be in the exit block of the region.
131
286
  // In this case we model the operands but not the PHI itself.
132
286
  auto *Scope = LI.getLoopFor(PHI->getParent());
133
286
  if (!IsExitBlock && 
canSynthesize(PHI, *scop, &SE, Scope)242
)
134
7
    return;
135
279
136
279
  // PHI nodes are modeled as if they had been demoted prior to the SCoP
137
279
  // detection. Hence, the PHI is a load of a new memory location in which the
138
279
  // incoming value was written at the end of the incoming basic block.
139
279
  bool OnlyNonAffineSubRegionOperands = true;
140
869
  for (unsigned u = 0; u < PHI->getNumIncomingValues(); 
u++590
) {
141
590
    Value *Op = PHI->getIncomingValue(u);
142
590
    BasicBlock *OpBB = PHI->getIncomingBlock(u);
143
590
    ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
144
590
145
590
    // Do not build PHI dependences inside a non-affine subregion, but make
146
590
    // sure that the necessary scalar values are still made available.
147
590
    if (NonAffineSubRegion && 
NonAffineSubRegion->contains(OpBB)73
) {
148
20
      auto *OpInst = dyn_cast<Instruction>(Op);
149
20
      if (!OpInst || 
!NonAffineSubRegion->contains(OpInst)10
)
150
12
        ensureValueRead(Op, OpStmt);
151
20
      continue;
152
20
    }
153
570
154
570
    OnlyNonAffineSubRegionOperands = false;
155
570
    ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
156
570
  }
157
279
158
279
  if (!OnlyNonAffineSubRegionOperands && 
!IsExitBlock272
) {
159
228
    addPHIReadAccess(PHIStmt, PHI);
160
228
  }
161
279
}
162
163
void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
164
7.79k
                                         Instruction *Inst) {
165
7.79k
  assert(!isa<PHINode>(Inst));
166
7.79k
167
7.79k
  // Pull-in required operands.
168
7.79k
  for (Use &Op : Inst->operands())
169
13.8k
    ensureValueRead(Op.get(), UserStmt);
170
7.79k
}
171
172
21.6k
void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
173
21.6k
  // Check for uses of this instruction outside the scop. Because we do not
174
21.6k
  // iterate over such instructions and therefore did not "ensure" the existence
175
21.6k
  // of a write, we must determine such use here.
176
21.6k
  if (scop->isEscaping(Inst))
177
92
    ensureValueWrite(Inst);
178
21.6k
}
179
180
/// Check that a value is a Fortran Array descriptor.
181
///
182
/// We check if V has the following structure:
183
/// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
184
///                                   [<num> x %struct.descriptor_dimension] }
185
///
186
///
187
/// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
188
///
189
/// 1. V's type name starts with "struct.array"
190
/// 2. V's type has layout as shown.
191
/// 3. Final member of V's type has name "struct.descriptor_dimension",
192
/// 4. "struct.descriptor_dimension" has layout as shown.
193
/// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
194
///
195
/// We are interested in such types since this is the code that dragonegg
196
/// generates for Fortran array descriptors.
197
///
198
/// @param V the Value to be checked.
199
///
200
/// @returns True if V is a Fortran array descriptor, False otherwise.
201
10
bool isFortranArrayDescriptor(Value *V) {
202
10
  PointerType *PTy = dyn_cast<PointerType>(V->getType());
203
10
204
10
  if (!PTy)
205
0
    return false;
206
10
207
10
  Type *Ty = PTy->getElementType();
208
10
  assert(Ty && "Ty expected to be initialized");
209
10
  auto *StructArrTy = dyn_cast<StructType>(Ty);
210
10
211
10
  if (!(StructArrTy && StructArrTy->hasName()))
212
0
    return false;
213
10
214
10
  if (!StructArrTy->getName().startswith("struct.array"))
215
0
    return false;
216
10
217
10
  if (StructArrTy->getNumElements() != 4)
218
0
    return false;
219
10
220
10
  const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
221
10
222
10
  // i8* match
223
10
  if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
224
0
    return false;
225
10
226
10
  // Get a reference to the int type and check that all the members
227
10
  // share the same int type
228
10
  Type *IntTy = ArrMemberTys[1];
229
10
  if (ArrMemberTys[2] != IntTy)
230
0
    return false;
231
10
232
10
  // type: [<num> x %struct.descriptor_dimension]
233
10
  ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
234
10
  if (!DescriptorDimArrayTy)
235
0
    return false;
236
10
237
10
  // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
238
10
  StructType *DescriptorDimTy =
239
10
      dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
240
10
241
10
  if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
242
0
    return false;
243
10
244
10
  if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
245
0
    return false;
246
10
247
10
  if (DescriptorDimTy->getNumElements() != 3)
248
0
    return false;
249
10
250
30
  
for (auto MemberTy : DescriptorDimTy->elements())10
{
251
30
    if (MemberTy != IntTy)
252
0
      return false;
253
30
  }
254
10
255
10
  return true;
256
10
}
257
258
5
Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
259
5
  // match: 4.1 & 4.2 store/load
260
5
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)1
)
261
0
    return nullptr;
262
5
263
5
  // match: 4
264
5
  if (Inst.getAlignment() != 8)
265
1
    return nullptr;
266
4
267
4
  Value *Address = Inst.getPointerOperand();
268
4
269
4
  const BitCastInst *Bitcast = nullptr;
270
4
  // [match: 3]
271
4
  if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
272
3
    Value *TypedMem = Slot->getPointerOperand();
273
3
    // match: 2
274
3
    Bitcast = dyn_cast<BitCastInst>(TypedMem);
275
3
  } else {
276
1
    // match: 2
277
1
    Bitcast = dyn_cast<BitCastInst>(Address);
278
1
  }
279
4
280
4
  if (!Bitcast)
281
1
    return nullptr;
282
3
283
3
  auto *MallocMem = Bitcast->getOperand(0);
284
3
285
3
  // match: 1
286
3
  auto *MallocCall = dyn_cast<CallInst>(MallocMem);
287
3
  if (!MallocCall)
288
1
    return nullptr;
289
2
290
2
  Function *MallocFn = MallocCall->getCalledFunction();
291
2
  if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
292
0
    return nullptr;
293
2
294
2
  // Find all uses the malloc'd memory.
295
2
  // We are looking for a "store" into a struct with the type being the Fortran
296
2
  // descriptor type
297
4
  
for (auto user : MallocMem->users())2
{
298
4
    /// match: 5
299
4
    auto *MallocStore = dyn_cast<StoreInst>(user);
300
4
    if (!MallocStore)
301
2
      continue;
302
2
303
2
    auto *DescriptorGEP =
304
2
        dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
305
2
    if (!DescriptorGEP)
306
0
      continue;
307
2
308
2
    // match: 5
309
2
    auto DescriptorType =
310
2
        dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
311
2
    if (!(DescriptorType && DescriptorType->hasName()))
312
0
      continue;
313
2
314
2
    Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
315
2
316
2
    if (!Descriptor)
317
0
      continue;
318
2
319
2
    if (!isFortranArrayDescriptor(Descriptor))
320
0
      continue;
321
2
322
2
    return Descriptor;
323
2
  }
324
2
325
2
  
return nullptr0
;
326
2
}
327
328
13
Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
329
13
  // match: 3
330
13
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)5
)
331
0
    return nullptr;
332
13
333
13
  Value *Slot = Inst.getPointerOperand();
334
13
335
13
  LoadInst *MemLoad = nullptr;
336
13
  // [match: 2]
337
13
  if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
338
11
    // match: 1
339
11
    MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
340
11
  } else {
341
2
    // match: 1
342
2
    MemLoad = dyn_cast<LoadInst>(Slot);
343
2
  }
344
13
345
13
  if (!MemLoad)
346
5
    return nullptr;
347
8
348
8
  auto *BitcastOperator =
349
8
      dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
350
8
  if (!BitcastOperator)
351
0
    return nullptr;
352
8
353
8
  Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
354
8
  if (!Descriptor)
355
0
    return nullptr;
356
8
357
8
  if (!isFortranArrayDescriptor(Descriptor))
358
0
    return nullptr;
359
8
360
8
  return Descriptor;
361
8
}
362
363
3.59k
bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
364
3.59k
  Value *Val = Inst.getValueOperand();
365
3.59k
  Type *ElementType = Val->getType();
366
3.59k
  Value *Address = Inst.getPointerOperand();
367
3.59k
  const SCEV *AccessFunction =
368
3.59k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
369
3.59k
  const SCEVUnknown *BasePointer =
370
3.59k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
371
3.59k
  enum MemoryAccess::AccessType AccType =
372
3.59k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.78k
:
MemoryAccess::MUST_WRITE1.81k
;
373
3.59k
374
3.59k
  if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
375
104
    auto *Src = BitCast->getOperand(0);
376
104
    auto *SrcTy = Src->getType();
377
104
    auto *DstTy = BitCast->getType();
378
104
    // Do not try to delinearize non-sized (opaque) pointers.
379
104
    if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
380
104
        
(103
DstTy->isPointerTy()103
&&
!DstTy->getPointerElementType()->isSized()103
)) {
381
1
      return false;
382
1
    }
383
103
    if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
384
103
        DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
385
103
            DL.getTypeAllocSize(DstTy->getPointerElementType()))
386
60
      Address = Src;
387
103
  }
388
3.59k
389
3.59k
  auto *GEP = dyn_cast<GetElementPtrInst>(Address);
390
3.59k
  if (!GEP)
391
878
    return false;
392
2.71k
393
2.71k
  std::vector<const SCEV *> Subscripts;
394
2.71k
  std::vector<int> Sizes;
395
2.71k
  std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
396
2.71k
  auto *BasePtr = GEP->getOperand(0);
397
2.71k
398
2.71k
  if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
399
18
    BasePtr = BasePtrCast->getOperand(0);
400
2.71k
401
2.71k
  // Check for identical base pointers to ensure that we do not miss index
402
2.71k
  // offsets that have been added before this GEP is applied.
403
2.71k
  if (BasePtr != BasePointer->getValue())
404
83
    return false;
405
2.63k
406
2.63k
  std::vector<const SCEV *> SizesSCEV;
407
2.63k
408
2.63k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
409
2.63k
410
2.63k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
411
2.74k
  for (auto *Subscript : Subscripts) {
412
2.74k
    InvariantLoadsSetTy AccessILS;
413
2.74k
    if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
414
2.74k
                      &AccessILS))
415
153
      return false;
416
2.59k
417
2.59k
    for (LoadInst *LInst : AccessILS)
418
47
      if (!ScopRIL.count(LInst))
419
3
        return false;
420
2.59k
  }
421
2.63k
422
2.63k
  
if (2.48k
Sizes.empty()2.48k
)
423
2.24k
    return false;
424
238
425
238
  SizesSCEV.push_back(nullptr);
426
238
427
238
  for (auto V : Sizes)
428
257
    SizesSCEV.push_back(SE.getSCEV(
429
257
        ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
430
238
431
238
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
432
238
                 true, Subscripts, SizesSCEV, Val);
433
238
  return true;
434
238
}
435
436
3.36k
bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
437
3.36k
  if (!PollyDelinearize)
438
6
    return false;
439
3.35k
440
3.35k
  Value *Address = Inst.getPointerOperand();
441
3.35k
  Value *Val = Inst.getValueOperand();
442
3.35k
  Type *ElementType = Val->getType();
443
3.35k
  unsigned ElementSize = DL.getTypeAllocSize(ElementType);
444
3.35k
  enum MemoryAccess::AccessType AccType =
445
3.35k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.65k
:
MemoryAccess::MUST_WRITE1.69k
;
446
3.35k
447
3.35k
  const SCEV *AccessFunction =
448
3.35k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
449
3.35k
  const SCEVUnknown *BasePointer =
450
3.35k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
451
3.35k
452
3.35k
  assert(BasePointer && "Could not find base pointer");
453
3.35k
454
3.35k
  auto &InsnToMemAcc = scop->getInsnToMemAccMap();
455
3.35k
  auto AccItr = InsnToMemAcc.find(Inst);
456
3.35k
  if (AccItr == InsnToMemAcc.end())
457
3.17k
    return false;
458
177
459
177
  std::vector<const SCEV *> Sizes = {nullptr};
460
177
461
177
  Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
462
177
               AccItr->second.Shape->DelinearizedSizes.end());
463
177
464
177
  // In case only the element size is contained in the 'Sizes' array, the
465
177
  // access does not access a real multi-dimensional array. Hence, we allow
466
177
  // the normal single-dimensional access construction to handle this.
467
177
  if (Sizes.size() == 1)
468
1
    return false;
469
176
470
176
  // Remove the element size. This information is already provided by the
471
176
  // ElementSize parameter. In case the element size of this access and the
472
176
  // element size used for delinearization differs the delinearization is
473
176
  // incorrect. Hence, we invalidate the scop.
474
176
  //
475
176
  // TODO: Handle delinearization with differing element sizes.
476
176
  auto DelinearizedSize =
477
176
      cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
478
176
  Sizes.pop_back();
479
176
  if (ElementSize != DelinearizedSize)
480
2
    scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
481
176
482
176
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
483
176
                 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
484
176
  return true;
485
176
}
486
487
3.64k
bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
488
3.64k
  auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
489
3.64k
490
3.64k
  if (MemIntr == nullptr)
491
3.62k
    return false;
492
19
493
19
  auto *L = LI.getLoopFor(Inst->getParent());
494
19
  auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
495
19
  assert(LengthVal);
496
19
497
19
  // Check if the length val is actually affine or if we overapproximate it
498
19
  InvariantLoadsSetTy AccessILS;
499
19
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
500
19
501
19
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
502
19
  bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
503
19
                                     LengthVal, SE, &AccessILS);
504
19
  for (LoadInst *LInst : AccessILS)
505
0
    if (!ScopRIL.count(LInst))
506
0
      LengthIsAffine = false;
507
19
  if (!LengthIsAffine)
508
2
    LengthVal = nullptr;
509
19
510
19
  auto *DestPtrVal = MemIntr->getDest();
511
19
  assert(DestPtrVal);
512
19
513
19
  auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
514
19
  assert(DestAccFunc);
515
19
  // Ignore accesses to "NULL".
516
19
  // TODO: We could use this to optimize the region further, e.g., intersect
517
19
  //       the context with
518
19
  //          isl_set_complement(isl_set_params(getDomain()))
519
19
  //       as we know it would be undefined to execute this instruction anyway.
520
19
  if (DestAccFunc->isZero())
521
2
    return true;
522
17
523
17
  auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
524
17
  assert(DestPtrSCEV);
525
17
  DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
526
17
  addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
527
17
                 IntegerType::getInt8Ty(DestPtrVal->getContext()),
528
17
                 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
529
17
                 Inst.getValueOperand());
530
17
531
17
  auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
532
17
  if (!MemTrans)
533
10
    return true;
534
7
535
7
  auto *SrcPtrVal = MemTrans->getSource();
536
7
  assert(SrcPtrVal);
537
7
538
7
  auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
539
7
  assert(SrcAccFunc);
540
7
  // Ignore accesses to "NULL".
541
7
  // TODO: See above TODO
542
7
  if (SrcAccFunc->isZero())
543
0
    return true;
544
7
545
7
  auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
546
7
  assert(SrcPtrSCEV);
547
7
  SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
548
7
  addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
549
7
                 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
550
7
                 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
551
7
                 Inst.getValueOperand());
552
7
553
7
  return true;
554
7
}
555
556
3.62k
bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
557
3.62k
  auto *CI = dyn_cast_or_null<CallInst>(Inst);
558
3.62k
559
3.62k
  if (CI == nullptr)
560
3.59k
    return false;
561
25
562
25
  if (CI->doesNotAccessMemory() || 
isIgnoredIntrinsic(CI)14
)
563
11
    return true;
564
14
565
14
  bool ReadOnly = false;
566
14
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
567
14
  auto *CalledFunction = CI->getCalledFunction();
568
14
  switch (AA.getModRefBehavior(CalledFunction)) {
569
14
  case FMRB_UnknownModRefBehavior:
570
0
    llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
571
14
  case FMRB_DoesNotAccessMemory:
572
0
    return true;
573
14
  case FMRB_DoesNotReadMemory:
574
0
  case FMRB_OnlyAccessesInaccessibleMem:
575
0
  case FMRB_OnlyAccessesInaccessibleOrArgMem:
576
0
    return false;
577
8
  case FMRB_OnlyReadsMemory:
578
8
    GlobalReads.emplace_back(Stmt, CI);
579
8
    return true;
580
4
  case FMRB_OnlyReadsArgumentPointees:
581
4
    ReadOnly = true;
582
4
  // Fall through
583
6
  case FMRB_OnlyAccessesArgumentPointees: {
584
6
    auto AccType = ReadOnly ? 
MemoryAccess::READ4
:
MemoryAccess::MAY_WRITE2
;
585
6
    Loop *L = LI.getLoopFor(Inst->getParent());
586
16
    for (const auto &Arg : CI->arg_operands()) {
587
16
      if (!Arg->getType()->isPointerTy())
588
6
        continue;
589
10
590
10
      auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
591
10
      if (ArgSCEV->isZero())
592
4
        continue;
593
6
594
6
      auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
595
6
      addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
596
6
                     ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
597
6
    }
598
6
    return true;
599
0
  }
600
0
  }
601
0
602
0
  return true;
603
0
}
604
605
3.18k
void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
606
3.18k
  Value *Address = Inst.getPointerOperand();
607
3.18k
  Value *Val = Inst.getValueOperand();
608
3.18k
  Type *ElementType = Val->getType();
609
3.18k
  enum MemoryAccess::AccessType AccType =
610
3.18k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.55k
:
MemoryAccess::MUST_WRITE1.62k
;
611
3.18k
612
3.18k
  const SCEV *AccessFunction =
613
3.18k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
614
3.18k
  const SCEVUnknown *BasePointer =
615
3.18k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
616
3.18k
617
3.18k
  assert(BasePointer && "Could not find base pointer");
618
3.18k
  AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
619
3.18k
620
3.18k
  // Check if the access depends on a loop contained in a non-affine subregion.
621
3.18k
  bool isVariantInNonAffineLoop = false;
622
3.18k
  SetVector<const Loop *> Loops;
623
3.18k
  findLoops(AccessFunction, Loops);
624
3.18k
  for (const Loop *L : Loops)
625
1.77k
    if (Stmt->contains(L)) {
626
8
      isVariantInNonAffineLoop = true;
627
8
      break;
628
8
    }
629
3.18k
630
3.18k
  InvariantLoadsSetTy AccessILS;
631
3.18k
632
3.18k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
633
3.18k
  bool IsAffine = !isVariantInNonAffineLoop &&
634
3.18k
                  isAffineExpr(&scop->getRegion(), SurroundingLoop,
635
3.17k
                               AccessFunction, SE, &AccessILS);
636
3.18k
637
3.18k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
638
3.18k
  for (LoadInst *LInst : AccessILS)
639
51
    if (!ScopRIL.count(LInst))
640
7
      IsAffine = false;
641
3.18k
642
3.18k
  if (!IsAffine && 
AccType == MemoryAccess::MUST_WRITE42
)
643
27
    AccType = MemoryAccess::MAY_WRITE;
644
3.18k
645
3.18k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
646
3.18k
                 IsAffine, {AccessFunction}, {nullptr}, Val);
647
3.18k
}
648
649
3.64k
void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
650
3.64k
  if (buildAccessMemIntrinsic(Inst, Stmt))
651
19
    return;
652
3.62k
653
3.62k
  if (buildAccessCallInst(Inst, Stmt))
654
25
    return;
655
3.59k
656
3.59k
  if (buildAccessMultiDimFixed(Inst, Stmt))
657
238
    return;
658
3.36k
659
3.36k
  if (buildAccessMultiDimParam(Inst, Stmt))
660
176
    return;
661
3.18k
662
3.18k
  buildAccessSingleDim(Inst, Stmt);
663
3.18k
}
664
665
1.18k
void ScopBuilder::buildAccessFunctions() {
666
9.17k
  for (auto &Stmt : *scop) {
667
9.17k
    if (Stmt.isBlockStmt()) {
668
9.05k
      buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
669
9.05k
      continue;
670
9.05k
    }
671
121
672
121
    Region *R = Stmt.getRegion();
673
121
    for (BasicBlock *BB : R->blocks())
674
336
      buildAccessFunctions(&Stmt, *BB, R);
675
121
  }
676
1.18k
677
1.18k
  // Build write accesses for values that are used after the SCoP.
678
1.18k
  // The instructions defining them might be synthesizable and therefore not
679
1.18k
  // contained in any statement, hence we iterate over the original instructions
680
1.18k
  // to identify all escaping values.
681
5.92k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
682
5.92k
    for (Instruction &Inst : *BB)
683
21.6k
      buildEscapingDependences(&Inst);
684
5.92k
  }
685
1.18k
}
686
687
21.0k
bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
688
21.0k
  return !isa<TerminatorInst>(Inst) && 
!isIgnoredIntrinsic(Inst)15.3k
&&
689
21.0k
         
!canSynthesize(Inst, *scop, &SE, L)15.3k
;
690
21.0k
}
691
692
/// Generate a name for a statement.
693
///
694
/// @param BB     The basic block the statement will represent.
695
/// @param BBIdx  The index of the @p BB relative to other BBs/regions.
696
/// @param Count  The index of the created statement in @p BB.
697
/// @param IsMain Whether this is the main of all statement for @p BB. If true,
698
///               no suffix will be added.
699
/// @param IsLast Uses a special indicator for the last statement of a BB.
700
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
701
9.05k
                                bool IsMain, bool IsLast = false) {
702
9.05k
  std::string Suffix;
703
9.05k
  if (!IsMain) {
704
3.46k
    if (UseInstructionNames)
705
3.45k
      Suffix = '_';
706
3.46k
    if (IsLast)
707
2.82k
      Suffix += "last";
708
642
    else if (Count < 26)
709
638
      Suffix += 'a' + Count;
710
4
    else
711
4
      Suffix += std::to_string(Count);
712
3.46k
  }
713
9.05k
  return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
714
9.05k
}
715
716
/// Generate a name for a statement that represents a non-affine subregion.
717
///
718
/// @param R    The region the statement will represent.
719
/// @param RIdx The index of the @p R relative to other BBs/regions.
720
121
static std::string makeStmtName(Region *R, long RIdx) {
721
121
  return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
722
121
                              UseInstructionNames);
723
121
}
724
725
700
void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
726
700
  Loop *SurroundingLoop = LI.getLoopFor(BB);
727
700
728
700
  int Count = 0;
729
700
  long BBIdx = scop->getNextStmtIdx();
730
700
  std::vector<Instruction *> Instructions;
731
2.61k
  for (Instruction &Inst : *BB) {
732
2.61k
    if (shouldModelInst(&Inst, SurroundingLoop))
733
1.00k
      Instructions.push_back(&Inst);
734
2.61k
    if (Inst.getMetadata("polly_split_after") ||
735
2.61k
        
(2.61k
SplitOnStore2.61k
&&
isa<StoreInst>(Inst)13
)) {
736
5
      std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
737
5
      scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
738
5
      Count++;
739
5
      Instructions.clear();
740
5
    }
741
2.61k
  }
742
700
743
700
  std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
744
700
  scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
745
700
}
746
747
/// Is @p Inst an ordered instruction?
748
///
749
/// An unordered instruction is an instruction, such that a sequence of
750
/// unordered instructions can be permuted without changing semantics. Any
751
/// instruction for which this is not always the case is ordered.
752
5.98k
static bool isOrderedInstruction(Instruction *Inst) {
753
5.98k
  return Inst->mayHaveSideEffects() || 
Inst->mayReadOrWriteMemory()4.47k
;
754
5.98k
}
755
756
/// Join instructions to the same statement if one uses the scalar result of the
757
/// other.
758
static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
759
4.89k
                            ArrayRef<Instruction *> ModeledInsts) {
760
5.98k
  for (Instruction *Inst : ModeledInsts) {
761
5.98k
    if (isa<PHINode>(Inst))
762
125
      continue;
763
5.86k
764
10.2k
    
for (Use &Op : Inst->operands())5.86k
{
765
10.2k
      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
766
10.2k
      if (!OpInst)
767
3.20k
        continue;
768
7.09k
769
7.09k
      // Check if OpInst is in the BB and is a modeled instruction.
770
7.09k
      auto OpVal = UnionFind.findValue(OpInst);
771
7.09k
      if (OpVal == UnionFind.end())
772
4.56k
        continue;
773
2.53k
774
2.53k
      UnionFind.unionSets(Inst, OpInst);
775
2.53k
    }
776
5.86k
  }
777
4.89k
}
778
779
/// Ensure that the order of ordered instructions does not change.
780
///
781
/// If we encounter an ordered instruction enclosed in instructions belonging to
782
/// a different statement (which might as well contain ordered instructions, but
783
/// this is not tested here), join them.
784
static void
785
joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
786
4.89k
                        ArrayRef<Instruction *> ModeledInsts) {
787
4.89k
  SetVector<Instruction *> SeenLeaders;
788
5.98k
  for (Instruction *Inst : ModeledInsts) {
789
5.98k
    if (!isOrderedInstruction(Inst))
790
3.26k
      continue;
791
2.72k
792
2.72k
    Instruction *Leader = UnionFind.getLeaderValue(Inst);
793
2.72k
    bool Inserted = SeenLeaders.insert(Leader);
794
2.72k
    if (Inserted)
795
1.60k
      continue;
796
1.12k
797
1.12k
    // Merge statements to close holes. Say, we have already seen statements A
798
1.12k
    // and B, in this order. Then we see an instruction of A again and we would
799
1.12k
    // see the pattern "A B A". This function joins all statements until the
800
1.12k
    // only seen occurrence of A.
801
1.14k
    
for (Instruction *Prev : reverse(SeenLeaders))1.12k
{
802
1.14k
      // Items added to 'SeenLeaders' are leaders, but may have lost their
803
1.14k
      // leadership status when merged into another statement.
804
1.14k
      Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
805
1.14k
      if (PrevLeader == Leader)
806
1.12k
        break;
807
25
      UnionFind.unionSets(Prev, Leader);
808
25
    }
809
1.12k
  }
810
4.89k
}
811
812
/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
813
/// the incoming values from this block are executed after the PHI READ.
814
///
815
/// Otherwise it could overwrite the incoming value from before the BB with the
816
/// value for the next execution. This can happen if the PHI WRITE is added to
817
/// the statement with the instruction that defines the incoming value (instead
818
/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
819
/// are in order, we put both into the statement. PHI WRITEs are always executed
820
/// after PHI READs when they are in the same statement.
821
///
822
/// TODO: This is an overpessimization. We only have to ensure that the PHI
823
/// WRITE is not put into a statement containing the PHI itself. That could also
824
/// be done by
825
/// - having all (strongly connected) PHIs in a single statement,
826
/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
827
///   has a chance of being lifted before a PHI by being in a statement with a
828
///   PHI that comes before in the basic block), or
829
/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
830
static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
831
4.89k
                            ArrayRef<Instruction *> ModeledInsts) {
832
5.98k
  for (Instruction *Inst : ModeledInsts) {
833
5.98k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
834
5.98k
    if (!PHI)
835
5.86k
      continue;
836
125
837
125
    int Idx = PHI->getBasicBlockIndex(PHI->getParent());
838
125
    if (Idx < 0)
839
103
      continue;
840
22
841
22
    Instruction *IncomingVal =
842
22
        dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
843
22
    if (!IncomingVal)
844
5
      continue;
845
17
846
17
    UnionFind.unionSets(PHI, IncomingVal);
847
17
  }
848
4.89k
}
849
850
4.89k
void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
851
4.89k
  Loop *L = LI.getLoopFor(BB);
852
4.89k
853
4.89k
  // Extracting out modeled instructions saves us from checking
854
4.89k
  // shouldModelInst() repeatedly.
855
4.89k
  SmallVector<Instruction *, 32> ModeledInsts;
856
4.89k
  EquivalenceClasses<Instruction *> UnionFind;
857
4.89k
  Instruction *MainInst = nullptr;
858
17.7k
  for (Instruction &Inst : *BB) {
859
17.7k
    if (!shouldModelInst(&Inst, L))
860
11.7k
      continue;
861
5.98k
    ModeledInsts.push_back(&Inst);
862
5.98k
    UnionFind.insert(&Inst);
863
5.98k
864
5.98k
    // When a BB is split into multiple statements, the main statement is the
865
5.98k
    // one containing the 'main' instruction. We select the first instruction
866
5.98k
    // that is unlikely to be removed (because it has side-effects) as the main
867
5.98k
    // one. It is used to ensure that at least one statement from the bb has the
868
5.98k
    // same name as with -polly-stmt-granularity=bb.
869
5.98k
    if (!MainInst && 
(5.10k
isa<StoreInst>(Inst)5.10k
||
870
5.10k
                      
(3.80k
isa<CallInst>(Inst)3.80k
&&
!isa<IntrinsicInst>(Inst)60
)))
871
1.34k
      MainInst = &Inst;
872
5.98k
  }
873
4.89k
874
4.89k
  joinOperandTree(UnionFind, ModeledInsts);
875
4.89k
  joinOrderedInstructions(UnionFind, ModeledInsts);
876
4.89k
  joinOrderedPHIs(UnionFind, ModeledInsts);
877
4.89k
878
4.89k
  // The list of instructions for statement (statement represented by the leader
879
4.89k
  // instruction). The order of statements instructions is reversed such that
880
4.89k
  // the epilogue is first. This makes it easier to ensure that the epilogue is
881
4.89k
  // the last statement.
882
4.89k
  MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
883
4.89k
884
4.89k
  // Collect the instructions of all leaders. UnionFind's member iterator
885
4.89k
  // unfortunately are not in any specific order.
886
17.7k
  for (Instruction &Inst : reverse(*BB)) {
887
17.7k
    auto LeaderIt = UnionFind.findLeader(&Inst);
888
17.7k
    if (LeaderIt == UnionFind.member_end())
889
11.7k
      continue;
890
5.99k
891
5.99k
    std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
892
5.99k
    InstList.push_back(&Inst);
893
5.99k
  }
894
4.89k
895
4.89k
  // Finally build the statements.
896
4.89k
  int Count = 0;
897
4.89k
  long BBIdx = scop->getNextStmtIdx();
898
4.89k
  bool MainFound = false;
899
4.89k
  for (auto &Instructions : reverse(LeaderToInstList)) {
900
3.45k
    std::vector<Instruction *> &InstList = Instructions.second;
901
3.45k
902
3.45k
    // If there is no main instruction, make the first statement the main.
903
3.45k
    bool IsMain;
904
3.45k
    if (MainInst)
905
1.88k
      IsMain = std::find(InstList.begin(), InstList.end(), MainInst) !=
906
1.88k
               InstList.end();
907
1.57k
    else
908
1.57k
      IsMain = (Count == 0);
909
3.45k
    if (IsMain)
910
2.82k
      MainFound = true;
911
3.45k
912
3.45k
    std::reverse(InstList.begin(), InstList.end());
913
3.45k
    std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
914
3.45k
    scop->addScopStmt(BB, Name, L, std::move(InstList));
915
3.45k
    Count += 1;
916
3.45k
  }
917
4.89k
918
4.89k
  // Unconditionally add an epilogue (last statement). It contains no
919
4.89k
  // instructions, but holds the PHI write accesses for successor basic blocks,
920
4.89k
  // if the incoming value is not defined in another statement if the same BB.
921
4.89k
  // The epilogue will be removed if no PHIWrite is added to it.
922
4.89k
  std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true);
923
4.89k
  scop->addScopStmt(BB, EpilogueName, L, {});
924
4.89k
}
925
926
2.53k
void ScopBuilder::buildStmts(Region &SR) {
927
2.53k
  if (scop->isNonAffineSubRegion(&SR)) {
928
121
    std::vector<Instruction *> Instructions;
929
121
    Loop *SurroundingLoop =
930
121
        getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
931
121
    for (Instruction &Inst : *SR.getEntry())
932
713
      if (shouldModelInst(&Inst, SurroundingLoop))
933
443
        Instructions.push_back(&Inst);
934
121
    long RIdx = scop->getNextStmtIdx();
935
121
    std::string Name = makeStmtName(&SR, RIdx);
936
121
    scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
937
121
    return;
938
121
  }
939
2.41k
940
9.36k
  
for (auto I = SR.element_begin(), E = SR.element_end(); 2.41k
I != E;
++I6.94k
)
941
6.94k
    if (I->isSubRegion())
942
1.35k
      buildStmts(*I->getNodeAs<Region>());
943
5.59k
    else {
944
5.59k
      BasicBlock *BB = I->getNodeAs<BasicBlock>();
945
5.59k
      switch (StmtGranularity) {
946
5.59k
      case GranularityChoice::BasicBlocks:
947
696
        buildSequentialBlockStmts(BB);
948
696
        break;
949
5.59k
      case GranularityChoice::ScalarIndependence:
950
4.89k
        buildEqivClassBlockStmts(BB);
951
4.89k
        break;
952
5.59k
      case GranularityChoice::Stores:
953
4
        buildSequentialBlockStmts(BB, true);
954
4
        break;
955
5.59k
      }
956
5.59k
    }
957
2.41k
}
958
959
void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
960
9.38k
                                       Region *NonAffineSubRegion) {
961
9.38k
  assert(
962
9.38k
      Stmt &&
963
9.38k
      "The exit BB is the only one that cannot be represented by a statement");
964
9.38k
  assert(Stmt->represents(&BB));
965
9.38k
966
9.38k
  // We do not build access functions for error blocks, as they may contain
967
9.38k
  // instructions we can not model.
968
9.38k
  if (isErrorBlock(BB, scop->getRegion(), LI, DT))
969
80
    return;
970
9.30k
971
9.30k
  auto BuildAccessesForInst = [this, Stmt,
972
9.30k
                               NonAffineSubRegion](Instruction *Inst) {
973
8.04k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
974
8.04k
    if (PHI)
975
242
      buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
976
8.04k
977
8.04k
    if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
978
3.36k
      assert(Stmt && "Cannot build access function in non-existing statement");
979
3.36k
      buildMemoryAccess(MemInst, Stmt);
980
3.36k
    }
981
8.04k
982
8.04k
    // PHI nodes have already been modeled above and TerminatorInsts that are
983
8.04k
    // not part of a non-affine subregion are fully modeled and regenerated
984
8.04k
    // from the polyhedral domains. Hence, they do not need to be modeled as
985
8.04k
    // explicit data dependences.
986
8.04k
    if (!PHI)
987
7.79k
      buildScalarDependences(Stmt, Inst);
988
8.04k
  };
989
9.30k
990
9.30k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
991
9.30k
  bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
992
9.30k
  if (IsEntryBlock) {
993
9.10k
    for (Instruction *Inst : Stmt->getInstructions())
994
7.38k
      BuildAccessesForInst(Inst);
995
9.10k
    if (Stmt->isRegionStmt())
996
119
      BuildAccessesForInst(BB.getTerminator());
997
9.10k
  } else {
998
535
    for (Instruction &Inst : BB) {
999
535
      if (isIgnoredIntrinsic(&Inst))
1000
0
        continue;
1001
535
1002
535
      // Invariant loads already have been processed.
1003
535
      if (isa<LoadInst>(Inst) && 
RIL.count(cast<LoadInst>(&Inst))51
)
1004
2
        continue;
1005
533
1006
533
      BuildAccessesForInst(&Inst);
1007
533
    }
1008
206
  }
1009
9.30k
}
1010
1011
MemoryAccess *ScopBuilder::addMemoryAccess(
1012
    ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
1013
    Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
1014
    ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
1015
5.01k
    MemoryKind Kind) {
1016
5.01k
  bool isKnownMustAccess = false;
1017
5.01k
1018
5.01k
  // Accesses in single-basic block statements are always executed.
1019
5.01k
  if (Stmt->isBlockStmt())
1020
4.59k
    isKnownMustAccess = true;
1021
5.01k
1022
5.01k
  if (Stmt->isRegionStmt()) {
1023
417
    // Accesses that dominate the exit block of a non-affine region are always
1024
417
    // executed. In non-affine regions there may exist MemoryKind::Values that
1025
417
    // do not dominate the exit. MemoryKind::Values will always dominate the
1026
417
    // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1027
417
    // non-affine region.
1028
417
    if (Inst && 
DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())376
)
1029
235
      isKnownMustAccess = true;
1030
417
  }
1031
5.01k
1032
5.01k
  // Non-affine PHI writes do not "happen" at a particular instruction, but
1033
5.01k
  // after exiting the statement. Therefore they are guaranteed to execute and
1034
5.01k
  // overwrite the old value.
1035
5.01k
  if (Kind == MemoryKind::PHI || 
Kind == MemoryKind::ExitPHI4.44k
)
1036
676
    isKnownMustAccess = true;
1037
5.01k
1038
5.01k
  if (!isKnownMustAccess && 
AccType == MemoryAccess::MUST_WRITE180
)
1039
82
    AccType = MemoryAccess::MAY_WRITE;
1040
5.01k
1041
5.01k
  auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
1042
5.01k
                                  Affine, Subscripts, Sizes, AccessValue, Kind);
1043
5.01k
1044
5.01k
  scop->addAccessFunction(Access);
1045
5.01k
  Stmt->addAccess(Access);
1046
5.01k
  return Access;
1047
5.01k
}
1048
1049
void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1050
                                 MemoryAccess::AccessType AccType,
1051
                                 Value *BaseAddress, Type *ElementType,
1052
                                 bool IsAffine,
1053
                                 ArrayRef<const SCEV *> Subscripts,
1054
                                 ArrayRef<const SCEV *> Sizes,
1055
3.64k
                                 Value *AccessValue) {
1056
3.64k
  ArrayBasePointers.insert(BaseAddress);
1057
3.64k
  auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1058
3.64k
                                    ElementType, IsAffine, AccessValue,
1059
3.64k
                                    Subscripts, Sizes, MemoryKind::Array);
1060
3.64k
1061
3.64k
  if (!DetectFortranArrays)
1062
3.63k
    return;
1063
13
1064
13
  if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1065
8
    MemAccess->setFortranArrayDescriptor(FAD);
1066
5
  else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1067
2
    MemAccess->setFortranArrayDescriptor(FAD);
1068
13
}
1069
1070
404
void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1071
404
  // Find the statement that defines the value of Inst. That statement has to
1072
404
  // write the value to make it available to those statements that read it.
1073
404
  ScopStmt *Stmt = scop->getStmtFor(Inst);
1074
404
1075
404
  // It is possible that the value is synthesizable within a loop (such that it
1076
404
  // is not part of any statement), but not after the loop (where you need the
1077
404
  // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1078
404
  // avoid this. In case the IR has no such PHI, use the last statement (where
1079
404
  // the value is synthesizable) to write the value.
1080
404
  if (!Stmt)
1081
44
    Stmt = scop->getLastStmtFor(Inst->getParent());
1082
404
1083
404
  // Inst not defined within this SCoP.
1084
404
  if (!Stmt)
1085
0
    return;
1086
404
1087
404
  // Do not process further if the instruction is already written.
1088
404
  if (Stmt->lookupValueWriteOf(Inst))
1089
65
    return;
1090
339
1091
339
  addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1092
339
                  true, Inst, ArrayRef<const SCEV *>(),
1093
339
                  ArrayRef<const SCEV *>(), MemoryKind::Value);
1094
339
}
1095
1096
14.3k
void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1097
14.3k
  // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1098
14.3k
  // to be able to replace this one. Currently, there is a split responsibility.
1099
14.3k
  // In a first step, the MemoryAccess is created, but without the
1100
14.3k
  // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1101
14.3k
  // AccessRelation is created. At least for scalar accesses, there is no new
1102
14.3k
  // information available at ScopStmt::buildAccessRelations(), so we could
1103
14.3k
  // create the AccessRelation right away. This is what
1104
14.3k
  // ScopStmt::ensureValueRead(Value*) does.
1105
14.3k
1106
14.3k
  auto *Scope = UserStmt->getSurroundingLoop();
1107
14.3k
  auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1108
14.3k
  switch (VUse.getKind()) {
1109
14.3k
  case VirtualUse::Constant:
1110
13.9k
  case VirtualUse::Block:
1111
13.9k
  case VirtualUse::Synthesizable:
1112
13.9k
  case VirtualUse::Hoisted:
1113
13.9k
  case VirtualUse::Intra:
1114
13.9k
    // Uses of these kinds do not need a MemoryAccess.
1115
13.9k
    break;
1116
13.9k
1117
13.9k
  case VirtualUse::ReadOnly:
1118
62
    // Add MemoryAccess for invariant values only if requested.
1119
62
    if (!ModelReadOnlyScalars)
1120
4
      break;
1121
58
1122
58
    LLVM_FALLTHROUGH;
1123
387
  case VirtualUse::Inter:
1124
387
1125
387
    // Do not create another MemoryAccess for reloading the value if one already
1126
387
    // exists.
1127
387
    if (UserStmt->lookupValueReadOf(V))
1128
30
      break;
1129
357
1130
357
    addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1131
357
                    true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1132
357
                    MemoryKind::Value);
1133
357
1134
357
    // Inter-statement uses need to write the value in their defining statement.
1135
357
    if (VUse.isInter())
1136
312
      ensureValueWrite(cast<Instruction>(V));
1137
357
    break;
1138
14.3k
  }
1139
14.3k
}
1140
1141
void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1142
                                 BasicBlock *IncomingBlock,
1143
570
                                 Value *IncomingValue, bool IsExitBlock) {
1144
570
  // As the incoming block might turn out to be an error statement ensure we
1145
570
  // will create an exit PHI SAI object. It is needed during code generation
1146
570
  // and would be created later anyway.
1147
570
  if (IsExitBlock)
1148
142
    scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1149
142
                                   MemoryKind::ExitPHI);
1150
570
1151
570
  // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1152
570
  // from outside the SCoP's region have no statement representation.
1153
570
  if (!IncomingStmt)
1154
69
    return;
1155
501
1156
501
  // Take care for the incoming value being available in the incoming block.
1157
501
  // This must be done before the check for multiple PHI writes because multiple
1158
501
  // exiting edges from subregion each can be the effective written value of the
1159
501
  // subregion. As such, all of them must be made available in the subregion
1160
501
  // statement.
1161
501
  ensureValueRead(IncomingValue, IncomingStmt);
1162
501
1163
501
  // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1164
501
  if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1165
53
    assert(Acc->getAccessInstruction() == PHI);
1166
53
    Acc->addIncoming(IncomingBlock, IncomingValue);
1167
53
    return;
1168
53
  }
1169
448
1170
448
  MemoryAccess *Acc = addMemoryAccess(
1171
448
      IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1172
448
      PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1173
448
      IsExitBlock ? 
MemoryKind::ExitPHI108
:
MemoryKind::PHI340
);
1174
448
  assert(Acc);
1175
448
  Acc->addIncoming(IncomingBlock, IncomingValue);
1176
448
}
1177
1178
228
void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1179
228
  addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1180
228
                  PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1181
228
                  MemoryKind::PHI);
1182
228
}
1183
1184
2.28k
void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1185
2.28k
  isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1186
2.28k
1187
2.28k
  Stmt.Domain = scop->getDomainConditions(&Stmt);
1188
2.28k
  Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1189
2.28k
}
1190
1191
2.28k
void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1192
2.28k
  isl::set Domain = Stmt.getDomain();
1193
4.81k
  for (unsigned u = 0, e = Domain.dim(isl::dim::set); u < e; 
u++2.53k
) {
1194
2.53k
    isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
1195
2.53k
    Stmt.NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
1196
2.53k
  }
1197
2.28k
}
1198
1199
/// Return the reduction type for a given binary operator.
1200
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1201
294
                                                    const Instruction *Load) {
1202
294
  if (!BinOp)
1203
0
    return MemoryAccess::RT_NONE;
1204
294
  switch (BinOp->getOpcode()) {
1205
294
  case Instruction::FAdd:
1206
1
    if (!BinOp->isFast())
1207
0
      return MemoryAccess::RT_NONE;
1208
1
    // Fall through
1209
271
  case Instruction::Add:
1210
271
    return MemoryAccess::RT_ADD;
1211
1
  case Instruction::Or:
1212
1
    return MemoryAccess::RT_BOR;
1213
3
  case Instruction::Xor:
1214
3
    return MemoryAccess::RT_BXOR;
1215
1
  case Instruction::And:
1216
1
    return MemoryAccess::RT_BAND;
1217
1
  case Instruction::FMul:
1218
0
    if (!BinOp->isFast())
1219
0
      return MemoryAccess::RT_NONE;
1220
0
    // Fall through
1221
18
  case Instruction::Mul:
1222
18
    if (DisableMultiplicativeReductions)
1223
0
      return MemoryAccess::RT_NONE;
1224
18
    return MemoryAccess::RT_MUL;
1225
18
  default:
1226
0
    return MemoryAccess::RT_NONE;
1227
0
  }
1228
0
}
1229
1230
2.28k
void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1231
2.28k
  SmallVector<MemoryAccess *, 2> Loads;
1232
2.28k
  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1233
2.28k
1234
2.28k
  // First collect candidate load-store reduction chains by iterating over all
1235
2.28k
  // stores and collecting possible reduction loads.
1236
4.69k
  for (MemoryAccess *StoreMA : Stmt) {
1237
4.69k
    if (StoreMA->isRead())
1238
2.22k
      continue;
1239
2.46k
1240
2.46k
    Loads.clear();
1241
2.46k
    collectCandidateReductionLoads(StoreMA, Loads);
1242
2.46k
    for (MemoryAccess *LoadMA : Loads)
1243
418
      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1244
2.46k
  }
1245
2.28k
1246
2.28k
  // Then check each possible candidate pair.
1247
2.28k
  for (const auto &CandidatePair : Candidates) {
1248
418
    bool Valid = true;
1249
418
    isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1250
418
    isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1251
418
1252
418
    // Skip those with obviously unequal base addresses.
1253
418
    if (!LoadAccs.has_equal_space(StoreAccs)) {
1254
75
      continue;
1255
75
    }
1256
343
1257
343
    // And check if the remaining for overlap with other memory accesses.
1258
343
    isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1259
343
    AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1260
343
    isl::set AllAccs = AllAccsRel.range();
1261
343
1262
971
    for (MemoryAccess *MA : Stmt) {
1263
971
      if (MA == CandidatePair.first || 
MA == CandidatePair.second628
)
1264
686
        continue;
1265
285
1266
285
      isl::map AccRel =
1267
285
          MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1268
285
      isl::set Accs = AccRel.range();
1269
285
1270
285
      if (AllAccs.has_equal_space(Accs)) {
1271
68
        isl::set OverlapAccs = Accs.intersect(AllAccs);
1272
68
        Valid = Valid && 
OverlapAccs.is_empty()49
;
1273
68
      }
1274
285
    }
1275
343
1276
343
    if (!Valid)
1277
49
      continue;
1278
294
1279
294
    const LoadInst *Load =
1280
294
        dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1281
294
    MemoryAccess::ReductionType RT =
1282
294
        getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1283
294
1284
294
    // If no overlapping access was found we mark the load and store as
1285
294
    // reduction like.
1286
294
    CandidatePair.first->markAsReductionLike(RT);
1287
294
    CandidatePair.second->markAsReductionLike(RT);
1288
294
  }
1289
2.28k
}
1290
1291
void ScopBuilder::collectCandidateReductionLoads(
1292
2.46k
    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1293
2.46k
  ScopStmt *Stmt = StoreMA->getStatement();
1294
2.46k
1295
2.46k
  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1296
2.46k
  if (!Store)
1297
755
    return;
1298
1.71k
1299
1.71k
  // Skip if there is not one binary operator between the load and the store
1300
1.71k
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1301
1.71k
  if (!BinOp)
1302
1.05k
    return;
1303
662
1304
662
  // Skip if the binary operators has multiple uses
1305
662
  if (BinOp->getNumUses() != 1)
1306
47
    return;
1307
615
1308
615
  // Skip if the opcode of the binary operator is not commutative/associative
1309
615
  if (!BinOp->isCommutative() || 
!BinOp->isAssociative()601
)
1310
240
    return;
1311
375
1312
375
  // Skip if the binary operator is outside the current SCoP
1313
375
  if (BinOp->getParent() != Store->getParent())
1314
1
    return;
1315
374
1316
374
  // Skip if it is a multiplicative reduction and we disabled them
1317
374
  if (DisableMultiplicativeReductions &&
1318
374
      
(2
BinOp->getOpcode() == Instruction::Mul2
||
1319
2
       
BinOp->getOpcode() == Instruction::FMul1
))
1320
1
    return;
1321
373
1322
373
  // Check the binary operator operands for a candidate load
1323
373
  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1324
373
  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1325
373
  if (!PossibleLoad0 && 
!PossibleLoad119
)
1326
9
    return;
1327
364
1328
364
  // A load is only a candidate if it cannot escape (thus has only this use)
1329
364
  if (PossibleLoad0 && 
PossibleLoad0->getNumUses() == 1354
)
1330
347
    if (PossibleLoad0->getParent() == Store->getParent())
1331
343
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1332
364
  if (PossibleLoad1 && 
PossibleLoad1->getNumUses() == 182
)
1333
76
    if (PossibleLoad1->getParent() == Store->getParent())
1334
75
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1335
364
}
1336
1337
2.28k
void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1338
4.69k
  for (MemoryAccess *Access : Stmt.MemAccs) {
1339
4.69k
    Type *ElementType = Access->getElementType();
1340
4.69k
1341
4.69k
    MemoryKind Ty;
1342
4.69k
    if (Access->isPHIKind())
1343
546
      Ty = MemoryKind::PHI;
1344
4.14k
    else if (Access->isExitPHIKind())
1345
85
      Ty = MemoryKind::ExitPHI;
1346
4.06k
    else if (Access->isValueKind())
1347
670
      Ty = MemoryKind::Value;
1348
3.39k
    else
1349
3.39k
      Ty = MemoryKind::Array;
1350
4.69k
1351
4.69k
    auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1352
4.69k
                                               ElementType, Access->Sizes, Ty);
1353
4.69k
    Access->buildAccessRelation(SAI);
1354
4.69k
    scop->addAccessData(Access);
1355
4.69k
  }
1356
2.28k
}
1357
1358
#ifndef NDEBUG
1359
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1360
  auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1361
  auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1362
  assert(PhysUse.getKind() == VirtUse.getKind());
1363
}
1364
1365
/// Check the consistency of every statement's MemoryAccesses.
1366
///
1367
/// The check is carried out by expecting the "physical" kind of use (derived
1368
/// from the BasicBlocks instructions resides in) to be same as the "virtual"
1369
/// kind of use (derived from a statement's MemoryAccess).
1370
///
1371
/// The "physical" uses are taken by ensureValueRead to determine whether to
1372
/// create MemoryAccesses. When done, the kind of scalar access should be the
1373
/// same no matter which way it was derived.
1374
///
1375
/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1376
/// can intentionally influence on the kind of uses (not corresponding to the
1377
/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1378
/// to pick up the virtual uses. But here in the code generator, this has not
1379
/// happened yet, such that virtual and physical uses are equivalent.
1380
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1381
  for (auto *BB : S->getRegion().blocks()) {
1382
    for (auto &Inst : *BB) {
1383
      auto *Stmt = S->getStmtFor(&Inst);
1384
      if (!Stmt)
1385
        continue;
1386
1387
      if (isIgnoredIntrinsic(&Inst))
1388
        continue;
1389
1390
      // Branch conditions are encoded in the statement domains.
1391
      if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt())
1392
        continue;
1393
1394
      // Verify all uses.
1395
      for (auto &Op : Inst.operands())
1396
        verifyUse(S, Op, LI);
1397
1398
      // Stores do not produce values used by other statements.
1399
      if (isa<StoreInst>(Inst))
1400
        continue;
1401
1402
      // For every value defined in the block, also check that a use of that
1403
      // value in the same statement would not be an inter-statement use. It can
1404
      // still be synthesizable or load-hoisted, but these kind of instructions
1405
      // are not directly copied in code-generation.
1406
      auto VirtDef =
1407
          VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1408
      assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1409
             VirtDef.getKind() == VirtualUse::Intra ||
1410
             VirtDef.getKind() == VirtualUse::Hoisted);
1411
    }
1412
  }
1413
1414
  if (S->hasSingleExitEdge())
1415
    return;
1416
1417
  // PHINodes in the SCoP region's exit block are also uses to be checked.
1418
  if (!S->getRegion().isTopLevelRegion()) {
1419
    for (auto &Inst : *S->getRegion().getExit()) {
1420
      if (!isa<PHINode>(Inst))
1421
        break;
1422
1423
      for (auto &Op : Inst.operands())
1424
        verifyUse(S, Op, LI);
1425
    }
1426
  }
1427
}
1428
#endif
1429
1430
/// Return the block that is the representing block for @p RN.
1431
115
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1432
115
  return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1433
115
                           : 
RN->getNodeAs<BasicBlock>()0
;
1434
115
}
1435
1436
void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1437
1.18k
                            OptimizationRemarkEmitter &ORE) {
1438
1.18k
  scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1439
1.18k
1440
1.18k
  buildStmts(R);
1441
1.18k
1442
1.18k
  // Create all invariant load instructions first. These are categorized as
1443
1.18k
  // 'synthesizable', therefore are not part of any ScopStmt but need to be
1444
1.18k
  // created somewhere.
1445
1.18k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1446
5.92k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
1447
5.92k
    if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1448
45
      continue;
1449
5.88k
1450
21.5k
    
for (Instruction &Inst : *BB)5.88k
{
1451
21.5k
      LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1452
21.5k
      if (!Load)
1453
19.7k
        continue;
1454
1.78k
1455
1.78k
      if (!RIL.count(Load))
1456
1.50k
        continue;
1457
282
1458
282
      // Invariant loads require a MemoryAccess to be created in some statement.
1459
282
      // It is not important to which statement the MemoryAccess is added
1460
282
      // because it will later be removed from the ScopStmt again. We chose the
1461
282
      // first statement of the basic block the LoadInst is in.
1462
282
      ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1463
282
      assert(!List.empty());
1464
282
      ScopStmt *RILStmt = List.front();
1465
282
      buildMemoryAccess(Load, RILStmt);
1466
282
    }
1467
5.88k
  }
1468
1.18k
  buildAccessFunctions();
1469
1.18k
1470
1.18k
  // In case the region does not have an exiting block we will later (during
1471
1.18k
  // code generation) split the exit block. This will move potential PHI nodes
1472
1.18k
  // from the current exit block into the new region exiting block. Hence, PHI
1473
1.18k
  // nodes that are at this point not part of the region will be.
1474
1.18k
  // To handle these PHI nodes later we will now model their operands as scalar
1475
1.18k
  // accesses. Note that we do not model anything in the exit block if we have
1476
1.18k
  // an exiting block in the region, as there will not be any splitting later.
1477
1.18k
  if (!R.isTopLevelRegion() && 
!scop->hasSingleExitEdge()1.18k
) {
1478
208
    for (Instruction &Inst : *R.getExit()) {
1479
208
      PHINode *PHI = dyn_cast<PHINode>(&Inst);
1480
208
      if (!PHI)
1481
164
        break;
1482
44
1483
44
      buildPHIAccesses(nullptr, PHI, nullptr, true);
1484
44
    }
1485
164
  }
1486
1.18k
1487
1.18k
  // Create memory accesses for global reads since all arrays are now known.
1488
1.18k
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1489
1.18k
  for (auto GlobalReadPair : GlobalReads) {
1490
8
    ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1491
8
    Instruction *GlobalRead = GlobalReadPair.second;
1492
8
    for (auto *BP : ArrayBasePointers)
1493
16
      addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1494
16
                     BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1495
8
  }
1496
1.18k
1497
1.18k
  scop->buildInvariantEquivalenceClasses();
1498
1.18k
1499
1.18k
  /// A map from basic blocks to their invalid domains.
1500
1.18k
  DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1501
1.18k
1502
1.18k
  if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1503
7
    DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1504
7
    return;
1505
7
  }
1506
1.17k
1507
1.17k
  scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1508
1.17k
1509
1.17k
  // Initialize the invalid domain.
1510
1.17k
  for (ScopStmt &Stmt : scop->Stmts)
1511
9.02k
    if (Stmt.isBlockStmt())
1512
8.91k
      Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1513
115
    else
1514
115
      Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1515
115
          Stmt.getRegion()->getNode())]);
1516
1.17k
1517
1.17k
  // Remove empty statements.
1518
1.17k
  // Exit early in case there are no executable statements left in this scop.
1519
1.17k
  scop->removeStmtNotInDomainMap();
1520
1.17k
  scop->simplifySCoP(false);
1521
1.17k
  if (scop->isEmpty()) {
1522
28
    DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1523
28
    return;
1524
28
  }
1525
1.15k
1526
1.15k
  // The ScopStmts now have enough information to initialize themselves.
1527
2.28k
  
for (ScopStmt &Stmt : *scop)1.15k
{
1528
2.28k
    buildDomain(Stmt);
1529
2.28k
    collectSurroundingLoops(Stmt);
1530
2.28k
    buildAccessRelations(Stmt);
1531
2.28k
1532
2.28k
    if (DetectReductions)
1533
2.28k
      checkForReductions(Stmt);
1534
2.28k
  }
1535
1.15k
1536
1.15k
  // Check early for a feasible runtime context.
1537
1.15k
  if (!scop->hasFeasibleRuntimeContext()) {
1538
3
    DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1539
3
    return;
1540
3
  }
1541
1.14k
1542
1.14k
  // Check early for profitability. Afterwards it cannot change anymore,
1543
1.14k
  // only the runtime context could become infeasible.
1544
1.14k
  if (!scop->isProfitable(UnprofitableScalarAccs)) {
1545
2
    scop->invalidate(PROFITABLE, DebugLoc());
1546
2
    DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1547
2
    return;
1548
2
  }
1549
1.14k
1550
1.14k
  scop->buildSchedule(LI);
1551
1.14k
1552
1.14k
  scop->finalizeAccesses();
1553
1.14k
1554
1.14k
  scop->realignParams();
1555
1.14k
  scop->addUserContext();
1556
1.14k
1557
1.14k
  // After the context was fully constructed, thus all our knowledge about
1558
1.14k
  // the parameters is in there, we add all recorded assumptions to the
1559
1.14k
  // assumed/invalid context.
1560
1.14k
  scop->addRecordedAssumptions();
1561
1.14k
1562
1.14k
  scop->simplifyContexts();
1563
1.14k
  if (!scop->buildAliasChecks(AA)) {
1564
6
    DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1565
6
    return;
1566
6
  }
1567
1.14k
1568
1.14k
  scop->hoistInvariantLoads();
1569
1.14k
  scop->canonicalizeDynamicBasePtrs();
1570
1.14k
  scop->verifyInvariantLoads();
1571
1.14k
  scop->simplifySCoP(true);
1572
1.14k
1573
1.14k
  // Check late for a feasible runtime context because profitability did not
1574
1.14k
  // change.
1575
1.14k
  if (!scop->hasFeasibleRuntimeContext()) {
1576
17
    DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1577
17
    return;
1578
17
  }
1579
1.12k
1580
#ifndef NDEBUG
1581
  verifyUses(scop.get(), LI, DT);
1582
#endif
1583
}
1584
1585
ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1586
                         const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1587
                         ScopDetection &SD, ScalarEvolution &SE,
1588
                         OptimizationRemarkEmitter &ORE)
1589
1.18k
    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1590
1.18k
  DebugLoc Beg, End;
1591
1.18k
  auto P = getBBPairForRegion(R);
1592
1.18k
  getDebugLocations(P, Beg, End);
1593
1.18k
1594
1.18k
  std::string Msg = "SCoP begins here.";
1595
1.18k
  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1596
1.18k
           << Msg);
1597
1.18k
1598
1.18k
  buildScop(*R, AC, ORE);
1599
1.18k
1600
1.18k
  DEBUG(dbgs() << *scop);
1601
1.18k
1602
1.18k
  if (!scop->hasFeasibleRuntimeContext()) {
1603
63
    InfeasibleScops++;
1604
63
    Msg = "SCoP ends here but was dismissed.";
1605
63
    DEBUG(dbgs() << "SCoP detected but dismissed\n");
1606
63
    scop.reset();
1607
1.12k
  } else {
1608
1.12k
    Msg = "SCoP ends here.";
1609
1.12k
    ++ScopFound;
1610
1.12k
    if (scop->getMaxLoopDepth() > 0)
1611
1.05k
      ++RichScopFound;
1612
1.12k
  }
1613
1.18k
1614
1.18k
  if (R->isTopLevelRegion())
1615
4
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1616
4
             << Msg);
1617
1.18k
  else
1618
1.18k
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1619
1.18k
             << Msg);
1620
1.18k
}