Coverage Report

Created: 2019-04-21 11:35

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Analysis/ScopBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ScopBuilder.cpp ----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Create a polyhedral description for a static control flow region.
10
//
11
// The pass creates a polyhedral description of the Scops detected by the SCoP
12
// detection derived from their LLVM-IR code.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "polly/ScopBuilder.h"
17
#include "polly/Options.h"
18
#include "polly/ScopDetection.h"
19
#include "polly/ScopInfo.h"
20
#include "polly/Support/GICHelper.h"
21
#include "polly/Support/SCEVValidator.h"
22
#include "polly/Support/ScopHelper.h"
23
#include "polly/Support/VirtualInstruction.h"
24
#include "llvm/ADT/ArrayRef.h"
25
#include "llvm/ADT/EquivalenceClasses.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/Analysis/AliasAnalysis.h"
28
#include "llvm/Analysis/LoopInfo.h"
29
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
30
#include "llvm/Analysis/RegionInfo.h"
31
#include "llvm/Analysis/RegionIterator.h"
32
#include "llvm/Analysis/ScalarEvolution.h"
33
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
34
#include "llvm/IR/BasicBlock.h"
35
#include "llvm/IR/DataLayout.h"
36
#include "llvm/IR/DebugLoc.h"
37
#include "llvm/IR/DerivedTypes.h"
38
#include "llvm/IR/Dominators.h"
39
#include "llvm/IR/Function.h"
40
#include "llvm/IR/InstrTypes.h"
41
#include "llvm/IR/Instruction.h"
42
#include "llvm/IR/Instructions.h"
43
#include "llvm/IR/Type.h"
44
#include "llvm/IR/Use.h"
45
#include "llvm/IR/Value.h"
46
#include "llvm/Support/CommandLine.h"
47
#include "llvm/Support/Compiler.h"
48
#include "llvm/Support/Debug.h"
49
#include "llvm/Support/ErrorHandling.h"
50
#include "llvm/Support/raw_ostream.h"
51
#include <cassert>
52
53
using namespace llvm;
54
using namespace polly;
55
56
766
#define DEBUG_TYPE "polly-scops"
57
58
STATISTIC(ScopFound, "Number of valid Scops");
59
STATISTIC(RichScopFound, "Number of Scops containing a loop");
60
STATISTIC(InfeasibleScops,
61
          "Number of SCoPs with statically infeasible context.");
62
63
bool polly::ModelReadOnlyScalars;
64
65
static cl::opt<bool, true> XModelReadOnlyScalars(
66
    "polly-analyze-read-only-scalars",
67
    cl::desc("Model read-only scalar values in the scop description"),
68
    cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
69
    cl::init(true), cl::cat(PollyCategory));
70
71
static cl::opt<bool> UnprofitableScalarAccs(
72
    "polly-unprofitable-scalar-accs",
73
    cl::desc("Count statements with scalar accesses as not optimizable"),
74
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
75
76
static cl::opt<bool> DetectFortranArrays(
77
    "polly-detect-fortran-arrays",
78
    cl::desc("Detect Fortran arrays and use this for code generation"),
79
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
80
81
static cl::opt<bool> DetectReductions("polly-detect-reductions",
82
                                      cl::desc("Detect and exploit reductions"),
83
                                      cl::Hidden, cl::ZeroOrMore,
84
                                      cl::init(true), cl::cat(PollyCategory));
85
86
// Multiplicative reductions can be disabled separately as these kind of
87
// operations can overflow easily. Additive reductions and bit operations
88
// are in contrast pretty stable.
89
static cl::opt<bool> DisableMultiplicativeReductions(
90
    "polly-disable-multiplicative-reductions",
91
    cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
92
    cl::init(false), cl::cat(PollyCategory));
93
94
enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
95
96
static cl::opt<GranularityChoice> StmtGranularity(
97
    "polly-stmt-granularity",
98
    cl::desc(
99
        "Algorithm to use for splitting basic blocks into multiple statements"),
100
    cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
101
                          "One statement per basic block"),
102
               clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
103
                          "Scalar independence heuristic"),
104
               clEnumValN(GranularityChoice::Stores, "store",
105
                          "Store-level granularity")),
106
    cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
107
108
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
109
                                   Region *NonAffineSubRegion,
110
110
                                   bool IsExitBlock) {
111
110
  // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
112
110
  // true, are not modeled as ordinary PHI nodes as they are not part of the
113
110
  // region. However, we model the operands in the predecessor blocks that are
114
110
  // part of the region as regular scalar accesses.
115
110
116
110
  // If we can synthesize a PHI we can skip it, however only if it is in
117
110
  // the region. If it is not it can only be in the exit block of the region.
118
110
  // In this case we model the operands but not the PHI itself.
119
110
  auto *Scope = LI.getLoopFor(PHI->getParent());
120
110
  if (!IsExitBlock && 
canSynthesize(PHI, *scop, &SE, Scope)96
)
121
2
    return;
122
108
123
108
  // PHI nodes are modeled as if they had been demoted prior to the SCoP
124
108
  // detection. Hence, the PHI is a load of a new memory location in which the
125
108
  // incoming value was written at the end of the incoming basic block.
126
108
  bool OnlyNonAffineSubRegionOperands = true;
127
326
  for (unsigned u = 0; u < PHI->getNumIncomingValues(); 
u++218
) {
128
218
    Value *Op = PHI->getIncomingValue(u);
129
218
    BasicBlock *OpBB = PHI->getIncomingBlock(u);
130
218
    ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
131
218
132
218
    // Do not build PHI dependences inside a non-affine subregion, but make
133
218
    // sure that the necessary scalar values are still made available.
134
218
    if (NonAffineSubRegion && 
NonAffineSubRegion->contains(OpBB)43
) {
135
9
      auto *OpInst = dyn_cast<Instruction>(Op);
136
9
      if (!OpInst || 
!NonAffineSubRegion->contains(OpInst)4
)
137
5
        ensureValueRead(Op, OpStmt);
138
9
      continue;
139
9
    }
140
209
141
209
    OnlyNonAffineSubRegionOperands = false;
142
209
    ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
143
209
  }
144
108
145
108
  if (!OnlyNonAffineSubRegionOperands && 
!IsExitBlock105
) {
146
91
    addPHIReadAccess(PHIStmt, PHI);
147
91
  }
148
108
}
149
150
void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
151
2.81k
                                         Instruction *Inst) {
152
2.81k
  assert(!isa<PHINode>(Inst));
153
2.81k
154
2.81k
  // Pull-in required operands.
155
2.81k
  for (Use &Op : Inst->operands())
156
4.95k
    ensureValueRead(Op.get(), UserStmt);
157
2.81k
}
158
159
7.33k
void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
160
7.33k
  // Check for uses of this instruction outside the scop. Because we do not
161
7.33k
  // iterate over such instructions and therefore did not "ensure" the existence
162
7.33k
  // of a write, we must determine such use here.
163
7.33k
  if (scop->isEscaping(Inst))
164
26
    ensureValueWrite(Inst);
165
7.33k
}
166
167
/// Check that a value is a Fortran Array descriptor.
168
///
169
/// We check if V has the following structure:
170
/// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
171
///                                   [<num> x %struct.descriptor_dimension] }
172
///
173
///
174
/// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
175
///
176
/// 1. V's type name starts with "struct.array"
177
/// 2. V's type has layout as shown.
178
/// 3. Final member of V's type has name "struct.descriptor_dimension",
179
/// 4. "struct.descriptor_dimension" has layout as shown.
180
/// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
181
///
182
/// We are interested in such types since this is the code that dragonegg
183
/// generates for Fortran array descriptors.
184
///
185
/// @param V the Value to be checked.
186
///
187
/// @returns True if V is a Fortran array descriptor, False otherwise.
188
5
bool isFortranArrayDescriptor(Value *V) {
189
5
  PointerType *PTy = dyn_cast<PointerType>(V->getType());
190
5
191
5
  if (!PTy)
192
0
    return false;
193
5
194
5
  Type *Ty = PTy->getElementType();
195
5
  assert(Ty && "Ty expected to be initialized");
196
5
  auto *StructArrTy = dyn_cast<StructType>(Ty);
197
5
198
5
  if (!(StructArrTy && StructArrTy->hasName()))
199
0
    return false;
200
5
201
5
  if (!StructArrTy->getName().startswith("struct.array"))
202
0
    return false;
203
5
204
5
  if (StructArrTy->getNumElements() != 4)
205
0
    return false;
206
5
207
5
  const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
208
5
209
5
  // i8* match
210
5
  if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
211
0
    return false;
212
5
213
5
  // Get a reference to the int type and check that all the members
214
5
  // share the same int type
215
5
  Type *IntTy = ArrMemberTys[1];
216
5
  if (ArrMemberTys[2] != IntTy)
217
0
    return false;
218
5
219
5
  // type: [<num> x %struct.descriptor_dimension]
220
5
  ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
221
5
  if (!DescriptorDimArrayTy)
222
0
    return false;
223
5
224
5
  // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
225
5
  StructType *DescriptorDimTy =
226
5
      dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
227
5
228
5
  if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
229
0
    return false;
230
5
231
5
  if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
232
0
    return false;
233
5
234
5
  if (DescriptorDimTy->getNumElements() != 3)
235
0
    return false;
236
5
237
15
  
for (auto MemberTy : DescriptorDimTy->elements())5
{
238
15
    if (MemberTy != IntTy)
239
0
      return false;
240
15
  }
241
5
242
5
  return true;
243
5
}
244
245
0
Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
246
0
  // match: 4.1 & 4.2 store/load
247
0
  if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
248
0
    return nullptr;
249
0
250
0
  // match: 4
251
0
  if (Inst.getAlignment() != 8)
252
0
    return nullptr;
253
0
254
0
  Value *Address = Inst.getPointerOperand();
255
0
256
0
  const BitCastInst *Bitcast = nullptr;
257
0
  // [match: 3]
258
0
  if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
259
0
    Value *TypedMem = Slot->getPointerOperand();
260
0
    // match: 2
261
0
    Bitcast = dyn_cast<BitCastInst>(TypedMem);
262
0
  } else {
263
0
    // match: 2
264
0
    Bitcast = dyn_cast<BitCastInst>(Address);
265
0
  }
266
0
267
0
  if (!Bitcast)
268
0
    return nullptr;
269
0
270
0
  auto *MallocMem = Bitcast->getOperand(0);
271
0
272
0
  // match: 1
273
0
  auto *MallocCall = dyn_cast<CallInst>(MallocMem);
274
0
  if (!MallocCall)
275
0
    return nullptr;
276
0
277
0
  Function *MallocFn = MallocCall->getCalledFunction();
278
0
  if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
279
0
    return nullptr;
280
0
281
0
  // Find all uses the malloc'd memory.
282
0
  // We are looking for a "store" into a struct with the type being the Fortran
283
0
  // descriptor type
284
0
  for (auto user : MallocMem->users()) {
285
0
    /// match: 5
286
0
    auto *MallocStore = dyn_cast<StoreInst>(user);
287
0
    if (!MallocStore)
288
0
      continue;
289
0
290
0
    auto *DescriptorGEP =
291
0
        dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
292
0
    if (!DescriptorGEP)
293
0
      continue;
294
0
295
0
    // match: 5
296
0
    auto DescriptorType =
297
0
        dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
298
0
    if (!(DescriptorType && DescriptorType->hasName()))
299
0
      continue;
300
0
301
0
    Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
302
0
303
0
    if (!Descriptor)
304
0
      continue;
305
0
306
0
    if (!isFortranArrayDescriptor(Descriptor))
307
0
      continue;
308
0
309
0
    return Descriptor;
310
0
  }
311
0
312
0
  return nullptr;
313
0
}
314
315
5
Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
316
5
  // match: 3
317
5
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)2
)
318
0
    return nullptr;
319
5
320
5
  Value *Slot = Inst.getPointerOperand();
321
5
322
5
  LoadInst *MemLoad = nullptr;
323
5
  // [match: 2]
324
5
  if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
325
5
    // match: 1
326
5
    MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
327
5
  } else {
328
0
    // match: 1
329
0
    MemLoad = dyn_cast<LoadInst>(Slot);
330
0
  }
331
5
332
5
  if (!MemLoad)
333
0
    return nullptr;
334
5
335
5
  auto *BitcastOperator =
336
5
      dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
337
5
  if (!BitcastOperator)
338
0
    return nullptr;
339
5
340
5
  Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
341
5
  if (!Descriptor)
342
0
    return nullptr;
343
5
344
5
  if (!isFortranArrayDescriptor(Descriptor))
345
0
    return nullptr;
346
5
347
5
  return Descriptor;
348
5
}
349
350
1.26k
bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
351
1.26k
  Value *Val = Inst.getValueOperand();
352
1.26k
  Type *ElementType = Val->getType();
353
1.26k
  Value *Address = Inst.getPointerOperand();
354
1.26k
  const SCEV *AccessFunction =
355
1.26k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
356
1.26k
  const SCEVUnknown *BasePointer =
357
1.26k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
358
1.26k
  enum MemoryAccess::AccessType AccType =
359
1.26k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ612
:
MemoryAccess::MUST_WRITE653
;
360
1.26k
361
1.26k
  if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
362
32
    auto *Src = BitCast->getOperand(0);
363
32
    auto *SrcTy = Src->getType();
364
32
    auto *DstTy = BitCast->getType();
365
32
    // Do not try to delinearize non-sized (opaque) pointers.
366
32
    if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
367
32
        
(31
DstTy->isPointerTy()31
&&
!DstTy->getPointerElementType()->isSized()31
)) {
368
1
      return false;
369
1
    }
370
31
    if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
371
31
        DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
372
31
            DL.getTypeAllocSize(DstTy->getPointerElementType()))
373
13
      Address = Src;
374
31
  }
375
1.26k
376
1.26k
  auto *GEP = dyn_cast<GetElementPtrInst>(Address);
377
1.26k
  if (!GEP)
378
289
    return false;
379
975
380
975
  std::vector<const SCEV *> Subscripts;
381
975
  std::vector<int> Sizes;
382
975
  std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
383
975
  auto *BasePtr = GEP->getOperand(0);
384
975
385
975
  if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
386
0
    BasePtr = BasePtrCast->getOperand(0);
387
975
388
975
  // Check for identical base pointers to ensure that we do not miss index
389
975
  // offsets that have been added before this GEP is applied.
390
975
  if (BasePtr != BasePointer->getValue())
391
32
    return false;
392
943
393
943
  std::vector<const SCEV *> SizesSCEV;
394
943
395
943
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
396
943
397
943
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
398
986
  for (auto *Subscript : Subscripts) {
399
986
    InvariantLoadsSetTy AccessILS;
400
986
    if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
401
986
                      &AccessILS))
402
37
      return false;
403
949
404
949
    for (LoadInst *LInst : AccessILS)
405
6
      if (!ScopRIL.count(LInst))
406
1
        return false;
407
949
  }
408
943
409
943
  
if (905
Sizes.empty()905
)
410
819
    return false;
411
86
412
86
  SizesSCEV.push_back(nullptr);
413
86
414
86
  for (auto V : Sizes)
415
92
    SizesSCEV.push_back(SE.getSCEV(
416
92
        ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
417
86
418
86
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
419
86
                 true, Subscripts, SizesSCEV, Val);
420
86
  return true;
421
86
}
422
423
1.17k
bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
424
1.17k
  if (!PollyDelinearize)
425
3
    return false;
426
1.17k
427
1.17k
  Value *Address = Inst.getPointerOperand();
428
1.17k
  Value *Val = Inst.getValueOperand();
429
1.17k
  Type *ElementType = Val->getType();
430
1.17k
  unsigned ElementSize = DL.getTypeAllocSize(ElementType);
431
1.17k
  enum MemoryAccess::AccessType AccType =
432
1.17k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ568
:
MemoryAccess::MUST_WRITE608
;
433
1.17k
434
1.17k
  const SCEV *AccessFunction =
435
1.17k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
436
1.17k
  const SCEVUnknown *BasePointer =
437
1.17k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
438
1.17k
439
1.17k
  assert(BasePointer && "Could not find base pointer");
440
1.17k
441
1.17k
  auto &InsnToMemAcc = scop->getInsnToMemAccMap();
442
1.17k
  auto AccItr = InsnToMemAcc.find(Inst);
443
1.17k
  if (AccItr == InsnToMemAcc.end())
444
1.11k
    return false;
445
62
446
62
  std::vector<const SCEV *> Sizes = {nullptr};
447
62
448
62
  Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
449
62
               AccItr->second.Shape->DelinearizedSizes.end());
450
62
451
62
  // In case only the element size is contained in the 'Sizes' array, the
452
62
  // access does not access a real multi-dimensional array. Hence, we allow
453
62
  // the normal single-dimensional access construction to handle this.
454
62
  if (Sizes.size() == 1)
455
1
    return false;
456
61
457
61
  // Remove the element size. This information is already provided by the
458
61
  // ElementSize parameter. In case the element size of this access and the
459
61
  // element size used for delinearization differs the delinearization is
460
61
  // incorrect. Hence, we invalidate the scop.
461
61
  //
462
61
  // TODO: Handle delinearization with differing element sizes.
463
61
  auto DelinearizedSize =
464
61
      cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
465
61
  Sizes.pop_back();
466
61
  if (ElementSize != DelinearizedSize)
467
0
    scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
468
61
469
61
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
470
61
                 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
471
61
  return true;
472
61
}
473
474
1.27k
bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
475
1.27k
  auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
476
1.27k
477
1.27k
  if (MemIntr == nullptr)
478
1.27k
    return false;
479
5
480
5
  auto *L = LI.getLoopFor(Inst->getParent());
481
5
  auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
482
5
  assert(LengthVal);
483
5
484
5
  // Check if the length val is actually affine or if we overapproximate it
485
5
  InvariantLoadsSetTy AccessILS;
486
5
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
487
5
488
5
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
489
5
  bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
490
5
                                     LengthVal, SE, &AccessILS);
491
5
  for (LoadInst *LInst : AccessILS)
492
0
    if (!ScopRIL.count(LInst))
493
0
      LengthIsAffine = false;
494
5
  if (!LengthIsAffine)
495
0
    LengthVal = nullptr;
496
5
497
5
  auto *DestPtrVal = MemIntr->getDest();
498
5
  assert(DestPtrVal);
499
5
500
5
  auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
501
5
  assert(DestAccFunc);
502
5
  // Ignore accesses to "NULL".
503
5
  // TODO: We could use this to optimize the region further, e.g., intersect
504
5
  //       the context with
505
5
  //          isl_set_complement(isl_set_params(getDomain()))
506
5
  //       as we know it would be undefined to execute this instruction anyway.
507
5
  if (DestAccFunc->isZero())
508
0
    return true;
509
5
510
5
  auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
511
5
  assert(DestPtrSCEV);
512
5
  DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
513
5
  addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
514
5
                 IntegerType::getInt8Ty(DestPtrVal->getContext()),
515
5
                 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
516
5
                 Inst.getValueOperand());
517
5
518
5
  auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
519
5
  if (!MemTrans)
520
2
    return true;
521
3
522
3
  auto *SrcPtrVal = MemTrans->getSource();
523
3
  assert(SrcPtrVal);
524
3
525
3
  auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
526
3
  assert(SrcAccFunc);
527
3
  // Ignore accesses to "NULL".
528
3
  // TODO: See above TODO
529
3
  if (SrcAccFunc->isZero())
530
0
    return true;
531
3
532
3
  auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
533
3
  assert(SrcPtrSCEV);
534
3
  SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
535
3
  addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
536
3
                 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
537
3
                 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
538
3
                 Inst.getValueOperand());
539
3
540
3
  return true;
541
3
}
542
543
1.27k
bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
544
1.27k
  auto *CI = dyn_cast_or_null<CallInst>(Inst);
545
1.27k
546
1.27k
  if (CI == nullptr)
547
1.26k
    return false;
548
9
549
9
  if (CI->doesNotAccessMemory() || 
isIgnoredIntrinsic(CI)7
||
isDebugCall(CI)7
)
550
2
    return true;
551
7
552
7
  bool ReadOnly = false;
553
7
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
554
7
  auto *CalledFunction = CI->getCalledFunction();
555
7
  switch (AA.getModRefBehavior(CalledFunction)) {
556
7
  case FMRB_UnknownModRefBehavior:
557
0
    llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
558
7
  case FMRB_DoesNotAccessMemory:
559
0
    return true;
560
7
  case FMRB_DoesNotReadMemory:
561
0
  case FMRB_OnlyAccessesInaccessibleMem:
562
0
  case FMRB_OnlyAccessesInaccessibleOrArgMem:
563
0
    return false;
564
4
  case FMRB_OnlyReadsMemory:
565
4
    GlobalReads.emplace_back(Stmt, CI);
566
4
    return true;
567
2
  case FMRB_OnlyReadsArgumentPointees:
568
2
    ReadOnly = true;
569
2
    LLVM_FALLTHROUGH;
570
3
  case FMRB_OnlyAccessesArgumentPointees: {
571
3
    auto AccType = ReadOnly ? 
MemoryAccess::READ2
:
MemoryAccess::MAY_WRITE1
;
572
3
    Loop *L = LI.getLoopFor(Inst->getParent());
573
8
    for (const auto &Arg : CI->arg_operands()) {
574
8
      if (!Arg->getType()->isPointerTy())
575
3
        continue;
576
5
577
5
      auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
578
5
      if (ArgSCEV->isZero())
579
2
        continue;
580
3
581
3
      auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
582
3
      addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
583
3
                     ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
584
3
    }
585
3
    return true;
586
0
  }
587
0
  }
588
0
589
0
  return true;
590
0
}
591
592
1.11k
void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
593
1.11k
  Value *Address = Inst.getPointerOperand();
594
1.11k
  Value *Val = Inst.getValueOperand();
595
1.11k
  Type *ElementType = Val->getType();
596
1.11k
  enum MemoryAccess::AccessType AccType =
597
1.11k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ543
:
MemoryAccess::MUST_WRITE575
;
598
1.11k
599
1.11k
  const SCEV *AccessFunction =
600
1.11k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
601
1.11k
  const SCEVUnknown *BasePointer =
602
1.11k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
603
1.11k
604
1.11k
  assert(BasePointer && "Could not find base pointer");
605
1.11k
  AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
606
1.11k
607
1.11k
  // Check if the access depends on a loop contained in a non-affine subregion.
608
1.11k
  bool isVariantInNonAffineLoop = false;
609
1.11k
  SetVector<const Loop *> Loops;
610
1.11k
  findLoops(AccessFunction, Loops);
611
1.11k
  for (const Loop *L : Loops)
612
558
    if (Stmt->contains(L)) {
613
2
      isVariantInNonAffineLoop = true;
614
2
      break;
615
2
    }
616
1.11k
617
1.11k
  InvariantLoadsSetTy AccessILS;
618
1.11k
619
1.11k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
620
1.11k
  bool IsAffine = !isVariantInNonAffineLoop &&
621
1.11k
                  isAffineExpr(&scop->getRegion(), SurroundingLoop,
622
1.11k
                               AccessFunction, SE, &AccessILS);
623
1.11k
624
1.11k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
625
1.11k
  for (LoadInst *LInst : AccessILS)
626
8
    if (!ScopRIL.count(LInst))
627
3
      IsAffine = false;
628
1.11k
629
1.11k
  if (!IsAffine && 
AccType == MemoryAccess::MUST_WRITE8
)
630
6
    AccType = MemoryAccess::MAY_WRITE;
631
1.11k
632
1.11k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
633
1.11k
                 IsAffine, {AccessFunction}, {nullptr}, Val);
634
1.11k
}
635
636
1.27k
void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
637
1.27k
  if (buildAccessMemIntrinsic(Inst, Stmt))
638
5
    return;
639
1.27k
640
1.27k
  if (buildAccessCallInst(Inst, Stmt))
641
9
    return;
642
1.26k
643
1.26k
  if (buildAccessMultiDimFixed(Inst, Stmt))
644
86
    return;
645
1.17k
646
1.17k
  if (buildAccessMultiDimParam(Inst, Stmt))
647
61
    return;
648
1.11k
649
1.11k
  buildAccessSingleDim(Inst, Stmt);
650
1.11k
}
651
652
383
void ScopBuilder::buildAccessFunctions() {
653
2.96k
  for (auto &Stmt : *scop) {
654
2.96k
    if (Stmt.isBlockStmt()) {
655
2.91k
      buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
656
2.91k
      continue;
657
2.91k
    }
658
52
659
52
    Region *R = Stmt.getRegion();
660
52
    for (BasicBlock *BB : R->blocks())
661
138
      buildAccessFunctions(&Stmt, *BB, R);
662
52
  }
663
383
664
383
  // Build write accesses for values that are used after the SCoP.
665
383
  // The instructions defining them might be synthesizable and therefore not
666
383
  // contained in any statement, hence we iterate over the original instructions
667
383
  // to identify all escaping values.
668
1.91k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
669
1.91k
    for (Instruction &Inst : *BB)
670
7.33k
      buildEscapingDependences(&Inst);
671
1.91k
  }
672
383
}
673
674
7.12k
bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
675
7.12k
  return !Inst->isTerminator() && 
!isIgnoredIntrinsic(Inst)5.29k
&&
676
7.12k
         
!canSynthesize(Inst, *scop, &SE, L)5.28k
;
677
7.12k
}
678
679
/// Generate a name for a statement.
680
///
681
/// @param BB     The basic block the statement will represent.
682
/// @param BBIdx  The index of the @p BB relative to other BBs/regions.
683
/// @param Count  The index of the created statement in @p BB.
684
/// @param IsMain Whether this is the main of all statement for @p BB. If true,
685
///               no suffix will be added.
686
/// @param IsLast Uses a special indicator for the last statement of a BB.
687
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
688
2.91k
                                bool IsMain, bool IsLast = false) {
689
2.91k
  std::string Suffix;
690
2.91k
  if (!IsMain) {
691
1.13k
    if (UseInstructionNames)
692
1.13k
      Suffix = '_';
693
1.13k
    if (IsLast)
694
909
      Suffix += "last";
695
227
    else if (Count < 26)
696
227
      Suffix += 'a' + Count;
697
0
    else
698
0
      Suffix += std::to_string(Count);
699
1.13k
  }
700
2.91k
  return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
701
2.91k
}
702
703
/// Generate a name for a statement that represents a non-affine subregion.
704
///
705
/// @param R    The region the statement will represent.
706
/// @param RIdx The index of the @p R relative to other BBs/regions.
707
52
static std::string makeStmtName(Region *R, long RIdx) {
708
52
  return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
709
52
                              UseInstructionNames);
710
52
}
711
712
221
void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
713
221
  Loop *SurroundingLoop = LI.getLoopFor(BB);
714
221
715
221
  int Count = 0;
716
221
  long BBIdx = scop->getNextStmtIdx();
717
221
  std::vector<Instruction *> Instructions;
718
775
  for (Instruction &Inst : *BB) {
719
775
    if (shouldModelInst(&Inst, SurroundingLoop))
720
288
      Instructions.push_back(&Inst);
721
775
    if (Inst.getMetadata("polly_split_after") ||
722
775
        (SplitOnStore && 
isa<StoreInst>(Inst)0
)) {
723
0
      std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
724
0
      scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
725
0
      Count++;
726
0
      Instructions.clear();
727
0
    }
728
775
  }
729
221
730
221
  std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
731
221
  scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
732
221
}
733
734
/// Is @p Inst an ordered instruction?
735
///
736
/// An unordered instruction is an instruction, such that a sequence of
737
/// unordered instructions can be permuted without changing semantics. Any
738
/// instruction for which this is not always the case is ordered.
739
2.07k
static bool isOrderedInstruction(Instruction *Inst) {
740
2.07k
  return Inst->mayHaveSideEffects() || 
Inst->mayReadOrWriteMemory()1.55k
;
741
2.07k
}
742
743
/// Join instructions to the same statement if one uses the scalar result of the
744
/// other.
745
static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
746
1.55k
                            ArrayRef<Instruction *> ModeledInsts) {
747
2.07k
  for (Instruction *Inst : ModeledInsts) {
748
2.07k
    if (isa<PHINode>(Inst))
749
53
      continue;
750
2.02k
751
3.52k
    
for (Use &Op : Inst->operands())2.02k
{
752
3.52k
      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
753
3.52k
      if (!OpInst)
754
1.01k
        continue;
755
2.51k
756
2.51k
      // Check if OpInst is in the BB and is a modeled instruction.
757
2.51k
      auto OpVal = UnionFind.findValue(OpInst);
758
2.51k
      if (OpVal == UnionFind.end())
759
1.58k
        continue;
760
931
761
931
      UnionFind.unionSets(Inst, OpInst);
762
931
    }
763
2.02k
  }
764
1.55k
}
765
766
/// Ensure that the order of ordered instructions does not change.
767
///
768
/// If we encounter an ordered instruction enclosed in instructions belonging to
769
/// a different statement (which might as well contain ordered instructions, but
770
/// this is not tested here), join them.
771
static void
772
joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
773
1.55k
                        ArrayRef<Instruction *> ModeledInsts) {
774
1.55k
  SetVector<Instruction *> SeenLeaders;
775
2.07k
  for (Instruction *Inst : ModeledInsts) {
776
2.07k
    if (!isOrderedInstruction(Inst))
777
1.16k
      continue;
778
910
779
910
    Instruction *Leader = UnionFind.getLeaderValue(Inst);
780
910
    bool Inserted = SeenLeaders.insert(Leader);
781
910
    if (Inserted)
782
529
      continue;
783
381
784
381
    // Merge statements to close holes. Say, we have already seen statements A
785
381
    // and B, in this order. Then we see an instruction of A again and we would
786
381
    // see the pattern "A B A". This function joins all statements until the
787
381
    // only seen occurrence of A.
788
397
    
for (Instruction *Prev : reverse(SeenLeaders))381
{
789
397
      // Items added to 'SeenLeaders' are leaders, but may have lost their
790
397
      // leadership status when merged into another statement.
791
397
      Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
792
397
      if (PrevLeader == Leader)
793
379
        break;
794
18
      UnionFind.unionSets(Prev, Leader);
795
18
    }
796
381
  }
797
1.55k
}
798
799
/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
800
/// the incoming values from this block are executed after the PHI READ.
801
///
802
/// Otherwise it could overwrite the incoming value from before the BB with the
803
/// value for the next execution. This can happen if the PHI WRITE is added to
804
/// the statement with the instruction that defines the incoming value (instead
805
/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
806
/// are in order, we put both into the statement. PHI WRITEs are always executed
807
/// after PHI READs when they are in the same statement.
808
///
809
/// TODO: This is an overpessimization. We only have to ensure that the PHI
810
/// WRITE is not put into a statement containing the PHI itself. That could also
811
/// be done by
812
/// - having all (strongly connected) PHIs in a single statement,
813
/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
814
///   has a chance of being lifted before a PHI by being in a statement with a
815
///   PHI that comes before in the basic block), or
816
/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
817
static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
818
1.55k
                            ArrayRef<Instruction *> ModeledInsts) {
819
2.07k
  for (Instruction *Inst : ModeledInsts) {
820
2.07k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
821
2.07k
    if (!PHI)
822
2.02k
      continue;
823
53
824
53
    int Idx = PHI->getBasicBlockIndex(PHI->getParent());
825
53
    if (Idx < 0)
826
44
      continue;
827
9
828
9
    Instruction *IncomingVal =
829
9
        dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
830
9
    if (!IncomingVal)
831
3
      continue;
832
6
833
6
    UnionFind.unionSets(PHI, IncomingVal);
834
6
  }
835
1.55k
}
836
837
1.55k
void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
838
1.55k
  Loop *L = LI.getLoopFor(BB);
839
1.55k
840
1.55k
  // Extracting out modeled instructions saves us from checking
841
1.55k
  // shouldModelInst() repeatedly.
842
1.55k
  SmallVector<Instruction *, 32> ModeledInsts;
843
1.55k
  EquivalenceClasses<Instruction *> UnionFind;
844
1.55k
  Instruction *MainInst = nullptr;
845
5.89k
  for (Instruction &Inst : *BB) {
846
5.89k
    if (!shouldModelInst(&Inst, L))
847
3.82k
      continue;
848
2.07k
    ModeledInsts.push_back(&Inst);
849
2.07k
    UnionFind.insert(&Inst);
850
2.07k
851
2.07k
    // When a BB is split into multiple statements, the main statement is the
852
2.07k
    // one containing the 'main' instruction. We select the first instruction
853
2.07k
    // that is unlikely to be removed (because it has side-effects) as the main
854
2.07k
    // one. It is used to ensure that at least one statement from the bb has the
855
2.07k
    // same name as with -polly-stmt-granularity=bb.
856
2.07k
    if (!MainInst && 
(1.63k
isa<StoreInst>(Inst)1.63k
||
857
1.63k
                      
(1.20k
isa<CallInst>(Inst)1.20k
&&
!isa<IntrinsicInst>(Inst)12
)))
858
437
      MainInst = &Inst;
859
2.07k
  }
860
1.55k
861
1.55k
  joinOperandTree(UnionFind, ModeledInsts);
862
1.55k
  joinOrderedInstructions(UnionFind, ModeledInsts);
863
1.55k
  joinOrderedPHIs(UnionFind, ModeledInsts);
864
1.55k
865
1.55k
  // The list of instructions for statement (statement represented by the leader
866
1.55k
  // instruction). The order of statements instructions is reversed such that
867
1.55k
  // the epilogue is first. This makes it easier to ensure that the epilogue is
868
1.55k
  // the last statement.
869
1.55k
  MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
870
1.55k
871
1.55k
  // Collect the instructions of all leaders. UnionFind's member iterator
872
1.55k
  // unfortunately are not in any specific order.
873
5.89k
  for (Instruction &Inst : reverse(*BB)) {
874
5.89k
    auto LeaderIt = UnionFind.findLeader(&Inst);
875
5.89k
    if (LeaderIt == UnionFind.member_end())
876
3.82k
      continue;
877
2.07k
878
2.07k
    std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
879
2.07k
    InstList.push_back(&Inst);
880
2.07k
  }
881
1.55k
882
1.55k
  // Finally build the statements.
883
1.55k
  int Count = 0;
884
1.55k
  long BBIdx = scop->getNextStmtIdx();
885
1.55k
  bool MainFound = false;
886
1.55k
  for (auto &Instructions : reverse(LeaderToInstList)) {
887
1.13k
    std::vector<Instruction *> &InstList = Instructions.second;
888
1.13k
889
1.13k
    // If there is no main instruction, make the first statement the main.
890
1.13k
    bool IsMain;
891
1.13k
    if (MainInst)
892
612
      IsMain = std::find(InstList.begin(), InstList.end(), MainInst) !=
893
612
               InstList.end();
894
524
    else
895
524
      IsMain = (Count == 0);
896
1.13k
    if (IsMain)
897
909
      MainFound = true;
898
1.13k
899
1.13k
    std::reverse(InstList.begin(), InstList.end());
900
1.13k
    std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
901
1.13k
    scop->addScopStmt(BB, Name, L, std::move(InstList));
902
1.13k
    Count += 1;
903
1.13k
  }
904
1.55k
905
1.55k
  // Unconditionally add an epilogue (last statement). It contains no
906
1.55k
  // instructions, but holds the PHI write accesses for successor basic blocks,
907
1.55k
  // if the incoming value is not defined in another statement if the same BB.
908
1.55k
  // The epilogue will be removed if no PHIWrite is added to it.
909
1.55k
  std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true);
910
1.55k
  scop->addScopStmt(BB, EpilogueName, L, {});
911
1.55k
}
912
913
841
void ScopBuilder::buildStmts(Region &SR) {
914
841
  if (scop->isNonAffineSubRegion(&SR)) {
915
52
    std::vector<Instruction *> Instructions;
916
52
    Loop *SurroundingLoop =
917
52
        getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
918
52
    for (Instruction &Inst : *SR.getEntry())
919
450
      if (shouldModelInst(&Inst, SurroundingLoop))
920
304
        Instructions.push_back(&Inst);
921
52
    long RIdx = scop->getNextStmtIdx();
922
52
    std::string Name = makeStmtName(&SR, RIdx);
923
52
    scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
924
52
    return;
925
52
  }
926
789
927
3.02k
  
for (auto I = SR.element_begin(), E = SR.element_end(); 789
I != E;
++I2.23k
)
928
2.23k
    if (I->isSubRegion())
929
458
      buildStmts(*I->getNodeAs<Region>());
930
1.77k
    else {
931
1.77k
      BasicBlock *BB = I->getNodeAs<BasicBlock>();
932
1.77k
      switch (StmtGranularity) {
933
1.77k
      case GranularityChoice::BasicBlocks:
934
221
        buildSequentialBlockStmts(BB);
935
221
        break;
936
1.77k
      case GranularityChoice::ScalarIndependence:
937
1.55k
        buildEqivClassBlockStmts(BB);
938
1.55k
        break;
939
1.77k
      case GranularityChoice::Stores:
940
0
        buildSequentialBlockStmts(BB, true);
941
0
        break;
942
1.77k
      }
943
1.77k
    }
944
789
}
945
946
void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
947
3.05k
                                       Region *NonAffineSubRegion) {
948
3.05k
  assert(
949
3.05k
      Stmt &&
950
3.05k
      "The exit BB is the only one that cannot be represented by a statement");
951
3.05k
  assert(Stmt->represents(&BB));
952
3.05k
953
3.05k
  // We do not build access functions for error blocks, as they may contain
954
3.05k
  // instructions we can not model.
955
3.05k
  if (isErrorBlock(BB, scop->getRegion(), LI, DT))
956
15
    return;
957
3.03k
958
3.03k
  auto BuildAccessesForInst = [this, Stmt,
959
3.03k
                               NonAffineSubRegion](Instruction *Inst) {
960
2.91k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
961
2.91k
    if (PHI)
962
96
      buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
963
2.91k
964
2.91k
    if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
965
1.16k
      assert(Stmt && "Cannot build access function in non-existing statement");
966
1.16k
      buildMemoryAccess(MemInst, Stmt);
967
1.16k
    }
968
2.91k
969
2.91k
    // PHI nodes have already been modeled above and terminators that are
970
2.91k
    // not part of a non-affine subregion are fully modeled and regenerated
971
2.91k
    // from the polyhedral domains. Hence, they do not need to be modeled as
972
2.91k
    // explicit data dependences.
973
2.91k
    if (!PHI)
974
2.81k
      buildScalarDependences(Stmt, Inst);
975
2.91k
  };
976
3.03k
977
3.03k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
978
3.03k
  bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
979
3.03k
  if (IsEntryBlock) {
980
2.95k
    for (Instruction *Inst : Stmt->getInstructions())
981
2.66k
      BuildAccessesForInst(Inst);
982
2.95k
    if (Stmt->isRegionStmt())
983
51
      BuildAccessesForInst(BB.getTerminator());
984
2.95k
  } else {
985
203
    for (Instruction &Inst : BB) {
986
203
      if (isIgnoredIntrinsic(&Inst))
987
0
        continue;
988
203
989
203
      // Invariant loads already have been processed.
990
203
      if (isa<LoadInst>(Inst) && 
RIL.count(cast<LoadInst>(&Inst))17
)
991
2
        continue;
992
201
993
201
      BuildAccessesForInst(&Inst);
994
201
    }
995
80
  }
996
3.03k
}
997
998
MemoryAccess *ScopBuilder::addMemoryAccess(
999
    ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
1000
    Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
1001
    ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
1002
1.76k
    MemoryKind Kind) {
1003
1.76k
  bool isKnownMustAccess = false;
1004
1.76k
1005
1.76k
  // Accesses in single-basic block statements are always executed.
1006
1.76k
  if (Stmt->isBlockStmt())
1007
1.53k
    isKnownMustAccess = true;
1008
1.76k
1009
1.76k
  if (Stmt->isRegionStmt()) {
1010
234
    // Accesses that dominate the exit block of a non-affine region are always
1011
234
    // executed. In non-affine regions there may exist MemoryKind::Values that
1012
234
    // do not dominate the exit. MemoryKind::Values will always dominate the
1013
234
    // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1014
234
    // non-affine region.
1015
234
    if (Inst && 
DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())209
)
1016
154
      isKnownMustAccess = true;
1017
234
  }
1018
1.76k
1019
1.76k
  // Non-affine PHI writes do not "happen" at a particular instruction, but
1020
1.76k
  // after exiting the statement. Therefore they are guaranteed to execute and
1021
1.76k
  // overwrite the old value.
1022
1.76k
  if (Kind == MemoryKind::PHI || 
Kind == MemoryKind::ExitPHI1.54k
)
1023
245
    isKnownMustAccess = true;
1024
1.76k
1025
1.76k
  if (!isKnownMustAccess && 
AccType == MemoryAccess::MUST_WRITE79
)
1026
36
    AccType = MemoryAccess::MAY_WRITE;
1027
1.76k
1028
1.76k
  auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
1029
1.76k
                                  Affine, Subscripts, Sizes, AccessValue, Kind);
1030
1.76k
1031
1.76k
  scop->addAccessFunction(Access);
1032
1.76k
  Stmt->addAccess(Access);
1033
1.76k
  return Access;
1034
1.76k
}
1035
1036
void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1037
                                 MemoryAccess::AccessType AccType,
1038
                                 Value *BaseAddress, Type *ElementType,
1039
                                 bool IsAffine,
1040
                                 ArrayRef<const SCEV *> Subscripts,
1041
                                 ArrayRef<const SCEV *> Sizes,
1042
1.28k
                                 Value *AccessValue) {
1043
1.28k
  ArrayBasePointers.insert(BaseAddress);
1044
1.28k
  auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1045
1.28k
                                    ElementType, IsAffine, AccessValue,
1046
1.28k
                                    Subscripts, Sizes, MemoryKind::Array);
1047
1.28k
1048
1.28k
  if (!DetectFortranArrays)
1049
1.28k
    return;
1050
5
1051
5
  if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1052
5
    MemAccess->setFortranArrayDescriptor(FAD);
1053
0
  else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1054
0
    MemAccess->setFortranArrayDescriptor(FAD);
1055
5
}
1056
1057
136
void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1058
136
  // Find the statement that defines the value of Inst. That statement has to
1059
136
  // write the value to make it available to those statements that read it.
1060
136
  ScopStmt *Stmt = scop->getStmtFor(Inst);
1061
136
1062
136
  // It is possible that the value is synthesizable within a loop (such that it
1063
136
  // is not part of any statement), but not after the loop (where you need the
1064
136
  // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1065
136
  // avoid this. In case the IR has no such PHI, use the last statement (where
1066
136
  // the value is synthesizable) to write the value.
1067
136
  if (!Stmt)
1068
14
    Stmt = scop->getLastStmtFor(Inst->getParent());
1069
136
1070
136
  // Inst not defined within this SCoP.
1071
136
  if (!Stmt)
1072
0
    return;
1073
136
1074
136
  // Do not process further if the instruction is already written.
1075
136
  if (Stmt->lookupValueWriteOf(Inst))
1076
25
    return;
1077
111
1078
111
  addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1079
111
                  true, Inst, ArrayRef<const SCEV *>(),
1080
111
                  ArrayRef<const SCEV *>(), MemoryKind::Value);
1081
111
}
1082
1083
5.14k
void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1084
5.14k
  // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1085
5.14k
  // to be able to replace this one. Currently, there is a split responsibility.
1086
5.14k
  // In a first step, the MemoryAccess is created, but without the
1087
5.14k
  // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1088
5.14k
  // AccessRelation is created. At least for scalar accesses, there is no new
1089
5.14k
  // information available at ScopStmt::buildAccessRelations(), so we could
1090
5.14k
  // create the AccessRelation right away. This is what
1091
5.14k
  // ScopStmt::ensureValueRead(Value*) does.
1092
5.14k
1093
5.14k
  auto *Scope = UserStmt->getSurroundingLoop();
1094
5.14k
  auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1095
5.14k
  switch (VUse.getKind()) {
1096
5.14k
  case VirtualUse::Constant:
1097
4.99k
  case VirtualUse::Block:
1098
4.99k
  case VirtualUse::Synthesizable:
1099
4.99k
  case VirtualUse::Hoisted:
1100
4.99k
  case VirtualUse::Intra:
1101
4.99k
    // Uses of these kinds do not need a MemoryAccess.
1102
4.99k
    break;
1103
4.99k
1104
4.99k
  case VirtualUse::ReadOnly:
1105
22
    // Add MemoryAccess for invariant values only if requested.
1106
22
    if (!ModelReadOnlyScalars)
1107
1
      break;
1108
21
1109
21
    LLVM_FALLTHROUGH;
1110
144
  case VirtualUse::Inter:
1111
144
1112
144
    // Do not create another MemoryAccess for reloading the value if one already
1113
144
    // exists.
1114
144
    if (UserStmt->lookupValueReadOf(V))
1115
20
      break;
1116
124
1117
124
    addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1118
124
                    true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1119
124
                    MemoryKind::Value);
1120
124
1121
124
    // Inter-statement uses need to write the value in their defining statement.
1122
124
    if (VUse.isInter())
1123
110
      ensureValueWrite(cast<Instruction>(V));
1124
124
    break;
1125
5.14k
  }
1126
5.14k
}
1127
1128
void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1129
                                 BasicBlock *IncomingBlock,
1130
209
                                 Value *IncomingValue, bool IsExitBlock) {
1131
209
  // As the incoming block might turn out to be an error statement ensure we
1132
209
  // will create an exit PHI SAI object. It is needed during code generation
1133
209
  // and would be created later anyway.
1134
209
  if (IsExitBlock)
1135
37
    scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1136
37
                                   MemoryKind::ExitPHI);
1137
209
1138
209
  // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1139
209
  // from outside the SCoP's region have no statement representation.
1140
209
  if (!IncomingStmt)
1141
27
    return;
1142
182
1143
182
  // Take care for the incoming value being available in the incoming block.
1144
182
  // This must be done before the check for multiple PHI writes because multiple
1145
182
  // exiting edges from subregion each can be the effective written value of the
1146
182
  // subregion. As such, all of them must be made available in the subregion
1147
182
  // statement.
1148
182
  ensureValueRead(IncomingValue, IncomingStmt);
1149
182
1150
182
  // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1151
182
  if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1152
28
    assert(Acc->getAccessInstruction() == PHI);
1153
28
    Acc->addIncoming(IncomingBlock, IncomingValue);
1154
28
    return;
1155
28
  }
1156
154
1157
154
  MemoryAccess *Acc = addMemoryAccess(
1158
154
      IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1159
154
      PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1160
154
      IsExitBlock ? 
MemoryKind::ExitPHI25
:
MemoryKind::PHI129
);
1161
154
  assert(Acc);
1162
154
  Acc->addIncoming(IncomingBlock, IncomingValue);
1163
154
}
1164
1165
91
void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1166
91
  addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1167
91
                  PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1168
91
                  MemoryKind::PHI);
1169
91
}
1170
1171
747
void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1172
747
  isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1173
747
1174
747
  Stmt.Domain = scop->getDomainConditions(&Stmt);
1175
747
  Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1176
747
}
1177
1178
747
void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1179
747
  isl::set Domain = Stmt.getDomain();
1180
747
  BasicBlock *BB = Stmt.getEntryBlock();
1181
747
1182
747
  Loop *L = LI.getLoopFor(BB);
1183
747
1184
751
  while (L && 
Stmt.isRegionStmt()653
&&
Stmt.getRegion()->contains(L)25
)
1185
4
    L = L->getParentLoop();
1186
747
1187
747
  SmallVector<llvm::Loop *, 8> Loops;
1188
747
1189
1.57k
  while (L && 
Stmt.getParent()->getRegion().contains(L)878
) {
1190
831
    Loops.push_back(L);
1191
831
    L = L->getParentLoop();
1192
831
  }
1193
747
1194
747
  Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
1195
747
}
1196
1197
/// Return the reduction type for a given binary operator.
1198
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1199
61
                                                    const Instruction *Load) {
1200
61
  if (!BinOp)
1201
0
    return MemoryAccess::RT_NONE;
1202
61
  switch (BinOp->getOpcode()) {
1203
61
  case Instruction::FAdd:
1204
1
    if (!BinOp->isFast())
1205
0
      return MemoryAccess::RT_NONE;
1206
1
    LLVM_FALLTHROUGH;
1207
59
  case Instruction::Add:
1208
59
    return MemoryAccess::RT_ADD;
1209
1
  case Instruction::Or:
1210
0
    return MemoryAccess::RT_BOR;
1211
1
  case Instruction::Xor:
1212
0
    return MemoryAccess::RT_BXOR;
1213
1
  case Instruction::And:
1214
0
    return MemoryAccess::RT_BAND;
1215
1
  case Instruction::FMul:
1216
0
    if (!BinOp->isFast())
1217
0
      return MemoryAccess::RT_NONE;
1218
0
    LLVM_FALLTHROUGH;
1219
2
  case Instruction::Mul:
1220
2
    if (DisableMultiplicativeReductions)
1221
0
      return MemoryAccess::RT_NONE;
1222
2
    return MemoryAccess::RT_MUL;
1223
2
  default:
1224
0
    return MemoryAccess::RT_NONE;
1225
61
  }
1226
61
}
1227
1228
747
void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1229
747
  SmallVector<MemoryAccess *, 2> Loads;
1230
747
  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1231
747
1232
747
  // First collect candidate load-store reduction chains by iterating over all
1233
747
  // stores and collecting possible reduction loads.
1234
1.60k
  for (MemoryAccess *StoreMA : Stmt) {
1235
1.60k
    if (StoreMA->isRead())
1236
751
      continue;
1237
849
1238
849
    Loads.clear();
1239
849
    collectCandidateReductionLoads(StoreMA, Loads);
1240
849
    for (MemoryAccess *LoadMA : Loads)
1241
108
      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1242
849
  }
1243
747
1244
747
  // Then check each possible candidate pair.
1245
747
  for (const auto &CandidatePair : Candidates) {
1246
108
    bool Valid = true;
1247
108
    isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1248
108
    isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1249
108
1250
108
    // Skip those with obviously unequal base addresses.
1251
108
    if (!LoadAccs.has_equal_space(StoreAccs)) {
1252
28
      continue;
1253
28
    }
1254
80
1255
80
    // And check if the remaining for overlap with other memory accesses.
1256
80
    isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1257
80
    AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1258
80
    isl::set AllAccs = AllAccsRel.range();
1259
80
1260
219
    for (MemoryAccess *MA : Stmt) {
1261
219
      if (MA == CandidatePair.first || 
MA == CandidatePair.second139
)
1262
160
        continue;
1263
59
1264
59
      isl::map AccRel =
1265
59
          MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1266
59
      isl::set Accs = AccRel.range();
1267
59
1268
59
      if (AllAccs.has_equal_space(Accs)) {
1269
23
        isl::set OverlapAccs = Accs.intersect(AllAccs);
1270
23
        Valid = Valid && 
OverlapAccs.is_empty()19
;
1271
23
      }
1272
59
    }
1273
80
1274
80
    if (!Valid)
1275
19
      continue;
1276
61
1277
61
    const LoadInst *Load =
1278
61
        dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1279
61
    MemoryAccess::ReductionType RT =
1280
61
        getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1281
61
1282
61
    // If no overlapping access was found we mark the load and store as
1283
61
    // reduction like.
1284
61
    CandidatePair.first->markAsReductionLike(RT);
1285
61
    CandidatePair.second->markAsReductionLike(RT);
1286
61
  }
1287
747
}
1288
1289
void ScopBuilder::collectCandidateReductionLoads(
1290
849
    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1291
849
  ScopStmt *Stmt = StoreMA->getStatement();
1292
849
1293
849
  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1294
849
  if (!Store)
1295
254
    return;
1296
595
1297
595
  // Skip if there is not one binary operator between the load and the store
1298
595
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1299
595
  if (!BinOp)
1300
379
    return;
1301
216
1302
216
  // Skip if the binary operators has multiple uses
1303
216
  if (BinOp->getNumUses() != 1)
1304
23
    return;
1305
193
1306
193
  // Skip if the opcode of the binary operator is not commutative/associative
1307
193
  if (!BinOp->isCommutative() || 
!BinOp->isAssociative()188
)
1308
105
    return;
1309
88
1310
88
  // Skip if the binary operator is outside the current SCoP
1311
88
  if (BinOp->getParent() != Store->getParent())
1312
0
    return;
1313
88
1314
88
  // Skip if it is a multiplicative reduction and we disabled them
1315
88
  if (DisableMultiplicativeReductions &&
1316
88
      
(0
BinOp->getOpcode() == Instruction::Mul0
||
1317
0
       BinOp->getOpcode() == Instruction::FMul))
1318
0
    return;
1319
88
1320
88
  // Check the binary operator operands for a candidate load
1321
88
  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1322
88
  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1323
88
  if (!PossibleLoad0 && 
!PossibleLoad16
)
1324
2
    return;
1325
86
1326
86
  // A load is only a candidate if it cannot escape (thus has only this use)
1327
86
  if (PossibleLoad0 && 
PossibleLoad0->getNumUses() == 182
)
1328
81
    if (PossibleLoad0->getParent() == Store->getParent())
1329
80
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1330
86
  if (PossibleLoad1 && 
PossibleLoad1->getNumUses() == 129
)
1331
28
    if (PossibleLoad1->getParent() == Store->getParent())
1332
28
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1333
86
}
1334
1335
747
void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1336
1.60k
  for (MemoryAccess *Access : Stmt.MemAccs) {
1337
1.60k
    Type *ElementType = Access->getElementType();
1338
1.60k
1339
1.60k
    MemoryKind Ty;
1340
1.60k
    if (Access->isPHIKind())
1341
209
      Ty = MemoryKind::PHI;
1342
1.39k
    else if (Access->isExitPHIKind())
1343
20
      Ty = MemoryKind::ExitPHI;
1344
1.37k
    else if (Access->isValueKind())
1345
222
      Ty = MemoryKind::Value;
1346
1.14k
    else
1347
1.14k
      Ty = MemoryKind::Array;
1348
1.60k
1349
1.60k
    auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1350
1.60k
                                               ElementType, Access->Sizes, Ty);
1351
1.60k
    Access->buildAccessRelation(SAI);
1352
1.60k
    scop->addAccessData(Access);
1353
1.60k
  }
1354
747
}
1355
1356
#ifndef NDEBUG
1357
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1358
  auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1359
  auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1360
  assert(PhysUse.getKind() == VirtUse.getKind());
1361
}
1362
1363
/// Check the consistency of every statement's MemoryAccesses.
1364
///
1365
/// The check is carried out by expecting the "physical" kind of use (derived
1366
/// from the BasicBlocks instructions resides in) to be same as the "virtual"
1367
/// kind of use (derived from a statement's MemoryAccess).
1368
///
1369
/// The "physical" uses are taken by ensureValueRead to determine whether to
1370
/// create MemoryAccesses. When done, the kind of scalar access should be the
1371
/// same no matter which way it was derived.
1372
///
1373
/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1374
/// can intentionally influence on the kind of uses (not corresponding to the
1375
/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1376
/// to pick up the virtual uses. But here in the code generator, this has not
1377
/// happened yet, such that virtual and physical uses are equivalent.
1378
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1379
  for (auto *BB : S->getRegion().blocks()) {
1380
    for (auto &Inst : *BB) {
1381
      auto *Stmt = S->getStmtFor(&Inst);
1382
      if (!Stmt)
1383
        continue;
1384
1385
      if (isIgnoredIntrinsic(&Inst))
1386
        continue;
1387
1388
      // Branch conditions are encoded in the statement domains.
1389
      if (Inst.isTerminator() && Stmt->isBlockStmt())
1390
        continue;
1391
1392
      // Verify all uses.
1393
      for (auto &Op : Inst.operands())
1394
        verifyUse(S, Op, LI);
1395
1396
      // Stores do not produce values used by other statements.
1397
      if (isa<StoreInst>(Inst))
1398
        continue;
1399
1400
      // For every value defined in the block, also check that a use of that
1401
      // value in the same statement would not be an inter-statement use. It can
1402
      // still be synthesizable or load-hoisted, but these kind of instructions
1403
      // are not directly copied in code-generation.
1404
      auto VirtDef =
1405
          VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1406
      assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1407
             VirtDef.getKind() == VirtualUse::Intra ||
1408
             VirtDef.getKind() == VirtualUse::Hoisted);
1409
    }
1410
  }
1411
1412
  if (S->hasSingleExitEdge())
1413
    return;
1414
1415
  // PHINodes in the SCoP region's exit block are also uses to be checked.
1416
  if (!S->getRegion().isTopLevelRegion()) {
1417
    for (auto &Inst : *S->getRegion().getExit()) {
1418
      if (!isa<PHINode>(Inst))
1419
        break;
1420
1421
      for (auto &Op : Inst.operands())
1422
        verifyUse(S, Op, LI);
1423
    }
1424
  }
1425
}
1426
#endif
1427
1428
/// Return the block that is the representing block for @p RN.
1429
47
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1430
47
  return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1431
47
                           : 
RN->getNodeAs<BasicBlock>()0
;
1432
47
}
1433
1434
void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1435
383
                            OptimizationRemarkEmitter &ORE) {
1436
383
  scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1437
383
1438
383
  buildStmts(R);
1439
383
1440
383
  // Create all invariant load instructions first. These are categorized as
1441
383
  // 'synthesizable', therefore are not part of any ScopStmt but need to be
1442
383
  // created somewhere.
1443
383
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1444
1.91k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
1445
1.91k
    if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1446
12
      continue;
1447
1.90k
1448
7.31k
    
for (Instruction &Inst : *BB)1.90k
{
1449
7.31k
      LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1450
7.31k
      if (!Load)
1451
6.70k
        continue;
1452
612
1453
612
      if (!RIL.count(Load))
1454
501
        continue;
1455
111
1456
111
      // Invariant loads require a MemoryAccess to be created in some statement.
1457
111
      // It is not important to which statement the MemoryAccess is added
1458
111
      // because it will later be removed from the ScopStmt again. We chose the
1459
111
      // first statement of the basic block the LoadInst is in.
1460
111
      ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1461
111
      assert(!List.empty());
1462
111
      ScopStmt *RILStmt = List.front();
1463
111
      buildMemoryAccess(Load, RILStmt);
1464
111
    }
1465
1.90k
  }
1466
383
  buildAccessFunctions();
1467
383
1468
383
  // In case the region does not have an exiting block we will later (during
1469
383
  // code generation) split the exit block. This will move potential PHI nodes
1470
383
  // from the current exit block into the new region exiting block. Hence, PHI
1471
383
  // nodes that are at this point not part of the region will be.
1472
383
  // To handle these PHI nodes later we will now model their operands as scalar
1473
383
  // accesses. Note that we do not model anything in the exit block if we have
1474
383
  // an exiting block in the region, as there will not be any splitting later.
1475
383
  if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
1476
64
    for (Instruction &Inst : *R.getExit()) {
1477
64
      PHINode *PHI = dyn_cast<PHINode>(&Inst);
1478
64
      if (!PHI)
1479
50
        break;
1480
14
1481
14
      buildPHIAccesses(nullptr, PHI, nullptr, true);
1482
14
    }
1483
50
  }
1484
383
1485
383
  // Create memory accesses for global reads since all arrays are now known.
1486
383
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1487
383
  for (auto GlobalReadPair : GlobalReads) {
1488
4
    ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1489
4
    Instruction *GlobalRead = GlobalReadPair.second;
1490
4
    for (auto *BP : ArrayBasePointers)
1491
9
      addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1492
9
                     BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1493
4
  }
1494
383
1495
383
  scop->buildInvariantEquivalenceClasses();
1496
383
1497
383
  /// A map from basic blocks to their invalid domains.
1498
383
  DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1499
383
1500
383
  if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1501
2
    LLVM_DEBUG(
1502
2
        dbgs() << "Bailing-out because buildDomains encountered problems\n");
1503
2
    return;
1504
2
  }
1505
381
1506
381
  scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1507
381
1508
381
  // Initialize the invalid domain.
1509
381
  for (ScopStmt &Stmt : scop->Stmts)
1510
2.91k
    if (Stmt.isBlockStmt())
1511
2.86k
      Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1512
47
    else
1513
47
      Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1514
47
          Stmt.getRegion()->getNode())]);
1515
381
1516
381
  // Remove empty statements.
1517
381
  // Exit early in case there are no executable statements left in this scop.
1518
381
  scop->removeStmtNotInDomainMap();
1519
381
  scop->simplifySCoP(false);
1520
381
  if (scop->isEmpty()) {
1521
12
    LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1522
12
    return;
1523
12
  }
1524
369
1525
369
  // The ScopStmts now have enough information to initialize themselves.
1526
747
  
for (ScopStmt &Stmt : *scop)369
{
1527
747
    collectSurroundingLoops(Stmt);
1528
747
1529
747
    buildDomain(Stmt);
1530
747
    buildAccessRelations(Stmt);
1531
747
1532
747
    if (DetectReductions)
1533
747
      checkForReductions(Stmt);
1534
747
  }
1535
369
1536
369
  // Check early for a feasible runtime context.
1537
369
  if (!scop->hasFeasibleRuntimeContext()) {
1538
1
    LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1539
1
    return;
1540
1
  }
1541
368
1542
368
  // Check early for profitability. Afterwards it cannot change anymore,
1543
368
  // only the runtime context could become infeasible.
1544
368
  if (!scop->isProfitable(UnprofitableScalarAccs)) {
1545
1
    scop->invalidate(PROFITABLE, DebugLoc());
1546
1
    LLVM_DEBUG(
1547
1
        dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1548
1
    return;
1549
1
  }
1550
367
1551
367
  scop->buildSchedule(LI);
1552
367
1553
367
  scop->finalizeAccesses();
1554
367
1555
367
  scop->realignParams();
1556
367
  scop->addUserContext();
1557
367
1558
367
  // After the context was fully constructed, thus all our knowledge about
1559
367
  // the parameters is in there, we add all recorded assumptions to the
1560
367
  // assumed/invalid context.
1561
367
  scop->addRecordedAssumptions();
1562
367
1563
367
  scop->simplifyContexts();
1564
367
  if (!scop->buildAliasChecks(AA)) {
1565
1
    LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1566
1
    return;
1567
1
  }
1568
366
1569
366
  scop->hoistInvariantLoads();
1570
366
  scop->canonicalizeDynamicBasePtrs();
1571
366
  scop->verifyInvariantLoads();
1572
366
  scop->simplifySCoP(true);
1573
366
1574
366
  // Check late for a feasible runtime context because profitability did not
1575
366
  // change.
1576
366
  if (!scop->hasFeasibleRuntimeContext()) {
1577
8
    LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1578
8
    return;
1579
8
  }
1580
366
1581
#ifndef NDEBUG
1582
  verifyUses(scop.get(), LI, DT);
1583
#endif
1584
}
1585
1586
ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1587
                         const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1588
                         ScopDetection &SD, ScalarEvolution &SE,
1589
                         OptimizationRemarkEmitter &ORE)
1590
383
    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1591
383
  DebugLoc Beg, End;
1592
383
  auto P = getBBPairForRegion(R);
1593
383
  getDebugLocations(P, Beg, End);
1594
383
1595
383
  std::string Msg = "SCoP begins here.";
1596
383
  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1597
383
           << Msg);
1598
383
1599
383
  buildScop(*R, AC, ORE);
1600
383
1601
383
  LLVM_DEBUG(dbgs() << *scop);
1602
383
1603
383
  if (!scop->hasFeasibleRuntimeContext()) {
1604
25
    InfeasibleScops++;
1605
25
    Msg = "SCoP ends here but was dismissed.";
1606
25
    LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
1607
25
    scop.reset();
1608
358
  } else {
1609
358
    Msg = "SCoP ends here.";
1610
358
    ++ScopFound;
1611
358
    if (scop->getMaxLoopDepth() > 0)
1612
340
      ++RichScopFound;
1613
358
  }
1614
383
1615
383
  if (R->isTopLevelRegion())
1616
0
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1617
0
             << Msg);
1618
383
  else
1619
383
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1620
383
             << Msg);
1621
383
}