Coverage Report

Created: 2019-02-15 18:59

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Analysis/ScopBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ScopBuilder.cpp ----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Create a polyhedral description for a static control flow region.
10
//
11
// The pass creates a polyhedral description of the Scops detected by the SCoP
12
// detection derived from their LLVM-IR code.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "polly/ScopBuilder.h"
17
#include "polly/Options.h"
18
#include "polly/ScopDetection.h"
19
#include "polly/ScopDetectionDiagnostic.h"
20
#include "polly/ScopInfo.h"
21
#include "polly/Support/GICHelper.h"
22
#include "polly/Support/SCEVValidator.h"
23
#include "polly/Support/ScopHelper.h"
24
#include "polly/Support/VirtualInstruction.h"
25
#include "llvm/ADT/APInt.h"
26
#include "llvm/ADT/ArrayRef.h"
27
#include "llvm/ADT/DenseMap.h"
28
#include "llvm/ADT/EquivalenceClasses.h"
29
#include "llvm/ADT/SetVector.h"
30
#include "llvm/ADT/Statistic.h"
31
#include "llvm/Analysis/AliasAnalysis.h"
32
#include "llvm/Analysis/LoopInfo.h"
33
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
34
#include "llvm/Analysis/RegionInfo.h"
35
#include "llvm/Analysis/RegionIterator.h"
36
#include "llvm/Analysis/ScalarEvolution.h"
37
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
38
#include "llvm/IR/BasicBlock.h"
39
#include "llvm/IR/Constants.h"
40
#include "llvm/IR/DataLayout.h"
41
#include "llvm/IR/DebugLoc.h"
42
#include "llvm/IR/DerivedTypes.h"
43
#include "llvm/IR/DiagnosticInfo.h"
44
#include "llvm/IR/Dominators.h"
45
#include "llvm/IR/Function.h"
46
#include "llvm/IR/InstrTypes.h"
47
#include "llvm/IR/Instruction.h"
48
#include "llvm/IR/Instructions.h"
49
#include "llvm/IR/IntrinsicInst.h"
50
#include "llvm/IR/Operator.h"
51
#include "llvm/IR/Type.h"
52
#include "llvm/IR/Use.h"
53
#include "llvm/IR/Value.h"
54
#include "llvm/Support/Casting.h"
55
#include "llvm/Support/CommandLine.h"
56
#include "llvm/Support/Compiler.h"
57
#include "llvm/Support/Debug.h"
58
#include "llvm/Support/ErrorHandling.h"
59
#include "llvm/Support/raw_ostream.h"
60
#include <cassert>
61
#include <string>
62
#include <tuple>
63
#include <vector>
64
65
using namespace llvm;
66
using namespace polly;
67
68
2.38k
#define DEBUG_TYPE "polly-scops"
69
70
STATISTIC(ScopFound, "Number of valid Scops");
71
STATISTIC(RichScopFound, "Number of Scops containing a loop");
72
STATISTIC(InfeasibleScops,
73
          "Number of SCoPs with statically infeasible context.");
74
75
bool polly::ModelReadOnlyScalars;
76
77
static cl::opt<bool, true> XModelReadOnlyScalars(
78
    "polly-analyze-read-only-scalars",
79
    cl::desc("Model read-only scalar values in the scop description"),
80
    cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
81
    cl::init(true), cl::cat(PollyCategory));
82
83
static cl::opt<bool> UnprofitableScalarAccs(
84
    "polly-unprofitable-scalar-accs",
85
    cl::desc("Count statements with scalar accesses as not optimizable"),
86
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
87
88
static cl::opt<bool> DetectFortranArrays(
89
    "polly-detect-fortran-arrays",
90
    cl::desc("Detect Fortran arrays and use this for code generation"),
91
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
92
93
static cl::opt<bool> DetectReductions("polly-detect-reductions",
94
                                      cl::desc("Detect and exploit reductions"),
95
                                      cl::Hidden, cl::ZeroOrMore,
96
                                      cl::init(true), cl::cat(PollyCategory));
97
98
// Multiplicative reductions can be disabled separately as these kind of
99
// operations can overflow easily. Additive reductions and bit operations
100
// are in contrast pretty stable.
101
static cl::opt<bool> DisableMultiplicativeReductions(
102
    "polly-disable-multiplicative-reductions",
103
    cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
104
    cl::init(false), cl::cat(PollyCategory));
105
106
enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
107
108
static cl::opt<GranularityChoice> StmtGranularity(
109
    "polly-stmt-granularity",
110
    cl::desc(
111
        "Algorithm to use for splitting basic blocks into multiple statements"),
112
    cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
113
                          "One statement per basic block"),
114
               clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
115
                          "Scalar independence heuristic"),
116
               clEnumValN(GranularityChoice::Stores, "store",
117
                          "Store-level granularity")),
118
    cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
119
120
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
121
                                   Region *NonAffineSubRegion,
122
291
                                   bool IsExitBlock) {
123
291
  // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
124
291
  // true, are not modeled as ordinary PHI nodes as they are not part of the
125
291
  // region. However, we model the operands in the predecessor blocks that are
126
291
  // part of the region as regular scalar accesses.
127
291
128
291
  // If we can synthesize a PHI we can skip it, however only if it is in
129
291
  // the region. If it is not it can only be in the exit block of the region.
130
291
  // In this case we model the operands but not the PHI itself.
131
291
  auto *Scope = LI.getLoopFor(PHI->getParent());
132
291
  if (!IsExitBlock && 
canSynthesize(PHI, *scop, &SE, Scope)249
)
133
7
    return;
134
284
135
284
  // PHI nodes are modeled as if they had been demoted prior to the SCoP
136
284
  // detection. Hence, the PHI is a load of a new memory location in which the
137
284
  // incoming value was written at the end of the incoming basic block.
138
284
  bool OnlyNonAffineSubRegionOperands = true;
139
883
  for (unsigned u = 0; u < PHI->getNumIncomingValues(); 
u++599
) {
140
599
    Value *Op = PHI->getIncomingValue(u);
141
599
    BasicBlock *OpBB = PHI->getIncomingBlock(u);
142
599
    ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
143
599
144
599
    // Do not build PHI dependences inside a non-affine subregion, but make
145
599
    // sure that the necessary scalar values are still made available.
146
599
    if (NonAffineSubRegion && 
NonAffineSubRegion->contains(OpBB)73
) {
147
20
      auto *OpInst = dyn_cast<Instruction>(Op);
148
20
      if (!OpInst || 
!NonAffineSubRegion->contains(OpInst)10
)
149
12
        ensureValueRead(Op, OpStmt);
150
20
      continue;
151
20
    }
152
579
153
579
    OnlyNonAffineSubRegionOperands = false;
154
579
    ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
155
579
  }
156
284
157
284
  if (!OnlyNonAffineSubRegionOperands && 
!IsExitBlock277
) {
158
235
    addPHIReadAccess(PHIStmt, PHI);
159
235
  }
160
284
}
161
162
void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
163
7.82k
                                         Instruction *Inst) {
164
7.82k
  assert(!isa<PHINode>(Inst));
165
7.82k
166
7.82k
  // Pull-in required operands.
167
7.82k
  for (Use &Op : Inst->operands())
168
13.8k
    ensureValueRead(Op.get(), UserStmt);
169
7.82k
}
170
171
21.7k
void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
172
21.7k
  // Check for uses of this instruction outside the scop. Because we do not
173
21.7k
  // iterate over such instructions and therefore did not "ensure" the existence
174
21.7k
  // of a write, we must determine such use here.
175
21.7k
  if (scop->isEscaping(Inst))
176
92
    ensureValueWrite(Inst);
177
21.7k
}
178
179
/// Check that a value is a Fortran Array descriptor.
180
///
181
/// We check if V has the following structure:
182
/// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
183
///                                   [<num> x %struct.descriptor_dimension] }
184
///
185
///
186
/// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
187
///
188
/// 1. V's type name starts with "struct.array"
189
/// 2. V's type has layout as shown.
190
/// 3. Final member of V's type has name "struct.descriptor_dimension",
191
/// 4. "struct.descriptor_dimension" has layout as shown.
192
/// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
193
///
194
/// We are interested in such types since this is the code that dragonegg
195
/// generates for Fortran array descriptors.
196
///
197
/// @param V the Value to be checked.
198
///
199
/// @returns True if V is a Fortran array descriptor, False otherwise.
200
10
bool isFortranArrayDescriptor(Value *V) {
201
10
  PointerType *PTy = dyn_cast<PointerType>(V->getType());
202
10
203
10
  if (!PTy)
204
0
    return false;
205
10
206
10
  Type *Ty = PTy->getElementType();
207
10
  assert(Ty && "Ty expected to be initialized");
208
10
  auto *StructArrTy = dyn_cast<StructType>(Ty);
209
10
210
10
  if (!(StructArrTy && StructArrTy->hasName()))
211
0
    return false;
212
10
213
10
  if (!StructArrTy->getName().startswith("struct.array"))
214
0
    return false;
215
10
216
10
  if (StructArrTy->getNumElements() != 4)
217
0
    return false;
218
10
219
10
  const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
220
10
221
10
  // i8* match
222
10
  if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
223
0
    return false;
224
10
225
10
  // Get a reference to the int type and check that all the members
226
10
  // share the same int type
227
10
  Type *IntTy = ArrMemberTys[1];
228
10
  if (ArrMemberTys[2] != IntTy)
229
0
    return false;
230
10
231
10
  // type: [<num> x %struct.descriptor_dimension]
232
10
  ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
233
10
  if (!DescriptorDimArrayTy)
234
0
    return false;
235
10
236
10
  // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
237
10
  StructType *DescriptorDimTy =
238
10
      dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
239
10
240
10
  if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
241
0
    return false;
242
10
243
10
  if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
244
0
    return false;
245
10
246
10
  if (DescriptorDimTy->getNumElements() != 3)
247
0
    return false;
248
10
249
30
  
for (auto MemberTy : DescriptorDimTy->elements())10
{
250
30
    if (MemberTy != IntTy)
251
0
      return false;
252
30
  }
253
10
254
10
  return true;
255
10
}
256
257
5
Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
258
5
  // match: 4.1 & 4.2 store/load
259
5
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)1
)
260
0
    return nullptr;
261
5
262
5
  // match: 4
263
5
  if (Inst.getAlignment() != 8)
264
1
    return nullptr;
265
4
266
4
  Value *Address = Inst.getPointerOperand();
267
4
268
4
  const BitCastInst *Bitcast = nullptr;
269
4
  // [match: 3]
270
4
  if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
271
3
    Value *TypedMem = Slot->getPointerOperand();
272
3
    // match: 2
273
3
    Bitcast = dyn_cast<BitCastInst>(TypedMem);
274
3
  } else {
275
1
    // match: 2
276
1
    Bitcast = dyn_cast<BitCastInst>(Address);
277
1
  }
278
4
279
4
  if (!Bitcast)
280
1
    return nullptr;
281
3
282
3
  auto *MallocMem = Bitcast->getOperand(0);
283
3
284
3
  // match: 1
285
3
  auto *MallocCall = dyn_cast<CallInst>(MallocMem);
286
3
  if (!MallocCall)
287
1
    return nullptr;
288
2
289
2
  Function *MallocFn = MallocCall->getCalledFunction();
290
2
  if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
291
0
    return nullptr;
292
2
293
2
  // Find all uses the malloc'd memory.
294
2
  // We are looking for a "store" into a struct with the type being the Fortran
295
2
  // descriptor type
296
4
  
for (auto user : MallocMem->users())2
{
297
4
    /// match: 5
298
4
    auto *MallocStore = dyn_cast<StoreInst>(user);
299
4
    if (!MallocStore)
300
2
      continue;
301
2
302
2
    auto *DescriptorGEP =
303
2
        dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
304
2
    if (!DescriptorGEP)
305
0
      continue;
306
2
307
2
    // match: 5
308
2
    auto DescriptorType =
309
2
        dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
310
2
    if (!(DescriptorType && DescriptorType->hasName()))
311
0
      continue;
312
2
313
2
    Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
314
2
315
2
    if (!Descriptor)
316
0
      continue;
317
2
318
2
    if (!isFortranArrayDescriptor(Descriptor))
319
0
      continue;
320
2
321
2
    return Descriptor;
322
2
  }
323
2
324
2
  
return nullptr0
;
325
2
}
326
327
13
Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
328
13
  // match: 3
329
13
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)5
)
330
0
    return nullptr;
331
13
332
13
  Value *Slot = Inst.getPointerOperand();
333
13
334
13
  LoadInst *MemLoad = nullptr;
335
13
  // [match: 2]
336
13
  if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
337
11
    // match: 1
338
11
    MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
339
11
  } else {
340
2
    // match: 1
341
2
    MemLoad = dyn_cast<LoadInst>(Slot);
342
2
  }
343
13
344
13
  if (!MemLoad)
345
5
    return nullptr;
346
8
347
8
  auto *BitcastOperator =
348
8
      dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
349
8
  if (!BitcastOperator)
350
0
    return nullptr;
351
8
352
8
  Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
353
8
  if (!Descriptor)
354
0
    return nullptr;
355
8
356
8
  if (!isFortranArrayDescriptor(Descriptor))
357
0
    return nullptr;
358
8
359
8
  return Descriptor;
360
8
}
361
362
3.61k
bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
363
3.61k
  Value *Val = Inst.getValueOperand();
364
3.61k
  Type *ElementType = Val->getType();
365
3.61k
  Value *Address = Inst.getPointerOperand();
366
3.61k
  const SCEV *AccessFunction =
367
3.61k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
368
3.61k
  const SCEVUnknown *BasePointer =
369
3.61k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
370
3.61k
  enum MemoryAccess::AccessType AccType =
371
3.61k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.80k
:
MemoryAccess::MUST_WRITE1.81k
;
372
3.61k
373
3.61k
  if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
374
106
    auto *Src = BitCast->getOperand(0);
375
106
    auto *SrcTy = Src->getType();
376
106
    auto *DstTy = BitCast->getType();
377
106
    // Do not try to delinearize non-sized (opaque) pointers.
378
106
    if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
379
106
        
(105
DstTy->isPointerTy()105
&&
!DstTy->getPointerElementType()->isSized()105
)) {
380
1
      return false;
381
1
    }
382
105
    if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
383
105
        DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
384
105
            DL.getTypeAllocSize(DstTy->getPointerElementType()))
385
62
      Address = Src;
386
105
  }
387
3.61k
388
3.61k
  auto *GEP = dyn_cast<GetElementPtrInst>(Address);
389
3.61k
  if (!GEP)
390
869
    return false;
391
2.74k
392
2.74k
  std::vector<const SCEV *> Subscripts;
393
2.74k
  std::vector<int> Sizes;
394
2.74k
  std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
395
2.74k
  auto *BasePtr = GEP->getOperand(0);
396
2.74k
397
2.74k
  if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
398
18
    BasePtr = BasePtrCast->getOperand(0);
399
2.74k
400
2.74k
  // Check for identical base pointers to ensure that we do not miss index
401
2.74k
  // offsets that have been added before this GEP is applied.
402
2.74k
  if (BasePtr != BasePointer->getValue())
403
87
    return false;
404
2.65k
405
2.65k
  std::vector<const SCEV *> SizesSCEV;
406
2.65k
407
2.65k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
408
2.65k
409
2.65k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
410
2.76k
  for (auto *Subscript : Subscripts) {
411
2.76k
    InvariantLoadsSetTy AccessILS;
412
2.76k
    if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
413
2.76k
                      &AccessILS))
414
152
      return false;
415
2.61k
416
2.61k
    for (LoadInst *LInst : AccessILS)
417
47
      if (!ScopRIL.count(LInst))
418
3
        return false;
419
2.61k
  }
420
2.65k
421
2.65k
  
if (2.50k
Sizes.empty()2.50k
)
422
2.26k
    return false;
423
240
424
240
  SizesSCEV.push_back(nullptr);
425
240
426
240
  for (auto V : Sizes)
427
259
    SizesSCEV.push_back(SE.getSCEV(
428
259
        ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
429
240
430
240
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
431
240
                 true, Subscripts, SizesSCEV, Val);
432
240
  return true;
433
240
}
434
435
3.37k
bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
436
3.37k
  if (!PollyDelinearize)
437
6
    return false;
438
3.36k
439
3.36k
  Value *Address = Inst.getPointerOperand();
440
3.36k
  Value *Val = Inst.getValueOperand();
441
3.36k
  Type *ElementType = Val->getType();
442
3.36k
  unsigned ElementSize = DL.getTypeAllocSize(ElementType);
443
3.36k
  enum MemoryAccess::AccessType AccType =
444
3.36k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.67k
:
MemoryAccess::MUST_WRITE1.69k
;
445
3.36k
446
3.36k
  const SCEV *AccessFunction =
447
3.36k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
448
3.36k
  const SCEVUnknown *BasePointer =
449
3.36k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
450
3.36k
451
3.36k
  assert(BasePointer && "Could not find base pointer");
452
3.36k
453
3.36k
  auto &InsnToMemAcc = scop->getInsnToMemAccMap();
454
3.36k
  auto AccItr = InsnToMemAcc.find(Inst);
455
3.36k
  if (AccItr == InsnToMemAcc.end())
456
3.18k
    return false;
457
182
458
182
  std::vector<const SCEV *> Sizes = {nullptr};
459
182
460
182
  Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
461
182
               AccItr->second.Shape->DelinearizedSizes.end());
462
182
463
182
  // In case only the element size is contained in the 'Sizes' array, the
464
182
  // access does not access a real multi-dimensional array. Hence, we allow
465
182
  // the normal single-dimensional access construction to handle this.
466
182
  if (Sizes.size() == 1)
467
1
    return false;
468
181
469
181
  // Remove the element size. This information is already provided by the
470
181
  // ElementSize parameter. In case the element size of this access and the
471
181
  // element size used for delinearization differs the delinearization is
472
181
  // incorrect. Hence, we invalidate the scop.
473
181
  //
474
181
  // TODO: Handle delinearization with differing element sizes.
475
181
  auto DelinearizedSize =
476
181
      cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
477
181
  Sizes.pop_back();
478
181
  if (ElementSize != DelinearizedSize)
479
2
    scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
480
181
481
181
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
482
181
                 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
483
181
  return true;
484
181
}
485
486
3.65k
bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
487
3.65k
  auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
488
3.65k
489
3.65k
  if (MemIntr == nullptr)
490
3.64k
    return false;
491
19
492
19
  auto *L = LI.getLoopFor(Inst->getParent());
493
19
  auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
494
19
  assert(LengthVal);
495
19
496
19
  // Check if the length val is actually affine or if we overapproximate it
497
19
  InvariantLoadsSetTy AccessILS;
498
19
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
499
19
500
19
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
501
19
  bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
502
19
                                     LengthVal, SE, &AccessILS);
503
19
  for (LoadInst *LInst : AccessILS)
504
0
    if (!ScopRIL.count(LInst))
505
0
      LengthIsAffine = false;
506
19
  if (!LengthIsAffine)
507
2
    LengthVal = nullptr;
508
19
509
19
  auto *DestPtrVal = MemIntr->getDest();
510
19
  assert(DestPtrVal);
511
19
512
19
  auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
513
19
  assert(DestAccFunc);
514
19
  // Ignore accesses to "NULL".
515
19
  // TODO: We could use this to optimize the region further, e.g., intersect
516
19
  //       the context with
517
19
  //          isl_set_complement(isl_set_params(getDomain()))
518
19
  //       as we know it would be undefined to execute this instruction anyway.
519
19
  if (DestAccFunc->isZero())
520
2
    return true;
521
17
522
17
  auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
523
17
  assert(DestPtrSCEV);
524
17
  DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
525
17
  addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
526
17
                 IntegerType::getInt8Ty(DestPtrVal->getContext()),
527
17
                 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
528
17
                 Inst.getValueOperand());
529
17
530
17
  auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
531
17
  if (!MemTrans)
532
10
    return true;
533
7
534
7
  auto *SrcPtrVal = MemTrans->getSource();
535
7
  assert(SrcPtrVal);
536
7
537
7
  auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
538
7
  assert(SrcAccFunc);
539
7
  // Ignore accesses to "NULL".
540
7
  // TODO: See above TODO
541
7
  if (SrcAccFunc->isZero())
542
0
    return true;
543
7
544
7
  auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
545
7
  assert(SrcPtrSCEV);
546
7
  SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
547
7
  addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
548
7
                 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
549
7
                 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
550
7
                 Inst.getValueOperand());
551
7
552
7
  return true;
553
7
}
554
555
3.64k
bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
556
3.64k
  auto *CI = dyn_cast_or_null<CallInst>(Inst);
557
3.64k
558
3.64k
  if (CI == nullptr)
559
3.61k
    return false;
560
26
561
26
  if (CI->doesNotAccessMemory() || 
isIgnoredIntrinsic(CI)15
||
isDebugCall(CI)15
)
562
12
    return true;
563
14
564
14
  bool ReadOnly = false;
565
14
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
566
14
  auto *CalledFunction = CI->getCalledFunction();
567
14
  switch (AA.getModRefBehavior(CalledFunction)) {
568
14
  case FMRB_UnknownModRefBehavior:
569
0
    llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
570
14
  case FMRB_DoesNotAccessMemory:
571
0
    return true;
572
14
  case FMRB_DoesNotReadMemory:
573
0
  case FMRB_OnlyAccessesInaccessibleMem:
574
0
  case FMRB_OnlyAccessesInaccessibleOrArgMem:
575
0
    return false;
576
8
  case FMRB_OnlyReadsMemory:
577
8
    GlobalReads.emplace_back(Stmt, CI);
578
8
    return true;
579
4
  case FMRB_OnlyReadsArgumentPointees:
580
4
    ReadOnly = true;
581
4
    LLVM_FALLTHROUGH;
582
6
  case FMRB_OnlyAccessesArgumentPointees: {
583
6
    auto AccType = ReadOnly ? 
MemoryAccess::READ4
:
MemoryAccess::MAY_WRITE2
;
584
6
    Loop *L = LI.getLoopFor(Inst->getParent());
585
16
    for (const auto &Arg : CI->arg_operands()) {
586
16
      if (!Arg->getType()->isPointerTy())
587
6
        continue;
588
10
589
10
      auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
590
10
      if (ArgSCEV->isZero())
591
4
        continue;
592
6
593
6
      auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
594
6
      addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
595
6
                     ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
596
6
    }
597
6
    return true;
598
0
  }
599
0
  }
600
0
601
0
  return true;
602
0
}
603
604
3.19k
void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
605
3.19k
  Value *Address = Inst.getPointerOperand();
606
3.19k
  Value *Val = Inst.getValueOperand();
607
3.19k
  Type *ElementType = Val->getType();
608
3.19k
  enum MemoryAccess::AccessType AccType =
609
3.19k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.57k
:
MemoryAccess::MUST_WRITE1.62k
;
610
3.19k
611
3.19k
  const SCEV *AccessFunction =
612
3.19k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
613
3.19k
  const SCEVUnknown *BasePointer =
614
3.19k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
615
3.19k
616
3.19k
  assert(BasePointer && "Could not find base pointer");
617
3.19k
  AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
618
3.19k
619
3.19k
  // Check if the access depends on a loop contained in a non-affine subregion.
620
3.19k
  bool isVariantInNonAffineLoop = false;
621
3.19k
  SetVector<const Loop *> Loops;
622
3.19k
  findLoops(AccessFunction, Loops);
623
3.19k
  for (const Loop *L : Loops)
624
1.78k
    if (Stmt->contains(L)) {
625
8
      isVariantInNonAffineLoop = true;
626
8
      break;
627
8
    }
628
3.19k
629
3.19k
  InvariantLoadsSetTy AccessILS;
630
3.19k
631
3.19k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
632
3.19k
  bool IsAffine = !isVariantInNonAffineLoop &&
633
3.19k
                  isAffineExpr(&scop->getRegion(), SurroundingLoop,
634
3.18k
                               AccessFunction, SE, &AccessILS);
635
3.19k
636
3.19k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
637
3.19k
  for (LoadInst *LInst : AccessILS)
638
51
    if (!ScopRIL.count(LInst))
639
7
      IsAffine = false;
640
3.19k
641
3.19k
  if (!IsAffine && 
AccType == MemoryAccess::MUST_WRITE40
)
642
25
    AccType = MemoryAccess::MAY_WRITE;
643
3.19k
644
3.19k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
645
3.19k
                 IsAffine, {AccessFunction}, {nullptr}, Val);
646
3.19k
}
647
648
3.65k
void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
649
3.65k
  if (buildAccessMemIntrinsic(Inst, Stmt))
650
19
    return;
651
3.64k
652
3.64k
  if (buildAccessCallInst(Inst, Stmt))
653
26
    return;
654
3.61k
655
3.61k
  if (buildAccessMultiDimFixed(Inst, Stmt))
656
240
    return;
657
3.37k
658
3.37k
  if (buildAccessMultiDimParam(Inst, Stmt))
659
181
    return;
660
3.19k
661
3.19k
  buildAccessSingleDim(Inst, Stmt);
662
3.19k
}
663
664
1.19k
void ScopBuilder::buildAccessFunctions() {
665
9.17k
  for (auto &Stmt : *scop) {
666
9.17k
    if (Stmt.isBlockStmt()) {
667
9.05k
      buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
668
9.05k
      continue;
669
9.05k
    }
670
119
671
119
    Region *R = Stmt.getRegion();
672
119
    for (BasicBlock *BB : R->blocks())
673
329
      buildAccessFunctions(&Stmt, *BB, R);
674
119
  }
675
1.19k
676
1.19k
  // Build write accesses for values that are used after the SCoP.
677
1.19k
  // The instructions defining them might be synthesizable and therefore not
678
1.19k
  // contained in any statement, hence we iterate over the original instructions
679
1.19k
  // to identify all escaping values.
680
5.89k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
681
5.89k
    for (Instruction &Inst : *BB)
682
21.7k
      buildEscapingDependences(&Inst);
683
5.89k
  }
684
1.19k
}
685
686
21.2k
bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
687
21.2k
  return !Inst->isTerminator() && 
!isIgnoredIntrinsic(Inst)15.5k
&&
688
21.2k
         
!canSynthesize(Inst, *scop, &SE, L)15.4k
;
689
21.2k
}
690
691
/// Generate a name for a statement.
692
///
693
/// @param BB     The basic block the statement will represent.
694
/// @param BBIdx  The index of the @p BB relative to other BBs/regions.
695
/// @param Count  The index of the created statement in @p BB.
696
/// @param IsMain Whether this is the main of all statement for @p BB. If true,
697
///               no suffix will be added.
698
/// @param IsLast Uses a special indicator for the last statement of a BB.
699
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
700
9.05k
                                bool IsMain, bool IsLast = false) {
701
9.05k
  std::string Suffix;
702
9.05k
  if (!IsMain) {
703
3.48k
    if (UseInstructionNames)
704
3.47k
      Suffix = '_';
705
3.48k
    if (IsLast)
706
2.81k
      Suffix += "last";
707
673
    else if (Count < 26)
708
669
      Suffix += 'a' + Count;
709
4
    else
710
4
      Suffix += std::to_string(Count);
711
3.48k
  }
712
9.05k
  return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
713
9.05k
}
714
715
/// Generate a name for a statement that represents a non-affine subregion.
716
///
717
/// @param R    The region the statement will represent.
718
/// @param RIdx The index of the @p R relative to other BBs/regions.
719
119
static std::string makeStmtName(Region *R, long RIdx) {
720
119
  return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
721
119
                              UseInstructionNames);
722
119
}
723
724
697
void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
725
697
  Loop *SurroundingLoop = LI.getLoopFor(BB);
726
697
727
697
  int Count = 0;
728
697
  long BBIdx = scop->getNextStmtIdx();
729
697
  std::vector<Instruction *> Instructions;
730
2.61k
  for (Instruction &Inst : *BB) {
731
2.61k
    if (shouldModelInst(&Inst, SurroundingLoop))
732
1.00k
      Instructions.push_back(&Inst);
733
2.61k
    if (Inst.getMetadata("polly_split_after") ||
734
2.61k
        
(2.61k
SplitOnStore2.61k
&&
isa<StoreInst>(Inst)13
)) {
735
5
      std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
736
5
      scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
737
5
      Count++;
738
5
      Instructions.clear();
739
5
    }
740
2.61k
  }
741
697
742
697
  std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
743
697
  scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
744
697
}
745
746
/// Is @p Inst an ordered instruction?
747
///
748
/// An unordered instruction is an instruction, such that a sequence of
749
/// unordered instructions can be permuted without changing semantics. Any
750
/// instruction for which this is not always the case is ordered.
751
6.03k
static bool isOrderedInstruction(Instruction *Inst) {
752
6.03k
  return Inst->mayHaveSideEffects() || 
Inst->mayReadOrWriteMemory()4.52k
;
753
6.03k
}
754
755
/// Join instructions to the same statement if one uses the scalar result of the
756
/// other.
757
static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
758
4.87k
                            ArrayRef<Instruction *> ModeledInsts) {
759
6.03k
  for (Instruction *Inst : ModeledInsts) {
760
6.03k
    if (isa<PHINode>(Inst))
761
132
      continue;
762
5.90k
763
10.3k
    
for (Use &Op : Inst->operands())5.90k
{
764
10.3k
      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
765
10.3k
      if (!OpInst)
766
3.20k
        continue;
767
7.16k
768
7.16k
      // Check if OpInst is in the BB and is a modeled instruction.
769
7.16k
      auto OpVal = UnionFind.findValue(OpInst);
770
7.16k
      if (OpVal == UnionFind.end())
771
4.61k
        continue;
772
2.55k
773
2.55k
      UnionFind.unionSets(Inst, OpInst);
774
2.55k
    }
775
5.90k
  }
776
4.87k
}
777
778
/// Ensure that the order of ordered instructions does not change.
779
///
780
/// If we encounter an ordered instruction enclosed in instructions belonging to
781
/// a different statement (which might as well contain ordered instructions, but
782
/// this is not tested here), join them.
783
static void
784
joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
785
4.87k
                        ArrayRef<Instruction *> ModeledInsts) {
786
4.87k
  SetVector<Instruction *> SeenLeaders;
787
6.03k
  for (Instruction *Inst : ModeledInsts) {
788
6.03k
    if (!isOrderedInstruction(Inst))
789
3.29k
      continue;
790
2.74k
791
2.74k
    Instruction *Leader = UnionFind.getLeaderValue(Inst);
792
2.74k
    bool Inserted = SeenLeaders.insert(Leader);
793
2.74k
    if (Inserted)
794
1.61k
      continue;
795
1.13k
796
1.13k
    // Merge statements to close holes. Say, we have already seen statements A
797
1.13k
    // and B, in this order. Then we see an instruction of A again and we would
798
1.13k
    // see the pattern "A B A". This function joins all statements until the
799
1.13k
    // only seen occurrence of A.
800
1.15k
    
for (Instruction *Prev : reverse(SeenLeaders))1.13k
{
801
1.15k
      // Items added to 'SeenLeaders' are leaders, but may have lost their
802
1.15k
      // leadership status when merged into another statement.
803
1.15k
      Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
804
1.15k
      if (PrevLeader == Leader)
805
1.12k
        break;
806
30
      UnionFind.unionSets(Prev, Leader);
807
30
    }
808
1.13k
  }
809
4.87k
}
810
811
/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
812
/// the incoming values from this block are executed after the PHI READ.
813
///
814
/// Otherwise it could overwrite the incoming value from before the BB with the
815
/// value for the next execution. This can happen if the PHI WRITE is added to
816
/// the statement with the instruction that defines the incoming value (instead
817
/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
818
/// are in order, we put both into the statement. PHI WRITEs are always executed
819
/// after PHI READs when they are in the same statement.
820
///
821
/// TODO: This is an overpessimization. We only have to ensure that the PHI
822
/// WRITE is not put into a statement containing the PHI itself. That could also
823
/// be done by
824
/// - having all (strongly connected) PHIs in a single statement,
825
/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
826
///   has a chance of being lifted before a PHI by being in a statement with a
827
///   PHI that comes before in the basic block), or
828
/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
829
static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
830
4.87k
                            ArrayRef<Instruction *> ModeledInsts) {
831
6.03k
  for (Instruction *Inst : ModeledInsts) {
832
6.03k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
833
6.03k
    if (!PHI)
834
5.90k
      continue;
835
132
836
132
    int Idx = PHI->getBasicBlockIndex(PHI->getParent());
837
132
    if (Idx < 0)
838
109
      continue;
839
23
840
23
    Instruction *IncomingVal =
841
23
        dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
842
23
    if (!IncomingVal)
843
6
      continue;
844
17
845
17
    UnionFind.unionSets(PHI, IncomingVal);
846
17
  }
847
4.87k
}
848
849
4.87k
void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
850
4.87k
  Loop *L = LI.getLoopFor(BB);
851
4.87k
852
4.87k
  // Extracting out modeled instructions saves us from checking
853
4.87k
  // shouldModelInst() repeatedly.
854
4.87k
  SmallVector<Instruction *, 32> ModeledInsts;
855
4.87k
  EquivalenceClasses<Instruction *> UnionFind;
856
4.87k
  Instruction *MainInst = nullptr;
857
17.8k
  for (Instruction &Inst : *BB) {
858
17.8k
    if (!shouldModelInst(&Inst, L))
859
11.8k
      continue;
860
6.03k
    ModeledInsts.push_back(&Inst);
861
6.03k
    UnionFind.insert(&Inst);
862
6.03k
863
6.03k
    // When a BB is split into multiple statements, the main statement is the
864
6.03k
    // one containing the 'main' instruction. We select the first instruction
865
6.03k
    // that is unlikely to be removed (because it has side-effects) as the main
866
6.03k
    // one. It is used to ensure that at least one statement from the bb has the
867
6.03k
    // same name as with -polly-stmt-granularity=bb.
868
6.03k
    if (!MainInst && 
(5.13k
isa<StoreInst>(Inst)5.13k
||
869
5.13k
                      
(3.83k
isa<CallInst>(Inst)3.83k
&&
!isa<IntrinsicInst>(Inst)60
)))
870
1.34k
      MainInst = &Inst;
871
6.03k
  }
872
4.87k
873
4.87k
  joinOperandTree(UnionFind, ModeledInsts);
874
4.87k
  joinOrderedInstructions(UnionFind, ModeledInsts);
875
4.87k
  joinOrderedPHIs(UnionFind, ModeledInsts);
876
4.87k
877
4.87k
  // The list of instructions for statement (statement represented by the leader
878
4.87k
  // instruction). The order of statements instructions is reversed such that
879
4.87k
  // the epilogue is first. This makes it easier to ensure that the epilogue is
880
4.87k
  // the last statement.
881
4.87k
  MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
882
4.87k
883
4.87k
  // Collect the instructions of all leaders. UnionFind's member iterator
884
4.87k
  // unfortunately are not in any specific order.
885
17.8k
  for (Instruction &Inst : reverse(*BB)) {
886
17.8k
    auto LeaderIt = UnionFind.findLeader(&Inst);
887
17.8k
    if (LeaderIt == UnionFind.member_end())
888
11.8k
      continue;
889
6.03k
890
6.03k
    std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
891
6.03k
    InstList.push_back(&Inst);
892
6.03k
  }
893
4.87k
894
4.87k
  // Finally build the statements.
895
4.87k
  int Count = 0;
896
4.87k
  long BBIdx = scop->getNextStmtIdx();
897
4.87k
  bool MainFound = false;
898
4.87k
  for (auto &Instructions : reverse(LeaderToInstList)) {
899
3.48k
    std::vector<Instruction *> &InstList = Instructions.second;
900
3.48k
901
3.48k
    // If there is no main instruction, make the first statement the main.
902
3.48k
    bool IsMain;
903
3.48k
    if (MainInst)
904
1.90k
      IsMain = std::find(InstList.begin(), InstList.end(), MainInst) !=
905
1.90k
               InstList.end();
906
1.57k
    else
907
1.57k
      IsMain = (Count == 0);
908
3.48k
    if (IsMain)
909
2.81k
      MainFound = true;
910
3.48k
911
3.48k
    std::reverse(InstList.begin(), InstList.end());
912
3.48k
    std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
913
3.48k
    scop->addScopStmt(BB, Name, L, std::move(InstList));
914
3.48k
    Count += 1;
915
3.48k
  }
916
4.87k
917
4.87k
  // Unconditionally add an epilogue (last statement). It contains no
918
4.87k
  // instructions, but holds the PHI write accesses for successor basic blocks,
919
4.87k
  // if the incoming value is not defined in another statement if the same BB.
920
4.87k
  // The epilogue will be removed if no PHIWrite is added to it.
921
4.87k
  std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true);
922
4.87k
  scop->addScopStmt(BB, EpilogueName, L, {});
923
4.87k
}
924
925
2.55k
void ScopBuilder::buildStmts(Region &SR) {
926
2.55k
  if (scop->isNonAffineSubRegion(&SR)) {
927
119
    std::vector<Instruction *> Instructions;
928
119
    Loop *SurroundingLoop =
929
119
        getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
930
119
    for (Instruction &Inst : *SR.getEntry())
931
710
      if (shouldModelInst(&Inst, SurroundingLoop))
932
442
        Instructions.push_back(&Inst);
933
119
    long RIdx = scop->getNextStmtIdx();
934
119
    std::string Name = makeStmtName(&SR, RIdx);
935
119
    scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
936
119
    return;
937
119
  }
938
2.43k
939
9.37k
  
for (auto I = SR.element_begin(), E = SR.element_end(); 2.43k
I != E;
++I6.93k
)
940
6.93k
    if (I->isSubRegion())
941
1.36k
      buildStmts(*I->getNodeAs<Region>());
942
5.56k
    else {
943
5.56k
      BasicBlock *BB = I->getNodeAs<BasicBlock>();
944
5.56k
      switch (StmtGranularity) {
945
5.56k
      case GranularityChoice::BasicBlocks:
946
693
        buildSequentialBlockStmts(BB);
947
693
        break;
948
5.56k
      case GranularityChoice::ScalarIndependence:
949
4.87k
        buildEqivClassBlockStmts(BB);
950
4.87k
        break;
951
5.56k
      case GranularityChoice::Stores:
952
4
        buildSequentialBlockStmts(BB, true);
953
4
        break;
954
5.56k
      }
955
5.56k
    }
956
2.43k
}
957
958
void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
959
9.38k
                                       Region *NonAffineSubRegion) {
960
9.38k
  assert(
961
9.38k
      Stmt &&
962
9.38k
      "The exit BB is the only one that cannot be represented by a statement");
963
9.38k
  assert(Stmt->represents(&BB));
964
9.38k
965
9.38k
  // We do not build access functions for error blocks, as they may contain
966
9.38k
  // instructions we can not model.
967
9.38k
  if (isErrorBlock(BB, scop->getRegion(), LI, DT))
968
78
    return;
969
9.30k
970
9.30k
  auto BuildAccessesForInst = [this, Stmt,
971
9.30k
                               NonAffineSubRegion](Instruction *Inst) {
972
8.07k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
973
8.07k
    if (PHI)
974
249
      buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
975
8.07k
976
8.07k
    if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
977
3.37k
      assert(Stmt && "Cannot build access function in non-existing statement");
978
3.37k
      buildMemoryAccess(MemInst, Stmt);
979
3.37k
    }
980
8.07k
981
8.07k
    // PHI nodes have already been modeled above and terminators that are
982
8.07k
    // not part of a non-affine subregion are fully modeled and regenerated
983
8.07k
    // from the polyhedral domains. Hence, they do not need to be modeled as
984
8.07k
    // explicit data dependences.
985
8.07k
    if (!PHI)
986
7.82k
      buildScalarDependences(Stmt, Inst);
987
8.07k
  };
988
9.30k
989
9.30k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
990
9.30k
  bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
991
9.30k
  if (IsEntryBlock) {
992
9.10k
    for (Instruction *Inst : Stmt->getInstructions())
993
7.43k
      BuildAccessesForInst(Inst);
994
9.10k
    if (Stmt->isRegionStmt())
995
117
      BuildAccessesForInst(BB.getTerminator());
996
9.10k
  } else {
997
524
    for (Instruction &Inst : BB) {
998
524
      if (isIgnoredIntrinsic(&Inst))
999
0
        continue;
1000
524
1001
524
      // Invariant loads already have been processed.
1002
524
      if (isa<LoadInst>(Inst) && 
RIL.count(cast<LoadInst>(&Inst))49
)
1003
2
        continue;
1004
522
1005
522
      BuildAccessesForInst(&Inst);
1006
522
    }
1007
202
  }
1008
9.30k
}
1009
1010
MemoryAccess *ScopBuilder::addMemoryAccess(
1011
    ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
1012
    Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
1013
    ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
1014
5.05k
    MemoryKind Kind) {
1015
5.05k
  bool isKnownMustAccess = false;
1016
5.05k
1017
5.05k
  // Accesses in single-basic block statements are always executed.
1018
5.05k
  if (Stmt->isBlockStmt())
1019
4.64k
    isKnownMustAccess = true;
1020
5.05k
1021
5.05k
  if (Stmt->isRegionStmt()) {
1022
412
    // Accesses that dominate the exit block of a non-affine region are always
1023
412
    // executed. In non-affine regions there may exist MemoryKind::Values that
1024
412
    // do not dominate the exit. MemoryKind::Values will always dominate the
1025
412
    // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1026
412
    // non-affine region.
1027
412
    if (Inst && 
DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())372
)
1028
232
      isKnownMustAccess = true;
1029
412
  }
1030
5.05k
1031
5.05k
  // Non-affine PHI writes do not "happen" at a particular instruction, but
1032
5.05k
  // after exiting the statement. Therefore they are guaranteed to execute and
1033
5.05k
  // overwrite the old value.
1034
5.05k
  if (Kind == MemoryKind::PHI || 
Kind == MemoryKind::ExitPHI4.46k
)
1035
691
    isKnownMustAccess = true;
1036
5.05k
1037
5.05k
  if (!isKnownMustAccess && 
AccType == MemoryAccess::MUST_WRITE178
)
1038
82
    AccType = MemoryAccess::MAY_WRITE;
1039
5.05k
1040
5.05k
  auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
1041
5.05k
                                  Affine, Subscripts, Sizes, AccessValue, Kind);
1042
5.05k
1043
5.05k
  scop->addAccessFunction(Access);
1044
5.05k
  Stmt->addAccess(Access);
1045
5.05k
  return Access;
1046
5.05k
}
1047
1048
void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1049
                                 MemoryAccess::AccessType AccType,
1050
                                 Value *BaseAddress, Type *ElementType,
1051
                                 bool IsAffine,
1052
                                 ArrayRef<const SCEV *> Subscripts,
1053
                                 ArrayRef<const SCEV *> Sizes,
1054
3.66k
                                 Value *AccessValue) {
1055
3.66k
  ArrayBasePointers.insert(BaseAddress);
1056
3.66k
  auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1057
3.66k
                                    ElementType, IsAffine, AccessValue,
1058
3.66k
                                    Subscripts, Sizes, MemoryKind::Array);
1059
3.66k
1060
3.66k
  if (!DetectFortranArrays)
1061
3.64k
    return;
1062
13
1063
13
  if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1064
8
    MemAccess->setFortranArrayDescriptor(FAD);
1065
5
  else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1066
2
    MemAccess->setFortranArrayDescriptor(FAD);
1067
13
}
1068
1069
408
void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1070
408
  // Find the statement that defines the value of Inst. That statement has to
1071
408
  // write the value to make it available to those statements that read it.
1072
408
  ScopStmt *Stmt = scop->getStmtFor(Inst);
1073
408
1074
408
  // It is possible that the value is synthesizable within a loop (such that it
1075
408
  // is not part of any statement), but not after the loop (where you need the
1076
408
  // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1077
408
  // avoid this. In case the IR has no such PHI, use the last statement (where
1078
408
  // the value is synthesizable) to write the value.
1079
408
  if (!Stmt)
1080
41
    Stmt = scop->getLastStmtFor(Inst->getParent());
1081
408
1082
408
  // Inst not defined within this SCoP.
1083
408
  if (!Stmt)
1084
0
    return;
1085
408
1086
408
  // Do not process further if the instruction is already written.
1087
408
  if (Stmt->lookupValueWriteOf(Inst))
1088
66
    return;
1089
342
1090
342
  addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1091
342
                  true, Inst, ArrayRef<const SCEV *>(),
1092
342
                  ArrayRef<const SCEV *>(), MemoryKind::Value);
1093
342
}
1094
1095
14.3k
void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1096
14.3k
  // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1097
14.3k
  // to be able to replace this one. Currently, there is a split responsibility.
1098
14.3k
  // In a first step, the MemoryAccess is created, but without the
1099
14.3k
  // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1100
14.3k
  // AccessRelation is created. At least for scalar accesses, there is no new
1101
14.3k
  // information available at ScopStmt::buildAccessRelations(), so we could
1102
14.3k
  // create the AccessRelation right away. This is what
1103
14.3k
  // ScopStmt::ensureValueRead(Value*) does.
1104
14.3k
1105
14.3k
  auto *Scope = UserStmt->getSurroundingLoop();
1106
14.3k
  auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1107
14.3k
  switch (VUse.getKind()) {
1108
14.3k
  case VirtualUse::Constant:
1109
13.9k
  case VirtualUse::Block:
1110
13.9k
  case VirtualUse::Synthesizable:
1111
13.9k
  case VirtualUse::Hoisted:
1112
13.9k
  case VirtualUse::Intra:
1113
13.9k
    // Uses of these kinds do not need a MemoryAccess.
1114
13.9k
    break;
1115
13.9k
1116
13.9k
  case VirtualUse::ReadOnly:
1117
63
    // Add MemoryAccess for invariant values only if requested.
1118
63
    if (!ModelReadOnlyScalars)
1119
4
      break;
1120
59
1121
59
    LLVM_FALLTHROUGH;
1122
392
  case VirtualUse::Inter:
1123
392
1124
392
    // Do not create another MemoryAccess for reloading the value if one already
1125
392
    // exists.
1126
392
    if (UserStmt->lookupValueReadOf(V))
1127
30
      break;
1128
362
1129
362
    addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1130
362
                    true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1131
362
                    MemoryKind::Value);
1132
362
1133
362
    // Inter-statement uses need to write the value in their defining statement.
1134
362
    if (VUse.isInter())
1135
316
      ensureValueWrite(cast<Instruction>(V));
1136
362
    break;
1137
14.3k
  }
1138
14.3k
}
1139
1140
void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1141
                                 BasicBlock *IncomingBlock,
1142
579
                                 Value *IncomingValue, bool IsExitBlock) {
1143
579
  // As the incoming block might turn out to be an error statement ensure we
1144
579
  // will create an exit PHI SAI object. It is needed during code generation
1145
579
  // and would be created later anyway.
1146
579
  if (IsExitBlock)
1147
136
    scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1148
136
                                   MemoryKind::ExitPHI);
1149
579
1150
579
  // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1151
579
  // from outside the SCoP's region have no statement representation.
1152
579
  if (!IncomingStmt)
1153
71
    return;
1154
508
1155
508
  // Take care for the incoming value being available in the incoming block.
1156
508
  // This must be done before the check for multiple PHI writes because multiple
1157
508
  // exiting edges from subregion each can be the effective written value of the
1158
508
  // subregion. As such, all of them must be made available in the subregion
1159
508
  // statement.
1160
508
  ensureValueRead(IncomingValue, IncomingStmt);
1161
508
1162
508
  // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1163
508
  if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1164
52
    assert(Acc->getAccessInstruction() == PHI);
1165
52
    Acc->addIncoming(IncomingBlock, IncomingValue);
1166
52
    return;
1167
52
  }
1168
456
1169
456
  MemoryAccess *Acc = addMemoryAccess(
1170
456
      IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1171
456
      PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1172
456
      IsExitBlock ? 
MemoryKind::ExitPHI101
:
MemoryKind::PHI355
);
1173
456
  assert(Acc);
1174
456
  Acc->addIncoming(IncomingBlock, IncomingValue);
1175
456
}
1176
1177
235
void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1178
235
  addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1179
235
                  PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1180
235
                  MemoryKind::PHI);
1181
235
}
1182
1183
2.30k
void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1184
2.30k
  isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1185
2.30k
1186
2.30k
  Stmt.Domain = scop->getDomainConditions(&Stmt);
1187
2.30k
  Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1188
2.30k
}
1189
1190
2.30k
void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1191
2.30k
  isl::set Domain = Stmt.getDomain();
1192
2.30k
  BasicBlock *BB = Stmt.getEntryBlock();
1193
2.30k
1194
2.30k
  Loop *L = LI.getLoopFor(BB);
1195
2.30k
1196
2.31k
  while (L && 
Stmt.isRegionStmt()2.04k
&&
Stmt.getRegion()->contains(L)83
)
1197
12
    L = L->getParentLoop();
1198
2.30k
1199
2.30k
  SmallVector<llvm::Loop *, 8> Loops;
1200
2.30k
1201
4.86k
  while (L && 
Stmt.getParent()->getRegion().contains(L)2.70k
) {
1202
2.56k
    Loops.push_back(L);
1203
2.56k
    L = L->getParentLoop();
1204
2.56k
  }
1205
2.30k
1206
2.30k
  Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
1207
2.30k
}
1208
1209
/// Return the reduction type for a given binary operator.
1210
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1211
292
                                                    const Instruction *Load) {
1212
292
  if (!BinOp)
1213
0
    return MemoryAccess::RT_NONE;
1214
292
  switch (BinOp->getOpcode()) {
1215
292
  case Instruction::FAdd:
1216
1
    if (!BinOp->isFast())
1217
0
      return MemoryAccess::RT_NONE;
1218
1
    LLVM_FALLTHROUGH;
1219
269
  case Instruction::Add:
1220
269
    return MemoryAccess::RT_ADD;
1221
1
  case Instruction::Or:
1222
1
    return MemoryAccess::RT_BOR;
1223
3
  case Instruction::Xor:
1224
3
    return MemoryAccess::RT_BXOR;
1225
1
  case Instruction::And:
1226
1
    return MemoryAccess::RT_BAND;
1227
1
  case Instruction::FMul:
1228
0
    if (!BinOp->isFast())
1229
0
      return MemoryAccess::RT_NONE;
1230
0
    LLVM_FALLTHROUGH;
1231
18
  case Instruction::Mul:
1232
18
    if (DisableMultiplicativeReductions)
1233
0
      return MemoryAccess::RT_NONE;
1234
18
    return MemoryAccess::RT_MUL;
1235
18
  default:
1236
0
    return MemoryAccess::RT_NONE;
1237
292
  }
1238
292
}
1239
1240
2.30k
void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1241
2.30k
  SmallVector<MemoryAccess *, 2> Loads;
1242
2.30k
  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1243
2.30k
1244
2.30k
  // First collect candidate load-store reduction chains by iterating over all
1245
2.30k
  // stores and collecting possible reduction loads.
1246
4.73k
  for (MemoryAccess *StoreMA : Stmt) {
1247
4.73k
    if (StoreMA->isRead())
1248
2.25k
      continue;
1249
2.47k
1250
2.47k
    Loads.clear();
1251
2.47k
    collectCandidateReductionLoads(StoreMA, Loads);
1252
2.47k
    for (MemoryAccess *LoadMA : Loads)
1253
418
      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1254
2.47k
  }
1255
2.30k
1256
2.30k
  // Then check each possible candidate pair.
1257
2.30k
  for (const auto &CandidatePair : Candidates) {
1258
418
    bool Valid = true;
1259
418
    isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1260
418
    isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1261
418
1262
418
    // Skip those with obviously unequal base addresses.
1263
418
    if (!LoadAccs.has_equal_space(StoreAccs)) {
1264
75
      continue;
1265
75
    }
1266
343
1267
343
    // And check if the remaining for overlap with other memory accesses.
1268
343
    isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1269
343
    AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1270
343
    isl::set AllAccs = AllAccsRel.range();
1271
343
1272
973
    for (MemoryAccess *MA : Stmt) {
1273
973
      if (MA == CandidatePair.first || 
MA == CandidatePair.second630
)
1274
686
        continue;
1275
287
1276
287
      isl::map AccRel =
1277
287
          MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1278
287
      isl::set Accs = AccRel.range();
1279
287
1280
287
      if (AllAccs.has_equal_space(Accs)) {
1281
70
        isl::set OverlapAccs = Accs.intersect(AllAccs);
1282
70
        Valid = Valid && 
OverlapAccs.is_empty()51
;
1283
70
      }
1284
287
    }
1285
343
1286
343
    if (!Valid)
1287
51
      continue;
1288
292
1289
292
    const LoadInst *Load =
1290
292
        dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1291
292
    MemoryAccess::ReductionType RT =
1292
292
        getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1293
292
1294
292
    // If no overlapping access was found we mark the load and store as
1295
292
    // reduction like.
1296
292
    CandidatePair.first->markAsReductionLike(RT);
1297
292
    CandidatePair.second->markAsReductionLike(RT);
1298
292
  }
1299
2.30k
}
1300
1301
void ScopBuilder::collectCandidateReductionLoads(
1302
2.47k
    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1303
2.47k
  ScopStmt *Stmt = StoreMA->getStatement();
1304
2.47k
1305
2.47k
  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1306
2.47k
  if (!Store)
1307
772
    return;
1308
1.70k
1309
1.70k
  // Skip if there is not one binary operator between the load and the store
1310
1.70k
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1311
1.70k
  if (!BinOp)
1312
1.04k
    return;
1313
664
1314
664
  // Skip if the binary operators has multiple uses
1315
664
  if (BinOp->getNumUses() != 1)
1316
48
    return;
1317
616
1318
616
  // Skip if the opcode of the binary operator is not commutative/associative
1319
616
  if (!BinOp->isCommutative() || 
!BinOp->isAssociative()602
)
1320
242
    return;
1321
374
1322
374
  // Skip if the binary operator is outside the current SCoP
1323
374
  if (BinOp->getParent() != Store->getParent())
1324
1
    return;
1325
373
1326
373
  // Skip if it is a multiplicative reduction and we disabled them
1327
373
  if (DisableMultiplicativeReductions &&
1328
373
      
(2
BinOp->getOpcode() == Instruction::Mul2
||
1329
2
       
BinOp->getOpcode() == Instruction::FMul1
))
1330
1
    return;
1331
372
1332
372
  // Check the binary operator operands for a candidate load
1333
372
  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1334
372
  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1335
372
  if (!PossibleLoad0 && 
!PossibleLoad119
)
1336
9
    return;
1337
363
1338
363
  // A load is only a candidate if it cannot escape (thus has only this use)
1339
363
  if (PossibleLoad0 && 
PossibleLoad0->getNumUses() == 1353
)
1340
346
    if (PossibleLoad0->getParent() == Store->getParent())
1341
342
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1342
363
  if (PossibleLoad1 && 
PossibleLoad1->getNumUses() == 183
)
1343
77
    if (PossibleLoad1->getParent() == Store->getParent())
1344
76
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1345
363
}
1346
1347
2.30k
void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1348
4.74k
  for (MemoryAccess *Access : Stmt.MemAccs) {
1349
4.74k
    Type *ElementType = Access->getElementType();
1350
4.74k
1351
4.74k
    MemoryKind Ty;
1352
4.74k
    if (Access->isPHIKind())
1353
568
      Ty = MemoryKind::PHI;
1354
4.17k
    else if (Access->isExitPHIKind())
1355
84
      Ty = MemoryKind::ExitPHI;
1356
4.08k
    else if (Access->isValueKind())
1357
679
      Ty = MemoryKind::Value;
1358
3.41k
    else
1359
3.41k
      Ty = MemoryKind::Array;
1360
4.74k
1361
4.74k
    auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1362
4.74k
                                               ElementType, Access->Sizes, Ty);
1363
4.74k
    Access->buildAccessRelation(SAI);
1364
4.74k
    scop->addAccessData(Access);
1365
4.74k
  }
1366
2.30k
}
1367
1368
#ifndef NDEBUG
1369
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1370
  auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1371
  auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1372
  assert(PhysUse.getKind() == VirtUse.getKind());
1373
}
1374
1375
/// Check the consistency of every statement's MemoryAccesses.
1376
///
1377
/// The check is carried out by expecting the "physical" kind of use (derived
1378
/// from the BasicBlocks instructions resides in) to be same as the "virtual"
1379
/// kind of use (derived from a statement's MemoryAccess).
1380
///
1381
/// The "physical" uses are taken by ensureValueRead to determine whether to
1382
/// create MemoryAccesses. When done, the kind of scalar access should be the
1383
/// same no matter which way it was derived.
1384
///
1385
/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1386
/// can intentionally influence on the kind of uses (not corresponding to the
1387
/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1388
/// to pick up the virtual uses. But here in the code generator, this has not
1389
/// happened yet, such that virtual and physical uses are equivalent.
1390
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1391
  for (auto *BB : S->getRegion().blocks()) {
1392
    for (auto &Inst : *BB) {
1393
      auto *Stmt = S->getStmtFor(&Inst);
1394
      if (!Stmt)
1395
        continue;
1396
1397
      if (isIgnoredIntrinsic(&Inst))
1398
        continue;
1399
1400
      // Branch conditions are encoded in the statement domains.
1401
      if (Inst.isTerminator() && Stmt->isBlockStmt())
1402
        continue;
1403
1404
      // Verify all uses.
1405
      for (auto &Op : Inst.operands())
1406
        verifyUse(S, Op, LI);
1407
1408
      // Stores do not produce values used by other statements.
1409
      if (isa<StoreInst>(Inst))
1410
        continue;
1411
1412
      // For every value defined in the block, also check that a use of that
1413
      // value in the same statement would not be an inter-statement use. It can
1414
      // still be synthesizable or load-hoisted, but these kind of instructions
1415
      // are not directly copied in code-generation.
1416
      auto VirtDef =
1417
          VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1418
      assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1419
             VirtDef.getKind() == VirtualUse::Intra ||
1420
             VirtDef.getKind() == VirtualUse::Hoisted);
1421
    }
1422
  }
1423
1424
  if (S->hasSingleExitEdge())
1425
    return;
1426
1427
  // PHINodes in the SCoP region's exit block are also uses to be checked.
1428
  if (!S->getRegion().isTopLevelRegion()) {
1429
    for (auto &Inst : *S->getRegion().getExit()) {
1430
      if (!isa<PHINode>(Inst))
1431
        break;
1432
1433
      for (auto &Op : Inst.operands())
1434
        verifyUse(S, Op, LI);
1435
    }
1436
  }
1437
}
1438
#endif
1439
1440
/// Return the block that is the representing block for @p RN.
1441
112
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1442
112
  return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1443
112
                           : 
RN->getNodeAs<BasicBlock>()0
;
1444
112
}
1445
1446
void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1447
1.19k
                            OptimizationRemarkEmitter &ORE) {
1448
1.19k
  scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1449
1.19k
1450
1.19k
  buildStmts(R);
1451
1.19k
1452
1.19k
  // Create all invariant load instructions first. These are categorized as
1453
1.19k
  // 'synthesizable', therefore are not part of any ScopStmt but need to be
1454
1.19k
  // created somewhere.
1455
1.19k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1456
5.89k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
1457
5.89k
    if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
1458
43
      continue;
1459
5.85k
1460
21.6k
    
for (Instruction &Inst : *BB)5.85k
{
1461
21.6k
      LoadInst *Load = dyn_cast<LoadInst>(&Inst);
1462
21.6k
      if (!Load)
1463
19.8k
        continue;
1464
1.80k
1465
1.80k
      if (!RIL.count(Load))
1466
1.51k
        continue;
1467
288
1468
288
      // Invariant loads require a MemoryAccess to be created in some statement.
1469
288
      // It is not important to which statement the MemoryAccess is added
1470
288
      // because it will later be removed from the ScopStmt again. We chose the
1471
288
      // first statement of the basic block the LoadInst is in.
1472
288
      ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
1473
288
      assert(!List.empty());
1474
288
      ScopStmt *RILStmt = List.front();
1475
288
      buildMemoryAccess(Load, RILStmt);
1476
288
    }
1477
5.85k
  }
1478
1.19k
  buildAccessFunctions();
1479
1.19k
1480
1.19k
  // In case the region does not have an exiting block we will later (during
1481
1.19k
  // code generation) split the exit block. This will move potential PHI nodes
1482
1.19k
  // from the current exit block into the new region exiting block. Hence, PHI
1483
1.19k
  // nodes that are at this point not part of the region will be.
1484
1.19k
  // To handle these PHI nodes later we will now model their operands as scalar
1485
1.19k
  // accesses. Note that we do not model anything in the exit block if we have
1486
1.19k
  // an exiting block in the region, as there will not be any splitting later.
1487
1.19k
  if (!R.isTopLevelRegion() && 
!scop->hasSingleExitEdge()1.18k
) {
1488
201
    for (Instruction &Inst : *R.getExit()) {
1489
201
      PHINode *PHI = dyn_cast<PHINode>(&Inst);
1490
201
      if (!PHI)
1491
159
        break;
1492
42
1493
42
      buildPHIAccesses(nullptr, PHI, nullptr, true);
1494
42
    }
1495
159
  }
1496
1.19k
1497
1.19k
  // Create memory accesses for global reads since all arrays are now known.
1498
1.19k
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1499
1.19k
  for (auto GlobalReadPair : GlobalReads) {
1500
8
    ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1501
8
    Instruction *GlobalRead = GlobalReadPair.second;
1502
8
    for (auto *BP : ArrayBasePointers)
1503
16
      addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1504
16
                     BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1505
8
  }
1506
1.19k
1507
1.19k
  scop->buildInvariantEquivalenceClasses();
1508
1.19k
1509
1.19k
  /// A map from basic blocks to their invalid domains.
1510
1.19k
  DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1511
1.19k
1512
1.19k
  if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
1513
8
    LLVM_DEBUG(
1514
8
        dbgs() << "Bailing-out because buildDomains encountered problems\n");
1515
8
    return;
1516
8
  }
1517
1.18k
1518
1.18k
  scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1519
1.18k
1520
1.18k
  // Initialize the invalid domain.
1521
1.18k
  for (ScopStmt &Stmt : scop->Stmts)
1522
9.02k
    if (Stmt.isBlockStmt())
1523
8.91k
      Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1524
112
    else
1525
112
      Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1526
112
          Stmt.getRegion()->getNode())]);
1527
1.18k
1528
1.18k
  // Remove empty statements.
1529
1.18k
  // Exit early in case there are no executable statements left in this scop.
1530
1.18k
  scop->removeStmtNotInDomainMap();
1531
1.18k
  scop->simplifySCoP(false);
1532
1.18k
  if (scop->isEmpty()) {
1533
31
    LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1534
31
    return;
1535
31
  }
1536
1.15k
1537
1.15k
  // The ScopStmts now have enough information to initialize themselves.
1538
2.30k
  
for (ScopStmt &Stmt : *scop)1.15k
{
1539
2.30k
    collectSurroundingLoops(Stmt);
1540
2.30k
1541
2.30k
    buildDomain(Stmt);
1542
2.30k
    buildAccessRelations(Stmt);
1543
2.30k
1544
2.30k
    if (DetectReductions)
1545
2.30k
      checkForReductions(Stmt);
1546
2.30k
  }
1547
1.15k
1548
1.15k
  // Check early for a feasible runtime context.
1549
1.15k
  if (!scop->hasFeasibleRuntimeContext()) {
1550
3
    LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1551
3
    return;
1552
3
  }
1553
1.14k
1554
1.14k
  // Check early for profitability. Afterwards it cannot change anymore,
1555
1.14k
  // only the runtime context could become infeasible.
1556
1.14k
  if (!scop->isProfitable(UnprofitableScalarAccs)) {
1557
2
    scop->invalidate(PROFITABLE, DebugLoc());
1558
2
    LLVM_DEBUG(
1559
2
        dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1560
2
    return;
1561
2
  }
1562
1.14k
1563
1.14k
  scop->buildSchedule(LI);
1564
1.14k
1565
1.14k
  scop->finalizeAccesses();
1566
1.14k
1567
1.14k
  scop->realignParams();
1568
1.14k
  scop->addUserContext();
1569
1.14k
1570
1.14k
  // After the context was fully constructed, thus all our knowledge about
1571
1.14k
  // the parameters is in there, we add all recorded assumptions to the
1572
1.14k
  // assumed/invalid context.
1573
1.14k
  scop->addRecordedAssumptions();
1574
1.14k
1575
1.14k
  scop->simplifyContexts();
1576
1.14k
  if (!scop->buildAliasChecks(AA)) {
1577
6
    LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1578
6
    return;
1579
6
  }
1580
1.14k
1581
1.14k
  scop->hoistInvariantLoads();
1582
1.14k
  scop->canonicalizeDynamicBasePtrs();
1583
1.14k
  scop->verifyInvariantLoads();
1584
1.14k
  scop->simplifySCoP(true);
1585
1.14k
1586
1.14k
  // Check late for a feasible runtime context because profitability did not
1587
1.14k
  // change.
1588
1.14k
  if (!scop->hasFeasibleRuntimeContext()) {
1589
18
    LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1590
18
    return;
1591
18
  }
1592
1.14k
1593
#ifndef NDEBUG
1594
  verifyUses(scop.get(), LI, DT);
1595
#endif
1596
}
1597
1598
ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1599
                         const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1600
                         ScopDetection &SD, ScalarEvolution &SE,
1601
                         OptimizationRemarkEmitter &ORE)
1602
1.19k
    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1603
1.19k
  DebugLoc Beg, End;
1604
1.19k
  auto P = getBBPairForRegion(R);
1605
1.19k
  getDebugLocations(P, Beg, End);
1606
1.19k
1607
1.19k
  std::string Msg = "SCoP begins here.";
1608
1.19k
  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1609
1.19k
           << Msg);
1610
1.19k
1611
1.19k
  buildScop(*R, AC, ORE);
1612
1.19k
1613
1.19k
  LLVM_DEBUG(dbgs() << *scop);
1614
1.19k
1615
1.19k
  if (!scop->hasFeasibleRuntimeContext()) {
1616
68
    InfeasibleScops++;
1617
68
    Msg = "SCoP ends here but was dismissed.";
1618
68
    LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
1619
68
    scop.reset();
1620
1.12k
  } else {
1621
1.12k
    Msg = "SCoP ends here.";
1622
1.12k
    ++ScopFound;
1623
1.12k
    if (scop->getMaxLoopDepth() > 0)
1624
1.05k
      ++RichScopFound;
1625
1.12k
  }
1626
1.19k
1627
1.19k
  if (R->isTopLevelRegion())
1628
4
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
1629
4
             << Msg);
1630
1.18k
  else
1631
1.18k
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
1632
1.18k
             << Msg);
1633
1.19k
}