Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Analysis/ScopBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ScopBuilder.cpp ----------------------------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Create a polyhedral description for a static control flow region.
11
//
12
// The pass creates a polyhedral description of the Scops detected by the SCoP
13
// detection derived from their LLVM-IR code.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "polly/ScopBuilder.h"
18
#include "polly/Options.h"
19
#include "polly/ScopDetection.h"
20
#include "polly/ScopDetectionDiagnostic.h"
21
#include "polly/ScopInfo.h"
22
#include "polly/Support/SCEVValidator.h"
23
#include "polly/Support/ScopHelper.h"
24
#include "polly/Support/VirtualInstruction.h"
25
#include "llvm/ADT/APInt.h"
26
#include "llvm/ADT/ArrayRef.h"
27
#include "llvm/ADT/DenseMap.h"
28
#include "llvm/ADT/SetVector.h"
29
#include "llvm/ADT/Statistic.h"
30
#include "llvm/Analysis/AliasAnalysis.h"
31
#include "llvm/Analysis/LoopInfo.h"
32
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
33
#include "llvm/Analysis/RegionInfo.h"
34
#include "llvm/Analysis/RegionIterator.h"
35
#include "llvm/Analysis/ScalarEvolution.h"
36
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
37
#include "llvm/IR/BasicBlock.h"
38
#include "llvm/IR/Constants.h"
39
#include "llvm/IR/DataLayout.h"
40
#include "llvm/IR/DebugLoc.h"
41
#include "llvm/IR/DerivedTypes.h"
42
#include "llvm/IR/DiagnosticInfo.h"
43
#include "llvm/IR/Dominators.h"
44
#include "llvm/IR/Function.h"
45
#include "llvm/IR/InstrTypes.h"
46
#include "llvm/IR/Instruction.h"
47
#include "llvm/IR/Instructions.h"
48
#include "llvm/IR/IntrinsicInst.h"
49
#include "llvm/IR/Operator.h"
50
#include "llvm/IR/Type.h"
51
#include "llvm/IR/Use.h"
52
#include "llvm/IR/Value.h"
53
#include "llvm/Support/Casting.h"
54
#include "llvm/Support/CommandLine.h"
55
#include "llvm/Support/Compiler.h"
56
#include "llvm/Support/Debug.h"
57
#include "llvm/Support/ErrorHandling.h"
58
#include "llvm/Support/raw_ostream.h"
59
#include <cassert>
60
#include <string>
61
#include <tuple>
62
#include <vector>
63
64
using namespace llvm;
65
using namespace polly;
66
67
2.29k
#define DEBUG_TYPE "polly-scops"
68
69
STATISTIC(ScopFound, "Number of valid Scops");
70
STATISTIC(RichScopFound, "Number of Scops containing a loop");
71
STATISTIC(InfeasibleScops,
72
          "Number of SCoPs with statically infeasible context.");
73
74
bool polly::ModelReadOnlyScalars;
75
76
static cl::opt<bool, true> XModelReadOnlyScalars(
77
    "polly-analyze-read-only-scalars",
78
    cl::desc("Model read-only scalar values in the scop description"),
79
    cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
80
    cl::init(true), cl::cat(PollyCategory));
81
82
static cl::opt<bool> UnprofitableScalarAccs(
83
    "polly-unprofitable-scalar-accs",
84
    cl::desc("Count statements with scalar accesses as not optimizable"),
85
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
86
87
static cl::opt<bool> DetectFortranArrays(
88
    "polly-detect-fortran-arrays",
89
    cl::desc("Detect Fortran arrays and use this for code generation"),
90
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
91
92
static cl::opt<bool> DetectReductions("polly-detect-reductions",
93
                                      cl::desc("Detect and exploit reductions"),
94
                                      cl::Hidden, cl::ZeroOrMore,
95
                                      cl::init(true), cl::cat(PollyCategory));
96
97
// Multiplicative reductions can be disabled separately as these kind of
98
// operations can overflow easily. Additive reductions and bit operations
99
// are in contrast pretty stable.
100
static cl::opt<bool> DisableMultiplicativeReductions(
101
    "polly-disable-multiplicative-reductions",
102
    cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
103
    cl::init(false), cl::cat(PollyCategory));
104
105
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
106
                                   Region *NonAffineSubRegion,
107
1.89k
                                   bool IsExitBlock) {
108
1.89k
  // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
109
1.89k
  // true, are not modeled as ordinary PHI nodes as they are not part of the
110
1.89k
  // region. However, we model the operands in the predecessor blocks that are
111
1.89k
  // part of the region as regular scalar accesses.
112
1.89k
113
1.89k
  // If we can synthesize a PHI we can skip it, however only if it is in
114
1.89k
  // the region. If it is not it can only be in the exit block of the region.
115
1.89k
  // In this case we model the operands but not the PHI itself.
116
1.89k
  auto *Scope = LI.getLoopFor(PHI->getParent());
117
1.89k
  if (
!IsExitBlock && 1.89k
canSynthesize(PHI, *scop, &SE, Scope)1.85k
)
118
1.64k
    return;
119
250
120
250
  // PHI nodes are modeled as if they had been demoted prior to the SCoP
121
250
  // detection. Hence, the PHI is a load of a new memory location in which the
122
250
  // incoming value was written at the end of the incoming basic block.
123
250
  bool OnlyNonAffineSubRegionOperands = true;
124
785
  for (unsigned u = 0; 
u < PHI->getNumIncomingValues()785
;
u++535
) {
125
535
    Value *Op = PHI->getIncomingValue(u);
126
535
    BasicBlock *OpBB = PHI->getIncomingBlock(u);
127
535
    ScopStmt *OpStmt = scop->getLastStmtFor(OpBB);
128
535
129
535
    // Do not build PHI dependences inside a non-affine subregion, but make
130
535
    // sure that the necessary scalar values are still made available.
131
535
    if (
NonAffineSubRegion && 535
NonAffineSubRegion->contains(OpBB)73
) {
132
20
      auto *OpInst = dyn_cast<Instruction>(Op);
133
20
      if (
!OpInst || 20
!NonAffineSubRegion->contains(OpInst)10
)
134
12
        ensureValueRead(Op, OpStmt);
135
20
      continue;
136
20
    }
137
515
138
515
    OnlyNonAffineSubRegionOperands = false;
139
515
    ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
140
515
  }
141
250
142
250
  if (
!OnlyNonAffineSubRegionOperands && 250
!IsExitBlock243
) {
143
199
    addPHIReadAccess(PHIStmt, PHI);
144
199
  }
145
1.89k
}
146
147
void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
148
13.7k
                                         Instruction *Inst) {
149
13.7k
  assert(!isa<PHINode>(Inst));
150
13.7k
151
13.7k
  // Pull-in required operands.
152
13.7k
  for (Use &Op : Inst->operands())
153
25.6k
    ensureValueRead(Op.get(), UserStmt);
154
13.7k
}
155
156
20.9k
void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
157
20.9k
  // Check for uses of this instruction outside the scop. Because we do not
158
20.9k
  // iterate over such instructions and therefore did not "ensure" the existence
159
20.9k
  // of a write, we must determine such use here.
160
20.9k
  if (scop->isEscaping(Inst))
161
96
    ensureValueWrite(Inst);
162
20.9k
}
163
164
/// Check that a value is a Fortran Array descriptor.
165
///
166
/// We check if V has the following structure:
167
/// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
168
///                                   [<num> x %struct.descriptor_dimension] }
169
///
170
///
171
/// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
172
///
173
/// 1. V's type name starts with "struct.array"
174
/// 2. V's type has layout as shown.
175
/// 3. Final member of V's type has name "struct.descriptor_dimension",
176
/// 4. "struct.descriptor_dimension" has layout as shown.
177
/// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
178
///
179
/// We are interested in such types since this is the code that dragonegg
180
/// generates for Fortran array descriptors.
181
///
182
/// @param V the Value to be checked.
183
///
184
/// @returns True if V is a Fortran array descriptor, False otherwise.
185
10
bool isFortranArrayDescriptor(Value *V) {
186
10
  PointerType *PTy = dyn_cast<PointerType>(V->getType());
187
10
188
10
  if (!PTy)
189
0
    return false;
190
10
191
10
  Type *Ty = PTy->getElementType();
192
10
  assert(Ty && "Ty expected to be initialized");
193
10
  auto *StructArrTy = dyn_cast<StructType>(Ty);
194
10
195
10
  if (
!(StructArrTy && 10
StructArrTy->hasName()10
))
196
0
    return false;
197
10
198
10
  
if (10
!StructArrTy->getName().startswith("struct.array")10
)
199
0
    return false;
200
10
201
10
  
if (10
StructArrTy->getNumElements() != 410
)
202
0
    return false;
203
10
204
10
  const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
205
10
206
10
  // i8* match
207
10
  if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
208
0
    return false;
209
10
210
10
  // Get a reference to the int type and check that all the members
211
10
  // share the same int type
212
10
  Type *IntTy = ArrMemberTys[1];
213
10
  if (ArrMemberTys[2] != IntTy)
214
0
    return false;
215
10
216
10
  // type: [<num> x %struct.descriptor_dimension]
217
10
  ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
218
10
  if (!DescriptorDimArrayTy)
219
0
    return false;
220
10
221
10
  // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
222
10
  StructType *DescriptorDimTy =
223
10
      dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
224
10
225
10
  if (
!(DescriptorDimTy && 10
DescriptorDimTy->hasName()10
))
226
0
    return false;
227
10
228
10
  
if (10
DescriptorDimTy->getName() != "struct.descriptor_dimension"10
)
229
0
    return false;
230
10
231
10
  
if (10
DescriptorDimTy->getNumElements() != 310
)
232
0
    return false;
233
10
234
10
  
for (auto MemberTy : DescriptorDimTy->elements()) 10
{
235
30
    if (MemberTy != IntTy)
236
0
      return false;
237
10
  }
238
10
239
10
  return true;
240
10
}
241
242
5
Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
243
5
  // match: 4.1 & 4.2 store/load
244
5
  if (
!isa<LoadInst>(Inst) && 5
!isa<StoreInst>(Inst)1
)
245
0
    return nullptr;
246
5
247
5
  // match: 4
248
5
  
if (5
Inst.getAlignment() != 85
)
249
1
    return nullptr;
250
4
251
4
  Value *Address = Inst.getPointerOperand();
252
4
253
4
  const BitCastInst *Bitcast = nullptr;
254
4
  // [match: 3]
255
4
  if (auto *
Slot4
= dyn_cast<GetElementPtrInst>(Address)) {
256
3
    Value *TypedMem = Slot->getPointerOperand();
257
3
    // match: 2
258
3
    Bitcast = dyn_cast<BitCastInst>(TypedMem);
259
4
  } else {
260
1
    // match: 2
261
1
    Bitcast = dyn_cast<BitCastInst>(Address);
262
1
  }
263
4
264
4
  if (!Bitcast)
265
1
    return nullptr;
266
3
267
3
  auto *MallocMem = Bitcast->getOperand(0);
268
3
269
3
  // match: 1
270
3
  auto *MallocCall = dyn_cast<CallInst>(MallocMem);
271
3
  if (!MallocCall)
272
1
    return nullptr;
273
2
274
2
  Function *MallocFn = MallocCall->getCalledFunction();
275
2
  if (
!(MallocFn && 2
MallocFn->hasName()2
&&
MallocFn->getName() == "malloc"2
))
276
0
    return nullptr;
277
2
278
2
  // Find all uses the malloc'd memory.
279
2
  // We are looking for a "store" into a struct with the type being the Fortran
280
2
  // descriptor type
281
2
  
for (auto user : MallocMem->users()) 2
{
282
4
    /// match: 5
283
4
    auto *MallocStore = dyn_cast<StoreInst>(user);
284
4
    if (!MallocStore)
285
2
      continue;
286
2
287
2
    auto *DescriptorGEP =
288
2
        dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
289
2
    if (!DescriptorGEP)
290
0
      continue;
291
2
292
2
    // match: 5
293
2
    auto DescriptorType =
294
2
        dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
295
2
    if (
!(DescriptorType && 2
DescriptorType->hasName()2
))
296
0
      continue;
297
2
298
2
    Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
299
2
300
2
    if (!Descriptor)
301
0
      continue;
302
2
303
2
    
if (2
!isFortranArrayDescriptor(Descriptor)2
)
304
0
      continue;
305
2
306
2
    return Descriptor;
307
2
  }
308
0
309
0
  return nullptr;
310
0
}
311
312
13
Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
313
13
  // match: 3
314
13
  if (
!isa<LoadInst>(Inst) && 13
!isa<StoreInst>(Inst)5
)
315
0
    return nullptr;
316
13
317
13
  Value *Slot = Inst.getPointerOperand();
318
13
319
13
  LoadInst *MemLoad = nullptr;
320
13
  // [match: 2]
321
13
  if (auto *
SlotGEP13
= dyn_cast<GetElementPtrInst>(Slot)) {
322
11
    // match: 1
323
11
    MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
324
13
  } else {
325
2
    // match: 1
326
2
    MemLoad = dyn_cast<LoadInst>(Slot);
327
2
  }
328
13
329
13
  if (!MemLoad)
330
5
    return nullptr;
331
8
332
8
  auto *BitcastOperator =
333
8
      dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
334
8
  if (!BitcastOperator)
335
0
    return nullptr;
336
8
337
8
  Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
338
8
  if (!Descriptor)
339
0
    return nullptr;
340
8
341
8
  
if (8
!isFortranArrayDescriptor(Descriptor)8
)
342
0
    return nullptr;
343
8
344
8
  return Descriptor;
345
8
}
346
347
3.51k
bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
348
3.51k
  Value *Val = Inst.getValueOperand();
349
3.51k
  Type *ElementType = Val->getType();
350
3.51k
  Value *Address = Inst.getPointerOperand();
351
3.51k
  const SCEV *AccessFunction =
352
3.51k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
353
3.51k
  const SCEVUnknown *BasePointer =
354
3.51k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
355
3.51k
  enum MemoryAccess::AccessType AccType =
356
3.51k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.75k
:
MemoryAccess::MUST_WRITE1.76k
;
357
3.51k
358
3.51k
  if (auto *
BitCast3.51k
= dyn_cast<BitCastInst>(Address)) {
359
104
    auto *Src = BitCast->getOperand(0);
360
104
    auto *SrcTy = Src->getType();
361
104
    auto *DstTy = BitCast->getType();
362
104
    // Do not try to delinearize non-sized (opaque) pointers.
363
104
    if (
(SrcTy->isPointerTy() && 104
!SrcTy->getPointerElementType()->isSized()104
) ||
364
104
        
(DstTy->isPointerTy() && 103
!DstTy->getPointerElementType()->isSized()103
)) {
365
1
      return false;
366
1
    }
367
103
    
if (103
SrcTy->isPointerTy() && 103
DstTy->isPointerTy()103
&&
368
103
        DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
369
103
            DL.getTypeAllocSize(DstTy->getPointerElementType()))
370
60
      Address = Src;
371
104
  }
372
3.51k
373
3.51k
  auto *GEP = dyn_cast<GetElementPtrInst>(Address);
374
3.51k
  if (!GEP)
375
851
    return false;
376
2.66k
377
2.66k
  std::vector<const SCEV *> Subscripts;
378
2.66k
  std::vector<int> Sizes;
379
2.66k
  std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
380
2.66k
  auto *BasePtr = GEP->getOperand(0);
381
2.66k
382
2.66k
  if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
383
18
    BasePtr = BasePtrCast->getOperand(0);
384
2.66k
385
2.66k
  // Check for identical base pointers to ensure that we do not miss index
386
2.66k
  // offsets that have been added before this GEP is applied.
387
2.66k
  if (BasePtr != BasePointer->getValue())
388
80
    return false;
389
2.58k
390
2.58k
  std::vector<const SCEV *> SizesSCEV;
391
2.58k
392
2.58k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
393
2.58k
394
2.58k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
395
2.70k
  for (auto *Subscript : Subscripts) {
396
2.70k
    InvariantLoadsSetTy AccessILS;
397
2.70k
    if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
398
2.70k
                      &AccessILS))
399
153
      return false;
400
2.54k
401
2.54k
    for (LoadInst *LInst : AccessILS)
402
45
      
if (45
!ScopRIL.count(LInst)45
)
403
3
        return false;
404
2.43k
  }
405
2.43k
406
2.43k
  
if (2.43k
Sizes.empty()2.43k
)
407
2.19k
    return false;
408
236
409
236
  SizesSCEV.push_back(nullptr);
410
236
411
236
  for (auto V : Sizes)
412
255
    SizesSCEV.push_back(SE.getSCEV(
413
255
        ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
414
3.51k
415
3.51k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
416
3.51k
                 true, Subscripts, SizesSCEV, Val);
417
3.51k
  return true;
418
3.51k
}
419
420
3.28k
bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
421
3.28k
  if (!PollyDelinearize)
422
6
    return false;
423
3.27k
424
3.27k
  Value *Address = Inst.getPointerOperand();
425
3.27k
  Value *Val = Inst.getValueOperand();
426
3.27k
  Type *ElementType = Val->getType();
427
3.27k
  unsigned ElementSize = DL.getTypeAllocSize(ElementType);
428
3.27k
  enum MemoryAccess::AccessType AccType =
429
3.27k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.62k
:
MemoryAccess::MUST_WRITE1.65k
;
430
3.27k
431
3.27k
  const SCEV *AccessFunction =
432
3.27k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
433
3.27k
  const SCEVUnknown *BasePointer =
434
3.27k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
435
3.27k
436
3.27k
  assert(BasePointer && "Could not find base pointer");
437
3.27k
438
3.27k
  auto &InsnToMemAcc = scop->getInsnToMemAccMap();
439
3.27k
  auto AccItr = InsnToMemAcc.find(Inst);
440
3.27k
  if (AccItr == InsnToMemAcc.end())
441
3.10k
    return false;
442
174
443
174
  std::vector<const SCEV *> Sizes = {nullptr};
444
174
445
174
  Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
446
174
               AccItr->second.Shape->DelinearizedSizes.end());
447
174
448
174
  // In case only the element size is contained in the 'Sizes' array, the
449
174
  // access does not access a real multi-dimensional array. Hence, we allow
450
174
  // the normal single-dimensional access construction to handle this.
451
174
  if (Sizes.size() == 1)
452
1
    return false;
453
173
454
173
  // Remove the element size. This information is already provided by the
455
173
  // ElementSize parameter. In case the element size of this access and the
456
173
  // element size used for delinearization differs the delinearization is
457
173
  // incorrect. Hence, we invalidate the scop.
458
173
  //
459
173
  // TODO: Handle delinearization with differing element sizes.
460
173
  auto DelinearizedSize =
461
173
      cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
462
173
  Sizes.pop_back();
463
173
  if (ElementSize != DelinearizedSize)
464
2
    scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
465
3.28k
466
3.28k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
467
3.28k
                 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
468
3.28k
  return true;
469
3.28k
}
470
471
3.59k
bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
472
3.59k
  auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
473
3.59k
474
3.59k
  if (MemIntr == nullptr)
475
3.57k
    return false;
476
17
477
17
  auto *L = LI.getLoopFor(Inst->getParent());
478
17
  auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
479
17
  assert(LengthVal);
480
17
481
17
  // Check if the length val is actually affine or if we overapproximate it
482
17
  InvariantLoadsSetTy AccessILS;
483
17
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
484
17
485
17
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
486
17
  bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
487
17
                                     LengthVal, SE, &AccessILS);
488
17
  for (LoadInst *LInst : AccessILS)
489
0
    
if (0
!ScopRIL.count(LInst)0
)
490
0
      LengthIsAffine = false;
491
17
  if (!LengthIsAffine)
492
2
    LengthVal = nullptr;
493
17
494
17
  auto *DestPtrVal = MemIntr->getDest();
495
17
  assert(DestPtrVal);
496
17
497
17
  auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
498
17
  assert(DestAccFunc);
499
17
  // Ignore accesses to "NULL".
500
17
  // TODO: We could use this to optimize the region further, e.g., intersect
501
17
  //       the context with
502
17
  //          isl_set_complement(isl_set_params(getDomain()))
503
17
  //       as we know it would be undefined to execute this instruction anyway.
504
17
  if (DestAccFunc->isZero())
505
2
    return true;
506
15
507
15
  auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
508
15
  assert(DestPtrSCEV);
509
15
  DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
510
15
  addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
511
15
                 IntegerType::getInt8Ty(DestPtrVal->getContext()),
512
15
                 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
513
15
                 Inst.getValueOperand());
514
15
515
15
  auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
516
15
  if (!MemTrans)
517
9
    return true;
518
6
519
6
  auto *SrcPtrVal = MemTrans->getSource();
520
6
  assert(SrcPtrVal);
521
6
522
6
  auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
523
6
  assert(SrcAccFunc);
524
6
  // Ignore accesses to "NULL".
525
6
  // TODO: See above TODO
526
6
  if (SrcAccFunc->isZero())
527
0
    return true;
528
6
529
6
  auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
530
6
  assert(SrcPtrSCEV);
531
6
  SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
532
6
  addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
533
6
                 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
534
6
                 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
535
6
                 Inst.getValueOperand());
536
6
537
6
  return true;
538
6
}
539
540
3.57k
bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
541
3.57k
  auto *CI = dyn_cast_or_null<CallInst>(Inst);
542
3.57k
543
3.57k
  if (CI == nullptr)
544
3.51k
    return false;
545
57
546
57
  
if (57
CI->doesNotAccessMemory() || 57
isIgnoredIntrinsic(CI)26
)
547
43
    return true;
548
14
549
14
  bool ReadOnly = false;
550
14
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
551
14
  auto *CalledFunction = CI->getCalledFunction();
552
14
  switch (AA.getModRefBehavior(CalledFunction)) {
553
0
  case FMRB_UnknownModRefBehavior:
554
0
    llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
555
0
  case FMRB_DoesNotAccessMemory:
556
0
    return true;
557
0
  case FMRB_DoesNotReadMemory:
558
0
  case FMRB_OnlyAccessesInaccessibleMem:
559
0
  case FMRB_OnlyAccessesInaccessibleOrArgMem:
560
0
    return false;
561
8
  case FMRB_OnlyReadsMemory:
562
8
    GlobalReads.emplace_back(Stmt, CI);
563
8
    return true;
564
4
  case FMRB_OnlyReadsArgumentPointees:
565
4
    ReadOnly = true;
566
4
  // Fall through
567
6
  case FMRB_OnlyAccessesArgumentPointees: {
568
6
    auto AccType = ReadOnly ? 
MemoryAccess::READ4
:
MemoryAccess::MAY_WRITE2
;
569
6
    Loop *L = LI.getLoopFor(Inst->getParent());
570
16
    for (const auto &Arg : CI->arg_operands()) {
571
16
      if (!Arg->getType()->isPointerTy())
572
6
        continue;
573
10
574
10
      auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
575
10
      if (ArgSCEV->isZero())
576
4
        continue;
577
6
578
6
      auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
579
6
      addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
580
6
                     ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
581
6
    }
582
6
    return true;
583
0
  }
584
0
  }
585
0
586
0
  return true;
587
0
}
588
589
3.11k
void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
590
3.11k
  Value *Address = Inst.getPointerOperand();
591
3.11k
  Value *Val = Inst.getValueOperand();
592
3.11k
  Type *ElementType = Val->getType();
593
3.11k
  enum MemoryAccess::AccessType AccType =
594
3.11k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.52k
:
MemoryAccess::MUST_WRITE1.58k
;
595
3.11k
596
3.11k
  const SCEV *AccessFunction =
597
3.11k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
598
3.11k
  const SCEVUnknown *BasePointer =
599
3.11k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
600
3.11k
601
3.11k
  assert(BasePointer && "Could not find base pointer");
602
3.11k
  AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
603
3.11k
604
3.11k
  // Check if the access depends on a loop contained in a non-affine subregion.
605
3.11k
  bool isVariantInNonAffineLoop = false;
606
3.11k
  SetVector<const Loop *> Loops;
607
3.11k
  findLoops(AccessFunction, Loops);
608
3.11k
  for (const Loop *L : Loops)
609
1.73k
    
if (1.73k
Stmt->contains(L)1.73k
) {
610
8
      isVariantInNonAffineLoop = true;
611
8
      break;
612
8
    }
613
3.11k
614
3.11k
  InvariantLoadsSetTy AccessILS;
615
3.11k
616
3.11k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
617
3.11k
  bool IsAffine = !isVariantInNonAffineLoop &&
618
3.10k
                  isAffineExpr(&scop->getRegion(), SurroundingLoop,
619
3.10k
                               AccessFunction, SE, &AccessILS);
620
3.11k
621
3.11k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
622
3.11k
  for (LoadInst *LInst : AccessILS)
623
49
    
if (49
!ScopRIL.count(LInst)49
)
624
7
      IsAffine = false;
625
3.11k
626
3.11k
  if (
!IsAffine && 3.11k
AccType == MemoryAccess::MUST_WRITE42
)
627
27
    AccType = MemoryAccess::MAY_WRITE;
628
3.11k
629
3.11k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
630
3.11k
                 IsAffine, {AccessFunction}, {nullptr}, Val);
631
3.11k
}
632
633
3.59k
void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
634
3.59k
  if (buildAccessMemIntrinsic(Inst, Stmt))
635
17
    return;
636
3.57k
637
3.57k
  
if (3.57k
buildAccessCallInst(Inst, Stmt)3.57k
)
638
57
    return;
639
3.51k
640
3.51k
  
if (3.51k
buildAccessMultiDimFixed(Inst, Stmt)3.51k
)
641
236
    return;
642
3.28k
643
3.28k
  
if (3.28k
buildAccessMultiDimParam(Inst, Stmt)3.28k
)
644
173
    return;
645
3.11k
646
3.11k
  buildAccessSingleDim(Inst, Stmt);
647
3.11k
}
648
649
1.14k
void ScopBuilder::buildAccessFunctions() {
650
5.52k
  for (auto &Stmt : *scop) {
651
5.52k
    if (
Stmt.isBlockStmt()5.52k
) {
652
5.40k
      buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
653
5.40k
      continue;
654
5.40k
    }
655
120
656
120
    Region *R = Stmt.getRegion();
657
120
    for (BasicBlock *BB : R->blocks())
658
333
      buildAccessFunctions(&Stmt, *BB, R);
659
5.52k
  }
660
1.14k
}
661
662
20.5k
bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
663
14.9k
  return !isa<TerminatorInst>(Inst) && !isIgnoredIntrinsic(Inst) &&
664
14.9k
         !canSynthesize(Inst, *scop, &SE, L);
665
20.5k
}
666
667
2.44k
void ScopBuilder::buildStmts(Region &SR) {
668
2.44k
  if (
scop->isNonAffineSubRegion(&SR)2.44k
) {
669
120
    std::vector<Instruction *> Instructions;
670
120
    Loop *SurroundingLoop =
671
120
        getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
672
120
    for (Instruction &Inst : *SR.getEntry())
673
710
      
if (710
shouldModelInst(&Inst, SurroundingLoop)710
)
674
442
        Instructions.push_back(&Inst);
675
120
    scop->addScopStmt(&SR, SurroundingLoop, Instructions);
676
120
    return;
677
120
  }
678
2.32k
679
9.01k
  
for (auto I = SR.element_begin(), E = SR.element_end(); 2.32k
I != E9.01k
;
++I6.68k
)
680
6.68k
    
if (6.68k
I->isSubRegion()6.68k
)
681
1.29k
      buildStmts(*I->getNodeAs<Region>());
682
5.39k
    else {
683
5.39k
      int Count = 0;
684
5.39k
      std::vector<Instruction *> Instructions;
685
19.8k
      for (Instruction &Inst : *I->getNodeAs<BasicBlock>()) {
686
19.8k
        Loop *L = LI.getLoopFor(Inst.getParent());
687
19.8k
        if (shouldModelInst(&Inst, L))
688
6.79k
          Instructions.push_back(&Inst);
689
19.8k
        if (
Inst.getMetadata("polly_split_after")19.8k
) {
690
9
          Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs<BasicBlock>());
691
9
          scop->addScopStmt(I->getNodeAs<BasicBlock>(), SurroundingLoop,
692
9
                            Instructions, Count);
693
9
          Count++;
694
9
          Instructions.clear();
695
9
        }
696
19.8k
      }
697
6.68k
      Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs<BasicBlock>());
698
6.68k
      scop->addScopStmt(I->getNodeAs<BasicBlock>(), SurroundingLoop,
699
6.68k
                        Instructions, Count);
700
6.68k
    }
701
2.44k
}
702
703
void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
704
                                       Region *NonAffineSubRegion,
705
5.89k
                                       bool IsExitBlock) {
706
5.89k
  assert(
707
5.89k
      !Stmt == IsExitBlock &&
708
5.89k
      "The exit BB is the only one that cannot be represented by a statement");
709
5.89k
  assert(IsExitBlock || Stmt->represents(&BB));
710
5.89k
711
5.89k
  // We do not build access functions for error blocks, as they may contain
712
5.89k
  // instructions we can not model.
713
5.89k
  if (
isErrorBlock(BB, scop->getRegion(), LI, DT) && 5.89k
!IsExitBlock45
)
714
45
    return;
715
5.85k
716
5.85k
  int Count = 0;
717
5.85k
  bool Split = false;
718
21.2k
  for (Instruction &Inst : BB) {
719
21.2k
    if (
Split21.2k
) {
720
18
      Split = false;
721
18
      Count++;
722
18
    }
723
21.2k
    if (Inst.getMetadata("polly_split_after"))
724
18
      Split = true;
725
21.2k
726
21.2k
    if (
Stmt && 21.2k
Stmt->isBlockStmt()21.0k
&&
Stmt != scop->getStmtListFor(&BB)[Count]19.7k
)
727
64
      continue;
728
21.1k
729
21.1k
    PHINode *PHI = dyn_cast<PHINode>(&Inst);
730
21.1k
    if (PHI)
731
1.89k
      buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, IsExitBlock);
732
21.1k
733
21.1k
    // For the exit block we stop modeling after the last PHI node.
734
21.1k
    if (
!PHI && 21.1k
IsExitBlock19.2k
)
735
161
      break;
736
20.9k
737
20.9k
    
if (auto 20.9k
MemInst20.9k
= MemAccInst::dyn_cast(Inst)) {
738
3.59k
      assert(Stmt && "Cannot build access function in non-existing statement");
739
3.59k
      buildMemoryAccess(MemInst, Stmt);
740
3.59k
    }
741
20.9k
742
20.9k
    if (isIgnoredIntrinsic(&Inst))
743
29
      continue;
744
20.9k
745
20.9k
    // PHI nodes have already been modeled above and TerminatorInsts that are
746
20.9k
    // not part of a non-affine subregion are fully modeled and regenerated
747
20.9k
    // from the polyhedral domains. Hence, they do not need to be modeled as
748
20.9k
    // explicit data dependences.
749
20.9k
    
if (20.9k
!PHI && 20.9k
(!isa<TerminatorInst>(&Inst) || 19.0k
NonAffineSubRegion5.68k
))
750
13.7k
      buildScalarDependences(Stmt, &Inst);
751
20.9k
752
20.9k
    if (!IsExitBlock)
753
20.9k
      buildEscapingDependences(&Inst);
754
21.2k
  }
755
5.89k
}
756
757
MemoryAccess *ScopBuilder::addMemoryAccess(
758
    ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
759
    Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
760
    ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
761
4.79k
    MemoryKind Kind) {
762
4.79k
  bool isKnownMustAccess = false;
763
4.79k
764
4.79k
  // Accesses in single-basic block statements are always executed.
765
4.79k
  if (Stmt->isBlockStmt())
766
4.37k
    isKnownMustAccess = true;
767
4.79k
768
4.79k
  if (
Stmt->isRegionStmt()4.79k
) {
769
416
    // Accesses that dominate the exit block of a non-affine region are always
770
416
    // executed. In non-affine regions there may exist MemoryKind::Values that
771
416
    // do not dominate the exit. MemoryKind::Values will always dominate the
772
416
    // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
773
416
    // non-affine region.
774
416
    if (
Inst && 416
DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())375
)
775
234
      isKnownMustAccess = true;
776
416
  }
777
4.79k
778
4.79k
  // Non-affine PHI writes do not "happen" at a particular instruction, but
779
4.79k
  // after exiting the statement. Therefore they are guaranteed to execute and
780
4.79k
  // overwrite the old value.
781
4.79k
  if (
Kind == MemoryKind::PHI || 4.79k
Kind == MemoryKind::ExitPHI4.30k
)
782
599
    isKnownMustAccess = true;
783
4.79k
784
4.79k
  if (
!isKnownMustAccess && 4.79k
AccType == MemoryAccess::MUST_WRITE180
)
785
82
    AccType = MemoryAccess::MAY_WRITE;
786
4.79k
787
4.79k
  auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
788
4.79k
                                  Affine, Subscripts, Sizes, AccessValue, Kind);
789
4.79k
790
4.79k
  scop->addAccessFunction(Access);
791
4.79k
  Stmt->addAccess(Access);
792
4.79k
  return Access;
793
4.79k
}
794
795
void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
796
                                 MemoryAccess::AccessType AccType,
797
                                 Value *BaseAddress, Type *ElementType,
798
                                 bool IsAffine,
799
                                 ArrayRef<const SCEV *> Subscripts,
800
                                 ArrayRef<const SCEV *> Sizes,
801
3.56k
                                 Value *AccessValue) {
802
3.56k
  ArrayBasePointers.insert(BaseAddress);
803
3.56k
  auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
804
3.56k
                                    ElementType, IsAffine, AccessValue,
805
3.56k
                                    Subscripts, Sizes, MemoryKind::Array);
806
3.56k
807
3.56k
  if (!DetectFortranArrays)
808
3.54k
    return;
809
13
810
13
  
if (Value *13
FAD13
= findFADAllocationInvisible(MemAccInst))
811
8
    MemAccess->setFortranArrayDescriptor(FAD);
812
5
  else 
if (Value *5
FAD5
= findFADAllocationVisible(MemAccInst))
813
2
    MemAccess->setFortranArrayDescriptor(FAD);
814
3.56k
}
815
816
367
void ScopBuilder::ensureValueWrite(Instruction *Inst) {
817
367
  // Find the statement that defines the value of Inst. That statement has to
818
367
  // write the value to make it available to those statements that read it.
819
367
  ScopStmt *Stmt = scop->getStmtFor(Inst);
820
367
821
367
  // It is possible that the value is synthesizable within a loop (such that it
822
367
  // is not part of any statement), but not after the loop (where you need the
823
367
  // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
824
367
  // avoid this. In case the IR has no such PHI, use the last statement (where
825
367
  // the value is synthesizable) to write the value.
826
367
  if (!Stmt)
827
46
    Stmt = scop->getLastStmtFor(Inst->getParent());
828
367
829
367
  // Inst not defined within this SCoP.
830
367
  if (!Stmt)
831
0
    return;
832
367
833
367
  // Do not process further if the instruction is already written.
834
367
  
if (367
Stmt->lookupValueWriteOf(Inst)367
)
835
54
    return;
836
313
837
313
  addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
838
313
                  true, Inst, ArrayRef<const SCEV *>(),
839
313
                  ArrayRef<const SCEV *>(), MemoryKind::Value);
840
313
}
841
842
26.1k
void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
843
26.1k
  // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
844
26.1k
  // to be able to replace this one. Currently, there is a split responsibility.
845
26.1k
  // In a first step, the MemoryAccess is created, but without the
846
26.1k
  // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
847
26.1k
  // AccessRelation is created. At least for scalar accesses, there is no new
848
26.1k
  // information available at ScopStmt::buildAccessRelations(), so we could
849
26.1k
  // create the AccessRelation right away. This is what
850
26.1k
  // ScopStmt::ensureValueRead(Value*) does.
851
26.1k
852
26.1k
  auto *Scope = UserStmt->getSurroundingLoop();
853
26.1k
  auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
854
26.1k
  switch (VUse.getKind()) {
855
25.7k
  case VirtualUse::Constant:
856
25.7k
  case VirtualUse::Block:
857
25.7k
  case VirtualUse::Synthesizable:
858
25.7k
  case VirtualUse::Hoisted:
859
25.7k
  case VirtualUse::Intra:
860
25.7k
    // Uses of these kinds do not need a MemoryAccess.
861
25.7k
    break;
862
25.7k
863
64
  case VirtualUse::ReadOnly:
864
64
    // Add MemoryAccess for invariant values only if requested.
865
64
    if (!ModelReadOnlyScalars)
866
4
      break;
867
60
868
60
    
LLVM_FALLTHROUGH60
;
869
347
  case VirtualUse::Inter:
870
347
871
347
    // Do not create another MemoryAccess for reloading the value if one already
872
347
    // exists.
873
347
    if (UserStmt->lookupValueReadOf(V))
874
29
      break;
875
318
876
318
    addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
877
318
                    true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
878
318
                    MemoryKind::Value);
879
318
880
318
    // Inter-statement uses need to write the value in their defining statement.
881
318
    if (VUse.isInter())
882
271
      ensureValueWrite(cast<Instruction>(V));
883
25.7k
    break;
884
26.1k
  }
885
26.1k
}
886
887
void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
888
                                 BasicBlock *IncomingBlock,
889
515
                                 Value *IncomingValue, bool IsExitBlock) {
890
515
  // As the incoming block might turn out to be an error statement ensure we
891
515
  // will create an exit PHI SAI object. It is needed during code generation
892
515
  // and would be created later anyway.
893
515
  if (IsExitBlock)
894
142
    scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
895
142
                                   MemoryKind::ExitPHI);
896
515
897
515
  // This is possible if PHI is in the SCoP's entry block. The incoming blocks
898
515
  // from outside the SCoP's region have no statement representation.
899
515
  if (!IncomingStmt)
900
62
    return;
901
453
902
453
  // Take care for the incoming value being available in the incoming block.
903
453
  // This must be done before the check for multiple PHI writes because multiple
904
453
  // exiting edges from subregion each can be the effective written value of the
905
453
  // subregion. As such, all of them must be made available in the subregion
906
453
  // statement.
907
453
  ensureValueRead(IncomingValue, IncomingStmt);
908
453
909
453
  // Do not add more than one MemoryAccess per PHINode and ScopStmt.
910
453
  if (MemoryAccess *
Acc453
= IncomingStmt->lookupPHIWriteOf(PHI)) {
911
53
    assert(Acc->getAccessInstruction() == PHI);
912
53
    Acc->addIncoming(IncomingBlock, IncomingValue);
913
53
    return;
914
53
  }
915
400
916
400
  MemoryAccess *Acc = addMemoryAccess(
917
400
      IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
918
400
      PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
919
400
      IsExitBlock ? 
MemoryKind::ExitPHI108
:
MemoryKind::PHI292
);
920
515
  assert(Acc);
921
515
  Acc->addIncoming(IncomingBlock, IncomingValue);
922
515
}
923
924
199
void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
925
199
  addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
926
199
                  PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
927
199
                  MemoryKind::PHI);
928
199
}
929
930
2.03k
void ScopBuilder::buildDomain(ScopStmt &Stmt) {
931
2.03k
  isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
932
2.03k
933
2.03k
  Stmt.Domain = scop->getDomainConditions(&Stmt);
934
2.03k
  Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
935
2.03k
}
936
937
2.03k
void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
938
2.03k
  isl::set Domain = Stmt.getDomain();
939
4.30k
  for (unsigned u = 0, e = Domain.dim(isl::dim::set); 
u < e4.30k
;
u++2.26k
) {
940
2.26k
    isl::id DimId = Domain.get_dim_id(isl::dim::set, u);
941
2.26k
    Stmt.NestLoops.push_back(static_cast<Loop *>(DimId.get_user()));
942
2.26k
  }
943
2.03k
}
944
945
/// Return the reduction type for a given binary operator.
946
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
947
290
                                                    const Instruction *Load) {
948
290
  if (!BinOp)
949
0
    return MemoryAccess::RT_NONE;
950
290
  switch (BinOp->getOpcode()) {
951
1
  case Instruction::FAdd:
952
1
    if (!BinOp->hasUnsafeAlgebra())
953
0
      return MemoryAccess::RT_NONE;
954
1
    // Fall through
955
267
  case Instruction::Add:
956
267
    return MemoryAccess::RT_ADD;
957
1
  case Instruction::Or:
958
1
    return MemoryAccess::RT_BOR;
959
3
  case Instruction::Xor:
960
3
    return MemoryAccess::RT_BXOR;
961
1
  case Instruction::And:
962
1
    return MemoryAccess::RT_BAND;
963
0
  case Instruction::FMul:
964
0
    if (!BinOp->hasUnsafeAlgebra())
965
0
      return MemoryAccess::RT_NONE;
966
0
    // Fall through
967
18
  case Instruction::Mul:
968
18
    if (DisableMultiplicativeReductions)
969
0
      return MemoryAccess::RT_NONE;
970
18
    return MemoryAccess::RT_MUL;
971
0
  default:
972
0
    return MemoryAccess::RT_NONE;
973
0
  }
974
0
}
975
976
2.03k
void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
977
2.03k
  SmallVector<MemoryAccess *, 2> Loads;
978
2.03k
  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
979
2.03k
980
2.03k
  // First collect candidate load-store reduction chains by iterating over all
981
2.03k
  // stores and collecting possible reduction loads.
982
4.47k
  for (MemoryAccess *StoreMA : Stmt) {
983
4.47k
    if (StoreMA->isRead())
984
2.12k
      continue;
985
2.34k
986
2.34k
    Loads.clear();
987
2.34k
    collectCandiateReductionLoads(StoreMA, Loads);
988
2.34k
    for (MemoryAccess *LoadMA : Loads)
989
418
      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
990
4.47k
  }
991
2.03k
992
2.03k
  // Then check each possible candidate pair.
993
418
  for (const auto &CandidatePair : Candidates) {
994
418
    bool Valid = true;
995
418
    isl::map LoadAccs = CandidatePair.first->getAccessRelation();
996
418
    isl::map StoreAccs = CandidatePair.second->getAccessRelation();
997
418
998
418
    // Skip those with obviously unequal base addresses.
999
418
    if (
!LoadAccs.has_equal_space(StoreAccs)418
) {
1000
75
      continue;
1001
75
    }
1002
343
1003
343
    // And check if the remaining for overlap with other memory accesses.
1004
343
    isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1005
343
    AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1006
343
    isl::set AllAccs = AllAccsRel.range();
1007
343
1008
1.05k
    for (MemoryAccess *MA : Stmt) {
1009
1.05k
      if (
MA == CandidatePair.first || 1.05k
MA == CandidatePair.second707
)
1010
686
        continue;
1011
364
1012
364
      isl::map AccRel =
1013
364
          MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1014
364
      isl::set Accs = AccRel.range();
1015
364
1016
364
      if (
AllAccs.has_equal_space(Accs)364
) {
1017
84
        isl::set OverlapAccs = Accs.intersect(AllAccs);
1018
57
        Valid = Valid && OverlapAccs.is_empty();
1019
84
      }
1020
1.05k
    }
1021
343
1022
343
    if (!Valid)
1023
53
      continue;
1024
290
1025
290
    const LoadInst *Load =
1026
290
        dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1027
290
    MemoryAccess::ReductionType RT =
1028
290
        getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1029
290
1030
290
    // If no overlapping access was found we mark the load and store as
1031
290
    // reduction like.
1032
290
    CandidatePair.first->markAsReductionLike(RT);
1033
290
    CandidatePair.second->markAsReductionLike(RT);
1034
290
  }
1035
2.03k
}
1036
1037
void ScopBuilder::collectCandiateReductionLoads(
1038
2.34k
    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1039
2.34k
  ScopStmt *Stmt = StoreMA->getStatement();
1040
2.34k
1041
2.34k
  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1042
2.34k
  if (!Store)
1043
679
    return;
1044
1.66k
1045
1.66k
  // Skip if there is not one binary operator between the load and the store
1046
1.66k
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1047
1.66k
  if (!BinOp)
1048
1.01k
    return;
1049
649
1050
649
  // Skip if the binary operators has multiple uses
1051
649
  
if (649
BinOp->getNumUses() != 1649
)
1052
39
    return;
1053
610
1054
610
  // Skip if the opcode of the binary operator is not commutative/associative
1055
610
  
if (610
!BinOp->isCommutative() || 610
!BinOp->isAssociative()596
)
1056
235
    return;
1057
375
1058
375
  // Skip if the binary operator is outside the current SCoP
1059
375
  
if (375
BinOp->getParent() != Store->getParent()375
)
1060
1
    return;
1061
374
1062
374
  // Skip if it is a multiplicative reduction and we disabled them
1063
374
  
if (374
DisableMultiplicativeReductions &&
1064
2
      (BinOp->getOpcode() == Instruction::Mul ||
1065
1
       BinOp->getOpcode() == Instruction::FMul))
1066
1
    return;
1067
373
1068
373
  // Check the binary operator operands for a candidate load
1069
373
  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1070
373
  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1071
373
  if (
!PossibleLoad0 && 373
!PossibleLoad119
)
1072
9
    return;
1073
364
1074
364
  // A load is only a candidate if it cannot escape (thus has only this use)
1075
364
  
if (364
PossibleLoad0 && 364
PossibleLoad0->getNumUses() == 1354
)
1076
347
    
if (347
PossibleLoad0->getParent() == Store->getParent()347
)
1077
343
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
1078
364
  if (
PossibleLoad1 && 364
PossibleLoad1->getNumUses() == 182
)
1079
76
    
if (76
PossibleLoad1->getParent() == Store->getParent()76
)
1080
75
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
1081
2.34k
}
1082
1083
2.03k
void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
1084
4.47k
  for (MemoryAccess *Access : Stmt.MemAccs) {
1085
4.47k
    Type *ElementType = Access->getElementType();
1086
4.47k
1087
4.47k
    MemoryKind Ty;
1088
4.47k
    if (Access->isPHIKind())
1089
469
      Ty = MemoryKind::PHI;
1090
4.00k
    else 
if (4.00k
Access->isExitPHIKind()4.00k
)
1091
85
      Ty = MemoryKind::ExitPHI;
1092
3.92k
    else 
if (3.92k
Access->isValueKind()3.92k
)
1093
605
      Ty = MemoryKind::Value;
1094
3.92k
    else
1095
3.31k
      Ty = MemoryKind::Array;
1096
4.47k
1097
4.47k
    auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1098
4.47k
                                               ElementType, Access->Sizes, Ty);
1099
4.47k
    Access->buildAccessRelation(SAI);
1100
4.47k
    scop->addAccessData(Access);
1101
4.47k
  }
1102
2.03k
}
1103
1104
#ifndef NDEBUG
1105
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
1106
  auto PhysUse = VirtualUse::create(S, Op, &LI, false);
1107
  auto VirtUse = VirtualUse::create(S, Op, &LI, true);
1108
  assert(PhysUse.getKind() == VirtUse.getKind());
1109
}
1110
1111
/// Check the consistency of every statement's MemoryAccesses.
1112
///
1113
/// The check is carried out by expecting the "physical" kind of use (derived
1114
/// from the BasicBlocks instructions resides in) to be same as the "virtual"
1115
/// kind of use (derived from a statement's MemoryAccess).
1116
///
1117
/// The "physical" uses are taken by ensureValueRead to determine whether to
1118
/// create MemoryAccesses. When done, the kind of scalar access should be the
1119
/// same no matter which way it was derived.
1120
///
1121
/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
1122
/// can intentionally influence on the kind of uses (not corresponding to the
1123
/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
1124
/// to pick up the virtual uses. But here in the code generator, this has not
1125
/// happened yet, such that virtual and physical uses are equivalent.
1126
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
1127
  for (auto *BB : S->getRegion().blocks()) {
1128
    for (auto &Inst : *BB) {
1129
      auto *Stmt = S->getStmtFor(&Inst);
1130
      if (!Stmt)
1131
        continue;
1132
1133
      if (isIgnoredIntrinsic(&Inst))
1134
        continue;
1135
1136
      // Branch conditions are encoded in the statement domains.
1137
      if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt())
1138
        continue;
1139
1140
      // Verify all uses.
1141
      for (auto &Op : Inst.operands())
1142
        verifyUse(S, Op, LI);
1143
1144
      // Stores do not produce values used by other statements.
1145
      if (isa<StoreInst>(Inst))
1146
        continue;
1147
1148
      // For every value defined in the block, also check that a use of that
1149
      // value in the same statement would not be an inter-statement use. It can
1150
      // still be synthesizable or load-hoisted, but these kind of instructions
1151
      // are not directly copied in code-generation.
1152
      auto VirtDef =
1153
          VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
1154
      assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
1155
             VirtDef.getKind() == VirtualUse::Intra ||
1156
             VirtDef.getKind() == VirtualUse::Hoisted);
1157
    }
1158
  }
1159
1160
  if (S->hasSingleExitEdge())
1161
    return;
1162
1163
  // PHINodes in the SCoP region's exit block are also uses to be checked.
1164
  if (!S->getRegion().isTopLevelRegion()) {
1165
    for (auto &Inst : *S->getRegion().getExit()) {
1166
      if (!isa<PHINode>(Inst))
1167
        break;
1168
1169
      for (auto &Op : Inst.operands())
1170
        verifyUse(S, Op, LI);
1171
    }
1172
  }
1173
}
1174
#endif
1175
1176
/// Return the block that is the representing block for @p RN.
1177
114
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
1178
114
  return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
1179
0
                           : RN->getNodeAs<BasicBlock>();
1180
114
}
1181
1182
void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
1183
1.14k
                            OptimizationRemarkEmitter &ORE) {
1184
1.14k
  scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
1185
1.14k
1186
1.14k
  buildStmts(R);
1187
1.14k
  buildAccessFunctions();
1188
1.14k
1189
1.14k
  // In case the region does not have an exiting block we will later (during
1190
1.14k
  // code generation) split the exit block. This will move potential PHI nodes
1191
1.14k
  // from the current exit block into the new region exiting block. Hence, PHI
1192
1.14k
  // nodes that are at this point not part of the region will be.
1193
1.14k
  // To handle these PHI nodes later we will now model their operands as scalar
1194
1.14k
  // accesses. Note that we do not model anything in the exit block if we have
1195
1.14k
  // an exiting block in the region, as there will not be any splitting later.
1196
1.14k
  if (
!R.isTopLevelRegion() && 1.14k
!scop->hasSingleExitEdge()1.14k
)
1197
161
    buildAccessFunctions(nullptr, *R.getExit(), nullptr,
1198
161
                         /* IsExitBlock */ true);
1199
1.14k
1200
1.14k
  // Create memory accesses for global reads since all arrays are now known.
1201
1.14k
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
1202
8
  for (auto GlobalReadPair : GlobalReads) {
1203
8
    ScopStmt *GlobalReadStmt = GlobalReadPair.first;
1204
8
    Instruction *GlobalRead = GlobalReadPair.second;
1205
8
    for (auto *BP : ArrayBasePointers)
1206
16
      addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
1207
16
                     BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
1208
8
  }
1209
1.14k
1210
1.14k
  scop->buildInvariantEquivalenceClasses();
1211
1.14k
1212
1.14k
  /// A map from basic blocks to their invalid domains.
1213
1.14k
  DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
1214
1.14k
1215
1.14k
  if (
!scop->buildDomains(&R, DT, LI, InvalidDomainMap)1.14k
) {
1216
7
    DEBUG(dbgs() << "Bailing-out because buildDomains encountered problems\n");
1217
7
    return;
1218
7
  }
1219
1.14k
1220
1.14k
  scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
1221
1.14k
1222
1.14k
  // Initialize the invalid domain.
1223
1.14k
  for (ScopStmt &Stmt : scop->Stmts)
1224
5.45k
    
if (5.45k
Stmt.isBlockStmt()5.45k
)
1225
5.34k
      Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
1226
5.45k
    else
1227
114
      Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
1228
114
          Stmt.getRegion()->getNode())]);
1229
1.14k
1230
1.14k
  // Remove empty statements.
1231
1.14k
  // Exit early in case there are no executable statements left in this scop.
1232
1.14k
  scop->removeStmtNotInDomainMap();
1233
1.14k
  scop->simplifySCoP(false);
1234
1.14k
  if (
scop->isEmpty()1.14k
) {
1235
28
    DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
1236
28
    return;
1237
28
  }
1238
1.11k
1239
1.11k
  // The ScopStmts now have enough information to initialize themselves.
1240
1.11k
  
for (ScopStmt &Stmt : *scop) 1.11k
{
1241
2.03k
    buildDomain(Stmt);
1242
2.03k
    collectSurroundingLoops(Stmt);
1243
2.03k
    buildAccessRelations(Stmt);
1244
2.03k
1245
2.03k
    if (DetectReductions)
1246
2.03k
      checkForReductions(Stmt);
1247
2.03k
  }
1248
1.11k
1249
1.11k
  // Check early for a feasible runtime context.
1250
1.11k
  if (
!scop->hasFeasibleRuntimeContext()1.11k
) {
1251
3
    DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
1252
3
    return;
1253
3
  }
1254
1.11k
1255
1.11k
  // Check early for profitability. Afterwards it cannot change anymore,
1256
1.11k
  // only the runtime context could become infeasible.
1257
1.11k
  
if (1.11k
!scop->isProfitable(UnprofitableScalarAccs)1.11k
) {
1258
2
    scop->invalidate(PROFITABLE, DebugLoc());
1259
2
    DEBUG(dbgs() << "Bailing-out because SCoP is not considered profitable\n");
1260
2
    return;
1261
2
  }
1262
1.10k
1263
1.10k
  scop->buildSchedule(LI);
1264
1.10k
1265
1.10k
  scop->finalizeAccesses();
1266
1.10k
1267
1.10k
  scop->realignParams();
1268
1.10k
  scop->addUserContext();
1269
1.10k
1270
1.10k
  // After the context was fully constructed, thus all our knowledge about
1271
1.10k
  // the parameters is in there, we add all recorded assumptions to the
1272
1.10k
  // assumed/invalid context.
1273
1.10k
  scop->addRecordedAssumptions();
1274
1.10k
1275
1.10k
  scop->simplifyContexts();
1276
1.10k
  if (
!scop->buildAliasChecks(AA)1.10k
) {
1277
6
    DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
1278
6
    return;
1279
6
  }
1280
1.10k
1281
1.10k
  scop->hoistInvariantLoads();
1282
1.10k
  scop->canonicalizeDynamicBasePtrs();
1283
1.10k
  scop->verifyInvariantLoads();
1284
1.10k
  scop->simplifySCoP(true);
1285
1.10k
1286
1.10k
  // Check late for a feasible runtime context because profitability did not
1287
1.10k
  // change.
1288
1.10k
  if (
!scop->hasFeasibleRuntimeContext()1.10k
) {
1289
15
    DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
1290
15
    return;
1291
15
  }
1292
1.08k
1293
#ifndef NDEBUG
1294
  verifyUses(scop.get(), LI, DT);
1295
#endif
1296
}
1297
1298
ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
1299
                         const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
1300
                         ScopDetection &SD, ScalarEvolution &SE,
1301
                         OptimizationRemarkEmitter &ORE)
1302
1.14k
    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
1303
1.14k
  DebugLoc Beg, End;
1304
1.14k
  auto P = getBBPairForRegion(R);
1305
1.14k
  getDebugLocations(P, Beg, End);
1306
1.14k
1307
1.14k
  std::string Msg = "SCoP begins here.";
1308
1.14k
  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
1309
1.14k
           << Msg);
1310
1.14k
1311
1.14k
  buildScop(*R, AC, ORE);
1312
1.14k
1313
1.14k
  DEBUG(dbgs() << *scop);
1314
1.14k
1315
1.14k
  if (
!scop->hasFeasibleRuntimeContext()1.14k
) {
1316
61
    InfeasibleScops++;
1317
61
    Msg = "SCoP ends here but was dismissed.";
1318
61
    DEBUG(dbgs() << "SCoP detected but dismissed\n");
1319
61
    scop.reset();
1320
1.14k
  } else {
1321
1.08k
    Msg = "SCoP ends here.";
1322
1.08k
    ++ScopFound;
1323
1.08k
    if (scop->getMaxLoopDepth() > 0)
1324
1.02k
      ++RichScopFound;
1325
1.08k
  }
1326
1.14k
1327
1.14k
  if (R->isTopLevelRegion())
1328
4
    
ORE.emit(OptimizationRemarkAnalysis(4
DEBUG_TYPE4
, "ScopEnd", End, P.first)
1329
4
             << Msg);
1330
1.14k
  else
1331
1.14k
    
ORE.emit(OptimizationRemarkAnalysis(1.14k
DEBUG_TYPE1.14k
, "ScopEnd", End, P.second)
1332
1.14k
             << Msg);
1333
1.14k
}