Coverage Report

Created: 2017-04-29 12:21

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Analysis/ScopInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--------- ScopInfo.cpp ----------------------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Create a polyhedral description for a static control flow region.
11
//
12
// The pass creates a polyhedral description of the Scops detected by the Scop
13
// detection derived from their LLVM-IR code.
14
//
15
// This representation is shared among several tools in the polyhedral
16
// community, which are e.g. Cloog, Pluto, Loopo, Graphite.
17
//
18
//===----------------------------------------------------------------------===//
19
20
#include "polly/ScopInfo.h"
21
#include "polly/LinkAllPasses.h"
22
#include "polly/Options.h"
23
#include "polly/ScopBuilder.h"
24
#include "polly/Support/GICHelper.h"
25
#include "polly/Support/SCEVValidator.h"
26
#include "polly/Support/ScopHelper.h"
27
#include "llvm/ADT/DepthFirstIterator.h"
28
#include "llvm/ADT/MapVector.h"
29
#include "llvm/ADT/PostOrderIterator.h"
30
#include "llvm/ADT/STLExtras.h"
31
#include "llvm/ADT/SetVector.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/ADT/StringExtras.h"
34
#include "llvm/Analysis/AliasAnalysis.h"
35
#include "llvm/Analysis/AssumptionCache.h"
36
#include "llvm/Analysis/Loads.h"
37
#include "llvm/Analysis/LoopInfo.h"
38
#include "llvm/Analysis/LoopIterator.h"
39
#include "llvm/Analysis/RegionIterator.h"
40
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41
#include "llvm/IR/DiagnosticInfo.h"
42
#include "llvm/Support/Debug.h"
43
#include "isl/aff.h"
44
#include "isl/constraint.h"
45
#include "isl/local_space.h"
46
#include "isl/map.h"
47
#include "isl/options.h"
48
#include "isl/printer.h"
49
#include "isl/schedule.h"
50
#include "isl/schedule_node.h"
51
#include "isl/set.h"
52
#include "isl/union_map.h"
53
#include "isl/union_set.h"
54
#include "isl/val.h"
55
#include <sstream>
56
#include <string>
57
#include <vector>
58
59
using namespace llvm;
60
using namespace polly;
61
62
1.63k
#define DEBUG_TYPE "polly-scops"
63
64
STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken.");
65
STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken.");
66
STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken.");
67
STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken.");
68
STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs.");
69
STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs.");
70
STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken.");
71
STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken.");
72
STATISTIC(AssumptionsInvariantLoad,
73
          "Number of invariant loads assumptions taken.");
74
STATISTIC(AssumptionsDelinearization,
75
          "Number of delinearization assumptions taken.");
76
77
STATISTIC(NumLoopsInScop, "Number of loops in scops");
78
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
79
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
80
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
81
STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
82
STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
83
STATISTIC(NumScopsDepthLarger,
84
          "Number of scops with maximal loop depth 6 and larger");
85
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
86
87
// The maximal number of basic sets we allow during domain construction to
88
// be created. More complex scops will result in very high compile time and
89
// are also unlikely to result in good code
90
static int const MaxDisjunctsInDomain = 20;
91
92
// The number of disjunct in the context after which we stop to add more
93
// disjuncts. This parameter is there to avoid exponential growth in the
94
// number of disjunct when adding non-convex sets to the context.
95
static int const MaxDisjunctsInContext = 4;
96
97
static cl::opt<bool> PollyRemarksMinimal(
98
    "polly-remarks-minimal",
99
    cl::desc("Do not emit remarks about assumptions that are known"),
100
    cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
101
102
// Multiplicative reductions can be disabled separately as these kind of
103
// operations can overflow easily. Additive reductions and bit operations
104
// are in contrast pretty stable.
105
static cl::opt<bool> DisableMultiplicativeReductions(
106
    "polly-disable-multiplicative-reductions",
107
    cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
108
    cl::init(false), cl::cat(PollyCategory));
109
110
static cl::opt<unsigned> RunTimeChecksMaxParameters(
111
    "polly-rtc-max-parameters",
112
    cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
113
    cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
114
115
static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
116
    "polly-rtc-max-arrays-per-group",
117
    cl::desc("The maximal number of arrays to compare in each alias group."),
118
    cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
119
120
static cl::opt<std::string> UserContextStr(
121
    "polly-context", cl::value_desc("isl parameter set"),
122
    cl::desc("Provide additional constraints on the context parameters"),
123
    cl::init(""), cl::cat(PollyCategory));
124
125
static cl::opt<bool> DetectReductions("polly-detect-reductions",
126
                                      cl::desc("Detect and exploit reductions"),
127
                                      cl::Hidden, cl::ZeroOrMore,
128
                                      cl::init(true), cl::cat(PollyCategory));
129
130
static cl::opt<bool>
131
    IslOnErrorAbort("polly-on-isl-error-abort",
132
                    cl::desc("Abort if an isl error is encountered"),
133
                    cl::init(true), cl::cat(PollyCategory));
134
135
static cl::opt<bool> UnprofitableScalarAccs(
136
    "polly-unprofitable-scalar-accs",
137
    cl::desc("Count statements with scalar accesses as not optimizable"),
138
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
139
140
static cl::opt<bool> PollyPreciseInbounds(
141
    "polly-precise-inbounds",
142
    cl::desc("Take more precise inbounds assumptions (do not scale well)"),
143
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
144
145
static cl::opt<bool>
146
    PollyIgnoreInbounds("polly-ignore-inbounds",
147
                        cl::desc("Do not take inbounds assumptions at all"),
148
                        cl::Hidden, cl::init(false), cl::cat(PollyCategory));
149
150
static cl::opt<bool> PollyIgnoreParamBounds(
151
    "polly-ignore-parameter-bounds",
152
    cl::desc(
153
        "Do not add parameter bounds and do no gist simplify sets accordingly"),
154
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
155
156
static cl::opt<bool> PollyPreciseFoldAccesses(
157
    "polly-precise-fold-accesses",
158
    cl::desc("Fold memory accesses to model more possible delinearizations "
159
             "(does not scale well)"),
160
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
161
//===----------------------------------------------------------------------===//
162
163
// Create a sequence of two schedules. Either argument may be null and is
164
// interpreted as the empty schedule. Can also return null if both schedules are
165
// empty.
166
static __isl_give isl_schedule *
167
combineInSequence(__isl_take isl_schedule *Prev,
168
2.82k
                  __isl_take isl_schedule *Succ) {
169
2.82k
  if (!Prev)
170
2.16k
    return Succ;
171
660
  
if (660
!Succ660
)
172
0
    return Prev;
173
660
174
660
  return isl_schedule_sequence(Prev, Succ);
175
660
}
176
177
static __isl_give isl_set *addRangeBoundsToSet(__isl_take isl_set *S,
178
                                               const ConstantRange &Range,
179
                                               int dim,
180
938
                                               enum isl_dim_type type) {
181
938
  isl_val *V;
182
938
  isl_ctx *Ctx = isl_set_get_ctx(S);
183
938
184
938
  // The upper and lower bound for a parameter value is derived either from
185
938
  // the data type of the parameter or from the - possibly more restrictive -
186
938
  // range metadata.
187
938
  V = isl_valFromAPInt(Ctx, Range.getSignedMin(), true);
188
938
  S = isl_set_lower_bound_val(S, type, dim, V);
189
938
  V = isl_valFromAPInt(Ctx, Range.getSignedMax(), true);
190
938
  S = isl_set_upper_bound_val(S, type, dim, V);
191
938
192
938
  if (Range.isFullSet())
193
875
    return S;
194
938
195
63
  
if (63
isl_set_n_basic_set(S) > MaxDisjunctsInContext63
)
196
13
    return S;
197
63
198
63
  // In case of signed wrapping, we can refine the set of valid values by
199
63
  // excluding the part not covered by the wrapping range.
200
50
  
if (50
Range.isSignWrappedSet()50
)
{5
201
5
    V = isl_valFromAPInt(Ctx, Range.getLower(), true);
202
5
    isl_set *SLB = isl_set_lower_bound_val(isl_set_copy(S), type, dim, V);
203
5
204
5
    V = isl_valFromAPInt(Ctx, Range.getUpper(), true);
205
5
    V = isl_val_sub_ui(V, 1);
206
5
    isl_set *SUB = isl_set_upper_bound_val(S, type, dim, V);
207
5
    S = isl_set_union(SLB, SUB);
208
5
  }
209
50
210
50
  return S;
211
63
}
212
213
1.43k
static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) {
214
1.43k
  LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr);
215
1.43k
  if (!BasePtrLI)
216
1.31k
    return nullptr;
217
1.43k
218
123
  
if (123
!S->contains(BasePtrLI)123
)
219
52
    return nullptr;
220
123
221
71
  ScalarEvolution &SE = *S->getSE();
222
71
223
71
  auto *OriginBaseSCEV =
224
71
      SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand()));
225
71
  if (!OriginBaseSCEV)
226
0
    return nullptr;
227
71
228
71
  auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV);
229
71
  if (!OriginBaseSCEVUnknown)
230
0
    return nullptr;
231
71
232
71
  return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(),
233
71
                             MemoryKind::Array);
234
71
}
235
236
ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl_ctx *Ctx,
237
                             ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
238
                             const DataLayout &DL, Scop *S,
239
                             const char *BaseName)
240
1.89k
    : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
241
1.89k
  std::string BasePtrName =
242
24
      BaseName ? BaseName
243
1.86k
               : getIslCompatibleName("MemRef_", BasePtr,
244
1.73k
                                      Kind == MemoryKind::PHI ? 
"__phi"129
:
""1.73k
);
245
1.89k
  Id = isl_id_alloc(Ctx, BasePtrName.c_str(), this);
246
1.89k
247
1.89k
  updateSizes(Sizes);
248
1.89k
249
1.89k
  if (
!BasePtr || 1.89k
Kind != MemoryKind::Array1.86k
)
{456
250
456
    BasePtrOriginSAI = nullptr;
251
456
    return;
252
456
  }
253
1.89k
254
1.43k
  BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr);
255
1.43k
  if (BasePtrOriginSAI)
256
71
    const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this);
257
1.43k
}
258
259
3.95k
__isl_give isl_space *ScopArrayInfo::getSpace() const {
260
3.95k
  auto *Space =
261
3.95k
      isl_space_set_alloc(isl_id_get_ctx(Id), 0, getNumberOfDimensions());
262
3.95k
  Space = isl_space_set_tuple_id(Space, isl_dim_set, isl_id_copy(Id));
263
3.95k
  return Space;
264
3.95k
}
265
266
0
bool ScopArrayInfo::isReadOnly() {
267
0
  isl_union_set *WriteSet = isl_union_map_range(S.getWrites());
268
0
  isl_space *Space = getSpace();
269
0
  WriteSet = isl_union_set_intersect(
270
0
      WriteSet, isl_union_set_from_set(isl_set_universe(Space)));
271
0
272
0
  bool IsReadOnly = isl_union_set_is_empty(WriteSet);
273
0
  isl_union_set_free(WriteSet);
274
0
275
0
  return IsReadOnly;
276
0
}
277
278
4.08k
void ScopArrayInfo::updateElementType(Type *NewElementType) {
279
4.08k
  if (NewElementType == ElementType)
280
3.37k
    return;
281
4.08k
282
708
  auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType);
283
708
  auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType);
284
708
285
708
  if (
NewElementSize == OldElementSize || 708
NewElementSize == 050
)
286
658
    return;
287
708
288
50
  
if (50
NewElementSize % OldElementSize == 0 && 50
NewElementSize < OldElementSize26
)
{0
289
0
    ElementType = NewElementType;
290
50
  } else {
291
50
    auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize);
292
50
    ElementType = IntegerType::get(ElementType->getContext(), GCD);
293
50
  }
294
50
}
295
296
bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
297
3.65k
                                bool CheckConsistency) {
298
3.65k
  int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
299
3.65k
  int ExtraDimsNew = NewSizes.size() - SharedDims;
300
3.65k
  int ExtraDimsOld = DimensionSizes.size() - SharedDims;
301
3.65k
302
3.65k
  if (
CheckConsistency3.65k
)
{3.65k
303
5.06k
    for (int i = 0; 
i < SharedDims5.06k
;
i++1.41k
)
{1.41k
304
1.41k
      auto *NewSize = NewSizes[i + ExtraDimsNew];
305
1.41k
      auto *KnownSize = DimensionSizes[i + ExtraDimsOld];
306
1.41k
      if (
NewSize && 1.41k
KnownSize145
&&
NewSize != KnownSize139
)
307
0
        return false;
308
1.41k
    }
309
3.65k
310
3.65k
    
if (3.65k
DimensionSizes.size() >= NewSizes.size()3.65k
)
311
2.18k
      return true;
312
3.65k
  }
313
3.65k
314
1.46k
  DimensionSizes.clear();
315
1.46k
  DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(),
316
1.46k
                        NewSizes.end());
317
1.46k
  for (isl_pw_aff *Size : DimensionSizesPw)
318
12
    isl_pw_aff_free(Size);
319
1.46k
  DimensionSizesPw.clear();
320
1.82k
  for (const SCEV *Expr : DimensionSizes) {
321
1.82k
    if (
!Expr1.82k
)
{1.44k
322
1.44k
      DimensionSizesPw.push_back(nullptr);
323
1.44k
      continue;
324
1.44k
    }
325
384
    isl_pw_aff *Size = S.getPwAffOnly(Expr);
326
384
    DimensionSizesPw.push_back(Size);
327
384
  }
328
1.46k
  return true;
329
3.65k
}
330
331
1.89k
ScopArrayInfo::~ScopArrayInfo() {
332
1.89k
  isl_id_free(Id);
333
1.89k
  for (isl_pw_aff *Size : DimensionSizesPw)
334
1.81k
    isl_pw_aff_free(Size);
335
1.89k
}
336
337
2.46k
std::string ScopArrayInfo::getName() const { return isl_id_get_name(Id); }
338
339
7.41k
int ScopArrayInfo::getElemSizeInBytes() const {
340
7.41k
  return DL.getTypeAllocSize(ElementType);
341
7.41k
}
342
343
3.80k
__isl_give isl_id *ScopArrayInfo::getBasePtrId() const {
344
3.80k
  return isl_id_copy(Id);
345
3.80k
}
346
347
0
void ScopArrayInfo::dump() const { print(errs()); }
348
349
1.57k
void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
350
1.57k
  OS.indent(8) << *getElementType() << " " << getName();
351
1.57k
  unsigned u = 0;
352
1.57k
  if (
getNumberOfDimensions() > 0 && 1.57k
!getDimensionSize(0)1.20k
)
{1.19k
353
1.19k
    OS << "[*]";
354
1.19k
    u++;
355
1.19k
  }
356
1.99k
  for (; 
u < getNumberOfDimensions()1.99k
;
u++418
)
{418
357
418
    OS << "[";
358
418
359
418
    if (
SizeAsPwAff418
)
{209
360
209
      auto *Size = getDimensionSizePw(u);
361
209
      OS << " " << Size << " ";
362
209
      isl_pw_aff_free(Size);
363
209
    } else {
364
209
      OS << *getDimensionSize(u);
365
209
    }
366
418
367
418
    OS << "]";
368
418
  }
369
1.57k
370
1.57k
  OS << ";";
371
1.57k
372
1.57k
  if (BasePtrOriginSAI)
373
52
    OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]";
374
1.57k
375
1.57k
  OS << " // Element size " << getElemSizeInBytes() << "\n";
376
1.57k
}
377
378
const ScopArrayInfo *
379
0
ScopArrayInfo::getFromAccessFunction(__isl_keep isl_pw_multi_aff *PMA) {
380
0
  isl_id *Id = isl_pw_multi_aff_get_tuple_id(PMA, isl_dim_out);
381
0
  assert(Id && "Output dimension didn't have an ID");
382
0
  return getFromId(Id);
383
0
}
384
385
709
const ScopArrayInfo *ScopArrayInfo::getFromId(__isl_take isl_id *Id) {
386
709
  void *User = isl_id_get_user(Id);
387
709
  const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
388
709
  isl_id_free(Id);
389
709
  return SAI;
390
709
}
391
392
13
void MemoryAccess::wrapConstantDimensions() {
393
13
  auto *SAI = getScopArrayInfo();
394
13
  auto *ArraySpace = SAI->getSpace();
395
13
  auto *Ctx = isl_space_get_ctx(ArraySpace);
396
13
  unsigned DimsArray = SAI->getNumberOfDimensions();
397
13
398
13
  auto *DivModAff = isl_multi_aff_identity(isl_space_map_from_domain_and_range(
399
13
      isl_space_copy(ArraySpace), isl_space_copy(ArraySpace)));
400
13
  auto *LArraySpace = isl_local_space_from_space(ArraySpace);
401
13
402
13
  // Begin with last dimension, to iteratively carry into higher dimensions.
403
30
  for (int i = DimsArray - 1; 
i > 030
;
i--17
)
{17
404
17
    auto *DimSize = SAI->getDimensionSize(i);
405
17
    auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize);
406
17
407
17
    // This transformation is not applicable to dimensions with dynamic size.
408
17
    if (!DimSizeCst)
409
0
      continue;
410
17
411
17
    // This transformation is not applicable to dimensions of size zero.
412
17
    
if (17
DimSize->isZero()17
)
413
0
      continue;
414
17
415
17
    auto *DimSizeVal = isl_valFromAPInt(Ctx, DimSizeCst->getAPInt(), false);
416
17
    auto *Var = isl_aff_var_on_domain(isl_local_space_copy(LArraySpace),
417
17
                                      isl_dim_set, i);
418
17
    auto *PrevVar = isl_aff_var_on_domain(isl_local_space_copy(LArraySpace),
419
17
                                          isl_dim_set, i - 1);
420
17
421
17
    // Compute: index % size
422
17
    // Modulo must apply in the divide of the previous iteration, if any.
423
17
    auto *Modulo = isl_aff_copy(Var);
424
17
    Modulo = isl_aff_mod_val(Modulo, isl_val_copy(DimSizeVal));
425
17
    Modulo = isl_aff_pullback_multi_aff(Modulo, isl_multi_aff_copy(DivModAff));
426
17
427
17
    // Compute: floor(index / size)
428
17
    auto *Divide = Var;
429
17
    Divide = isl_aff_div(
430
17
        Divide,
431
17
        isl_aff_val_on_domain(isl_local_space_copy(LArraySpace), DimSizeVal));
432
17
    Divide = isl_aff_floor(Divide);
433
17
    Divide = isl_aff_add(Divide, PrevVar);
434
17
    Divide = isl_aff_pullback_multi_aff(Divide, isl_multi_aff_copy(DivModAff));
435
17
436
17
    // Apply Modulo and Divide.
437
17
    DivModAff = isl_multi_aff_set_aff(DivModAff, i, Modulo);
438
17
    DivModAff = isl_multi_aff_set_aff(DivModAff, i - 1, Divide);
439
17
  }
440
13
441
13
  // Apply all modulo/divides on the accesses.
442
13
  AccessRelation =
443
13
      isl_map_apply_range(AccessRelation, isl_map_from_multi_aff(DivModAff));
444
13
  AccessRelation = isl_map_detect_equalities(AccessRelation);
445
13
  isl_local_space_free(LArraySpace);
446
13
}
447
448
3.47k
void MemoryAccess::updateDimensionality() {
449
3.47k
  auto *SAI = getScopArrayInfo();
450
3.47k
  auto *ArraySpace = SAI->getSpace();
451
3.47k
  auto *AccessSpace = isl_space_range(isl_map_get_space(AccessRelation));
452
3.47k
  auto *Ctx = isl_space_get_ctx(AccessSpace);
453
3.47k
454
3.47k
  auto DimsArray = isl_space_dim(ArraySpace, isl_dim_set);
455
3.47k
  auto DimsAccess = isl_space_dim(AccessSpace, isl_dim_set);
456
3.47k
  auto DimsMissing = DimsArray - DimsAccess;
457
3.47k
458
3.47k
  auto *BB = getStatement()->getEntryBlock();
459
3.47k
  auto &DL = BB->getModule()->getDataLayout();
460
3.47k
  unsigned ArrayElemSize = SAI->getElemSizeInBytes();
461
3.47k
  unsigned ElemBytes = DL.getTypeAllocSize(getElementType());
462
3.47k
463
3.47k
  auto *Map = isl_map_from_domain_and_range(
464
3.47k
      isl_set_universe(AccessSpace),
465
3.47k
      isl_set_universe(isl_space_copy(ArraySpace)));
466
3.47k
467
3.48k
  for (unsigned i = 0; 
i < DimsMissing3.48k
;
i++17
)
468
17
    Map = isl_map_fix_si(Map, isl_dim_out, i, 0);
469
3.47k
470
6.59k
  for (unsigned i = DimsMissing; 
i < DimsArray6.59k
;
i++3.12k
)
471
3.12k
    Map = isl_map_equate(Map, isl_dim_in, i - DimsMissing, isl_dim_out, i);
472
3.47k
473
3.47k
  AccessRelation = isl_map_apply_range(AccessRelation, Map);
474
3.47k
475
3.47k
  // For the non delinearized arrays, divide the access function of the last
476
3.47k
  // subscript by the size of the elements in the array.
477
3.47k
  //
478
3.47k
  // A stride one array access in C expressed as A[i] is expressed in
479
3.47k
  // LLVM-IR as something like A[i * elementsize]. This hides the fact that
480
3.47k
  // two subsequent values of 'i' index two values that are stored next to
481
3.47k
  // each other in memory. By this division we make this characteristic
482
3.47k
  // obvious again. If the base pointer was accessed with offsets not divisible
483
3.47k
  // by the accesses element size, we will have chosen a smaller ArrayElemSize
484
3.47k
  // that divides the offsets of all accesses to this base pointer.
485
3.47k
  if (
DimsAccess == 13.47k
)
{2.33k
486
2.33k
    isl_val *V = isl_val_int_from_si(Ctx, ArrayElemSize);
487
2.33k
    AccessRelation = isl_map_floordiv_val(AccessRelation, V);
488
2.33k
  }
489
3.47k
490
3.47k
  // We currently do this only if we added at least one dimension, which means
491
3.47k
  // some dimension's indices have not been specified, an indicator that some
492
3.47k
  // index values have been added together.
493
3.47k
  // TODO: Investigate general usefulness; Effect on unit tests is to make index
494
3.47k
  // expressions more complicated.
495
3.47k
  if (DimsMissing)
496
13
    wrapConstantDimensions();
497
3.47k
498
3.47k
  if (!isAffine())
499
50
    computeBoundsOnAccessRelation(ArrayElemSize);
500
3.47k
501
3.47k
  // Introduce multi-element accesses in case the type loaded by this memory
502
3.47k
  // access is larger than the canonical element type of the array.
503
3.47k
  //
504
3.47k
  // An access ((float *)A)[i] to an array char *A is modeled as
505
3.47k
  // {[i] -> A[o] : 4 i <= o <= 4 i + 3
506
3.47k
  if (
ElemBytes > ArrayElemSize3.47k
)
{49
507
49
    assert(ElemBytes % ArrayElemSize == 0 &&
508
49
           "Loaded element size should be multiple of canonical element size");
509
49
    auto *Map = isl_map_from_domain_and_range(
510
49
        isl_set_universe(isl_space_copy(ArraySpace)),
511
49
        isl_set_universe(isl_space_copy(ArraySpace)));
512
53
    for (unsigned i = 0; 
i < DimsArray - 153
;
i++4
)
513
4
      Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i);
514
49
515
49
    isl_constraint *C;
516
49
    isl_local_space *LS;
517
49
518
49
    LS = isl_local_space_from_space(isl_map_get_space(Map));
519
49
    int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes();
520
49
521
49
    C = isl_constraint_alloc_inequality(isl_local_space_copy(LS));
522
49
    C = isl_constraint_set_constant_val(C, isl_val_int_from_si(Ctx, Num - 1));
523
49
    C = isl_constraint_set_coefficient_si(C, isl_dim_in, DimsArray - 1, 1);
524
49
    C = isl_constraint_set_coefficient_si(C, isl_dim_out, DimsArray - 1, -1);
525
49
    Map = isl_map_add_constraint(Map, C);
526
49
527
49
    C = isl_constraint_alloc_inequality(LS);
528
49
    C = isl_constraint_set_coefficient_si(C, isl_dim_in, DimsArray - 1, -1);
529
49
    C = isl_constraint_set_coefficient_si(C, isl_dim_out, DimsArray - 1, 1);
530
49
    C = isl_constraint_set_constant_val(C, isl_val_int_from_si(Ctx, 0));
531
49
    Map = isl_map_add_constraint(Map, C);
532
49
    AccessRelation = isl_map_apply_range(AccessRelation, Map);
533
49
  }
534
3.47k
535
3.47k
  isl_space_free(ArraySpace);
536
3.47k
}
537
538
const std::string
539
271
MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) {
540
271
  switch (RT) {
541
0
  case MemoryAccess::RT_NONE:
542
0
    llvm_unreachable("Requested a reduction operator string for a memory "
543
0
                     "access which isn't a reduction");
544
257
  case MemoryAccess::RT_ADD:
545
257
    return "+";
546
10
  case MemoryAccess::RT_MUL:
547
10
    return "*";
548
1
  case MemoryAccess::RT_BOR:
549
1
    return "|";
550
2
  case MemoryAccess::RT_BXOR:
551
2
    return "^";
552
1
  case MemoryAccess::RT_BAND:
553
1
    return "&";
554
271
  }
555
0
  
llvm_unreachable0
("Unknown reduction type");0
556
0
  return "";
557
271
}
558
559
/// Return the reduction type for a given binary operator.
560
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
561
261
                                                    const Instruction *Load) {
562
261
  if (!BinOp)
563
0
    return MemoryAccess::RT_NONE;
564
261
  switch (BinOp->getOpcode()) {
565
1
  case Instruction::FAdd:
566
1
    if (!BinOp->hasUnsafeAlgebra())
567
0
      return MemoryAccess::RT_NONE;
568
1
  // Fall through
569
240
  case Instruction::Add:
570
240
    return MemoryAccess::RT_ADD;
571
1
  case Instruction::Or:
572
1
    return MemoryAccess::RT_BOR;
573
3
  case Instruction::Xor:
574
3
    return MemoryAccess::RT_BXOR;
575
1
  case Instruction::And:
576
1
    return MemoryAccess::RT_BAND;
577
0
  case Instruction::FMul:
578
0
    if (!BinOp->hasUnsafeAlgebra())
579
0
      return MemoryAccess::RT_NONE;
580
0
  // Fall through
581
16
  case Instruction::Mul:
582
16
    if (DisableMultiplicativeReductions)
583
0
      return MemoryAccess::RT_NONE;
584
16
    return MemoryAccess::RT_MUL;
585
0
  default:
586
0
    return MemoryAccess::RT_NONE;
587
261
  }
588
261
}
589
590
3.91k
MemoryAccess::~MemoryAccess() {
591
3.91k
  isl_id_free(Id);
592
3.91k
  isl_set_free(InvalidDomain);
593
3.91k
  isl_map_free(AccessRelation);
594
3.91k
  isl_map_free(NewAccessRelation);
595
3.91k
}
596
597
14.2k
const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const {
598
14.2k
  isl_id *ArrayId = getArrayId();
599
14.2k
  void *User = isl_id_get_user(ArrayId);
600
14.2k
  const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
601
14.2k
  isl_id_free(ArrayId);
602
14.2k
  return SAI;
603
14.2k
}
604
605
997
const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const {
606
997
  isl_id *ArrayId = getLatestArrayId();
607
997
  void *User = isl_id_get_user(ArrayId);
608
997
  const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User);
609
997
  isl_id_free(ArrayId);
610
997
  return SAI;
611
997
}
612
613
15.1k
__isl_give isl_id *MemoryAccess::getOriginalArrayId() const {
614
15.1k
  return isl_map_get_tuple_id(AccessRelation, isl_dim_out);
615
15.1k
}
616
617
997
__isl_give isl_id *MemoryAccess::getLatestArrayId() const {
618
997
  if (!hasNewAccessRelation())
619
974
    return getOriginalArrayId();
620
23
  return isl_map_get_tuple_id(NewAccessRelation, isl_dim_out);
621
997
}
622
623
172
__isl_give isl_map *MemoryAccess::getAddressFunction() const {
624
172
  return isl_map_lexmin(getAccessRelation());
625
172
}
626
627
__isl_give isl_pw_multi_aff *MemoryAccess::applyScheduleToAccessRelation(
628
59
    __isl_take isl_union_map *USchedule) const {
629
59
  isl_map *Schedule, *ScheduledAccRel;
630
59
  isl_union_set *UDomain;
631
59
632
59
  UDomain = isl_union_set_from_set(getStatement()->getDomain());
633
59
  USchedule = isl_union_map_intersect_domain(USchedule, UDomain);
634
59
  Schedule = isl_map_from_union_map(USchedule);
635
59
  ScheduledAccRel = isl_map_apply_domain(getAddressFunction(), Schedule);
636
59
  return isl_pw_multi_aff_from_map(ScheduledAccRel);
637
59
}
638
639
12.1k
__isl_give isl_map *MemoryAccess::getOriginalAccessRelation() const {
640
12.1k
  return isl_map_copy(AccessRelation);
641
12.1k
}
642
643
1.52k
std::string MemoryAccess::getOriginalAccessRelationStr() const {
644
1.52k
  return stringFromIslObj(AccessRelation);
645
1.52k
}
646
647
3.47k
__isl_give isl_space *MemoryAccess::getOriginalAccessRelationSpace() const {
648
3.47k
  return isl_map_get_space(AccessRelation);
649
3.47k
}
650
651
77
__isl_give isl_map *MemoryAccess::getNewAccessRelation() const {
652
77
  return isl_map_copy(NewAccessRelation);
653
77
}
654
655
45
std::string MemoryAccess::getNewAccessRelationStr() const {
656
45
  return stringFromIslObj(NewAccessRelation);
657
45
}
658
659
__isl_give isl_basic_map *
660
51
MemoryAccess::createBasicAccessMap(ScopStmt *Statement) {
661
51
  isl_space *Space = isl_space_set_alloc(Statement->getIslCtx(), 0, 1);
662
51
  Space = isl_space_align_params(Space, Statement->getDomainSpace());
663
51
664
51
  return isl_basic_map_from_domain_and_range(
665
51
      isl_basic_set_universe(Statement->getDomainSpace()),
666
51
      isl_basic_set_universe(Space));
667
51
}
668
669
// Formalize no out-of-bound access assumption
670
//
671
// When delinearizing array accesses we optimistically assume that the
672
// delinearized accesses do not access out of bound locations (the subscript
673
// expression of each array evaluates for each statement instance that is
674
// executed to a value that is larger than zero and strictly smaller than the
675
// size of the corresponding dimension). The only exception is the outermost
676
// dimension for which we do not need to assume any upper bound.  At this point
677
// we formalize this assumption to ensure that at code generation time the
678
// relevant run-time checks can be generated.
679
//
680
// To find the set of constraints necessary to avoid out of bound accesses, we
681
// first build the set of data locations that are not within array bounds. We
682
// then apply the reverse access relation to obtain the set of iterations that
683
// may contain invalid accesses and reduce this set of iterations to the ones
684
// that are actually executed by intersecting them with the domain of the
685
// statement. If we now project out all loop dimensions, we obtain a set of
686
// parameters that may cause statement instances to be executed that may
687
// possibly yield out of bound memory accesses. The complement of these
688
// constraints is the set of constraints that needs to be assumed to ensure such
689
// statement instances are never executed.
690
3.47k
void MemoryAccess::assumeNoOutOfBound() {
691
3.47k
  if (PollyIgnoreInbounds)
692
0
    return;
693
3.47k
  auto *SAI = getScopArrayInfo();
694
3.47k
  isl_space *Space = isl_space_range(getOriginalAccessRelationSpace());
695
3.47k
  isl_set *Outside = isl_set_empty(isl_space_copy(Space));
696
3.94k
  for (int i = 1, Size = isl_space_dim(Space, isl_dim_set); 
i < Size3.94k
;
++i471
)
{471
697
471
    isl_local_space *LS = isl_local_space_from_space(isl_space_copy(Space));
698
471
    isl_pw_aff *Var =
699
471
        isl_pw_aff_var_on_domain(isl_local_space_copy(LS), isl_dim_set, i);
700
471
    isl_pw_aff *Zero = isl_pw_aff_zero_on_domain(LS);
701
471
702
471
    isl_set *DimOutside;
703
471
704
471
    DimOutside = isl_pw_aff_lt_set(isl_pw_aff_copy(Var), Zero);
705
471
    isl_pw_aff *SizeE = SAI->getDimensionSizePw(i);
706
471
    SizeE = isl_pw_aff_add_dims(SizeE, isl_dim_in,
707
471
                                isl_space_dim(Space, isl_dim_set));
708
471
    SizeE = isl_pw_aff_set_tuple_id(SizeE, isl_dim_in,
709
471
                                    isl_space_get_tuple_id(Space, isl_dim_set));
710
471
711
471
    DimOutside = isl_set_union(DimOutside, isl_pw_aff_le_set(SizeE, Var));
712
471
713
471
    Outside = isl_set_union(Outside, DimOutside);
714
471
  }
715
3.47k
716
3.47k
  Outside = isl_set_apply(Outside, isl_map_reverse(getAccessRelation()));
717
3.47k
  Outside = isl_set_intersect(Outside, Statement->getDomain());
718
3.47k
  Outside = isl_set_params(Outside);
719
3.47k
720
3.47k
  // Remove divs to avoid the construction of overly complicated assumptions.
721
3.47k
  // Doing so increases the set of parameter combinations that are assumed to
722
3.47k
  // not appear. This is always save, but may make the resulting run-time check
723
3.47k
  // bail out more often than strictly necessary.
724
3.47k
  Outside = isl_set_remove_divs(Outside);
725
3.47k
  Outside = isl_set_complement(Outside);
726
3.47k
  const auto &Loc = getAccessInstruction()
727
3.25k
                        ? getAccessInstruction()->getDebugLoc()
728
221
                        : DebugLoc();
729
3.47k
  if (!PollyPreciseInbounds)
730
3.46k
    Outside = isl_set_gist(Outside, isl_set_params(Statement->getDomain()));
731
3.47k
  Statement->getParent()->recordAssumption(INBOUNDS, Outside, Loc,
732
3.47k
                                           AS_ASSUMPTION);
733
3.47k
  isl_space_free(Space);
734
3.47k
}
735
736
17
void MemoryAccess::buildMemIntrinsicAccessRelation() {
737
17
  assert(isMemoryIntrinsic());
738
17
  assert(Subscripts.size() == 2 && Sizes.size() == 1);
739
17
740
17
  auto *SubscriptPWA = getPwAff(Subscripts[0]);
741
17
  auto *SubscriptMap = isl_map_from_pw_aff(SubscriptPWA);
742
17
743
17
  isl_map *LengthMap;
744
17
  if (
Subscripts[1] == nullptr17
)
{0
745
0
    LengthMap = isl_map_universe(isl_map_get_space(SubscriptMap));
746
17
  } else {
747
17
    auto *LengthPWA = getPwAff(Subscripts[1]);
748
17
    LengthMap = isl_map_from_pw_aff(LengthPWA);
749
17
    auto *RangeSpace = isl_space_range(isl_map_get_space(LengthMap));
750
17
    LengthMap = isl_map_apply_range(LengthMap, isl_map_lex_gt(RangeSpace));
751
17
  }
752
17
  LengthMap = isl_map_lower_bound_si(LengthMap, isl_dim_out, 0, 0);
753
17
  LengthMap = isl_map_align_params(LengthMap, isl_map_get_space(SubscriptMap));
754
17
  SubscriptMap =
755
17
      isl_map_align_params(SubscriptMap, isl_map_get_space(LengthMap));
756
17
  LengthMap = isl_map_sum(LengthMap, SubscriptMap);
757
17
  AccessRelation = isl_map_set_tuple_id(LengthMap, isl_dim_in,
758
17
                                        getStatement()->getDomainId());
759
17
}
760
761
50
void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) {
762
50
  ScalarEvolution *SE = Statement->getParent()->getSE();
763
50
764
50
  auto MAI = MemAccInst(getAccessInstruction());
765
50
  if (isa<MemIntrinsic>(MAI))
766
0
    return;
767
50
768
50
  Value *Ptr = MAI.getPointerOperand();
769
50
  if (
!Ptr || 50
!SE->isSCEVable(Ptr->getType())28
)
770
22
    return;
771
50
772
28
  auto *PtrSCEV = SE->getSCEV(Ptr);
773
28
  if (isa<SCEVCouldNotCompute>(PtrSCEV))
774
0
    return;
775
28
776
28
  auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
777
28
  if (
BasePtrSCEV && 28
!isa<SCEVCouldNotCompute>(BasePtrSCEV)28
)
778
28
    PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV);
779
28
780
28
  const ConstantRange &Range = SE->getSignedRange(PtrSCEV);
781
28
  if (Range.isFullSet())
782
1
    return;
783
28
784
27
  
if (27
Range.isWrappedSet()27
)
785
16
    return;
786
27
787
11
  bool isWrapping = Range.isSignWrappedSet();
788
11
789
11
  unsigned BW = Range.getBitWidth();
790
11
  const auto One = APInt(BW, 1);
791
11
  const auto LB = isWrapping ? 
Range.getLower()0
:
Range.getSignedMin()11
;
792
11
  const auto UB = isWrapping ? 
(Range.getUpper() - One)0
:
Range.getSignedMax()11
;
793
11
794
11
  auto Min = LB.sdiv(APInt(BW, ElementSize));
795
11
  auto Max = UB.sdiv(APInt(BW, ElementSize)) + One;
796
11
797
11
  assert(Min.sle(Max) && "Minimum expected to be less or equal than max");
798
11
799
11
  isl_set *AccessRange = isl_map_range(isl_map_copy(AccessRelation));
800
11
  AccessRange =
801
11
      addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0, isl_dim_set);
802
11
  AccessRelation = isl_map_intersect_range(AccessRelation, AccessRange);
803
11
}
804
805
3.47k
void MemoryAccess::foldAccessRelation() {
806
3.47k
  if (
Sizes.size() < 2 || 3.47k
isa<SCEVConstant>(Sizes[1])339
)
807
3.31k
    return;
808
3.47k
809
155
  int Size = Subscripts.size();
810
155
811
155
  isl_map *OldAccessRelation = isl_map_copy(AccessRelation);
812
155
813
406
  for (int i = Size - 2; 
i >= 0406
;
--i251
)
{251
814
251
    isl_space *Space;
815
251
    isl_map *MapOne, *MapTwo;
816
251
    isl_pw_aff *DimSize = getPwAff(Sizes[i + 1]);
817
251
818
251
    isl_space *SpaceSize = isl_pw_aff_get_space(DimSize);
819
251
    isl_pw_aff_free(DimSize);
820
251
    isl_id *ParamId = isl_space_get_dim_id(SpaceSize, isl_dim_param, 0);
821
251
822
251
    Space = isl_map_get_space(AccessRelation);
823
251
    Space = isl_space_map_from_set(isl_space_range(Space));
824
251
    Space = isl_space_align_params(Space, SpaceSize);
825
251
826
251
    int ParamLocation = isl_space_find_dim_by_id(Space, isl_dim_param, ParamId);
827
251
    isl_id_free(ParamId);
828
251
829
251
    MapOne = isl_map_universe(isl_space_copy(Space));
830
981
    for (int j = 0; 
j < Size981
;
++j730
)
831
730
      MapOne = isl_map_equate(MapOne, isl_dim_in, j, isl_dim_out, j);
832
251
    MapOne = isl_map_lower_bound_si(MapOne, isl_dim_in, i + 1, 0);
833
251
834
251
    MapTwo = isl_map_universe(isl_space_copy(Space));
835
981
    for (int j = 0; 
j < Size981
;
++j730
)
836
730
      
if (730
j < i || 730
j > i + 1616
)
837
228
        MapTwo = isl_map_equate(MapTwo, isl_dim_in, j, isl_dim_out, j);
838
251
839
251
    isl_local_space *LS = isl_local_space_from_space(Space);
840
251
    isl_constraint *C;
841
251
    C = isl_equality_alloc(isl_local_space_copy(LS));
842
251
    C = isl_constraint_set_constant_si(C, -1);
843
251
    C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, 1);
844
251
    C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, -1);
845
251
    MapTwo = isl_map_add_constraint(MapTwo, C);
846
251
    C = isl_equality_alloc(LS);
847
251
    C = isl_constraint_set_coefficient_si(C, isl_dim_in, i + 1, 1);
848
251
    C = isl_constraint_set_coefficient_si(C, isl_dim_out, i + 1, -1);
849
251
    C = isl_constraint_set_coefficient_si(C, isl_dim_param, ParamLocation, 1);
850
251
    MapTwo = isl_map_add_constraint(MapTwo, C);
851
251
    MapTwo = isl_map_upper_bound_si(MapTwo, isl_dim_in, i + 1, -1);
852
251
853
251
    MapOne = isl_map_union(MapOne, MapTwo);
854
251
    AccessRelation = isl_map_apply_range(AccessRelation, MapOne);
855
251
  }
856
155
857
155
  isl_id *BaseAddrId = getScopArrayInfo()->getBasePtrId();
858
155
  auto Space = Statement->getDomainSpace();
859
155
  AccessRelation = isl_map_set_tuple_id(
860
155
      AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set));
861
155
  AccessRelation =
862
155
      isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
863
155
  AccessRelation = isl_map_gist_domain(AccessRelation, Statement->getDomain());
864
155
865
155
  // Access dimension folding might in certain cases increase the number of
866
155
  // disjuncts in the memory access, which can possibly complicate the generated
867
155
  // run-time checks and can lead to costly compilation.
868
155
  if (
!PollyPreciseFoldAccesses && 155
isl_map_n_basic_map(AccessRelation) >148
869
1
                                       isl_map_n_basic_map(OldAccessRelation)) {
870
1
    isl_map_free(AccessRelation);
871
1
    AccessRelation = OldAccessRelation;
872
154
  } else {
873
154
    isl_map_free(OldAccessRelation);
874
154
  }
875
155
876
155
  isl_space_free(Space);
877
155
}
878
879
/// Check if @p Expr is divisible by @p Size.
880
5.45k
static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
881
5.45k
  assert(Size != 0);
882
5.45k
  if (Size == 1)
883
107
    return true;
884
5.45k
885
5.45k
  // Only one factor needs to be divisible.
886
5.34k
  
if (auto *5.34k
MulExpr5.34k
= dyn_cast<SCEVMulExpr>(Expr))
{268
887
268
    for (auto *FactorExpr : MulExpr->operands())
888
268
      
if (268
isDivisible(FactorExpr, Size, SE)268
)
889
268
        return true;
890
0
    return false;
891
268
  }
892
5.34k
893
5.34k
  // For other n-ary expressions (Add, AddRec, Max,...) all operands need
894
5.34k
  // to be divisble.
895
5.07k
  
if (auto *5.07k
NAryExpr5.07k
= dyn_cast<SCEVNAryExpr>(Expr))
{1.43k
896
1.43k
    for (auto *OpExpr : NAryExpr->operands())
897
2.86k
      
if (2.86k
!isDivisible(OpExpr, Size, SE)2.86k
)
898
0
        return false;
899
1.43k
    return true;
900
1.43k
  }
901
5.07k
902
3.64k
  auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
903
3.64k
  auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
904
3.64k
  auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
905
3.64k
  return MulSCEV == Expr;
906
5.07k
}
907
908
3.52k
void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
909
3.52k
  assert(!AccessRelation && "AccessReltation already built");
910
3.52k
911
3.52k
  // Initialize the invalid domain which describes all iterations for which the
912
3.52k
  // access relation is not modeled correctly.
913
3.52k
  auto *StmtInvalidDomain = getStatement()->getInvalidDomain();
914
3.52k
  InvalidDomain = isl_set_empty(isl_set_get_space(StmtInvalidDomain));
915
3.52k
  isl_set_free(StmtInvalidDomain);
916
3.52k
917
3.52k
  isl_ctx *Ctx = isl_id_get_ctx(Id);
918
3.52k
  isl_id *BaseAddrId = SAI->getBasePtrId();
919
3.52k
920
3.52k
  if (
getAccessInstruction() && 3.52k
isa<MemIntrinsic>(getAccessInstruction())3.29k
)
{17
921
17
    buildMemIntrinsicAccessRelation();
922
17
    AccessRelation =
923
17
        isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
924
17
    return;
925
17
  }
926
3.52k
927
3.50k
  
if (3.50k
!isAffine()3.50k
)
{51
928
51
    // We overapproximate non-affine accesses with a possible access to the
929
51
    // whole array. For read accesses it does not make a difference, if an
930
51
    // access must or may happen. However, for write accesses it is important to
931
51
    // differentiate between writes that must happen and writes that may happen.
932
51
    if (!AccessRelation)
933
51
      AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement));
934
51
935
51
    AccessRelation =
936
51
        isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
937
51
    return;
938
51
  }
939
3.50k
940
3.45k
  isl_space *Space = isl_space_alloc(Ctx, 0, Statement->getNumIterators(), 0);
941
3.45k
  AccessRelation = isl_map_universe(Space);
942
3.45k
943
6.55k
  for (int i = 0, Size = Subscripts.size(); 
i < Size6.55k
;
++i3.09k
)
{3.09k
944
3.09k
    isl_pw_aff *Affine = getPwAff(Subscripts[i]);
945
3.09k
    isl_map *SubscriptMap = isl_map_from_pw_aff(Affine);
946
3.09k
    AccessRelation = isl_map_flat_range_product(AccessRelation, SubscriptMap);
947
3.09k
  }
948
3.45k
949
3.45k
  Space = Statement->getDomainSpace();
950
3.45k
  AccessRelation = isl_map_set_tuple_id(
951
3.45k
      AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set));
952
3.45k
  AccessRelation =
953
3.45k
      isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
954
3.45k
955
3.45k
  AccessRelation = isl_map_gist_domain(AccessRelation, Statement->getDomain());
956
3.45k
  isl_space_free(Space);
957
3.45k
}
958
959
MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst,
960
                           AccessType AccType, Value *BaseAddress,
961
                           Type *ElementType, bool Affine,
962
                           ArrayRef<const SCEV *> Subscripts,
963
                           ArrayRef<const SCEV *> Sizes, Value *AccessValue,
964
                           MemoryKind Kind, StringRef BaseName)
965
    : Kind(Kind), AccType(AccType), RedType(RT_NONE), Statement(Stmt),
966
      InvalidDomain(nullptr), BaseAddr(BaseAddress), BaseName(BaseName),
967
      ElementType(ElementType), Sizes(Sizes.begin(), Sizes.end()),
968
      AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine),
969
      Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
970
3.87k
      NewAccessRelation(nullptr) {
971
3.87k
  static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
972
3.87k
  const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()) + "_";
973
3.87k
974
3.87k
  std::string IdName =
975
3.87k
      getIslCompatibleName(Stmt->getBaseName(), Access, BaseName);
976
3.87k
  Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this);
977
3.87k
}
978
979
MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType,
980
                           __isl_take isl_map *AccRel)
981
    : Kind(MemoryKind::Array), AccType(AccType), RedType(RT_NONE),
982
      Statement(Stmt), InvalidDomain(nullptr), AccessInstruction(nullptr),
983
36
      IsAffine(true), AccessRelation(nullptr), NewAccessRelation(AccRel) {
984
36
  auto *ArrayInfoId = isl_map_get_tuple_id(NewAccessRelation, isl_dim_out);
985
36
  auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId);
986
36
  Sizes.push_back(nullptr);
987
90
  for (unsigned i = 1; 
i < SAI->getNumberOfDimensions()90
;
i++54
)
988
54
    Sizes.push_back(SAI->getDimensionSize(i));
989
36
  ElementType = SAI->getElementType();
990
36
  BaseAddr = SAI->getBasePtr();
991
36
  BaseName = SAI->getName();
992
36
  static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"};
993
36
  const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()) + "_";
994
36
995
36
  std::string IdName =
996
36
      getIslCompatibleName(Stmt->getBaseName(), Access, BaseName);
997
36
  Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this);
998
36
}
999
1000
3.46k
void MemoryAccess::realignParams() {
1001
3.46k
  auto *Ctx = Statement->getParent()->getContext();
1002
3.46k
  InvalidDomain = isl_set_gist_params(InvalidDomain, isl_set_copy(Ctx));
1003
3.46k
  AccessRelation = isl_map_gist_params(AccessRelation, Ctx);
1004
3.46k
}
1005
1006
0
const std::string MemoryAccess::getReductionOperatorStr() const {
1007
0
  return MemoryAccess::getReductionOperatorStr(getReductionType());
1008
0
}
1009
1010
754
__isl_give isl_id *MemoryAccess::getId() const { return isl_id_copy(Id); }
1011
1012
raw_ostream &polly::operator<<(raw_ostream &OS,
1013
1.52k
                               MemoryAccess::ReductionType RT) {
1014
1.52k
  if (RT == MemoryAccess::RT_NONE)
1015
1.28k
    OS << "NONE";
1016
1.52k
  else
1017
246
    OS << MemoryAccess::getReductionOperatorStr(RT);
1018
1.52k
  return OS;
1019
1.52k
}
1020
1021
1.52k
void MemoryAccess::print(raw_ostream &OS) const {
1022
1.52k
  switch (AccType) {
1023
776
  case READ:
1024
776
    OS.indent(12) << "ReadAccess :=\t";
1025
776
    break;
1026
719
  case MUST_WRITE:
1027
719
    OS.indent(12) << "MustWriteAccess :=\t";
1028
719
    break;
1029
31
  case MAY_WRITE:
1030
31
    OS.indent(12) << "MayWriteAccess :=\t";
1031
31
    break;
1032
1.52k
  }
1033
1.52k
  OS << "[Reduction Type: " << getReductionType() << "] ";
1034
1.52k
  OS << "[Scalar: " << isScalarKind() << "]\n";
1035
1.52k
  OS.indent(16) << getOriginalAccessRelationStr() << ";\n";
1036
1.52k
  if (hasNewAccessRelation())
1037
45
    OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n";
1038
1.52k
}
1039
1040
0
void MemoryAccess::dump() const { print(errs()); }
1041
1042
3.38k
__isl_give isl_pw_aff *MemoryAccess::getPwAff(const SCEV *E) {
1043
3.38k
  auto *Stmt = getStatement();
1044
3.38k
  PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock());
1045
3.38k
  isl_set *StmtDom = isl_set_reset_tuple_id(getStatement()->getDomain());
1046
3.38k
  isl_set *NewInvalidDom = isl_set_intersect(StmtDom, PWAC.second);
1047
3.38k
  InvalidDomain = isl_set_union(InvalidDomain, NewInvalidDom);
1048
3.38k
  return PWAC.first;
1049
3.38k
}
1050
1051
// Create a map in the size of the provided set domain, that maps from the
1052
// one element of the provided set domain to another element of the provided
1053
// set domain.
1054
// The mapping is limited to all points that are equal in all but the last
1055
// dimension and for which the last dimension of the input is strict smaller
1056
// than the last dimension of the output.
1057
//
1058
//   getEqualAndLarger(set[i0, i1, ..., iX]):
1059
//
1060
//   set[i0, i1, ..., iX] -> set[o0, o1, ..., oX]
1061
//     : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX
1062
//
1063
46
static isl_map *getEqualAndLarger(__isl_take isl_space *setDomain) {
1064
46
  isl_space *Space = isl_space_map_from_set(setDomain);
1065
46
  isl_map *Map = isl_map_universe(Space);
1066
46
  unsigned lastDimension = isl_map_dim(Map, isl_dim_in) - 1;
1067
46
1068
46
  // Set all but the last dimension to be equal for the input and output
1069
46
  //
1070
46
  //   input[i0, i1, ..., iX] -> output[o0, o1, ..., oX]
1071
46
  //     : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1)
1072
72
  for (unsigned i = 0; 
i < lastDimension72
;
++i26
)
1073
26
    Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i);
1074
46
1075
46
  // Set the last dimension of the input to be strict smaller than the
1076
46
  // last dimension of the output.
1077
46
  //
1078
46
  //   input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX
1079
46
  Map = isl_map_order_lt(Map, isl_dim_in, lastDimension, isl_dim_out,
1080
46
                         lastDimension);
1081
46
  return Map;
1082
46
}
1083
1084
__isl_give isl_set *
1085
46
MemoryAccess::getStride(__isl_take const isl_map *Schedule) const {
1086
46
  isl_map *S = const_cast<isl_map *>(Schedule);
1087
46
  isl_map *AccessRelation = getAccessRelation();
1088
46
  isl_space *Space = isl_space_range(isl_map_get_space(S));
1089
46
  isl_map *NextScatt = getEqualAndLarger(Space);
1090
46
1091
46
  S = isl_map_reverse(S);
1092
46
  NextScatt = isl_map_lexmin(NextScatt);
1093
46
1094
46
  NextScatt = isl_map_apply_range(NextScatt, isl_map_copy(S));
1095
46
  NextScatt = isl_map_apply_range(NextScatt, isl_map_copy(AccessRelation));
1096
46
  NextScatt = isl_map_apply_domain(NextScatt, S);
1097
46
  NextScatt = isl_map_apply_domain(NextScatt, AccessRelation);
1098
46
1099
46
  isl_set *Deltas = isl_map_deltas(NextScatt);
1100
46
  return Deltas;
1101
46
}
1102
1103
bool MemoryAccess::isStrideX(__isl_take const isl_map *Schedule,
1104
46
                             int StrideWidth) const {
1105
46
  isl_set *Stride, *StrideX;
1106
46
  bool IsStrideX;
1107
46
1108
46
  Stride = getStride(Schedule);
1109
46
  StrideX = isl_set_universe(isl_set_get_space(Stride));
1110
56
  for (unsigned i = 0; 
i < isl_set_dim(StrideX, isl_dim_set) - 156
;
i++10
)
1111
10
    StrideX = isl_set_fix_si(StrideX, isl_dim_set, i, 0);
1112
46
  StrideX = isl_set_fix_si(StrideX, isl_dim_set,
1113
46
                           isl_set_dim(StrideX, isl_dim_set) - 1, StrideWidth);
1114
46
  IsStrideX = isl_set_is_subset(Stride, StrideX);
1115
46
1116
46
  isl_set_free(StrideX);
1117
46
  isl_set_free(Stride);
1118
46
1119
46
  return IsStrideX;
1120
46
}
1121
1122
14
bool MemoryAccess::isStrideZero(__isl_take const isl_map *Schedule) const {
1123
14
  return isStrideX(Schedule, 0);
1124
14
}
1125
1126
28
bool MemoryAccess::isStrideOne(__isl_take const isl_map *Schedule) const {
1127
28
  return isStrideX(Schedule, 1);
1128
28
}
1129
1130
11
void MemoryAccess::setAccessRelation(__isl_take isl_map *NewAccess) {
1131
11
  isl_map_free(AccessRelation);
1132
11
  AccessRelation = NewAccess;
1133
11
}
1134
1135
102
void MemoryAccess::setNewAccessRelation(__isl_take isl_map *NewAccess) {
1136
102
  assert(NewAccess);
1137
102
1138
102
#ifndef NDEBUG
1139
  // Check domain space compatibility.
1140
  auto *NewSpace = isl_map_get_space(NewAccess);
1141
  auto *NewDomainSpace = isl_space_domain(isl_space_copy(NewSpace));
1142
  auto *OriginalDomainSpace = getStatement()->getDomainSpace();
1143
  assert(isl_space_has_equal_tuples(OriginalDomainSpace, NewDomainSpace));
1144
  isl_space_free(NewDomainSpace);
1145
  isl_space_free(OriginalDomainSpace);
1146
1147
  // Check whether there is an access for every statement instance.
1148
  auto *StmtDomain = getStatement()->getDomain();
1149
  StmtDomain = isl_set_intersect_params(
1150
      StmtDomain, getStatement()->getParent()->getContext());
1151
  auto *NewDomain = isl_map_domain(isl_map_copy(NewAccess));
1152
  assert(isl_set_is_subset(StmtDomain, NewDomain) &&
1153
         "Partial accesses not supported");
1154
  isl_set_free(NewDomain);
1155
  isl_set_free(StmtDomain);
1156
1157
  auto *NewAccessSpace = isl_space_range(NewSpace);
1158
  assert(isl_space_has_tuple_id(NewAccessSpace, isl_dim_set) &&
1159
         "Must specify the array that is accessed");
1160
  auto *NewArrayId = isl_space_get_tuple_id(NewAccessSpace, isl_dim_set);
1161
  auto *SAI = static_cast<ScopArrayInfo *>(isl_id_get_user(NewArrayId));
1162
  assert(SAI && "Must set a ScopArrayInfo");
1163
1164
  if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) {
1165
    InvariantEquivClassTy *EqClass =
1166
        getStatement()->getParent()->lookupInvariantEquivClass(
1167
            SAI->getBasePtr());
1168
    assert(EqClass &&
1169
           "Access functions to indirect arrays must have an invariant and "
1170
           "hoisted base pointer");
1171
  }
1172
1173
  // Check whether access dimensions correspond to number of dimensions of the
1174
  // accesses array.
1175
  auto Dims = SAI->getNumberOfDimensions();
1176
  assert(isl_space_dim(NewAccessSpace, isl_dim_set) == Dims &&
1177
         "Access dims must match array dims");
1178
  isl_space_free(NewAccessSpace);
1179
  isl_id_free(NewArrayId);
1180
#endif
1181
102
1182
102
  isl_map_free(NewAccessRelation);
1183
102
  NewAccessRelation = NewAccess;
1184
102
}
1185
1186
//===----------------------------------------------------------------------===//
1187
1188
866
__isl_give isl_map *ScopStmt::getSchedule() const {
1189
866
  isl_set *Domain = getDomain();
1190
866
  if (
isl_set_is_empty(Domain)866
)
{3
1191
3
    isl_set_free(Domain);
1192
3
    return isl_map_from_aff(
1193
3
        isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1194
3
  }
1195
863
  auto *Schedule = getParent()->getSchedule();
1196
863
  if (
!Schedule863
)
{4
1197
4
    isl_set_free(Domain);
1198
4
    return nullptr;
1199
4
  }
1200
859
  Schedule = isl_union_map_intersect_domain(
1201
859
      Schedule, isl_union_set_from_set(isl_set_copy(Domain)));
1202
859
  if (
isl_union_map_is_empty(Schedule)859
)
{0
1203
0
    isl_set_free(Domain);
1204
0
    isl_union_map_free(Schedule);
1205
0
    return isl_map_from_aff(
1206
0
        isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace())));
1207
0
  }
1208
859
  auto *M = isl_map_from_union_map(Schedule);
1209
859
  M = isl_map_coalesce(M);
1210
859
  M = isl_map_gist_domain(M, Domain);
1211
859
  M = isl_map_coalesce(M);
1212
859
  return M;
1213
859
}
1214
1215
4.23k
__isl_give isl_pw_aff *ScopStmt::getPwAff(const SCEV *E, bool NonNegative) {
1216
4.23k
  PWACtx PWAC = getParent()->getPwAff(E, getEntryBlock(), NonNegative);
1217
4.23k
  InvalidDomain = isl_set_union(InvalidDomain, PWAC.second);
1218
4.23k
  return PWAC.first;
1219
4.23k
}
1220
1221
8
void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) {
1222
8
  assert(isl_set_is_subset(NewDomain, Domain) &&
1223
8
         "New domain is not a subset of old domain!");
1224
8
  isl_set_free(Domain);
1225
8
  Domain = NewDomain;
1226
8
}
1227
1228
1.57k
void ScopStmt::buildAccessRelations() {
1229
1.57k
  Scop &S = *getParent();
1230
3.52k
  for (MemoryAccess *Access : MemAccs) {
1231
3.52k
    Type *ElementType = Access->getElementType();
1232
3.52k
1233
3.52k
    MemoryKind Ty;
1234
3.52k
    if (Access->isPHIKind())
1235
313
      Ty = MemoryKind::PHI;
1236
3.21k
    else 
if (3.21k
Access->isExitPHIKind()3.21k
)
1237
49
      Ty = MemoryKind::ExitPHI;
1238
3.16k
    else 
if (3.16k
Access->isValueKind()3.16k
)
1239
456
      Ty = MemoryKind::Value;
1240
3.16k
    else
1241
2.70k
      Ty = MemoryKind::Array;
1242
3.52k
1243
3.52k
    auto *SAI = S.getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
1244
3.52k
                                           ElementType, Access->Sizes, Ty);
1245
3.52k
    Access->buildAccessRelation(SAI);
1246
3.52k
  }
1247
1.57k
}
1248
1249
3.91k
void ScopStmt::addAccess(MemoryAccess *Access) {
1250
3.91k
  Instruction *AccessInst = Access->getAccessInstruction();
1251
3.91k
1252
3.91k
  if (
Access->isArrayKind()3.91k
)
{2.98k
1253
2.98k
    MemoryAccessList &MAL = InstructionToAccess[AccessInst];
1254
2.98k
    MAL.emplace_front(Access);
1255
928
  } else 
if (928
Access->isValueKind() && 928
Access->isWrite()498
)
{251
1256
251
    Instruction *AccessVal = cast<Instruction>(Access->getAccessValue());
1257
251
    assert(Parent.getStmtFor(AccessVal) == this);
1258
251
    assert(!ValueWrites.lookup(AccessVal));
1259
251
1260
251
    ValueWrites[AccessVal] = Access;
1261
677
  } else 
if (677
Access->isValueKind() && 677
Access->isRead()247
)
{247
1262
247
    Value *AccessVal = Access->getAccessValue();
1263
247
    assert(!ValueReads.lookup(AccessVal));
1264
247
1265
247
    ValueReads[AccessVal] = Access;
1266
430
  } else 
if (430
Access->isAnyPHIKind() && 430
Access->isWrite()430
)
{285
1267
285
    PHINode *PHI = cast<PHINode>(Access->getAccessValue());
1268
285
    assert(!PHIWrites.lookup(PHI));
1269
285
1270
285
    PHIWrites[PHI] = Access;
1271
285
  }
1272
3.91k
1273
3.91k
  MemAccs.push_back(Access);
1274
3.91k
}
1275
1276
1.56k
void ScopStmt::realignParams() {
1277
1.56k
  for (MemoryAccess *MA : *this)
1278
3.46k
    MA->realignParams();
1279
1.56k
1280
1.56k
  auto *Ctx = Parent.getContext();
1281
1.56k
  InvalidDomain = isl_set_gist_params(InvalidDomain, isl_set_copy(Ctx));
1282
1.56k
  Domain = isl_set_gist_params(Domain, Ctx);
1283
1.56k
}
1284
1285
/// Add @p BSet to the set @p User if @p BSet is bounded.
1286
static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet,
1287
1.86k
                                    void *User) {
1288
1.86k
  isl_set **BoundedParts = static_cast<isl_set **>(User);
1289
1.86k
  if (isl_basic_set_is_bounded(BSet))
1290
1.63k
    *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet));
1291
1.86k
  else
1292
232
    isl_basic_set_free(BSet);
1293
1.86k
  return isl_stat_ok;
1294
1.86k
}
1295
1296
/// Return the bounded parts of @p S.
1297
1.32k
static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) {
1298
1.32k
  isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S));
1299
1.32k
  isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts);
1300
1.32k
  isl_set_free(S);
1301
1.32k
  return BoundedParts;
1302
1.32k
}
1303
1304
/// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
1305
///
1306
/// @returns A separation of @p S into first an unbounded then a bounded subset,
1307
///          both with regards to the dimension @p Dim.
1308
static std::pair<__isl_give isl_set *, __isl_give isl_set *>
1309
1.32k
partitionSetParts(__isl_take isl_set *S, unsigned Dim) {
1310
1.32k
1311
3.10k
  for (unsigned u = 0, e = isl_set_n_dim(S); 
u < e3.10k
;
u++1.77k
)
1312
1.77k
    S = isl_set_lower_bound_si(S, isl_dim_set, u, 0);
1313
1.32k
1314
1.32k
  unsigned NumDimsS = isl_set_n_dim(S);
1315
1.32k
  isl_set *OnlyDimS = isl_set_copy(S);
1316
1.32k
1317
1.32k
  // Remove dimensions that are greater than Dim as they are not interesting.
1318
1.32k
  assert(NumDimsS >= Dim + 1);
1319
1.32k
  OnlyDimS =
1320
1.32k
      isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1);
1321
1.32k
1322
1.32k
  // Create artificial parametric upper bounds for dimensions smaller than Dim
1323
1.32k
  // as we are not interested in them.
1324
1.32k
  OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim);
1325
1.77k
  for (unsigned u = 0; 
u < Dim1.77k
;
u++454
)
{454
1326
454
    isl_constraint *C = isl_inequality_alloc(
1327
454
        isl_local_space_from_space(isl_set_get_space(OnlyDimS)));
1328
454
    C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1);
1329
454
    C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1);
1330
454
    OnlyDimS = isl_set_add_constraint(OnlyDimS, C);
1331
454
  }
1332
1.32k
1333
1.32k
  // Collect all bounded parts of OnlyDimS.
1334
1.32k
  isl_set *BoundedParts = collectBoundedParts(OnlyDimS);
1335
1.32k
1336
1.32k
  // Create the dimensions greater than Dim again.
1337
1.32k
  BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1,
1338
1.32k
                                     NumDimsS - Dim - 1);
1339
1.32k
1340
1.32k
  // Remove the artificial upper bound parameters again.
1341
1.32k
  BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim);
1342
1.32k
1343
1.32k
  isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts));
1344
1.32k
  return std::make_pair(UnboundedParts, BoundedParts);
1345
1.32k
}
1346
1347
/// Set the dimension Ids from @p From in @p To.
1348
static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From,
1349
2.15k
                                           __isl_take isl_set *To) {
1350
4.79k
  for (unsigned u = 0, e = isl_set_n_dim(From); 
u < e4.79k
;
u++2.63k
)
{2.63k
1351
2.63k
    isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u);
1352
2.63k
    To = isl_set_set_dim_id(To, isl_dim_set, u, DimId);
1353
2.63k
  }
1354
2.15k
  return To;
1355
2.15k
}
1356
1357
/// Create the conditions under which @p L @p Pred @p R is true.
1358
static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1359
                                             __isl_take isl_pw_aff *L,
1360
2.13k
                                             __isl_take isl_pw_aff *R) {
1361
2.13k
  switch (Pred) {
1362
570
  case ICmpInst::ICMP_EQ:
1363
570
    return isl_pw_aff_eq_set(L, R);
1364
905
  case ICmpInst::ICMP_NE:
1365
905
    return isl_pw_aff_ne_set(L, R);
1366
475
  case ICmpInst::ICMP_SLT:
1367
475
    return isl_pw_aff_lt_set(L, R);
1368
41
  case ICmpInst::ICMP_SLE:
1369
41
    return isl_pw_aff_le_set(L, R);
1370
112
  case ICmpInst::ICMP_SGT:
1371
112
    return isl_pw_aff_gt_set(L, R);
1372
15
  case ICmpInst::ICMP_SGE:
1373
15
    return isl_pw_aff_ge_set(L, R);
1374
14
  case ICmpInst::ICMP_ULT:
1375
14
    return isl_pw_aff_lt_set(L, R);
1376
2
  case ICmpInst::ICMP_UGT:
1377
2
    return isl_pw_aff_gt_set(L, R);
1378
0
  case ICmpInst::ICMP_ULE:
1379
0
    return isl_pw_aff_le_set(L, R);
1380
4
  case ICmpInst::ICMP_UGE:
1381
4
    return isl_pw_aff_ge_set(L, R);
1382
0
  default:
1383
0
    llvm_unreachable("Non integer predicate not supported");
1384
2.13k
  }
1385
2.13k
}
1386
1387
/// Create the conditions under which @p L @p Pred @p R is true.
1388
///
1389
/// Helper function that will make sure the dimensions of the result have the
1390
/// same isl_id's as the @p Domain.
1391
static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred,
1392
                                             __isl_take isl_pw_aff *L,
1393
                                             __isl_take isl_pw_aff *R,
1394
2.13k
                                             __isl_keep isl_set *Domain) {
1395
2.13k
  isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R);
1396
2.13k
  return setDimensionIds(Domain, ConsequenceCondSet);
1397
2.13k
}
1398
1399
/// Build the conditions sets for the switch @p SI in the @p Domain.
1400
///
1401
/// This will fill @p ConditionSets with the conditions under which control
1402
/// will be moved from @p SI to its successors. Hence, @p ConditionSets will
1403
/// have as many elements as @p SI has successors.
1404
static bool
1405
buildConditionSets(ScopStmt &Stmt, SwitchInst *SI, Loop *L,
1406
                   __isl_keep isl_set *Domain,
1407
19
                   SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1408
19
1409
19
  Value *Condition = getConditionFromTerminator(SI);
1410
19
  assert(Condition && "No condition for switch");
1411
19
1412
19
  Scop &S = *Stmt.getParent();
1413
19
  ScalarEvolution &SE = *S.getSE();
1414
19
  isl_pw_aff *LHS, *RHS;
1415
19
  LHS = Stmt.getPwAff(SE.getSCEVAtScope(Condition, L));
1416
19
1417
19
  unsigned NumSuccessors = SI->getNumSuccessors();
1418
19
  ConditionSets.resize(NumSuccessors);
1419
56
  for (auto &Case : SI->cases()) {
1420
56
    unsigned Idx = Case.getSuccessorIndex();
1421
56
    ConstantInt *CaseValue = Case.getCaseValue();
1422
56
1423
56
    RHS = Stmt.getPwAff(SE.getSCEV(CaseValue));
1424
56
    isl_set *CaseConditionSet =
1425
56
        buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain);
1426
56
    ConditionSets[Idx] = isl_set_coalesce(
1427
56
        isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
1428
56
  }
1429
19
1430
19
  assert(ConditionSets[0] == nullptr && "Default condition set was set");
1431
19
  isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
1432
56
  for (unsigned u = 2; 
u < NumSuccessors56
;
u++37
)
1433
37
    ConditionSetUnion =
1434
37
        isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
1435
19
  ConditionSets[0] = setDimensionIds(
1436
19
      Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion));
1437
19
1438
19
  isl_pw_aff_free(LHS);
1439
19
1440
19
  return true;
1441
19
}
1442
1443
/// Build the conditions sets for the branch condition @p Condition in
1444
/// the @p Domain.
1445
///
1446
/// This will fill @p ConditionSets with the conditions under which control
1447
/// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1448
/// have as many elements as @p TI has successors. If @p TI is nullptr the
1449
/// context under which @p Condition is true/false will be returned as the
1450
/// new elements of @p ConditionSets.
1451
static bool
1452
buildConditionSets(ScopStmt &Stmt, Value *Condition, TerminatorInst *TI,
1453
                   Loop *L, __isl_keep isl_set *Domain,
1454
2.30k
                   SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1455
2.30k
1456
2.30k
  Scop &S = *Stmt.getParent();
1457
2.30k
  isl_set *ConsequenceCondSet = nullptr;
1458
2.30k
  if (auto *
CCond2.30k
= dyn_cast<ConstantInt>(Condition))
{187
1459
187
    if (CCond->isZero())
1460
158
      ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
1461
187
    else
1462
29
      ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
1463
2.12k
  } else 
if (BinaryOperator *2.12k
BinOp2.12k
= dyn_cast<BinaryOperator>(Condition))
{39
1464
39
    auto Opcode = BinOp->getOpcode();
1465
39
    assert(Opcode == Instruction::And || Opcode == Instruction::Or);
1466
39
1467
39
    bool Valid = buildConditionSets(Stmt, BinOp->getOperand(0), TI, L, Domain,
1468
39
                                    ConditionSets) &&
1469
20
                 buildConditionSets(Stmt, BinOp->getOperand(1), TI, L, Domain,
1470
20
                                    ConditionSets);
1471
39
    if (
!Valid39
)
{20
1472
22
      while (!ConditionSets.empty())
1473
2
        isl_set_free(ConditionSets.pop_back_val());
1474
20
      return false;
1475
20
    }
1476
39
1477
19
    isl_set_free(ConditionSets.pop_back_val());
1478
19
    isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
1479
19
    isl_set_free(ConditionSets.pop_back_val());
1480
19
    isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
1481
19
1482
19
    if (Opcode == Instruction::And)
1483
8
      ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
1484
19
    else
1485
11
      ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
1486
2.08k
  } else {
1487
2.08k
    auto *ICond = dyn_cast<ICmpInst>(Condition);
1488
2.08k
    assert(ICond &&
1489
2.08k
           "Condition of exiting branch was neither constant nor ICmp!");
1490
2.08k
1491
2.08k
    ScalarEvolution &SE = *S.getSE();
1492
2.08k
    isl_pw_aff *LHS, *RHS;
1493
2.08k
    // For unsigned comparisons we assumed the signed bit of neither operand
1494
2.08k
    // to be set. The comparison is equal to a signed comparison under this
1495
2.08k
    // assumption.
1496
2.08k
    bool NonNeg = ICond->isUnsigned();
1497
2.08k
    LHS = Stmt.getPwAff(SE.getSCEVAtScope(ICond->getOperand(0), L), NonNeg);
1498
2.08k
    RHS = Stmt.getPwAff(SE.getSCEVAtScope(ICond->getOperand(1), L), NonNeg);
1499
2.08k
    ConsequenceCondSet =
1500
2.08k
        buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain);
1501
2.08k
  }
1502
2.30k
1503
2.30k
  // If no terminator was given we are only looking for parameter constraints
1504
2.30k
  // under which @p Condition is true/false.
1505
2.28k
  
if (2.28k
!TI2.28k
)
1506
24
    ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
1507
2.28k
  assert(ConsequenceCondSet);
1508
2.28k
  ConsequenceCondSet = isl_set_coalesce(
1509
2.28k
      isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
1510
2.28k
1511
2.28k
  isl_set *AlternativeCondSet = nullptr;
1512
2.28k
  bool TooComplex =
1513
2.28k
      isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
1514
2.28k
1515
2.28k
  if (
!TooComplex2.28k
)
{2.28k
1516
2.28k
    AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
1517
2.28k
                                          isl_set_copy(ConsequenceCondSet));
1518
2.28k
    TooComplex =
1519
2.28k
        isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
1520
2.28k
  }
1521
2.28k
1522
2.28k
  if (
TooComplex2.28k
)
{4
1523
4
    S.invalidate(COMPLEXITY, TI ? 
TI->getDebugLoc()4
:
DebugLoc()0
);
1524
4
    isl_set_free(AlternativeCondSet);
1525
4
    isl_set_free(ConsequenceCondSet);
1526
4
    return false;
1527
4
  }
1528
2.28k
1529
2.28k
  ConditionSets.push_back(ConsequenceCondSet);
1530
2.28k
  ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
1531
2.28k
1532
2.28k
  return true;
1533
2.28k
}
1534
1535
/// Build the conditions sets for the terminator @p TI in the @p Domain.
1536
///
1537
/// This will fill @p ConditionSets with the conditions under which control
1538
/// will be moved from @p TI to its successors. Hence, @p ConditionSets will
1539
/// have as many elements as @p TI has successors.
1540
static bool
1541
buildConditionSets(ScopStmt &Stmt, TerminatorInst *TI, Loop *L,
1542
                   __isl_keep isl_set *Domain,
1543
4.61k
                   SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
1544
4.61k
1545
4.61k
  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
1546
19
    return buildConditionSets(Stmt, SI, L, Domain, ConditionSets);
1547
4.61k
1548
4.59k
  assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
1549
4.59k
1550
4.59k
  if (
TI->getNumSuccessors() == 14.59k
)
{2.36k
1551
2.36k
    ConditionSets.push_back(isl_set_copy(Domain));
1552
2.36k
    return true;
1553
2.36k
  }
1554
4.59k
1555
2.23k
  Value *Condition = getConditionFromTerminator(TI);
1556
2.23k
  assert(Condition && "No condition for Terminator");
1557
2.23k
1558
2.23k
  return buildConditionSets(Stmt, Condition, TI, L, Domain, ConditionSets);
1559
4.59k
}
1560
1561
1.57k
void ScopStmt::buildDomain() {
1562
1.57k
  isl_id *Id = isl_id_alloc(getIslCtx(), getBaseName(), this);
1563
1.57k
1564
1.57k
  Domain = getParent()->getDomainConditions(this);
1565
1.57k
  Domain = isl_set_set_tuple_id(Domain, Id);
1566
1.57k
}
1567
1568
1.57k
void ScopStmt::collectSurroundingLoops() {
1569
3.33k
  for (unsigned u = 0, e = isl_set_n_dim(Domain); 
u < e3.33k
;
u++1.75k
)
{1.75k
1570
1.75k
    isl_id *DimId = isl_set_get_dim_id(Domain, isl_dim_set, u);
1571
1.75k
    NestLoops.push_back(static_cast<Loop *>(isl_id_get_user(DimId)));
1572
1.75k
    isl_id_free(DimId);
1573
1.75k
  }
1574
1.57k
}
1575
1576
ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop)
1577
    : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(nullptr),
1578
97
      R(&R), Build(nullptr), SurroundingLoop(SurroundingLoop) {
1579
97
1580
97
  BaseName = getIslCompatibleName("Stmt_", R.getNameStr(), "");
1581
97
}
1582
1583
ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, Loop *SurroundingLoop)
1584
    : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb),
1585
4.33k
      R(nullptr), Build(nullptr), SurroundingLoop(SurroundingLoop) {
1586
4.33k
1587
4.33k
  BaseName = getIslCompatibleName("Stmt_", &bb, "");
1588
4.33k
}
1589
1590
ScopStmt::ScopStmt(Scop &parent, __isl_take isl_map *SourceRel,
1591
                   __isl_take isl_map *TargetRel, __isl_take isl_set *NewDomain)
1592
    : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain), BB(nullptr),
1593
18
      R(nullptr), Build(nullptr) {
1594
18
  BaseName = getIslCompatibleName("CopyStmt_", "",
1595
18
                                  std::to_string(parent.getCopyStmtsNum()));
1596
18
  auto *Id = isl_id_alloc(getIslCtx(), getBaseName(), this);
1597
18
  Domain = isl_set_set_tuple_id(Domain, isl_id_copy(Id));
1598
18
  TargetRel = isl_map_set_tuple_id(TargetRel, isl_dim_in, Id);
1599
18
  auto *Access =
1600
18
      new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel);
1601
18
  parent.addAccessFunction(Access);
1602
18
  addAccess(Access);
1603
18
  SourceRel = isl_map_set_tuple_id(SourceRel, isl_dim_in, isl_id_copy(Id));
1604
18
  Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel);
1605
18
  parent.addAccessFunction(Access);
1606
18
  addAccess(Access);
1607
18
}
1608
1609
1.57k
void ScopStmt::init(LoopInfo &LI) {
1610
1.57k
  assert(!Domain && "init must be called only once");
1611
1.57k
1612
1.57k
  buildDomain();
1613
1.57k
  collectSurroundingLoops();
1614
1.57k
  buildAccessRelations();
1615
1.57k
1616
1.57k
  if (DetectReductions)
1617
1.57k
    checkForReductions();
1618
1.57k
}
1619
1620
/// Collect loads which might form a reduction chain with @p StoreMA.
1621
///
1622
/// Check if the stored value for @p StoreMA is a binary operator with one or
1623
/// two loads as operands. If the binary operand is commutative & associative,
1624
/// used only once (by @p StoreMA) and its load operands are also used only
1625
/// once, we have found a possible reduction chain. It starts at an operand
1626
/// load and includes the binary operator and @p StoreMA.
1627
///
1628
/// Note: We allow only one use to ensure the load and binary operator cannot
1629
///       escape this block or into any other store except @p StoreMA.
1630
void ScopStmt::collectCandiateReductionLoads(
1631
1.75k
    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
1632
1.75k
  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
1633
1.75k
  if (!Store)
1634
476
    return;
1635
1.75k
1636
1.75k
  // Skip if there is not one binary operator between the load and the store
1637
1.27k
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
1638
1.27k
  if (!BinOp)
1639
730
    return;
1640
1.27k
1641
1.27k
  // Skip if the binary operators has multiple uses
1642
549
  
if (549
BinOp->getNumUses() != 1549
)
1643
22
    return;
1644
549
1645
549
  // Skip if the opcode of the binary operator is not commutative/associative
1646
527
  
if (527
!BinOp->isCommutative() || 527
!BinOp->isAssociative()514
)
1647
184
    return;
1648
527
1649
527
  // Skip if the binary operator is outside the current SCoP
1650
343
  
if (343
BinOp->getParent() != Store->getParent()343
)
1651
1
    return;
1652
343
1653
343
  // Skip if it is a multiplicative reduction and we disabled them
1654
342
  
if (342
DisableMultiplicativeReductions &&342
1655
2
      (BinOp->getOpcode() == Instruction::Mul ||
1656
1
       BinOp->getOpcode() == Instruction::FMul))
1657
1
    return;
1658
342
1659
342
  // Check the binary operator operands for a candidate load
1660
341
  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
1661
341
  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
1662
341
  if (
!PossibleLoad0 && 341
!PossibleLoad116
)
1663
7
    return;
1664
341
1665
341
  // A load is only a candidate if it cannot escape (thus has only this use)
1666
334
  
if (334
PossibleLoad0 && 334
PossibleLoad0->getNumUses() == 1325
)
1667
321
    
if (321
PossibleLoad0->getParent() == Store->getParent()321
)
1668
317
      Loads.push_back(&getArrayAccessFor(PossibleLoad0));
1669
334
  if (
PossibleLoad1 && 334
PossibleLoad1->getNumUses() == 180
)
1670
75
    
if (75
PossibleLoad1->getParent() == Store->getParent()75
)
1671
74
      Loads.push_back(&getArrayAccessFor(PossibleLoad1));
1672
334
}
1673
1674
/// Check for reductions in this ScopStmt.
1675
///
1676
/// Iterate over all store memory accesses and check for valid binary reduction
1677
/// like chains. For all candidates we check if they have the same base address
1678
/// and there are no other accesses which overlap with them. The base address
1679
/// check rules out impossible reductions candidates early. The overlap check,
1680
/// together with the "only one user" check in collectCandiateReductionLoads,
1681
/// guarantees that none of the intermediate results will escape during
1682
/// execution of the loop nest. We basically check here that no other memory
1683
/// access can access the same memory as the potential reduction.
1684
1.57k
void ScopStmt::checkForReductions() {
1685
1.57k
  SmallVector<MemoryAccess *, 2> Loads;
1686
1.57k
  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1687
1.57k
1688
1.57k
  // First collect candidate load-store reduction chains by iterating over all
1689
1.57k
  // stores and collecting possible reduction loads.
1690
3.52k
  for (MemoryAccess *StoreMA : MemAccs) {
1691
3.52k
    if (StoreMA->isRead())
1692
1.76k
      continue;
1693
3.52k
1694
1.75k
    Loads.clear();
1695
1.75k
    collectCandiateReductionLoads(StoreMA, Loads);
1696
1.75k
    for (MemoryAccess *LoadMA : Loads)
1697
391
      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1698
1.75k
  }
1699
1.57k
1700
1.57k
  // Then check each possible candidate pair.
1701
391
  for (const auto &CandidatePair : Candidates) {
1702
391
    bool Valid = true;
1703
391
    isl_map *LoadAccs = CandidatePair.first->getAccessRelation();
1704
391
    isl_map *StoreAccs = CandidatePair.second->getAccessRelation();
1705
391
1706
391
    // Skip those with obviously unequal base addresses.
1707
391
    if (
!isl_map_has_equal_space(LoadAccs, StoreAccs)391
)
{75
1708
75
      isl_map_free(LoadAccs);
1709
75
      isl_map_free(StoreAccs);
1710
75
      continue;
1711
75
    }
1712
391
1713
391
    // And check if the remaining for overlap with other memory accesses.
1714
316
    isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs);
1715
316
    AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain());
1716
316
    isl_set *AllAccs = isl_map_range(AllAccsRel);
1717
316
1718
1.02k
    for (MemoryAccess *MA : MemAccs) {
1719
1.02k
      if (
MA == CandidatePair.first || 1.02k
MA == CandidatePair.second706
)
1720
632
        continue;
1721
1.02k
1722
390
      isl_map *AccRel =
1723
390
          isl_map_intersect_domain(MA->getAccessRelation(), getDomain());
1724
390
      isl_set *Accs = isl_map_range(AccRel);
1725
390
1726
390
      if (
isl_set_has_equal_space(AllAccs, Accs)390
)
{88
1727
88
        isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs));
1728
59
        Valid = Valid && isl_set_is_empty(OverlapAccs);
1729
88
        isl_set_free(OverlapAccs);
1730
302
      } else {
1731
302
        isl_set_free(Accs);
1732
302
      }
1733
390
    }
1734
316
1735
316
    isl_set_free(AllAccs);
1736
316
    if (!Valid)
1737
55
      continue;
1738
316
1739
261
    const LoadInst *Load =
1740
261
        dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1741
261
    MemoryAccess::ReductionType RT =
1742
261
        getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1743
261
1744
261
    // If no overlapping access was found we mark the load and store as
1745
261
    // reduction like.
1746
261
    CandidatePair.first->markAsReductionLike(RT);
1747
261
    CandidatePair.second->markAsReductionLike(RT);
1748
261
  }
1749
1.57k
}
1750
1751
630
std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); }
1752
1753
630
std::string ScopStmt::getScheduleStr() const {
1754
630
  auto *S = getSchedule();
1755
630
  if (!S)
1756
4
    return "";
1757
626
  auto Str = stringFromIslObj(S);
1758
626
  isl_map_free(S);
1759
626
  return Str;
1760
630
}
1761
1762
9.31k
void ScopStmt::setInvalidDomain(__isl_take isl_set *ID) {
1763
9.31k
  isl_set_free(InvalidDomain);
1764
9.31k
  InvalidDomain = ID;
1765
9.31k
}
1766
1767
21.2k
BasicBlock *ScopStmt::getEntryBlock() const {
1768
21.2k
  if (isBlockStmt())
1769
19.6k
    return getBasicBlock();
1770
1.51k
  return getRegion()->getEntry();
1771
21.2k
}
1772
1773
4.03k
unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); }
1774
1775
6.15k
const char *ScopStmt::getBaseName() const { return BaseName.c_str(); }
1776
1777
463
Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const {
1778
463
  return NestLoops[Dimension];
1779
463
}
1780
1781
1.64k
isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); }
1782
1783
43.2k
__isl_give isl_set *ScopStmt::getDomain() const { return isl_set_copy(Domain); }
1784
1785
3.81k
__isl_give isl_space *ScopStmt::getDomainSpace() const {
1786
3.81k
  return isl_set_get_space(Domain);
1787
3.81k
}
1788
1789
68
__isl_give isl_id *ScopStmt::getDomainId() const {
1790
68
  return isl_set_get_tuple_id(Domain);
1791
68
}
1792
1793
4.44k
ScopStmt::~ScopStmt() {
1794
4.44k
  isl_set_free(Domain);
1795
4.44k
  isl_set_free(InvalidDomain);
1796
4.44k
}
1797
1798
630
void ScopStmt::print(raw_ostream &OS) const {
1799
630
  OS << "\t" << getBaseName() << "\n";
1800
630
  OS.indent(12) << "Domain :=\n";
1801
630
1802
630
  if (
Domain630
)
{630
1803
630
    OS.indent(16) << getDomainStr() << ";\n";
1804
630
  } else
1805
0
    OS.indent(16) << "n/a\n";
1806
630
1807
630
  OS.indent(12) << "Schedule :=\n";
1808
630
1809
630
  if (
Domain630
)
{630
1810
630
    OS.indent(16) << getScheduleStr() << ";\n";
1811
630
  } else
1812
0
    OS.indent(16) << "n/a\n";
1813
630
1814
630
  for (MemoryAccess *Access : MemAccs)
1815
1.34k
    Access->print(OS);
1816
630
}
1817
1818
0
void ScopStmt::dump() const { print(dbgs()); }
1819
1820
320
void ScopStmt::removeMemoryAccess(MemoryAccess *MA) {
1821
320
  // Remove the memory accesses from this statement together with all scalar
1822
320
  // accesses that were caused by it. MemoryKind::Value READs have no access
1823
320
  // instruction, hence would not be removed by this function. However, it is
1824
320
  // only used for invariant LoadInst accesses, its arguments are always affine,
1825
320
  // hence synthesizable, and therefore there are no MemoryKind::Value READ
1826
320
  // accesses to be removed.
1827
1.97k
  auto Predicate = [&](MemoryAccess *Acc) {
1828
1.97k
    return Acc->getAccessInstruction() == MA->getAccessInstruction();
1829
1.97k
  };
1830
320
  MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate),
1831
320
                MemAccs.end());
1832
320
  InstructionToAccess.erase(MA->getAccessInstruction());
1833
320
}
1834
1835
1
void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) {
1836
1
  auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA);
1837
1
  assert(MAIt != MemAccs.end());
1838
1
  MemAccs.erase(MAIt);
1839
1
1840
1
  auto It = InstructionToAccess.find(MA->getAccessInstruction());
1841
1
  if (
It != InstructionToAccess.end()1
)
{1
1842
1
    It->second.remove(MA);
1843
1
    if (It->second.empty())
1844
1
      InstructionToAccess.erase(MA->getAccessInstruction());
1845
1
  }
1846
1
}
1847
1848
//===----------------------------------------------------------------------===//
1849
/// Scop class implement
1850
1851
41
void Scop::setContext(__isl_take isl_set *NewContext) {
1852
41
  NewContext = isl_set_align_params(NewContext, isl_set_get_space(Context));
1853
41
  isl_set_free(Context);
1854
41
  Context = NewContext;
1855
41
}
1856
1857
/// Remap parameter values but keep AddRecs valid wrt. invariant loads.
1858
struct SCEVSensitiveParameterRewriter
1859
    : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> {
1860
  ValueToValueMap &VMap;
1861
1862
public:
1863
  SCEVSensitiveParameterRewriter(ValueToValueMap &VMap, ScalarEvolution &SE)
1864
13.4k
      : SCEVRewriteVisitor(SE), VMap(VMap) {}
1865
1866
  static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE,
1867
13.4k
                             ValueToValueMap &VMap) {
1868
13.4k
    SCEVSensitiveParameterRewriter SSPR(VMap, SE);
1869
13.4k
    return SSPR.visit(E);
1870
13.4k
  }
1871
1872
3.77k
  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
1873
3.77k
    auto *Start = visit(E->getStart());
1874
3.77k
    auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0),
1875
3.77k
                                    visit(E->getStepRecurrence(SE)),
1876
3.77k
                                    E->getLoop(), SCEV::FlagAnyWrap);
1877
3.77k
    return SE.getAddExpr(Start, AddRec);
1878
3.77k
  }
1879
1880
6.19k
  const SCEV *visitUnknown(const SCEVUnknown *E) {
1881
6.19k
    if (auto *NewValue = VMap.lookup(E->getValue()))
1882
41
      return SE.getUnknown(NewValue);
1883
6.15k
    return E;
1884
6.19k
  }
1885
};
1886
1887
13.4k
const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) {
1888
13.4k
  return SCEVSensitiveParameterRewriter::rewrite(S, *SE, InvEquivClassVMap);
1889
13.4k
}
1890
1891
983
void Scop::createParameterId(const SCEV *Parameter) {
1892
983
  assert(Parameters.count(Parameter));
1893
983
  assert(!ParameterIds.count(Parameter));
1894
983
1895
983
  std::string ParameterName = "p_" + std::to_string(getNumParams() - 1);
1896
983
1897
983
  if (const SCEVUnknown *
ValueParameter983
= dyn_cast<SCEVUnknown>(Parameter))
{917
1898
917
    Value *Val = ValueParameter->getValue();
1899
917
1900
917
    // If this parameter references a specific Value and this value has a name
1901
917
    // we use this name as it is likely to be unique and more useful than just
1902
917
    // a number.
1903
917
    if (Val->hasName())
1904
871
      ParameterName = Val->getName();
1905
46
    else 
if (LoadInst *46
LI46
= dyn_cast<LoadInst>(Val))
{46
1906
46
      auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets();
1907
46
      if (
LoadOrigin->hasName()46
)
{39
1908
39
        ParameterName += "_loaded_from_";
1909
39
        ParameterName +=
1910
39
            LI->getPointerOperand()->stripInBoundsOffsets()->getName();
1911
39
      }
1912
46
    }
1913
917
  }
1914
983
1915
983
  ParameterName = getIslCompatibleName("", ParameterName, "");
1916
983
1917
983
  auto *Id = isl_id_alloc(getIslCtx(), ParameterName.c_str(),
1918
983
                          const_cast<void *>((const void *)Parameter));
1919
983
  ParameterIds[Parameter] = Id;
1920
983
}
1921
1922
9.70k
void Scop::addParams(const ParameterSetTy &NewParameters) {
1923
2.40k
  for (const SCEV *Parameter : NewParameters) {
1924
2.40k
    // Normalize the SCEV to get the representing element for an invariant load.
1925
2.40k
    Parameter = extractConstantFactor(Parameter, *SE).second;
1926
2.40k
    Parameter = getRepresentingInvariantLoadSCEV(Parameter);
1927
2.40k
1928
2.40k
    if (Parameters.insert(Parameter))
1929
983
      createParameterId(Parameter);
1930
2.40k
  }
1931
9.70k
}
1932
1933
10.9k
__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) {
1934
10.9k
  // Normalize the SCEV to get the representing element for an invariant load.
1935
10.9k
  Parameter = getRepresentingInvariantLoadSCEV(Parameter);
1936
10.9k
  return isl_id_copy(ParameterIds.lookup(Parameter));
1937
10.9k
}
1938
1939
__isl_give isl_set *
1940
2.74k
Scop::addNonEmptyDomainConstraints(__isl_take isl_set *C) const {
1941
2.74k
  isl_set *DomainContext = isl_union_set_params(getDomains());
1942
2.74k
  return isl_set_intersect_params(C, DomainContext);
1943
2.74k
}
1944
1945
12
bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
1946
12
  return DT.dominates(BB, getEntry());
1947
12
}
1948
1949
void Scop::addUserAssumptions(AssumptionCache &AC, DominatorTree &DT,
1950
934
                              LoopInfo &LI) {
1951
934
  auto &F = getFunction();
1952
19
  for (auto &Assumption : AC.assumptions()) {
1953
19
    auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1954
19
    if (
!CI || 19
CI->getNumArgOperands() != 119
)
1955
0
      continue;
1956
19
1957
19
    bool InScop = contains(CI);
1958
19
    if (
!InScop && 19
!isDominatedBy(DT, CI->getParent())12
)
1959
0
      continue;
1960
19
1961
19
    auto *L = LI.getLoopFor(CI->getParent());
1962
19
    auto *Val = CI->getArgOperand(0);
1963
19
    ParameterSetTy DetectedParams;
1964
19
    if (
!isAffineConstraint(Val, &R, L, *SE, DetectedParams)19
)
{0
1965
0
      emitOptimizationRemarkAnalysis(F.getContext(), DEBUG_TYPE, F,
1966
0
                                     CI->getDebugLoc(),
1967
0
                                     "Non-affine user assumption ignored.");
1968
0
      continue;
1969
0
    }
1970
19
1971
19
    // Collect all newly introduced parameters.
1972
19
    ParameterSetTy NewParams;
1973
21
    for (auto *Param : DetectedParams) {
1974
21
      Param = extractConstantFactor(Param, *SE).second;
1975
21
      Param = getRepresentingInvariantLoadSCEV(Param);
1976
21
      if (Parameters.count(Param))
1977
17
        continue;
1978
4
      NewParams.insert(Param);
1979
4
    }
1980
19
1981
19
    SmallVector<isl_set *, 2> ConditionSets;
1982
12
    auto *TI = InScop ? 
CI->getParent()->getTerminator()7
:
nullptr12
;
1983
12
    auto &Stmt = InScop ? 
*getStmtFor(CI->getParent())7
:
*Stmts.begin()12
;
1984
12
    auto *Dom = InScop ? 
getDomainConditions(&Stmt)7
:
isl_set_copy(Context)12
;
1985
19
    bool Valid = buildConditionSets(Stmt, Val, TI, L, Dom, ConditionSets);
1986
19
    isl_set_free(Dom);
1987
19
1988
19
    if (!Valid)
1989
0
      continue;
1990
19
1991
19
    isl_set *AssumptionCtx = nullptr;
1992
19
    if (
InScop19
)
{7
1993
7
      AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1994
7
      isl_set_free(ConditionSets[0]);
1995
12
    } else {
1996
12
      AssumptionCtx = isl_set_complement(ConditionSets[1]);
1997
12
      AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1998
12
    }
1999
19
2000
19
    // Project out newly introduced parameters as they are not otherwise useful.
2001
19
    if (
!NewParams.empty()19
)
{4
2002
10
      for (unsigned u = 0; 
u < isl_set_n_param(AssumptionCtx)10
;
u++6
)
{6
2003
6
        auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
2004
6
        auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
2005
6
        isl_id_free(Id);
2006
6
2007
6
        if (!NewParams.count(Param))
2008
2
          continue;
2009
6
2010
4
        AssumptionCtx =
2011
4
            isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
2012
4
      }
2013
4
    }
2014
19
2015
19
    emitOptimizationRemarkAnalysis(
2016
19
        F.getContext(), DEBUG_TYPE, F, CI->getDebugLoc(),
2017
19
        "Use user assumption: " + stringFromIslObj(AssumptionCtx));
2018
19
    Context = isl_set_intersect(Context, AssumptionCtx);
2019
19
  }
2020
934
}
2021
2022
901
void Scop::addUserContext() {
2023
901
  if (UserContextStr.empty())
2024
898
    return;
2025
901
2026
3
  isl_set *UserContext =
2027
3
      isl_set_read_from_str(getIslCtx(), UserContextStr.c_str());
2028
3
  isl_space *Space = getParamSpace();
2029
3
  if (isl_space_dim(Space, isl_dim_param) !=
2030
2
      isl_set_dim(UserContext, isl_dim_param)) {
2031
2
    auto SpaceStr = isl_space_to_str(Space);
2032
2
    errs() << "Error: the context provided in -polly-context has not the same "
2033
2
           << "number of dimensions than the computed context. Due to this "
2034
2
           << "mismatch, the -polly-context option is ignored. Please provide "
2035
2
           << "the context in the parameter space: " << SpaceStr << ".\n";
2036
2
    free(SpaceStr);
2037
2
    isl_set_free(UserContext);
2038
2
    isl_space_free(Space);
2039
2
    return;
2040
2
  }
2041
3
2042
2
  
for (unsigned i = 0; 1
i < isl_space_dim(Space, isl_dim_param)2
;
i++1
)
{1
2043
1
    auto *NameContext = isl_set_get_dim_name(Context, isl_dim_param, i);
2044
1
    auto *NameUserContext = isl_set_get_dim_name(UserContext, isl_dim_param, i);
2045
1
2046
1
    if (
strcmp(NameContext, NameUserContext) != 01
)
{0
2047
0
      auto SpaceStr = isl_space_to_str(Space);
2048
0
      errs() << "Error: the name of dimension " << i
2049
0
             << " provided in -polly-context "
2050
0
             << "is '" << NameUserContext << "', but the name in the computed "
2051
0
             << "context is '" << NameContext
2052
0
             << "'. Due to this name mismatch, "
2053
0
             << "the -polly-context option is ignored. Please provide "
2054
0
             << "the context in the parameter space: " << SpaceStr << ".\n";
2055
0
      free(SpaceStr);
2056
0
      isl_set_free(UserContext);
2057
0
      isl_space_free(Space);
2058
0
      return;
2059
0
    }
2060
1
2061
1
    UserContext =
2062
1
        isl_set_set_dim_id(UserContext, isl_dim_param, i,
2063
1
                           isl_space_get_dim_id(Space, isl_dim_param, i));
2064
1
  }
2065
1
2066
1
  Context = isl_set_intersect(Context, UserContext);
2067
1
  isl_space_free(Space);
2068
1
}
2069
2070
940
void Scop::buildInvariantEquivalenceClasses() {
2071
940
  DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
2072
940
2073
940
  const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
2074
248
  for (LoadInst *LInst : RIL) {
2075
248
    const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
2076
248
2077
248
    Type *Ty = LInst->getType();
2078
248
    LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
2079
248
    if (
ClassRep248
)
{21
2080
21
      InvEquivClassVMap[LInst] = ClassRep;
2081
21
      continue;
2082
21
    }
2083
248
2084
227
    ClassRep = LInst;
2085
227
    InvariantEquivClasses.emplace_back(
2086
227
        InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
2087
227
  }
2088
940
}
2089
2090
940
void Scop::buildContext() {
2091
940
  isl_space *Space = isl_space_params_alloc(getIslCtx(), 0);
2092
940
  Context = isl_set_universe(isl_space_copy(Space));
2093
940
  InvalidContext = isl_set_empty(isl_space_copy(Space));
2094
940
  AssumedContext = isl_set_universe(Space);
2095
940
}
2096
2097
900
void Scop::addParameterBounds() {
2098
900
  unsigned PDim = 0;
2099
927
  for (auto *Parameter : Parameters) {
2100
927
    ConstantRange SRange = SE->getSignedRange(Parameter);
2101
927
    Context = addRangeBoundsToSet(Context, SRange, PDim++, isl_dim_param);
2102
927
  }
2103
900
}
2104
2105
901
void Scop::realignParams() {
2106
901
  if (PollyIgnoreParamBounds)
2107
1
    return;
2108
901
2109
901
  // Add all parameters into a common model.
2110
900
  isl_space *Space = isl_space_params_alloc(getIslCtx(), ParameterIds.size());
2111
900
2112
900
  unsigned PDim = 0;
2113
927
  for (const auto *Parameter : Parameters) {
2114
927
    isl_id *id = getIdForParam(Parameter);
2115
927
    Space = isl_space_set_dim_id(Space, isl_dim_param, PDim++, id);
2116
927
  }
2117
900
2118
900
  // Align the parameters of all data structures to the model.
2119
900
  Context = isl_set_align_params(Context, Space);
2120
900
2121
900
  // As all parameters are known add bounds to them.
2122
900
  addParameterBounds();
2123
900
2124
900
  for (ScopStmt &Stmt : *this)
2125
1.56k
    Stmt.realignParams();
2126
900
2127
900
  // Simplify the schedule according to the context too.
2128
900
  Schedule = isl_schedule_gist_domain_params(Schedule, getContext());
2129
900
}
2130
2131
static __isl_give isl_set *
2132
simplifyAssumptionContext(__isl_take isl_set *AssumptionContext,
2133
901
                          const Scop &S) {
2134
901
  // If we have modeled all blocks in the SCoP that have side effects we can
2135
901
  // simplify the context with the constraints that are needed for anything to
2136
901
  // be executed at all. However, if we have error blocks in the SCoP we already
2137
901
  // assumed some parameter combinations cannot occur and removed them from the
2138
901
  // domains, thus we cannot use the remaining domain to simplify the
2139
901
  // assumptions.
2140
901
  if (
!S.hasErrorBlock()901
)
{881
2141
881
    isl_set *DomainParameters = isl_union_set_params(S.getDomains());
2142
881
    AssumptionContext =
2143
881
        isl_set_gist_params(AssumptionContext, DomainParameters);
2144
881
  }
2145
901
2146
901
  AssumptionContext = isl_set_gist_params(AssumptionContext, S.getContext());
2147
901
  return AssumptionContext;
2148
901
}
2149
2150
901
void Scop::simplifyContexts() {
2151
901
  // The parameter constraints of the iteration domains give us a set of
2152
901
  // constraints that need to hold for all cases where at least a single
2153
901
  // statement iteration is executed in the whole scop. We now simplify the
2154
901
  // assumed context under the assumption that such constraints hold and at
2155
901
  // least a single statement iteration is executed. For cases where no
2156
901
  // statement instances are executed, the assumptions we have taken about
2157
901
  // the executed code do not matter and can be changed.
2158
901
  //
2159
901
  // WARNING: This only holds if the assumptions we have taken do not reduce
2160
901
  //          the set of statement instances that are executed. Otherwise we
2161
901
  //          may run into a case where the iteration domains suggest that
2162
901
  //          for a certain set of parameter constraints no code is executed,
2163
901
  //          but in the original program some computation would have been
2164
901
  //          performed. In such a case, modifying the run-time conditions and
2165
901
  //          possibly influencing the run-time check may cause certain scops
2166
901
  //          to not be executed.
2167
901
  //
2168
901
  // Example:
2169
901
  //
2170
901
  //   When delinearizing the following code:
2171
901
  //
2172
901
  //     for (long i = 0; i < 100; i++)
2173
901
  //       for (long j = 0; j < m; j++)
2174
901
  //         A[i+p][j] = 1.0;
2175
901
  //
2176
901
  //   we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
2177
901
  //   otherwise we would access out of bound data. Now, knowing that code is
2178
901
  //   only executed for the case m >= 0, it is sufficient to assume p >= 0.
2179
901
  AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
2180
901
  InvalidContext = isl_set_align_params(InvalidContext, getParamSpace());
2181
901
}
2182
2183
/// Add the minimal/maximal access in @p Set to @p User.
2184
400
static isl_stat buildMinMaxAccess(__isl_take isl_set *Set, void *User) {
2185
400
  Scop::MinMaxVectorTy *MinMaxAccesses = (Scop::MinMaxVectorTy *)User;
2186
400
  isl_pw_multi_aff *MinPMA, *MaxPMA;
2187
400
  isl_pw_aff *LastDimAff;
2188
400
  isl_aff *OneAff;
2189
400
  unsigned Pos;
2190
400
2191
400
  Set = isl_set_remove_divs(Set);
2192
400
2193
400
  if (
isl_set_n_basic_set(Set) >= MaxDisjunctsInDomain400
)
{0
2194
0
    isl_set_free(Set);
2195
0
    return isl_stat_error;
2196
0
  }
2197
400
2198
400
  // Restrict the number of parameters involved in the access as the lexmin/
2199
400
  // lexmax computation will take too long if this number is high.
2200
400
  //
2201
400
  // Experiments with a simple test case using an i7 4800MQ:
2202
400
  //
2203
400
  //  #Parameters involved | Time (in sec)
2204
400
  //            6          |     0.01
2205
400
  //            7          |     0.04
2206
400
  //            8          |     0.12
2207
400
  //            9          |     0.40
2208
400
  //           10          |     1.54
2209
400
  //           11          |     6.78
2210
400
  //           12          |    30.38
2211
400
  //
2212
400
  
if (400
isl_set_n_param(Set) > RunTimeChecksMaxParameters400
)
{8
2213
8
    unsigned InvolvedParams = 0;
2214
92
    for (unsigned u = 0, e = isl_set_n_param(Set); 
u < e92
;
u++84
)
2215
84
      
if (84
isl_set_involves_dims(Set, isl_dim_param, u, 1)84
)
2216
42
        InvolvedParams++;
2217
8
2218
8
    if (
InvolvedParams > RunTimeChecksMaxParameters8
)
{2
2219
2
      isl_set_free(Set);
2220
2
      return isl_stat_error;
2221
2
    }
2222
8
  }
2223
400
2224
398
  MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set));
2225
398
  MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set));
2226
398
2227
398
  MinPMA = isl_pw_multi_aff_coalesce(MinPMA);
2228
398
  MaxPMA = isl_pw_multi_aff_coalesce(MaxPMA);
2229
398
2230
398
  // Adjust the last dimension of the maximal access by one as we want to
2231
398
  // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2232
398
  // we test during code generation might now point after the end of the
2233
398
  // allocated array but we will never dereference it anyway.
2234
398
  assert(isl_pw_multi_aff_dim(MaxPMA, isl_dim_out) &&
2235
398
         "Assumed at least one output dimension");
2236
398
  Pos = isl_pw_multi_aff_dim(MaxPMA, isl_dim_out) - 1;
2237
398
  LastDimAff = isl_pw_multi_aff_get_pw_aff(MaxPMA, Pos);
2238
398
  OneAff = isl_aff_zero_on_domain(
2239
398
      isl_local_space_from_space(isl_pw_aff_get_domain_space(LastDimAff)));
2240
398
  OneAff = isl_aff_add_constant_si(OneAff, 1);
2241
398
  LastDimAff = isl_pw_aff_add(LastDimAff, isl_pw_aff_from_aff(OneAff));
2242
398
  MaxPMA = isl_pw_multi_aff_set_pw_aff(MaxPMA, Pos, LastDimAff);
2243
398
2244
398
  MinMaxAccesses->push_back(std::make_pair(MinPMA, MaxPMA));
2245
398
2246
398
  isl_set_free(Set);
2247
398
  return isl_stat_ok;
2248
400
}
2249
2250
1.39k
static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) {
2251
1.39k
  isl_set *Domain = MA->getStatement()->getDomain();
2252
1.39k
  Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain));
2253
1.39k
  return isl_set_reset_tuple_id(Domain);
2254
1.39k
}
2255
2256
/// Wrapper function to calculate minimal/maximal accesses to each array.
2257
static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
2258
307
                                  Scop::MinMaxVectorTy &MinMaxAccesses) {
2259
307
2260
307
  MinMaxAccesses.reserve(AliasGroup.size());
2261
307
2262
307
  isl_union_set *Domains = S.getDomains();
2263
307
  isl_union_map *Accesses = isl_union_map_empty(S.getParamSpace());
2264
307
2265
307
  for (MemoryAccess *MA : AliasGroup)
2266
645
    Accesses = isl_union_map_add_map(Accesses, MA->getAccessRelation());
2267
307
2268
307
  Accesses = isl_union_map_intersect_domain(Accesses, Domains);
2269
307
  isl_union_set *Locations = isl_union_map_range(Accesses);
2270
307
  Locations = isl_union_set_coalesce(Locations);
2271
307
  Locations = isl_union_set_detect_equalities(Locations);
2272
307
  bool Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess,
2273
307
                                               &MinMaxAccesses));
2274
307
  isl_union_set_free(Locations);
2275
307
  return Valid;
2276
307
}
2277
2278
/// Helper to treat non-affine regions and basic blocks the same.
2279
///
2280
///{
2281
2282
/// Return the block that is the representing block for @p RN.
2283
13.0k
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2284
231
  return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2285
12.8k
                           : RN->getNodeAs<BasicBlock>();
2286
13.0k
}
2287
2288
/// Return the @p idx'th block that is executed after @p RN.
2289
static inline BasicBlock *
2290
6.73k
getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) {
2291
6.73k
  if (
RN->isSubRegion()6.73k
)
{86
2292
86
    assert(idx == 0);
2293
86
    return RN->getNodeAs<Region>()->getExit();
2294
86
  }
2295
6.64k
  return TI->getSuccessor(idx);
2296
6.73k
}
2297
2298
/// Return the smallest loop surrounding @p RN.
2299
14.5k
static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
2300
14.5k
  if (
!RN->isSubRegion()14.5k
)
{13.4k
2301
13.4k
    BasicBlock *BB = RN->getNodeAs<BasicBlock>();
2302
13.4k
    Loop *L = LI.getLoopFor(BB);
2303
13.4k
2304
13.4k
    // Unreachable statements are not considered to belong to a LLVM loop, as
2305
13.4k
    // they are not part of an actual loop in the control flow graph.
2306
13.4k
    // Nevertheless, we handle certain unreachable statements that are common
2307
13.4k
    // when modeling run-time bounds checks as being part of the loop to be
2308
13.4k
    // able to model them and to later eliminate the run-time bounds checks.
2309
13.4k
    //
2310
13.4k
    // Specifically, for basic blocks that terminate in an unreachable and
2311
13.4k
    // where the immeditate predecessor is part of a loop, we assume these
2312
13.4k
    // basic blocks belong to the loop the predecessor belongs to. This
2313
13.4k
    // allows us to model the following code.
2314
13.4k
    //
2315
13.4k
    // for (i = 0; i < N; i++) {
2316
13.4k
    //   if (i > 1024)
2317
13.4k
    //     abort();            <- this abort might be translated to an
2318
13.4k
    //                            unreachable
2319
13.4k
    //
2320
13.4k
    //   A[i] = ...
2321
13.4k
    // }
2322
13.4k
    if (
!L && 13.4k
isa<UnreachableInst>(BB->getTerminator())1.66k
&&
BB->getPrevNode()0
)
2323
0
      L = LI.getLoopFor(BB->getPrevNode());
2324
13.4k
    return L;
2325
13.4k
  }
2326
14.5k
2327
1.10k
  Region *NonAffineSubRegion = RN->getNodeAs<Region>();
2328
1.10k
  Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
2329
1.79k
  while (
L && 1.79k
NonAffineSubRegion->contains(L)1.35k
)
2330
686
    L = L->getParentLoop();
2331
1.10k
  return L;
2332
14.5k
}
2333
2334
/// Get the number of blocks in @p L.
2335
///
2336
/// The number of blocks in a loop are the number of basic blocks actually
2337
/// belonging to the loop, as well as all single basic blocks that the loop
2338
/// exits to and which terminate in an unreachable instruction. We do not
2339
/// allow such basic blocks in the exit of a scop, hence they belong to the
2340
/// scop and represent run-time conditions which we want to model and
2341
/// subsequently speculate away.
2342
///
2343
/// @see getRegionNodeLoop for additional details.
2344
4.10k
long getNumBlocksInLoop(Loop *L) {
2345
4.10k
  long NumBlocks = L->getNumBlocks();
2346
4.10k
  SmallVector<llvm::BasicBlock *, 4> ExitBlocks;
2347
4.10k
  L->getExitBlocks(ExitBlocks);
2348
4.10k
2349
4.16k
  for (auto ExitBlock : ExitBlocks) {
2350
4.16k
    if (isa<UnreachableInst>(ExitBlock->getTerminator()))
2351
27
      NumBlocks++;
2352
4.16k
  }
2353
4.10k
  return NumBlocks;
2354
4.10k
}
2355
2356
4.25k
static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
2357
4.25k
  if (!RN->isSubRegion())
2358
4.17k
    return 1;
2359
4.25k
2360
82
  Region *R = RN->getNodeAs<Region>();
2361
82
  return std::distance(R->block_begin(), R->block_end());
2362
4.25k
}
2363
2364
static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
2365
8.75k
                               const DominatorTree &DT) {
2366
8.75k
  if (!RN->isSubRegion())
2367
8.58k
    return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
2368
170
  for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
2369
485
    
if (485
isErrorBlock(*BB, R, LI, DT)485
)
2370
18
      return true;
2371
152
  return false;
2372
170
}
2373
2374
///}
2375
2376
static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain,
2377
1.90k
                                                 unsigned Dim, Loop *L) {
2378
1.90k
  Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1);
2379
1.90k
  isl_id *DimId =
2380
1.90k
      isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L));
2381
1.90k
  return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId);
2382
1.90k
}
2383
2384
1.58k
__isl_give isl_set *Scop::getDomainConditions(const ScopStmt *Stmt) const {
2385
1.58k
  return getDomainConditions(Stmt->getEntryBlock());
2386
1.58k
}
2387
2388
15.3k
__isl_give isl_set *Scop::getDomainConditions(BasicBlock *BB) const {
2389
15.3k
  auto DIt = DomainMap.find(BB);
2390
15.3k
  if (DIt != DomainMap.end())
2391
15.3k
    return isl_set_copy(DIt->getSecond());
2392
15.3k
2393
16
  auto &RI = *R.getRegionInfo();
2394
16
  auto *BBR = RI.getRegionFor(BB);
2395
32
  while (BBR->getEntry() == BB)
2396
16
    BBR = BBR->getParent();
2397
16
  return getDomainConditions(BBR->getEntry());
2398
15.3k
}
2399
2400
940
bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI) {
2401
940
2402
940
  bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R);
2403
940
  auto *EntryBB = R->getEntry();
2404
924
  auto *L = IsOnlyNonAffineRegion ? 
nullptr16
:
LI.getLoopFor(EntryBB)924
;
2405
940
  int LD = getRelativeLoopDepth(L);
2406
940
  auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD + 1));
2407
940
2408
1.73k
  while (
LD-- >= 01.73k
)
{791
2409
791
    S = addDomainDimId(S, LD + 1, L);
2410
791
    L = L->getParentLoop();
2411
791
  }
2412
940
2413
940
  // Initialize the invalid domain.
2414
940
  auto *EntryStmt = getStmtFor(EntryBB);
2415
940
  EntryStmt->setInvalidDomain(isl_set_empty(isl_set_get_space(S)));
2416
940
2417
940
  DomainMap[EntryBB] = S;
2418
940
2419
940
  if (IsOnlyNonAffineRegion)
2420
16
    return !containsErrorBlock(R->getNode(), *R, LI, DT);
2421
940
2422
924
  
if (924
!buildDomainsWithBranchConstraints(R, DT, LI)924
)
2423
4
    return false;
2424
924
2425
920
  
if (920
!propagateDomainConstraints(R, DT, LI)920
)
2426
0
    return false;
2427
920
2428
920
  // Error blocks and blocks dominated by them have been assumed to never be
2429
920
  // executed. Representing them in the Scop does not add any value. In fact,
2430
920
  // it is likely to cause issues during construction of the ScopStmts. The
2431
920
  // contents of error blocks have not been verified to be expressible and
2432
920
  // will cause problems when building up a ScopStmt for them.
2433
920
  // Furthermore, basic blocks dominated by error blocks may reference
2434
920
  // instructions in the error block which, if the error block is not modeled,
2435
920
  // can themselves not be constructed properly. To this end we will replace
2436
920
  // the domains of error blocks and those only reachable via error blocks
2437
920
  // with an empty set. Additionally, we will record for each block under which
2438
920
  // parameter combination it would be reached via an error block in its
2439
920
  // InvalidDomain. This information is needed during load hoisting.
2440
920
  
if (920
!propagateInvalidStmtDomains(R, DT, LI)920
)
2441
0
    return false;
2442
920
2443
920
  return true;
2444
920
}
2445
2446
/// Adjust the dimensions of @p Dom that was constructed for @p OldL
2447
///        to be compatible to domains constructed for loop @p NewL.
2448
///
2449
/// This function assumes @p NewL and @p OldL are equal or there is a CFG
2450
/// edge from @p OldL to @p NewL.
2451
static __isl_give isl_set *adjustDomainDimensions(Scop &S,
2452
                                                  __isl_take isl_set *Dom,
2453
7.51k
                                                  Loop *OldL, Loop *NewL) {
2454
7.51k
2455
7.51k
  // If the loops are the same there is nothing to do.
2456
7.51k
  if (NewL == OldL)
2457
5.08k
    return Dom;
2458
7.51k
2459
2.43k
  int OldDepth = S.getRelativeLoopDepth(OldL);
2460
2.43k
  int NewDepth = S.getRelativeLoopDepth(NewL);
2461
2.43k
  // If both loops are non-affine loops there is nothing to do.
2462
2.43k
  if (
OldDepth == -1 && 2.43k
NewDepth == -1361
)
2463
0
    return Dom;
2464
2.43k
2465
2.43k
  // Distinguish three cases:
2466
2.43k
  //   1) The depth is the same but the loops are not.
2467
2.43k
  //      => One loop was left one was entered.
2468
2.43k
  //   2) The depth increased from OldL to NewL.
2469
2.43k
  //      => One loop was entered, none was left.
2470
2.43k
  //   3) The depth decreased from OldL to NewL.
2471
2.43k
  //      => Loops were left were difference of the depths defines how many.
2472
2.43k
  
if (2.43k
OldDepth == NewDepth2.43k
)
{12
2473
12
    assert(OldL->getParentLoop() == NewL->getParentLoop());
2474
12
    Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1);
2475
12
    Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2476
12
    Dom = addDomainDimId(Dom, NewDepth, NewL);
2477
2.42k
  } else 
if (2.42k
OldDepth < NewDepth2.42k
)
{1.09k
2478
1.09k
    assert(OldDepth + 1 == NewDepth);
2479
1.09k
    auto &R = S.getRegion();
2480
1.09k
    (void)R;
2481
1.09k
    assert(NewL->getParentLoop() == OldL ||
2482
1.09k
           ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
2483
1.09k
    Dom = isl_set_add_dims(Dom, isl_dim_set, 1);
2484
1.09k
    Dom = addDomainDimId(Dom, NewDepth, NewL);
2485
1.32k
  } else {
2486
1.32k
    assert(OldDepth > NewDepth);
2487
1.32k
    int Diff = OldDepth - NewDepth;
2488
1.32k
    int NumDim = isl_set_n_dim(Dom);
2489
1.32k
    assert(NumDim >= Diff);
2490
1.32k
    Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff);
2491
1.32k
  }
2492
2.43k
2493
2.43k
  return Dom;
2494
2.43k
}
2495
2496
bool Scop::propagateInvalidStmtDomains(Region *R, DominatorTree &DT,
2497
1.81k
                                       LoopInfo &LI) {
2498
1.81k
  ReversePostOrderTraversal<Region *> RTraversal(R);
2499
5.24k
  for (auto *RN : RTraversal) {
2500
5.24k
2501
5.24k
    // Recurse for affine subregions but go on for basic blocks and non-affine
2502
5.24k
    // subregions.
2503
5.24k
    if (
RN->isSubRegion()5.24k
)
{968
2504
968
      Region *SubRegion = RN->getNodeAs<Region>();
2505
968
      if (
!isNonAffineSubRegion(SubRegion)968
)
{891
2506
891
        propagateInvalidStmtDomains(SubRegion, DT, LI);
2507
891
        continue;
2508
891
      }
2509
968
    }
2510
5.24k
2511
4.35k
    bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT);
2512
4.35k
    BasicBlock *BB = getRegionNodeBasicBlock(RN);
2513
4.35k
    ScopStmt *Stmt = getStmtFor(BB);
2514
4.35k
    isl_set *&Domain = DomainMap[BB];
2515
4.35k
    assert(Domain && "Cannot propagate a nullptr");
2516
4.35k
2517
4.35k
    auto *InvalidDomain = Stmt->getInvalidDomain();
2518
4.35k
    bool IsInvalidBlock =
2519
4.31k
        ContainsErrorBlock || isl_set_is_subset(Domain, InvalidDomain);
2520
4.35k
2521
4.35k
    if (
!IsInvalidBlock4.35k
)
{4.20k
2522
4.20k
      InvalidDomain = isl_set_intersect(InvalidDomain, isl_set_copy(Domain));
2523
152
    } else {
2524
152
      isl_set_free(InvalidDomain);
2525
152
      InvalidDomain = Domain;
2526
152
      isl_set *DomPar = isl_set_params(isl_set_copy(Domain));
2527
152
      recordAssumption(ERRORBLOCK, DomPar, BB->getTerminator()->getDebugLoc(),
2528
152
                       AS_RESTRICTION);
2529
152
      Domain = nullptr;
2530
152
    }
2531
4.35k
2532
4.35k
    if (
isl_set_is_empty(InvalidDomain)4.35k
)
{3.71k
2533
3.71k
      Stmt->setInvalidDomain(InvalidDomain);
2534
3.71k
      continue;
2535
3.71k
    }
2536
4.35k
2537
636
    auto *BBLoop = getRegionNodeLoop(RN, LI);
2538
636
    auto *TI = BB->getTerminator();
2539
627
    unsigned NumSuccs = RN->isSubRegion() ? 
19
:
TI->getNumSuccessors()627
;
2540
1.54k
    for (unsigned u = 0; 
u < NumSuccs1.54k
;
u++906
)
{906
2541
906
      auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2542
906
      auto *SuccStmt = getStmtFor(SuccBB);
2543
906
2544
906
      // Skip successors outside the SCoP.
2545
906
      if (!SuccStmt)
2546
151
        continue;
2547
906
2548
906
      // Skip backedges.
2549
755
      
if (755
DT.dominates(SuccBB, BB)755
)
2550
220
        continue;
2551
755
2552
535
      auto *SuccBBLoop = SuccStmt->getSurroundingLoop();
2553
535
      auto *AdjustedInvalidDomain = adjustDomainDimensions(
2554
535
          *this, isl_set_copy(InvalidDomain), BBLoop, SuccBBLoop);
2555
535
      auto *SuccInvalidDomain = SuccStmt->getInvalidDomain();
2556
535
      SuccInvalidDomain =
2557
535
          isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain);
2558
535
      SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain);
2559
535
      unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain);
2560
535
      SuccStmt->setInvalidDomain(SuccInvalidDomain);
2561
535
2562
535
      // Check if the maximal number of domain disjunctions was reached.
2563
535
      // In case this happens we will bail.
2564
535
      if (NumConjucts < MaxDisjunctsInDomain)
2565
535
        continue;
2566
535
2567
0
      isl_set_free(InvalidDomain);
2568
0
      invalidate(COMPLEXITY, TI->getDebugLoc());
2569
0
      return false;
2570
535
    }
2571
636
2572
636
    Stmt->setInvalidDomain(InvalidDomain);
2573
636
  }
2574
1.81k
2575
1.81k
  return true;
2576
1.81k
}
2577
2578
void Scop::propagateDomainConstraintsToRegionExit(
2579
    BasicBlock *BB, Loop *BBLoop,
2580
4.38k
    SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI) {
2581
4.38k
2582
4.38k
  // Check if the block @p BB is the entry of a region. If so we propagate it's
2583
4.38k
  // domain to the exit block of the region. Otherwise we are done.
2584
4.38k
  auto *RI = R.getRegionInfo();
2585
4.38k
  auto *BBReg = RI ? 
RI->getRegionFor(BB)4.38k
:
nullptr0
;
2586
4.38k
  auto *ExitBB = BBReg ? 
BBReg->getExit()4.38k
:
nullptr0
;
2587
4.38k
  if (
!BBReg || 4.38k
BBReg->getEntry() != BB4.38k
||
!contains(ExitBB)1.65k
)
2588
3.54k
    return;
2589
4.38k
2590
4.38k
  // Do not propagate the domain if there is a loop backedge inside the region
2591
4.38k
  // that would prevent the exit block from being executed.
2592
834
  auto *L = BBLoop;
2593
1.34k
  while (
L && 1.34k
contains(L)903
)
{881
2594
881
    SmallVector<BasicBlock *, 4> LatchBBs;
2595
881
    BBLoop->getLoopLatches(LatchBBs);
2596
881
    for (auto *LatchBB : LatchBBs)
2597
881
      
if (881
BB != LatchBB && 881
BBReg->contains(LatchBB)585
)
2598
367
        return;
2599
514
    L = L->getParentLoop();
2600
514
  }
2601
834
2602
467
  auto *Domain = DomainMap[BB];
2603
467
  assert(Domain && "Cannot propagate a nullptr");
2604
467
2605
467
  auto *ExitStmt = getStmtFor(ExitBB);
2606
467
  auto *ExitBBLoop = ExitStmt->getSurroundingLoop();
2607
467
2608
467
  // Since the dimensions of @p BB and @p ExitBB might be different we have to
2609
467
  // adjust the domain before we can propagate it.
2610
467
  auto *AdjustedDomain =
2611
467
      adjustDomainDimensions(*this, isl_set_copy(Domain), BBLoop, ExitBBLoop);
2612
467
  auto *&ExitDomain = DomainMap[ExitBB];
2613
467
2614
467
  // If the exit domain is not yet created we set it otherwise we "add" the
2615
467
  // current domain.
2616
467
  ExitDomain =
2617
448
      ExitDomain ? 
isl_set_union(AdjustedDomain, ExitDomain)19
:
AdjustedDomain448
;
2618
467
2619
467
  // Initialize the invalid domain.
2620
467
  ExitStmt->setInvalidDomain(isl_set_empty(isl_set_get_space(ExitDomain)));
2621
467
2622
467
  FinishedExitBlocks.insert(ExitBB);
2623
467
}
2624
2625
bool Scop::buildDomainsWithBranchConstraints(Region *R, DominatorTree &DT,
2626
1.82k
                                             LoopInfo &LI) {
2627
1.82k
  // To create the domain for each block in R we iterate over all blocks and
2628
1.82k
  // subregions in R and propagate the conditions under which the current region
2629
1.82k
  // element is executed. To this end we iterate in reverse post order over R as
2630
1.82k
  // it ensures that we first visit all predecessors of a region node (either a
2631
1.82k
  // basic block or a subregion) before we visit the region node itself.
2632
1.82k
  // Initially, only the domain for the SCoP region entry block is set and from
2633
1.82k
  // there we propagate the current domain to all successors, however we add the
2634
1.82k
  // condition that the successor is actually executed next.
2635
1.82k
  // As we are only interested in non-loop carried constraints here we can
2636
1.82k
  // simply skip loop back edges.
2637
1.82k
2638
1.82k
  SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
2639
1.82k
  ReversePostOrderTraversal<Region *> RTraversal(R);
2640
5.28k
  for (auto *RN : RTraversal) {
2641
5.28k
2642
5.28k
    // Recurse for affine subregions but go on for basic blocks and non-affine
2643
5.28k
    // subregions.
2644
5.28k
    if (
RN->isSubRegion()5.28k
)
{978
2645
978
      Region *SubRegion = RN->getNodeAs<Region>();
2646
978
      if (
!isNonAffineSubRegion(SubRegion)978
)
{901
2647
901
        if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI))
2648
3
          return false;
2649
898
        continue;
2650
901
      }
2651
978
    }
2652
5.28k
2653
4.38k
    
if (4.38k
containsErrorBlock(RN, getRegion(), LI, DT)4.38k
)
2654
36
      HasErrorBlock = true;
2655
4.38k
2656
4.38k
    BasicBlock *BB = getRegionNodeBasicBlock(RN);
2657
4.38k
    TerminatorInst *TI = BB->getTerminator();
2658
4.38k
2659
4.38k
    if (isa<UnreachableInst>(TI))
2660
0
      continue;
2661
4.38k
2662
4.38k
    isl_set *Domain = DomainMap.lookup(BB);
2663
4.38k
    if (!Domain)
2664
0
      continue;
2665
4.38k
    MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain));
2666
4.38k
2667
4.38k
    auto *BBLoop = getRegionNodeLoop(RN, LI);
2668
4.38k
    // Propagate the domain from BB directly to blocks that have a superset
2669
4.38k
    // domain, at the moment only region exit nodes of regions that start in BB.
2670
4.38k
    propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI);
2671
4.38k
2672
4.38k
    // If all successors of BB have been set a domain through the propagation
2673
4.38k
    // above we do not need to build condition sets but can just skip this
2674
4.38k
    // block. However, it is important to note that this is a local property
2675
4.38k
    // with regards to the region @p R. To this end FinishedExitBlocks is a
2676
4.38k
    // local variable.
2677
4.51k
    auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
2678
4.51k
      return FinishedExitBlocks.count(SuccBB);
2679
4.51k
    };
2680
4.38k
    if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
2681
260
      continue;
2682
4.38k
2683
4.38k
    // Build the condition sets for the successor nodes of the current region
2684
4.38k
    // node. If it is a non-affine subregion we will always execute the single
2685
4.38k
    // exit node, hence the single entry node domain is the condition set. For
2686
4.38k
    // basic blocks we use the helper function buildConditionSets.
2687
4.12k
    SmallVector<isl_set *, 8> ConditionSets;
2688
4.12k
    if (RN->isSubRegion())
2689
77
      ConditionSets.push_back(isl_set_copy(Domain));
2690
4.04k
    else 
if (4.04k
!buildConditionSets(*getStmtFor(BB), TI, BBLoop, Domain,4.04k
2691
4.04k
                                 ConditionSets))
2692
4
      return false;
2693
4.12k
2694
4.12k
    // Now iterate over the successors and set their initial domain based on
2695
4.12k
    // their condition set. We skip back edges here and have to be careful when
2696
4.12k
    // we leave a loop not to keep constraints over a dimension that doesn't
2697
4.12k
    // exist anymore.
2698
4.11k
    assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
2699
9.94k
    for (unsigned u = 0, e = ConditionSets.size(); 
u < e9.94k
;
u++5.82k
)
{5.82k
2700
5.82k
      isl_set *CondSet = ConditionSets[u];
2701
5.82k
      BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
2702
5.82k
2703
5.82k
      auto *SuccStmt = getStmtFor(SuccBB);
2704
5.82k
      // Skip blocks outside the region.
2705
5.82k
      if (
!SuccStmt5.82k
)
{1.05k
2706
1.05k
        isl_set_free(CondSet);
2707
1.05k
        continue;
2708
1.05k
      }
2709
5.82k
2710
5.82k
      // If we propagate the domain of some block to "SuccBB" we do not have to
2711
5.82k
      // adjust the domain.
2712
4.77k
      
if (4.77k
FinishedExitBlocks.count(SuccBB)4.77k
)
{412
2713
412
        isl_set_free(CondSet);
2714
412
        continue;
2715
412
      }
2716
4.77k
2717
4.77k
      // Skip back edges.
2718
4.36k
      
if (4.36k
DT.dominates(SuccBB, BB)4.36k
)
{1.32k
2719
1.32k
        isl_set_free(CondSet);
2720
1.32k
        continue;
2721
1.32k
      }
2722
4.36k
2723
3.03k
      auto *SuccBBLoop = SuccStmt->getSurroundingLoop();
2724
3.03k
      CondSet = adjustDomainDimensions(*this, CondSet, BBLoop, SuccBBLoop);
2725
3.03k
2726
3.03k
      // Set the domain for the successor or merge it with an existing domain in
2727
3.03k
      // case there are multiple paths (without loop back edges) to the
2728
3.03k
      // successor block.
2729
3.03k
      isl_set *&SuccDomain = DomainMap[SuccBB];
2730
3.03k
2731
3.03k
      if (
SuccDomain3.03k
)
{21
2732
21
        SuccDomain = isl_set_coalesce(isl_set_union(SuccDomain, CondSet));
2733
3.01k
      } else {
2734
3.01k
        // Initialize the invalid domain.
2735
3.01k
        SuccStmt->setInvalidDomain(isl_set_empty(isl_set_get_space(CondSet)));
2736
3.01k
        SuccDomain = CondSet;
2737
3.01k
      }
2738
3.03k
2739
3.03k
      // Check if the maximal number of domain disjunctions was reached.
2740
3.03k
      // In case this happens we will clean up and bail.
2741
3.03k
      if (isl_set_n_basic_set(SuccDomain) < MaxDisjunctsInDomain)
2742
3.03k
        continue;
2743
3.03k
2744
0
      invalidate(COMPLEXITY, DebugLoc());
2745
0
      while (++u < ConditionSets.size())
2746
0
        isl_set_free(ConditionSets[u]);
2747
0
      return false;
2748
3.03k
    }
2749
4.11k
  }
2750
1.82k
2751
1.81k
  return true;
2752
1.82k
}
2753
2754
__isl_give isl_set *
2755
Scop::getPredecessorDomainConstraints(BasicBlock *BB,
2756
                                      __isl_keep isl_set *Domain,
2757
4.35k
                                      DominatorTree &DT, LoopInfo &LI) {
2758
4.35k
  // If @p BB is the ScopEntry we are done
2759
4.35k
  if (R.getEntry() == BB)
2760
920
    return isl_set_universe(isl_set_get_space(Domain));
2761
4.35k
2762
4.35k
  // The region info of this function.
2763
3.43k
  auto &RI = *R.getRegionInfo();
2764
3.43k
2765
3.43k
  auto *BBLoop = getStmtFor(BB)->getSurroundingLoop();
2766
3.43k
2767
3.43k
  // A domain to collect all predecessor domains, thus all conditions under
2768
3.43k
  // which the block is executed. To this end we start with the empty domain.
2769
3.43k
  isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain));
2770
3.43k
2771
3.43k
  // Set of regions of which the entry block domain has been propagated to BB.
2772
3.43k
  // all predecessors inside any of the regions can be skipped.
2773
3.43k
  SmallSet<Region *, 8> PropagatedRegions;
2774
3.43k
2775
4.29k
  for (auto *PredBB : predecessors(BB)) {
2776
4.29k
    // Skip backedges.
2777
4.29k
    if (DT.dominates(BB, PredBB))
2778
546
      continue;
2779
4.29k
2780
4.29k
    // If the predecessor is in a region we used for propagation we can skip it.
2781
3.74k
    
auto PredBBInRegion = [PredBB](Region *PR) 3.74k
{ return PR->contains(PredBB); }304
;
2782
3.74k
    if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
2783
268
                    PredBBInRegion)) {
2784
268
      continue;
2785
268
    }
2786
3.74k
2787
3.74k
    // Check if there is a valid region we can use for propagation, thus look
2788
3.74k
    // for a region that contains the predecessor and has @p BB as exit block.
2789
3.47k
    auto *PredR = RI.getRegionFor(PredBB);
2790
3.47k
    while (
PredR->getExit() != BB && 3.47k
!PredR->contains(BB)2.61k
)
2791
0
      PredR->getParent();
2792
3.47k
2793
3.47k
    // If a valid region for propagation was found use the entry of that region
2794
3.47k
    // for propagation, otherwise the PredBB directly.
2795
3.47k
    if (
PredR->getExit() == BB3.47k
)
{865
2796
865
      PredBB = PredR->getEntry();
2797
865
      PropagatedRegions.insert(PredR);
2798
865
    }
2799
3.47k
2800
3.47k
    auto *PredBBDom = getDomainConditions(PredBB);
2801
3.47k
    auto *PredBBLoop = getStmtFor(PredBB)->getSurroundingLoop();
2802
3.47k
    PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop);
2803
3.47k
2804
3.47k
    PredDom = isl_set_union(PredDom, PredBBDom);
2805
3.47k
  }
2806
3.43k
2807
3.43k
  return PredDom;
2808
4.35k
}
2809
2810
bool Scop::propagateDomainConstraints(Region *R, DominatorTree &DT,
2811
1.81k
                                      LoopInfo &LI) {
2812
1.81k
  // Iterate over the region R and propagate the domain constrains from the
2813
1.81k
  // predecessors to the current node. In contrast to the
2814
1.81k
  // buildDomainsWithBranchConstraints function, this one will pull the domain
2815
1.81k
  // information from the predecessors instead of pushing it to the successors.
2816
1.81k
  // Additionally, we assume the domains to be already present in the domain
2817
1.81k
  // map here. However, we iterate again in reverse post order so we know all
2818
1.81k
  // predecessors have been visited before a block or non-affine subregion is
2819
1.81k
  // visited.
2820
1.81k
2821
1.81k
  ReversePostOrderTraversal<Region *> RTraversal(R);
2822
5.24k
  for (auto *RN : RTraversal) {
2823
5.24k
2824
5.24k
    // Recurse for affine subregions but go on for basic blocks and non-affine
2825
5.24k
    // subregions.
2826
5.24k
    if (
RN->isSubRegion()5.24k
)
{968
2827
968
      Region *SubRegion = RN->getNodeAs<Region>();
2828
968
      if (
!isNonAffineSubRegion(SubRegion)968
)
{891
2829
891
        if (!propagateDomainConstraints(SubRegion, DT, LI))
2830
0
          return false;
2831
891
        continue;
2832
891
      }
2833
968
    }
2834
5.24k
2835
4.35k
    BasicBlock *BB = getRegionNodeBasicBlock(RN);
2836
4.35k
    isl_set *&Domain = DomainMap[BB];
2837
4.35k
    assert(Domain);
2838
4.35k
2839
4.35k
    // Under the union of all predecessor conditions we can reach this block.
2840
4.35k
    auto *PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI);
2841
4.35k
    Domain = isl_set_coalesce(isl_set_intersect(Domain, PredDom));
2842
4.35k
    Domain = isl_set_align_params(Domain, getParamSpace());
2843
4.35k
2844
4.35k
    Loop *BBLoop = getRegionNodeLoop(RN, LI);
2845
4.35k
    if (
BBLoop && 4.35k
BBLoop->getHeader() == BB3.80k
&&
contains(BBLoop)1.33k
)
2846
1.32k
      
if (1.32k
!addLoopBoundsToHeaderDomain(BBLoop, LI)1.32k
)
2847
0
        return false;
2848
4.35k
  }
2849
1.81k
2850
1.81k
  return true;
2851
1.81k
}
2852
2853
/// Create a map to map from a given iteration to a subsequent iteration.
2854
///
2855
/// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
2856
/// is incremented by one and all other dimensions are equal, e.g.,
2857
///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
2858
///
2859
/// if @p Dim is 2 and @p SetSpace has 4 dimensions.
2860
static __isl_give isl_map *
2861
1.32k
createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) {
2862
1.32k
  auto *MapSpace = isl_space_map_from_set(SetSpace);
2863
1.32k
  auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace));
2864
3.10k
  for (unsigned u = 0; 
u < isl_map_dim(NextIterationMap, isl_dim_in)3.10k
;
u++1.77k
)
2865
1.77k
    
if (1.77k
u != Dim1.77k
)
2866
454
      NextIterationMap =
2867
454
          isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u);
2868
1.32k
  auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace));
2869
1.32k
  C = isl_constraint_set_constant_si(C, 1);
2870
1.32k
  C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1);
2871
1.32k
  C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1);
2872
1.32k
  NextIterationMap = isl_map_add_constraint(NextIterationMap, C);
2873
1.32k
  return NextIterationMap;
2874
1.32k
}
2875
2876
1.32k
bool Scop::addLoopBoundsToHeaderDomain(Loop *L, LoopInfo &LI) {
2877
1.32k
  int LoopDepth = getRelativeLoopDepth(L);
2878
1.32k
  assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
2879
1.32k
2880
1.32k
  BasicBlock *HeaderBB = L->getHeader();
2881
1.32k
  assert(DomainMap.count(HeaderBB));
2882
1.32k
  isl_set *&HeaderBBDom = DomainMap[HeaderBB];
2883
1.32k
2884
1.32k
  isl_map *NextIterationMap =
2885
1.32k
      createNextIterationMap(isl_set_get_space(HeaderBBDom), LoopDepth);
2886
1.32k
2887
1.32k
  isl_set *UnionBackedgeCondition =
2888
1.32k
      isl_set_empty(isl_set_get_space(HeaderBBDom));
2889
1.32k
2890
1.32k
  SmallVector<llvm::BasicBlock *, 4> LatchBlocks;
2891
1.32k
  L->getLoopLatches(LatchBlocks);
2892
1.32k
2893
1.32k
  for (BasicBlock *LatchBB : LatchBlocks) {
2894
1.32k
2895
1.32k
    // If the latch is only reachable via error statements we skip it.
2896
1.32k
    isl_set *LatchBBDom = DomainMap.lookup(LatchBB);
2897
1.32k
    if (!LatchBBDom)
2898
0
      continue;
2899
1.32k
2900
1.32k
    isl_set *BackedgeCondition = nullptr;
2901
1.32k
2902
1.32k
    TerminatorInst *TI = LatchBB->getTerminator();
2903
1.32k
    BranchInst *BI = dyn_cast<BranchInst>(TI);
2904
1.32k
    assert(BI && "Only branch instructions allowed in loop latches");
2905
1.32k
2906
1.32k
    if (BI->isUnconditional())
2907
753
      BackedgeCondition = isl_set_copy(LatchBBDom);
2908
573
    else {
2909
573
      SmallVector<isl_set *, 8> ConditionSets;
2910
573
      int idx = BI->getSuccessor(0) != HeaderBB;
2911
573
      if (!buildConditionSets(*getStmtFor(LatchBB), TI, L, LatchBBDom,
2912
0
                              ConditionSets)) {
2913
0
        isl_map_free(NextIterationMap);
2914
0
        isl_set_free(UnionBackedgeCondition);
2915
0
        return false;
2916
0
      }
2917
573
2918
573
      // Free the non back edge condition set as we do not need it.
2919
573
      isl_set_free(ConditionSets[1 - idx]);
2920
573
2921
573
      BackedgeCondition = ConditionSets[idx];
2922
573
    }
2923
1.32k
2924
1.32k
    int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB));
2925
1.32k
    assert(LatchLoopDepth >= LoopDepth);
2926
1.32k
    BackedgeCondition =
2927
1.32k
        isl_set_project_out(BackedgeCondition, isl_dim_set, LoopDepth + 1,
2928
1.32k
                            LatchLoopDepth - LoopDepth);
2929
1.32k
    UnionBackedgeCondition =
2930
1.32k
        isl_set_union(UnionBackedgeCondition, BackedgeCondition);
2931
1.32k
  }
2932
1.32k
2933
1.32k
  isl_map *ForwardMap = isl_map_lex_le(isl_set_get_space(HeaderBBDom));
2934
1.77k
  for (int i = 0; 
i < LoopDepth1.77k
;
i++454
)
2935
454
    ForwardMap = isl_map_equate(ForwardMap, isl_dim_in, i, isl_dim_out, i);
2936
1.32k
2937
1.32k
  isl_set *UnionBackedgeConditionComplement =
2938
1.32k
      isl_set_complement(UnionBackedgeCondition);
2939
1.32k
  UnionBackedgeConditionComplement = isl_set_lower_bound_si(
2940
1.32k
      UnionBackedgeConditionComplement, isl_dim_set, LoopDepth, 0);
2941
1.32k
  UnionBackedgeConditionComplement =
2942
1.32k
      isl_set_apply(UnionBackedgeConditionComplement, ForwardMap);
2943
1.32k
  HeaderBBDom = isl_set_subtract(HeaderBBDom, UnionBackedgeConditionComplement);
2944
1.32k
  HeaderBBDom = isl_set_apply(HeaderBBDom, NextIterationMap);
2945
1.32k
2946
1.32k
  auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
2947
1.32k
  HeaderBBDom = Parts.second;
2948
1.32k
2949
1.32k
  // Check if there is a <nsw> tagged AddRec for this loop and if so do not add
2950
1.32k
  // the bounded assumptions to the context as they are already implied by the
2951
1.32k
  // <nsw> tag.
2952
1.32k
  if (
Affinator.hasNSWAddRecForLoop(L)1.32k
)
{976
2953
976
    isl_set_free(Parts.first);
2954
976
    return true;
2955
976
  }
2956
1.32k
2957
349
  isl_set *UnboundedCtx = isl_set_params(Parts.first);
2958
349
  recordAssumption(INFINITELOOP, UnboundedCtx,
2959
349
                   HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
2960
349
  return true;
2961
1.32k
}
2962
2963
1.15k
MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
2964
1.15k
  Value *PointerBase = MA->getOriginalBaseAddr();
2965
1.15k
2966
1.15k
  auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
2967
1.15k
  if (!PointerBaseInst)
2968
955
    return nullptr;
2969
1.15k
2970
202
  auto *BasePtrStmt = getStmtFor(PointerBaseInst);
2971
202
  if (!BasePtrStmt)
2972
66
    return nullptr;
2973
202
2974
136
  return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
2975
202
}
2976
2977
bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2978
512
                                        __isl_keep isl_union_map *Writes) {
2979
512
  if (auto *
BasePtrMA512
= lookupBasePtrAccess(MA))
{73
2980
73
    auto *NHCtx = getNonHoistableCtx(BasePtrMA, Writes);
2981
73
    bool Hoistable = NHCtx != nullptr;
2982
73
    isl_set_free(NHCtx);
2983
73
    return !Hoistable;
2984
73
  }
2985
512
2986
439
  Value *BaseAddr = MA->getOriginalBaseAddr();
2987
439
  if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2988
58
    
if (58
!isa<LoadInst>(BasePtrInst)58
)
2989
37
      return contains(BasePtrInst);
2990
439
2991
402
  return false;
2992
439
}
2993
2994
901
bool Scop::buildAliasChecks(AliasAnalysis &AA) {
2995
901
  if (!PollyUseRuntimeAliasChecks)
2996
20
    return true;
2997
901
2998
881
  
if (881
buildAliasGroups(AA)881
)
{878
2999
878
    // Aliasing assumptions do not go through addAssumption but we still want to
3000
878
    // collect statistics so we do it here explicitly.
3001
878
    if (MinMaxAliasGroups.size())
3002
145
      AssumptionsAliasing++;
3003
878
    return true;
3004
878
  }
3005
881
3006
881
  // If a problem occurs while building the alias groups we need to delete
3007
881
  // this SCoP and pretend it wasn't valid in the first place. To this end
3008
881
  // we make the assumed context infeasible.
3009
3
  invalidate(ALIASING, DebugLoc());
3010
3
3011
3
  DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
3012
3
               << " could not be created as the number of parameters involved "
3013
3
                  "is too high. The SCoP will be "
3014
3
                  "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3015
3
                  "the maximal number of parameters but be advised that the "
3016
3
                  "compile time might increase exponentially.\n\n");
3017
3
  return false;
3018
881
}
3019
3020
std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3021
881
Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
3022
881
  AliasSetTracker AST(AA);
3023
881
3024
881
  DenseMap<Value *, MemoryAccess *> PtrToAcc;
3025
881
  DenseSet<const ScopArrayInfo *> HasWriteAccess;
3026
1.52k
  for (ScopStmt &Stmt : *this) {
3027
1.52k
3028
1.52k
    isl_set *StmtDomain = Stmt.getDomain();
3029
1.52k
    bool StmtDomainEmpty = isl_set_is_empty(StmtDomain);
3030
1.52k
    isl_set_free(StmtDomain);
3031
1.52k
3032
1.52k
    // Statements with an empty domain will never be executed.
3033
1.52k
    if (StmtDomainEmpty)
3034
3
      continue;
3035
1.52k
3036
3.32k
    
for (MemoryAccess *MA : Stmt) 1.52k
{3.32k
3037
3.32k
      if (MA->isScalarKind())
3038
795
        continue;
3039
2.53k
      
if (2.53k
!MA->isRead()2.53k
)
3040
1.23k
        HasWriteAccess.insert(MA->getScopArrayInfo());
3041
2.53k
      MemAccInst Acc(MA->getAccessInstruction());
3042
2.53k
      if (
MA->isRead() && 2.53k
isa<MemTransferInst>(Acc)1.30k
)
3043
6
        PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3044
2.53k
      else
3045
2.52k
        PtrToAcc[Acc.getPointerOperand()] = MA;
3046
2.53k
      AST.add(Acc);
3047
2.53k
    }
3048
1.52k
  }
3049
881
3050
881
  AliasGroupVectorTy AliasGroups;
3051
1.17k
  for (AliasSet &AS : AST) {
3052
1.17k
    if (
AS.isMustAlias() || 1.17k
AS.isForwardingAliasSet()314
)
3053
859
      continue;
3054
314
    AliasGroupTy AG;
3055
314
    for (auto &PR : AS)
3056
1.08k
      AG.push_back(PtrToAcc[PR.getValue()]);
3057
314
    if (AG.size() < 2)
3058
14
      continue;
3059
300
    AliasGroups.push_back(std::move(AG));
3060
300
  }
3061
881
3062
881
  return std::make_tuple(AliasGroups, HasWriteAccess);
3063
881
}
3064
3065
881
void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3066
1.18k
  for (unsigned u = 0; 
u < AliasGroups.size()1.18k
;
u++308
)
{308
3067
308
    AliasGroupTy NewAG;
3068
308
    AliasGroupTy &AG = AliasGroups[u];
3069
308
    AliasGroupTy::iterator AGI = AG.begin();
3070
308
    isl_set *AGDomain = getAccessDomain(*AGI);
3071
1.39k
    while (
AGI != AG.end()1.39k
)
{1.09k
3072
1.09k
      MemoryAccess *MA = *AGI;
3073
1.09k
      isl_set *MADomain = getAccessDomain(MA);
3074
1.09k
      if (
isl_set_is_disjoint(AGDomain, MADomain)1.09k
)
{26
3075
26
        NewAG.push_back(MA);
3076
26
        AGI = AG.erase(AGI);
3077
26
        isl_set_free(MADomain);
3078
1.06k
      } else {
3079
1.06k
        AGDomain = isl_set_union(AGDomain, MADomain);
3080
1.06k
        AGI++;
3081
1.06k
      }
3082
1.09k
    }
3083
308
    if (NewAG.size() > 1)
3084
8
      AliasGroups.push_back(std::move(NewAG));
3085
308
    isl_set_free(AGDomain);
3086
308
  }
3087
881
}
3088
3089
881
bool Scop::buildAliasGroups(AliasAnalysis &AA) {
3090
881
  // To create sound alias checks we perform the following steps:
3091
881
  //   o) We partition each group into read only and non read only accesses.
3092
881
  //   o) For each group with more than one base pointer we then compute minimal
3093
881
  //      and maximal accesses to each array of a group in read only and non
3094
881
  //      read only partitions separately.
3095
881
  AliasGroupVectorTy AliasGroups;
3096
881
  DenseSet<const ScopArrayInfo *> HasWriteAccess;
3097
881
3098
881
  std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
3099
881
3100
881
  splitAliasGroupsByDomain(AliasGroups);
3101
881
3102
308
  for (AliasGroupTy &AG : AliasGroups) {
3103
308
    bool Valid = buildAliasGroup(AG, HasWriteAccess);
3104
308
    if (!Valid)
3105
3
      return false;
3106
308
  }
3107
881
3108
878
  return true;
3109
881
}
3110
3111
bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
3112
308
                           DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3113
308
  AliasGroupTy ReadOnlyAccesses;
3114
308
  AliasGroupTy ReadWriteAccesses;
3115
308
  SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3116
308
  SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3117
308
3118
308
  auto &F = getFunction();
3119
308
3120
308
  if (AliasGroup.size() < 2)
3121
4
    return true;
3122
308
3123
1.06k
  
for (MemoryAccess *Access : AliasGroup) 304
{1.06k
3124
1.06k
    emitOptimizationRemarkAnalysis(
3125
1.06k
        F.getContext(), DEBUG_TYPE, F,
3126
1.06k
        Access->getAccessInstruction()->getDebugLoc(),
3127
1.06k
        "Possibly aliasing pointer, use restrict keyword.");
3128
1.06k
3129
1.06k
    const ScopArrayInfo *Array = Access->getScopArrayInfo();
3130
1.06k
    if (
HasWriteAccess.count(Array)1.06k
)
{669
3131
669
      ReadWriteArrays.insert(Array);
3132
669
      ReadWriteAccesses.push_back(Access);
3133
391
    } else {
3134
391
      ReadOnlyArrays.insert(Array);
3135
391
      ReadOnlyAccesses.push_back(Access);
3136
391
    }
3137
1.06k
  }
3138
304
3139
304
  // If there are no read-only pointers, and less than two read-write pointers,
3140
304
  // no alias check is needed.
3141
304
  if (
ReadOnlyAccesses.empty() && 304
ReadWriteArrays.size() <= 1160
)
3142
142
    return true;
3143
304
3144
304
  // If there is no read-write pointer, no alias check is needed.
3145
162
  
if (162
ReadWriteArrays.empty()162
)
3146
8
    return true;
3147
162
3148
162
  // For non-affine accesses, no alias check can be generated as we cannot
3149
162
  // compute a sufficiently tight lower and upper bound: bail out.
3150
645
  
for (MemoryAccess *MA : AliasGroup) 154
{645
3151
645
    if (
!MA->isAffine()645
)
{0
3152
0
      invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc());
3153
0
      return false;
3154
0
    }
3155
645
  }
3156
154
3157
154
  // Ensure that for all memory accesses for which we generate alias checks,
3158
154
  // their base pointers are available.
3159
645
  
for (MemoryAccess *MA : AliasGroup) 154
{645
3160
645
    if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
3161
55
      addRequiredInvariantLoad(
3162
55
          cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3163
645
  }
3164
154
3165
154
  MinMaxAliasGroups.emplace_back();
3166
154
  MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
3167
154
  MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
3168
154
  MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
3169
154
3170
154
  bool Valid;
3171
154
3172
154
  Valid =
3173
154
      calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
3174
154
3175
154
  if (!Valid)
3176
0
    return false;
3177
154
3178
154
  // Bail out if the number of values we need to compare is too large.
3179
154
  // This is important as the number of comparisons grows quadratically with
3180
154
  // the number of values we need to compare.
3181
154
  
if (154
MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >154
3182
154
      RunTimeChecksMaxArraysPerGroup)
3183
1
    return false;
3184
154
3185
153
  Valid =
3186
153
      calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
3187
153
3188
153
  if (!Valid)
3189
2
    return false;
3190
153
3191
151
  return true;
3192
153
}
3193
3194
/// Get the smallest loop that contains @p S but is not in @p S.
3195
2.66k
static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
3196
2.66k
  // Start with the smallest loop containing the entry and expand that
3197
2.66k
  // loop until it contains all blocks in the region. If there is a loop
3198
2.66k
  // containing all blocks in the region check if it is itself contained
3199
2.66k
  // and if so take the parent loop as it will be the smallest containing
3200
2.66k
  // the region but not contained by it.
3201
2.66k
  Loop *L = LI.getLoopFor(S.getEntry());
3202
3.29k
  while (
L3.29k
)
{2.35k
3203
2.35k
    bool AllContained = true;
3204
2.35k
    for (auto *BB : S.blocks())
3205
20.7k
      AllContained &= L->contains(BB);
3206
2.35k
    if (AllContained)
3207
1.71k
      break;
3208
635
    L = L->getParentLoop();
3209
635
  }
3210
2.66k
3211
1.71k
  return L ? 
(S.contains(L) ? 1.71k
L->getParentLoop()1.59k
:
L120
) :
nullptr944
;
3212
2.66k
}
3213
3214
Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
3215
           ScopDetection::DetectionContext &DC)
3216
    : SE(&ScalarEvolution), R(R), IsOptimized(false),
3217
      HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false),
3218
      MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
3219
      IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr),
3220
      Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr),
3221
940
      Schedule(nullptr) {
3222
940
  if (IslOnErrorAbort)
3223
940
    
isl_options_set_on_error(getIslCtx(), 940
ISL_ON_ERROR_ABORT940
);
3224
940
  buildContext();
3225
940
}
3226
3227
901
void Scop::foldSizeConstantsToRight() {
3228
901
  isl_union_set *Accessed = isl_union_map_range(getAccesses());
3229
901
3230
1.85k
  for (auto Array : arrays()) {
3231
1.85k
    if (Array->getNumberOfDimensions() <= 1)
3232
1.61k
      continue;
3233
1.85k
3234
233
    isl_space *Space = Array->getSpace();
3235
233
3236
233
    Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed));
3237
233
3238
233
    if (
!isl_union_set_contains(Accessed, Space)233
)
{0
3239
0
      isl_space_free(Space);
3240
0
      continue;
3241
0
    }
3242
233
3243
233
    isl_set *Elements = isl_union_set_extract_set(Accessed, Space);
3244
233
3245
233
    isl_map *Transform =
3246
233
        isl_map_universe(isl_space_map_from_set(Array->getSpace()));
3247
233
3248
233
    std::vector<int> Int;
3249
233
3250
233
    int Dims = isl_set_dim(Elements, isl_dim_set);
3251
783
    for (int i = 0; 
i < Dims783
;
i++550
)
{550
3252
550
      isl_set *DimOnly =
3253
550
          isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i);
3254
550
      DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1);
3255
550
      DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0);
3256
550
3257
550
      isl_basic_set *DimHull = isl_set_affine_hull(DimOnly);
3258
550
3259
550
      if (
i == Dims - 1550
)
{233
3260
233
        Int.push_back(1);
3261
233
        Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3262
233
        isl_basic_set_free(DimHull);
3263
233
        continue;
3264
233
      }
3265
550
3266
317
      
if (317
isl_basic_set_dim(DimHull, isl_dim_div) == 1317
)
{3
3267
3
        isl_aff *Diff = isl_basic_set_get_div(DimHull, 0);
3268
3
        isl_val *Val = isl_aff_get_denominator_val(Diff);
3269
3
        isl_aff_free(Diff);
3270
3
3271
3
        int ValInt = 1;
3272
3
3273
3
        if (isl_val_is_int(Val))
3274
3
          ValInt = isl_val_get_num_si(Val);
3275
3
        isl_val_free(Val);
3276
3
3277
3
        Int.push_back(ValInt);
3278
3
3279
3
        isl_constraint *C = isl_constraint_alloc_equality(
3280
3
            isl_local_space_from_space(isl_map_get_space(Transform)));
3281
3
        C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt);
3282
3
        C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1);
3283
3
        Transform = isl_map_add_constraint(Transform, C);
3284
3
        isl_basic_set_free(DimHull);
3285
3
        continue;
3286
3
      }
3287
317
3288
314
      isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull);
3289
314
      ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0);
3290
314
3291
314
      int ValInt = 1;
3292
314
      if (
isl_basic_set_is_equal(ZeroSet, DimHull)314
)
{11
3293
11
        ValInt = 0;
3294
11
      }
3295
314
3296
314
      Int.push_back(ValInt);
3297
314
      Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i);
3298
314
      isl_basic_set_free(DimHull);
3299
314
      isl_basic_set_free(ZeroSet);
3300
314
    }
3301
233
3302
233
    isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform));
3303
233
3304
233
    if (
!isl_set_is_subset(Elements, MappedElements)233
)
{0
3305
0
      isl_set_free(Elements);
3306
0
      isl_set_free(MappedElements);
3307
0
      isl_map_free(Transform);
3308
0
      continue;
3309
0
    }
3310
233
3311
233
    isl_set_free(MappedElements);
3312
233
3313
233
    bool CanFold = true;
3314
233
3315
233
    if (Int[0] <= 1)
3316
230
      CanFold = false;
3317
233
3318
233
    unsigned NumDims = Array->getNumberOfDimensions();
3319
317
    for (unsigned i = 1; 
i < NumDims - 1317
;
i++84
)
3320
84
      
if (84
Int[0] != Int[i] && 84
Int[i]3
)
3321
1
        CanFold = false;
3322
233
3323
233
    if (
!CanFold233
)
{230
3324
230
      isl_set_free(Elements);
3325
230
      isl_map_free(Transform);
3326
230
      continue;
3327
230
    }
3328
233
3329
3
    for (auto &Access : AccessFunctions)
3330
11
      
if (11
Access->getScopArrayInfo() == Array11
)
3331
11
        Access->setAccessRelation(isl_map_apply_range(
3332
11
            Access->getAccessRelation(), isl_map_copy(Transform)));
3333
3
3334
3
    isl_map_free(Transform);
3335
3
3336
3
    std::vector<const SCEV *> Sizes;
3337
9
    for (unsigned i = 0; 
i < NumDims9
;
i++6
)
{6
3338
6
      auto Size = Array->getDimensionSize(i);
3339
6
3340
6
      if (i == NumDims - 1)
3341
3
        Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
3342
6
      Sizes.push_back(Size);
3343
6
    }
3344
3
3345
3
    Array->updateSizes(Sizes, false /* CheckConsistency */);
3346
3
3347
3
    isl_set_free(Elements);
3348
3
  }
3349
901
  isl_union_set_free(Accessed);
3350
901
  return;
3351
901
}
3352
3353
901
void Scop::finalizeAccesses() {
3354
901
  updateAccessDimensionality();
3355
901
  foldSizeConstantsToRight();
3356
901
  foldAccessRelations();
3357
901
  assumeNoOutOfBounds();
3358
901
}
3359
3360
void Scop::init(AliasAnalysis &AA, AssumptionCache &AC, DominatorTree &DT,
3361
940
                LoopInfo &LI) {
3362
940
  buildInvariantEquivalenceClasses();
3363
940
3364
940
  if (!buildDomains(&R, DT, LI))
3365
6
    return;
3366
940
3367
934
  addUserAssumptions(AC, DT, LI);
3368
934
3369
934
  // Remove empty statements.
3370
934
  // Exit early in case there are no executable statements left in this scop.
3371
934
  simplifySCoP(false);
3372
934
  if (Stmts.empty())
3373
28
    return;
3374
934
3375
934
  // The ScopStmts now have enough information to initialize themselves.
3376
906
  for (ScopStmt &Stmt : Stmts)
3377
1.57k
    Stmt.init(LI);
3378
906
3379
906
  // Check early for a feasible runtime context.
3380
906
  if (!hasFeasibleRuntimeContext())
3381
3
    return;
3382
906
3383
906
  // Check early for profitability. Afterwards it cannot change anymore,
3384
906
  // only the runtime context could become infeasible.
3385
903
  
if (903
!isProfitable(UnprofitableScalarAccs)903
)
{2
3386
2
    invalidate(PROFITABLE, DebugLoc());
3387
2
    return;
3388
2
  }
3389
903
3390
901
  buildSchedule(LI);
3391
901
3392
901
  finalizeAccesses();
3393
901
3394
901
  realignParams();
3395
901
  addUserContext();
3396
901
3397
901
  // After the context was fully constructed, thus all our knowledge about
3398
901
  // the parameters is in there, we add all recorded assumptions to the
3399
901
  // assumed/invalid context.
3400
901
  addRecordedAssumptions();
3401
901
3402
901
  simplifyContexts();
3403
901
  if (!buildAliasChecks(AA))
3404
3
    return;
3405
901
3406
898
  hoistInvariantLoads();
3407
898
  verifyInvariantLoads();
3408
898
  simplifySCoP(true);
3409
898
3410
898
  // Check late for a feasible runtime context because profitability did not
3411
898
  // change.
3412
898
  if (!hasFeasibleRuntimeContext())
3413
15
    return;
3414
898
}
3415
3416
940
Scop::~Scop() {
3417
940
  isl_set_free(Context);
3418
940
  isl_set_free(AssumedContext);
3419
940
  isl_set_free(InvalidContext);
3420
940
  isl_schedule_free(Schedule);
3421
940
3422
940
  for (auto &It : ParameterIds)
3423
983
    isl_id_free(It.second);
3424
940
3425
940
  for (auto It : DomainMap)
3426
4.40k
    isl_set_free(It.second);
3427
940
3428
940
  for (auto &AS : RecordedAssumptions)
3429
189
    isl_set_free(AS.Set);
3430
940
3431
940
  // Free the alias groups
3432
154
  for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) {
3433
188
    for (MinMaxAccessTy &MMA : MinMaxAccessPair.first) {
3434
188
      isl_pw_multi_aff_free(MMA.first);
3435
188
      isl_pw_multi_aff_free(MMA.second);
3436
188
    }
3437
210
    for (MinMaxAccessTy &MMA : MinMaxAccessPair.second) {
3438
210
      isl_pw_multi_aff_free(MMA.first);
3439
210
      isl_pw_multi_aff_free(MMA.second);
3440
210
    }
3441
154
  }
3442
940
3443
940
  for (const auto &IAClass : InvariantEquivClasses)
3444
353
    isl_set_free(IAClass.ExecutionContext);
3445
940
3446
940
  // Explicitly release all Scop objects and the underlying isl objects before
3447
940
  // we release the isl context.
3448
940
  Stmts.clear();
3449
940
  ScopArrayInfoSet.clear();
3450
940
  ScopArrayInfoMap.clear();
3451
940
  ScopArrayNameMap.clear();
3452
940
  AccessFunctions.clear();
3453
940
}
3454
3455
901
void Scop::updateAccessDimensionality() {
3456
901
  // Check all array accesses for each base pointer and find a (virtual) element
3457
901
  // size for the base pointer that divides all access functions.
3458
901
  for (ScopStmt &Stmt : *this)
3459
3.47k
    
for (MemoryAccess *Access : Stmt) 1.56k
{3.47k
3460
3.47k
      if (!Access->isArrayKind())
3461
799
        continue;
3462
2.67k
      ScopArrayInfo *Array =
3463
2.67k
          const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
3464
2.67k
3465
2.67k
      if (Array->getNumberOfDimensions() != 1)
3466
352
        continue;
3467
2.32k
      unsigned DivisibleSize = Array->getElemSizeInBytes();
3468
2.32k
      const SCEV *Subscript = Access->getSubscript(0);
3469
2.32k
      while (!isDivisible(Subscript, DivisibleSize, *SE))
3470
2
        DivisibleSize /= 2;
3471
2.32k
      auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
3472
2.32k
      Array->updateElementType(Ty);
3473
2.32k
    }
3474
901
3475
901
  for (auto &Stmt : *this)
3476
1.56k
    for (auto &Access : Stmt)
3477
3.47k
      Access->updateDimensionality();
3478
901
}
3479
3480
901
void Scop::foldAccessRelations() {
3481
901
  for (auto &Stmt : *this)
3482
1.56k
    for (auto &Access : Stmt)
3483
3.47k
      Access->foldAccessRelation();
3484
901
}
3485
3486
901
void Scop::assumeNoOutOfBounds() {
3487
901
  for (auto &Stmt : *this)
3488
1.56k
    for (auto &Access : Stmt)
3489
3.47k
      Access->assumeNoOutOfBound();
3490
901
}
3491
3492
1.83k
void Scop::simplifySCoP(bool AfterHoisting) {
3493
7.76k
  for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); 
StmtIt != StmtEnd7.76k
;)
{5.92k
3494
5.92k
    ScopStmt &Stmt = *StmtIt;
3495
5.92k
3496
5.92k
    bool RemoveStmt = Stmt.isEmpty();
3497
5.92k
    if (!RemoveStmt)
3498
3.13k
      RemoveStmt = !DomainMap[Stmt.getEntryBlock()];
3499
5.92k
3500
5.92k
    // Remove read only statements only after invariant loop hoisting.
3501
5.92k
    if (
!RemoveStmt && 5.92k
AfterHoisting3.06k
)
{1.49k
3502
1.49k
      bool OnlyRead = true;
3503
2.58k
      for (MemoryAccess *MA : Stmt) {
3504
2.58k
        if (MA->isRead())
3505
1.11k
          continue;
3506
2.58k
3507
1.47k
        OnlyRead = false;
3508
1.47k
        break;
3509
2.58k
      }
3510
1.49k
3511
1.49k
      RemoveStmt = OnlyRead;
3512
1.49k
    }
3513
5.92k
3514
5.92k
    if (
!RemoveStmt5.92k
)
{3.04k
3515
3.04k
      StmtIt++;
3516
3.04k
      continue;
3517
3.04k
    }
3518
5.92k
3519
5.92k
    // Remove the statement because it is unnecessary.
3520
2.88k
    
if (2.88k
Stmt.isRegionStmt()2.88k
)
3521
14
      for (BasicBlock *BB : Stmt.getRegion()->blocks())
3522
37
        StmtMap.erase(BB);
3523
2.88k
    else
3524
2.86k
      StmtMap.erase(Stmt.getBasicBlock());
3525
2.88k
3526
2.88k
    StmtIt = Stmts.erase(StmtIt);
3527
2.88k
  }
3528
1.83k
}
3529
3530
270
InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) {
3531
270
  LoadInst *LInst = dyn_cast<LoadInst>(Val);
3532
270
  if (!LInst)
3533
216
    return nullptr;
3534
270
3535
54
  
if (Value *54
Rep54
= InvEquivClassVMap.lookup(LInst))
3536
4
    LInst = cast<LoadInst>(Rep);
3537
54
3538
54
  Type *Ty = LInst->getType();
3539
54
  const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3540
139
  for (auto &IAClass : InvariantEquivClasses) {
3541
139
    if (
PointerSCEV != IAClass.IdentifyingPointer || 139
Ty != IAClass.AccessType43
)
3542
96
      continue;
3543
139
3544
43
    auto &MAs = IAClass.InvariantAccesses;
3545
43
    for (auto *MA : MAs)
3546
49
      
if (49
MA->getAccessInstruction() == Val49
)
3547
41
        return &IAClass;
3548
43
  }
3549
54
3550
13
  return nullptr;
3551
54
}
3552
3553
/// Check if @p MA can always be hoisted without execution context.
3554
static bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
3555
                               bool MAInvalidCtxIsEmpty,
3556
319
                               bool NonHoistableCtxIsEmpty) {
3557
319
  LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3558
319
  const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
3559
319
  // TODO: We can provide more information for better but more expensive
3560
319
  //       results.
3561
319
  if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
3562
319
                                          LInst->getAlignment(), DL))
3563
265
    return false;
3564
319
3565
319
  // If the location might be overwritten we do not hoist it unconditionally.
3566
319
  //
3567
319
  // TODO: This is probably to conservative.
3568
54
  
if (54
!NonHoistableCtxIsEmpty54
)
3569
1
    return false;
3570
54
3571
54
  // If a dereferencable load is in a statement that is modeled precisely we can
3572
54
  // hoist it.
3573
53
  
if (53
StmtInvalidCtxIsEmpty && 53
MAInvalidCtxIsEmpty48
)
3574
48
    return true;
3575
53
3576
53
  // Even if the statement is not modeled precisely we can hoist the load if it
3577
53
  // does not involve any parameters that might have been specialized by the
3578
53
  // statement domain.
3579
10
  
for (unsigned u = 0, e = MA->getNumSubscripts(); 5
u < e10
;
u++5
)
3580
5
    
if (5
!isa<SCEVConstant>(MA->getSubscript(u))5
)
3581
0
      return false;
3582
5
  return true;
3583
5
}
3584
3585
307
void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
3586
307
3587
307
  if (InvMAs.empty())
3588
102
    return;
3589
307
3590
205
  auto *StmtInvalidCtx = Stmt.getInvalidContext();
3591
205
  bool StmtInvalidCtxIsEmpty = isl_set_is_empty(StmtInvalidCtx);
3592
205
3593
205
  // Get the context under which the statement is executed but remove the error
3594
205
  // context under which this statement is reached.
3595
205
  isl_set *DomainCtx = isl_set_params(Stmt.getDomain());
3596
205
  DomainCtx = isl_set_subtract(DomainCtx, StmtInvalidCtx);
3597
205
3598
205
  if (
isl_set_n_basic_set(DomainCtx) >= MaxDisjunctsInDomain205
)
{1
3599
1
    auto *AccInst = InvMAs.front().MA->getAccessInstruction();
3600
1
    invalidate(COMPLEXITY, AccInst->getDebugLoc());
3601
1
    isl_set_free(DomainCtx);
3602
1
    for (auto &InvMA : InvMAs)
3603
1
      isl_set_free(InvMA.NonHoistableCtx);
3604
1
    return;
3605
1
  }
3606
205
3607
205
  // Project out all parameters that relate to loads in the statement. Otherwise
3608
205
  // we could have cyclic dependences on the constraints under which the
3609
205
  // hoisted loads are executed and we could not determine an order in which to
3610
205
  // pre-load them. This happens because not only lower bounds are part of the
3611
205
  // domain but also upper bounds.
3612
319
  
for (auto &InvMA : InvMAs) 204
{319
3613
319
    auto *MA = InvMA.MA;
3614
319
    Instruction *AccInst = MA->getAccessInstruction();
3615
319
    if (
SE->isSCEVable(AccInst->getType())319
)
{297
3616
297
      SetVector<Value *> Values;
3617
540
      for (const SCEV *Parameter : Parameters) {
3618
540
        Values.clear();
3619
540
        findValues(Parameter, *SE, Values);
3620
540
        if (!Values.count(AccInst))
3621
441
          continue;
3622
540
3623
99
        
if (isl_id *99
ParamId99
= getIdForParam(Parameter))
{99
3624
99
          int Dim = isl_set_find_dim_by_id(DomainCtx, isl_dim_param, ParamId);
3625
99
          if (Dim >= 0)
3626
98
            DomainCtx = isl_set_eliminate(DomainCtx, isl_dim_param, Dim, 1);
3627
99
          isl_id_free(ParamId);
3628
99
        }
3629
99
      }
3630
297
    }
3631
319
  }
3632
204
3633
319
  for (auto &InvMA : InvMAs) {
3634
319
    auto *MA = InvMA.MA;
3635
319
    auto *NHCtx = InvMA.NonHoistableCtx;
3636
319
3637
319
    // Check for another invariant access that accesses the same location as
3638
319
    // MA and if found consolidate them. Otherwise create a new equivalence
3639
319
    // class at the end of InvariantEquivClasses.
3640
319
    LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3641
319
    Type *Ty = LInst->getType();
3642
319
    const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
3643
319
3644
319
    auto *MAInvalidCtx = MA->getInvalidContext();
3645
319
    bool NonHoistableCtxIsEmpty = isl_set_is_empty(NHCtx);
3646
319
    bool MAInvalidCtxIsEmpty = isl_set_is_empty(MAInvalidCtx);
3647
319
3648
319
    isl_set *MACtx;
3649
319
    // Check if we know that this pointer can be speculatively accessed.
3650
319
    if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3651
53
                           NonHoistableCtxIsEmpty)) {
3652
53
      MACtx = isl_set_universe(isl_set_get_space(DomainCtx));
3653
53
      isl_set_free(MAInvalidCtx);
3654
53
      isl_set_free(NHCtx);
3655
266
    } else {
3656
266
      MACtx = isl_set_copy(DomainCtx);
3657
266
      MACtx = isl_set_subtract(MACtx, isl_set_union(MAInvalidCtx, NHCtx));
3658
266
      MACtx = isl_set_gist_params(MACtx, getContext());
3659
266
    }
3660
319
3661
319
    bool Consolidated = false;
3662
880
    for (auto &IAClass : InvariantEquivClasses) {
3663
880
      if (
PointerSCEV != IAClass.IdentifyingPointer || 880
Ty != IAClass.AccessType201
)
3664
683
        continue;
3665
880
3666
880
      // If the pointer and the type is equal check if the access function wrt.
3667
880
      // to the domain is equal too. It can happen that the domain fixes
3668
880
      // parameter values and these can be different for distinct part of the
3669
880
      // SCoP. If this happens we cannot consolidate the loads but need to
3670
880
      // create a new invariant load equivalence class.
3671
197
      auto &MAs = IAClass.InvariantAccesses;
3672
197
      if (
!MAs.empty()197
)
{29
3673
29
        auto *LastMA = MAs.front();
3674
29
3675
29
        auto *AR = isl_map_range(MA->getAccessRelation());
3676
29
        auto *LastAR = isl_map_range(LastMA->getAccessRelation());
3677
29
        bool SameAR = isl_set_is_equal(AR, LastAR);
3678
29
        isl_set_free(AR);
3679
29
        isl_set_free(LastAR);
3680
29
3681
29
        if (!SameAR)
3682
4
          continue;
3683
29
      }
3684
197
3685
197
      // Add MA to the list of accesses that are in this class.
3686
193
      MAs.push_front(MA);
3687
193
3688
193
      Consolidated = true;
3689
193
3690
193
      // Unify the execution context of the class and this statement.
3691
193
      isl_set *&IAClassDomainCtx = IAClass.ExecutionContext;
3692
193
      if (IAClassDomainCtx)
3693
25
        IAClassDomainCtx =
3694
25
            isl_set_coalesce(isl_set_union(IAClassDomainCtx, MACtx));
3695
193
      else
3696
168
        IAClassDomainCtx = MACtx;
3697
193
      break;
3698
197
    }
3699
319
3700
319
    if (Consolidated)
3701
193
      continue;
3702
319
3703
319
    // If we did not consolidate MA, thus did not find an equivalence class
3704
319
    // for it, we create a new one.
3705
126
    InvariantEquivClasses.emplace_back(
3706
126
        InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3707
126
  }
3708
204
3709
204
  isl_set_free(DomainCtx);
3710
204
}
3711
3712
__isl_give isl_set *Scop::getNonHoistableCtx(MemoryAccess *Access,
3713
944
                                             __isl_keep isl_union_map *Writes) {
3714
944
  // TODO: Loads that are not loop carried, hence are in a statement with
3715
944
  //       zero iterators, are by construction invariant, though we
3716
944
  //       currently "hoist" them anyway. This is necessary because we allow
3717
944
  //       them to be treated as parameters (e.g., in conditions) and our code
3718
944
  //       generation would otherwise use the old value.
3719
944
3720
944
  auto &Stmt = *Access->getStatement();
3721
944
  BasicBlock *BB = Stmt.getEntryBlock();
3722
944
3723
944
  if (
Access->isScalarKind() || 944
Access->isWrite()745
||
!Access->isAffine()526
||
3724
512
      Access->isMemoryIntrinsic())
3725
432
    return nullptr;
3726
944
3727
944
  // Skip accesses that have an invariant base pointer which is defined but
3728
944
  // not loaded inside the SCoP. This can happened e.g., if a readnone call
3729
944
  // returns a pointer that is used as a base address. However, as we want
3730
944
  // to hoist indirect pointers, we allow the base pointer to be defined in
3731
944
  // the region if it is also a memory access. Each ScopArrayInfo object
3732
944
  // that has a base pointer origin has a base pointer that is loaded and
3733
944
  // that it is invariant, thus it will be hoisted too. However, if there is
3734
944
  // no base pointer origin we check that the base pointer is defined
3735
944
  // outside the region.
3736
512
  auto *LI = cast<LoadInst>(Access->getAccessInstruction());
3737
512
  if (hasNonHoistableBasePtrInScop(Access, Writes))
3738
0
    return nullptr;
3739
512
3740
512
  isl_map *AccessRelation = Access->getAccessRelation();
3741
512
  assert(!isl_map_is_empty(AccessRelation));
3742
512
3743
512
  if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0,
3744
71
                            Stmt.getNumIterators())) {
3745
71
    isl_map_free(AccessRelation);
3746
71
    return nullptr;
3747
71
  }
3748
512
3749
441
  AccessRelation = isl_map_intersect_domain(AccessRelation, Stmt.getDomain());
3750
441
  isl_set *SafeToLoad;
3751
441
3752
441
  auto &DL = getFunction().getParent()->getDataLayout();
3753
441
  if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
3754
90
                                  DL)) {
3755
90
    SafeToLoad =
3756
90
        isl_set_universe(isl_space_range(isl_map_get_space(AccessRelation)));
3757
90
    isl_map_free(AccessRelation);
3758
351
  } else 
if (351
BB != LI->getParent()351
)
{1
3759
1
    // Skip accesses in non-affine subregions as they might not be executed
3760
1
    // under the same condition as the entry of the non-affine subregion.
3761
1
    isl_map_free(AccessRelation);
3762
1
    return nullptr;
3763
350
  } else {
3764
350
    SafeToLoad = isl_map_range(AccessRelation);
3765
350
  }
3766
441
3767
440
  isl_union_map *Written = isl_union_map_intersect_range(
3768
440
      isl_union_map_copy(Writes), isl_union_set_from_set(SafeToLoad));
3769
440
  auto *WrittenCtx = isl_union_map_params(Written);
3770
440
  bool IsWritten = !isl_set_is_empty(WrittenCtx);
3771
440
3772
440
  if (!IsWritten)
3773
379
    return WrittenCtx;
3774
440
3775
61
  WrittenCtx = isl_set_remove_divs(WrittenCtx);
3776
61
  bool TooComplex = isl_set_n_basic_set(WrittenCtx) >= MaxDisjunctsInDomain;
3777
61
  if (
TooComplex || 61
!isRequiredInvariantLoad(LI)55
)
{47
3778
47
    isl_set_free(WrittenCtx);
3779
47
    return nullptr;
3780
47
  }
3781
61
3782
14
  addAssumption(INVARIANTLOAD, isl_set_copy(WrittenCtx), LI->getDebugLoc(),
3783
14
                AS_RESTRICTION);
3784
14
  return WrittenCtx;
3785
61
}
3786
3787
898
void Scop::verifyInvariantLoads() {
3788
898
  auto &RIL = getRequiredInvariantLoads();
3789
190
  for (LoadInst *LI : RIL) {
3790
190
    assert(LI && contains(LI));
3791
190
    ScopStmt *Stmt = getStmtFor(LI);
3792
190
    if (
Stmt && 190
Stmt->getArrayAccessOrNULLFor(LI)186
)
{2
3793
2
      invalidate(INVARIANTLOAD, LI->getDebugLoc());
3794
2
      return;
3795
2
    }
3796
190
  }
3797
898
}
3798
3799
898
void Scop::hoistInvariantLoads() {
3800
898
  if (!PollyInvariantLoadHoisting)
3801
751
    return;
3802
898
3803
147
  isl_union_map *Writes = getWrites();
3804
307
  for (ScopStmt &Stmt : *this) {
3805
307
    InvariantAccessesTy InvariantAccesses;
3806
307
3807
307
    for (MemoryAccess *Access : Stmt)
3808
871
      
if (auto *871
NHCtx871
= getNonHoistableCtx(Access, Writes))
3809
320
        InvariantAccesses.push_back({Access, NHCtx});
3810
307
3811
307
    // Transfer the memory access from the statement to the SCoP.
3812
307
    for (auto InvMA : InvariantAccesses)
3813
320
      Stmt.removeMemoryAccess(InvMA.MA);
3814
307
    addInvariantLoads(Stmt, InvariantAccesses);
3815
307
  }
3816
147
  isl_union_map_free(Writes);
3817
147
}
3818
3819
const ScopArrayInfo *
3820
Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
3821
                               ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
3822
3.65k
                               const char *BaseName) {
3823
3.65k
  assert((BasePtr || BaseName) &&
3824
3.65k
         "BasePtr and BaseName can not be nullptr at the same time.");
3825
3.65k
  assert(!(BasePtr && BaseName) && "BaseName is redundant.");
3826
3.62k
  auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)]
3827
24
                      : ScopArrayNameMap[BaseName];
3828
3.65k
  if (
!SAI3.65k
)
{1.89k
3829
1.89k
    auto &DL = getFunction().getParent()->getDataLayout();
3830
1.89k
    SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind,
3831
1.89k
                                DL, this, BaseName));
3832
1.89k
    ScopArrayInfoSet.insert(SAI.get());
3833
1.76k
  } else {
3834
1.76k
    SAI->updateElementType(ElementType);
3835
1.76k
    // In case of mismatching array sizes, we bail out by setting the run-time
3836
1.76k
    // context to false.
3837
1.76k
    if (!SAI->updateSizes(Sizes))
3838
0
      invalidate(DELINEARIZATION, DebugLoc());
3839
1.76k
  }
3840
3.65k
  return SAI.get();
3841
3.65k
}
3842
3843
const ScopArrayInfo *
3844
Scop::createScopArrayInfo(Type *ElementType, const std::string &BaseName,
3845
24
                          const std::vector<unsigned> &Sizes) {
3846
24
  auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
3847
24
  std::vector<const SCEV *> SCEVSizes;
3848
24
3849
24
  for (auto size : Sizes)
3850
62
    
if (62
size62
)
3851
62
      SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
3852
62
    else
3853
0
      SCEVSizes.push_back(nullptr);
3854
24
3855
24
  auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
3856
24
                                       MemoryKind::Array, BaseName.c_str());
3857
24
  return SAI;
3858
24
}
3859
3860
71
const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) {
3861
71
  auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get();
3862
71
  assert(SAI && "No ScopArrayInfo available for this base pointer");
3863
71
  return SAI;
3864
71
}
3865
3866
0
std::string Scop::getContextStr() const { return stringFromIslObj(Context); }
3867
3868
0
std::string Scop::getAssumedContextStr() const {
3869
0
  assert(AssumedContext && "Assumed context not yet built");
3870
0
  return stringFromIslObj(AssumedContext);
3871
0
}
3872
3873
0
std::string Scop::getInvalidContextStr() const {
3874
0
  return stringFromIslObj(InvalidContext);
3875
0
}
3876
3877
610
std::string Scop::getNameStr() const {
3878
610
  std::string ExitName, EntryName;
3879
610
  raw_string_ostream ExitStr(ExitName);
3880
610
  raw_string_ostream EntryStr(EntryName);
3881
610
3882
610
  R.getEntry()->printAsOperand(EntryStr, false);
3883
610
  EntryStr.str();
3884
610
3885
610
  if (
R.getExit()610
)
{610
3886
610
    R.getExit()->printAsOperand(ExitStr, false);
3887
610
    ExitStr.str();
3888
610
  } else
3889
0
    ExitName = "FunctionExit";
3890
610
3891
610
  return EntryName + "---" + ExitName;
3892
610
}
3893
3894
14.7k
__isl_give isl_set *Scop::getContext() const { return isl_set_copy(Context); }
3895
7.36k
__isl_give isl_space *Scop::getParamSpace() const {
3896
7.36k
  return isl_set_get_space(Context);
3897
7.36k
}
3898
3899
3.65k
__isl_give isl_set *Scop::getAssumedContext() const {
3900
3.65k
  assert(AssumedContext && "Assumed context not yet built");
3901
3.65k
  return isl_set_copy(AssumedContext);
3902
3.65k
}
3903
3904
903
bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
3905
903
  if (PollyProcessUnprofitable)
3906
900
    return true;
3907
903
3908
3
  
if (3
isEmpty()3
)
3909
0
    return false;
3910
3
3911
3
  unsigned OptimizableStmtsOrLoops = 0;
3912
14
  for (auto &Stmt : *this) {
3913
14
    if (Stmt.getNumIterators() == 0)
3914
0
      continue;
3915
14
3916
14
    bool ContainsArrayAccs = false;
3917
14
    bool ContainsScalarAccs = false;
3918
33
    for (auto *MA : Stmt) {
3919
33
      if (MA->isRead())
3920
15
        continue;
3921
18
      ContainsArrayAccs |= MA->isLatestArrayKind();
3922
18
      ContainsScalarAccs |= MA->isLatestScalarKind();
3923
18
    }
3924
14
3925
14
    if (
!ScalarsAreUnprofitable || 14
(ContainsArrayAccs && 9
!ContainsScalarAccs4
))
3926
6
      OptimizableStmtsOrLoops += Stmt.getNumIterators();
3927
14
  }
3928
3
3929
3
  return OptimizableStmtsOrLoops > 1;
3930
3
}
3931
3932
2.74k
bool Scop::hasFeasibleRuntimeContext() const {
3933
2.74k
  auto *PositiveContext = getAssumedContext();
3934
2.74k
  auto *NegativeContext = getInvalidContext();
3935
2.74k
  PositiveContext = addNonEmptyDomainConstraints(PositiveContext);
3936
2.74k
  bool IsFeasible = !(isl_set_is_empty(PositiveContext) ||
3937
2.68k
                      isl_set_is_subset(PositiveContext, NegativeContext));
3938
2.74k
  isl_set_free(PositiveContext);
3939
2.74k
  if (
!IsFeasible2.74k
)
{75
3940
75
    isl_set_free(NegativeContext);
3941
75
    return false;
3942
75
  }
3943
2.74k
3944
2.66k
  auto *DomainContext = isl_union_set_params(getDomains());
3945
2.66k
  IsFeasible = !isl_set_is_subset(DomainContext, NegativeContext);
3946
2.66k
  IsFeasible &= !isl_set_is_subset(Context, NegativeContext);
3947
2.66k
  isl_set_free(NegativeContext);
3948
2.66k
  isl_set_free(DomainContext);
3949
2.66k
3950
2.66k
  return IsFeasible;
3951
2.74k
}
3952
3953
555
static std::string toString(AssumptionKind Kind) {
3954
555
  switch (Kind) {
3955
3
  case ALIASING:
3956
3
    return "No-aliasing";
3957
101
  case INBOUNDS:
3958
101
    return "Inbounds";
3959
307
  case WRAPPING:
3960
307
    return "No-overflows";
3961
40
  case UNSIGNED:
3962
40
    return "Signed-unsigned";
3963
6
  case COMPLEXITY:
3964
6
    return "Low complexity";
3965
2
  case PROFITABLE:
3966
2
    return "Profitable";
3967
27
  case ERRORBLOCK:
3968
27
    return "No-error";
3969
58
  case INFINITELOOP:
3970
58
    return "Finite loop";
3971
9
  case INVARIANTLOAD:
3972
9
    return "Invariant load";
3973
2
  case DELINEARIZATION:
3974
2
    return "Delinearization";
3975
555
  }
3976
0
  
llvm_unreachable0
("Unknown AssumptionKind!");0
3977
0
}
3978
3979
6.55k
bool Scop::isEffectiveAssumption(__isl_keep isl_set *Set, AssumptionSign Sign) {
3980
6.55k
  if (
Sign == AS_ASSUMPTION6.55k
)
{3.48k
3981
3.48k
    if (isl_set_is_subset(Context, Set))
3982
3.33k
      return false;
3983
3.48k
3984
156
    
if (156
isl_set_is_subset(AssumedContext, Set)156
)
3985
40
      return false;
3986
3.07k
  } else {
3987
3.07k
    if (isl_set_is_disjoint(Set, Context))
3988
2.19k
      return false;
3989
3.07k
3990
878
    
if (878
isl_set_is_subset(Set, InvalidContext)878
)
3991
439
      return false;
3992
878
  }
3993
555
  return true;
3994
6.55k
}
3995
3996
bool Scop::trackAssumption(AssumptionKind Kind, __isl_keep isl_set *Set,
3997
6.55k
                           DebugLoc Loc, AssumptionSign Sign) {
3998
6.55k
  if (
PollyRemarksMinimal && 6.55k
!isEffectiveAssumption(Set, Sign)6.55k
)
3999
6.00k
    return false;
4000
6.55k
4001
6.55k
  // Do never emit trivial assumptions as they only clutter the output.
4002
555
  
if (555
!PollyRemarksMinimal555
)
{0
4003
0
    isl_set *Univ = nullptr;
4004
0
    if (Sign == AS_ASSUMPTION)
4005
0
      Univ = isl_set_universe(isl_set_get_space(Set));
4006
0
4007
0
    bool IsTrivial = (Sign == AS_RESTRICTION && isl_set_is_empty(Set)) ||
4008
0
                     
(Sign == AS_ASSUMPTION && 0
isl_set_is_equal(Univ, Set)0
);
4009
0
    isl_set_free(Univ);
4010
0
4011
0
    if (IsTrivial)
4012
0
      return false;
4013
0
  }
4014
555
4015
555
  switch (Kind) {
4016
3
  case ALIASING:
4017
3
    AssumptionsAliasing++;
4018
3
    break;
4019
101
  case INBOUNDS:
4020
101
    AssumptionsInbounds++;
4021
101
    break;
4022
307
  case WRAPPING:
4023
307
    AssumptionsWrapping++;
4024
307
    break;
4025
40
  case UNSIGNED:
4026
40
    AssumptionsUnsigned++;
4027
40
    break;
4028
6
  case COMPLEXITY:
4029
6
    AssumptionsComplexity++;
4030
6
    break;
4031
2
  case PROFITABLE:
4032
2
    AssumptionsUnprofitable++;
4033
2
    break;
4034
27
  case ERRORBLOCK:
4035
27
    AssumptionsErrorBlock++;
4036
27
    break;
4037
58
  case INFINITELOOP:
4038
58
    AssumptionsInfiniteLoop++;
4039
58
    break;
4040
9
  case INVARIANTLOAD:
4041
9
    AssumptionsInvariantLoad++;
4042
9
    break;
4043
2
  case DELINEARIZATION:
4044
2
    AssumptionsDelinearization++;
4045
2
    break;
4046
555
  }
4047
555
4048
555
  auto &F = getFunction();
4049
439
  auto Suffix = Sign == AS_ASSUMPTION ? 
" assumption:\t"116
:
" restriction:\t"439
;
4050
555
  std::string Msg = toString(Kind) + Suffix + stringFromIslObj(Set);
4051
555
  emitOptimizationRemarkAnalysis(F.getContext(), DEBUG_TYPE, F, Loc, Msg);
4052
555
  return true;
4053
555
}
4054
4055
void Scop::addAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4056
6.55k
                         DebugLoc Loc, AssumptionSign Sign) {
4057
6.55k
  // Simplify the assumptions/restrictions first.
4058
6.55k
  Set = isl_set_gist_params(Set, getContext());
4059
6.55k
4060
6.55k
  if (
!trackAssumption(Kind, Set, Loc, Sign)6.55k
)
{6.00k
4061
6.00k
    isl_set_free(Set);
4062
6.00k
    return;
4063
6.00k
  }
4064
6.55k
4065
555
  
if (555
Sign == AS_ASSUMPTION555
)
{116
4066
116
    AssumedContext = isl_set_intersect(AssumedContext, Set);
4067
116
    AssumedContext = isl_set_coalesce(AssumedContext);
4068
439
  } else {
4069
439
    InvalidContext = isl_set_union(InvalidContext, Set);
4070
439
    InvalidContext = isl_set_coalesce(InvalidContext);
4071
439
  }
4072
555
}
4073
4074
void Scop::recordAssumption(AssumptionKind Kind, __isl_take isl_set *Set,
4075
6.72k
                            DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) {
4076
6.72k
  assert((isl_set_is_params(Set) || BB) &&
4077
6.72k
         "Assumptions without a basic block must be parameter sets");
4078
6.72k
  RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
4079
6.72k
}
4080
4081
901
void Scop::addRecordedAssumptions() {
4082
7.43k
  while (
!RecordedAssumptions.empty()7.43k
)
{6.53k
4083
6.53k
    const Assumption &AS = RecordedAssumptions.pop_back_val();
4084
6.53k
4085
6.53k
    if (
!AS.BB6.53k
)
{3.91k
4086
3.91k
      addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign);
4087
3.91k
      continue;
4088
3.91k
    }
4089
6.53k
4090
6.53k
    // If the domain was deleted the assumptions are void.
4091
2.61k
    isl_set *Dom = getDomainConditions(AS.BB);
4092
2.61k
    if (
!Dom2.61k
)
{7
4093
7
      isl_set_free(AS.Set);
4094
7
      continue;
4095
7
    }
4096
2.61k
4097
2.61k
    // If a basic block was given use its domain to simplify the assumption.
4098
2.61k
    // In case of restrictions we know they only have to hold on the domain,
4099
2.61k
    // thus we can intersect them with the domain of the block. However, for
4100
2.61k
    // assumptions the domain has to imply them, thus:
4101
2.61k
    //                     _              _____
4102
2.61k
    //   Dom => S   <==>   A v B   <==>   A - B
4103
2.61k
    //
4104
2.61k
    // To avoid the complement we will register A - B as a restriction not an
4105
2.61k
    // assumption.
4106
2.61k
    isl_set *S = AS.Set;
4107
2.61k
    if (AS.Sign == AS_RESTRICTION)
4108
2.61k
      S = isl_set_params(isl_set_intersect(S, Dom));
4109
2.61k
    else /* (AS.Sign == AS_ASSUMPTION) */
4110
0
      S = isl_set_params(isl_set_subtract(Dom, S));
4111
2.61k
4112
2.61k
    addAssumption(AS.Kind, S, AS.Loc, AS_RESTRICTION);
4113
2.61k
  }
4114
901
}
4115
4116
15
void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc) {
4117
15
  addAssumption(Kind, isl_set_empty(getParamSpace()), Loc, AS_ASSUMPTION);
4118
15
}
4119
4120
2.85k
__isl_give isl_set *Scop::getInvalidContext() const {
4121
2.85k
  return isl_set_copy(InvalidContext);
4122
2.85k
}
4123
4124
380
void Scop::printContext(raw_ostream &OS) const {
4125
380
  OS << "Context:\n";
4126
380
  OS.indent(4) << Context << "\n";
4127
380
4128
380
  OS.indent(4) << "Assumed Context:\n";
4129
380
  OS.indent(4) << AssumedContext << "\n";
4130
380
4131
380
  OS.indent(4) << "Invalid Context:\n";
4132
380
  OS.indent(4) << InvalidContext << "\n";
4133
380
4134
380
  unsigned Dim = 0;
4135
380
  for (const SCEV *Parameter : Parameters)
4136
583
    OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n";
4137
380
}
4138
4139
380
void Scop::printAliasAssumptions(raw_ostream &OS) const {
4140
380
  int noOfGroups = 0;
4141
51
  for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) {
4142
51
    if (Pair.second.size() == 0)
4143
7
      noOfGroups += 1;
4144
51
    else
4145
44
      noOfGroups += Pair.second.size();
4146
51
  }
4147
380
4148
380
  OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n";
4149
380
  if (
MinMaxAliasGroups.empty()380
)
{333
4150
333
    OS.indent(8) << "n/a\n";
4151
333
    return;
4152
333
  }
4153
380
4154
51
  
for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) 47
{51
4155
51
4156
51
    // If the group has no read only accesses print the write accesses.
4157
51
    if (
Pair.second.empty()51
)
{7
4158
7
      OS.indent(8) << "[[";
4159
18
      for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4160
18
        OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4161
18
           << ">";
4162
18
      }
4163
7
      OS << " ]]\n";
4164
7
    }
4165
51
4166
71
    for (const MinMaxAccessTy &MMAReadOnly : Pair.second) {
4167
71
      OS.indent(8) << "[[";
4168
71
      OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">";
4169
80
      for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) {
4170
80
        OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second
4171
80
           << ">";
4172
80
      }
4173
71
      OS << " ]]\n";
4174
71
    }
4175
51
  }
4176
47
}
4177
4178
380
void Scop::printStatements(raw_ostream &OS) const {
4179
380
  OS << "Statements {\n";
4180
380
4181
380
  for (const ScopStmt &Stmt : *this)
4182
630
    OS.indent(4) << Stmt;
4183
380
4184
380
  OS.indent(4) << "}\n";
4185
380
}
4186
4187
380
void Scop::printArrayInfo(raw_ostream &OS) const {
4188
380
  OS << "Arrays {\n";
4189
380
4190
380
  for (auto &Array : arrays())
4191
787
    Array->print(OS);
4192
380
4193
380
  OS.indent(4) << "}\n";
4194
380
4195
380
  OS.indent(4) << "Arrays (Bounds as pw_affs) {\n";
4196
380
4197
380
  for (auto &Array : arrays())
4198
787
    Array->print(OS, /* SizeAsPwAff */ true);
4199
380
4200
380
  OS.indent(4) << "}\n";
4201
380
}
4202
4203
380
void Scop::print(raw_ostream &OS) const {
4204
380
  OS.indent(4) << "Function: " << getFunction().getName() << "\n";
4205
380
  OS.indent(4) << "Region: " << getNameStr() << "\n";
4206
380
  OS.indent(4) << "Max Loop Depth:  " << getMaxLoopDepth() << "\n";
4207
380
  OS.indent(4) << "Invariant Accesses: {\n";
4208
141
  for (const auto &IAClass : InvariantEquivClasses) {
4209
141
    const auto &MAs = IAClass.InvariantAccesses;
4210
141
    if (
MAs.empty()141
)
{1
4211
1
      OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n";
4212
140
    } else {
4213
140
      MAs.front()->print(OS);
4214
140
      OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext
4215
140
                    << "\n";
4216
140
    }
4217
141
  }
4218
380
  OS.indent(4) << "}\n";
4219
380
  printContext(OS.indent(4));
4220
380
  printArrayInfo(OS.indent(4));
4221
380
  printAliasAssumptions(OS);
4222
380
  printStatements(OS.indent(4));
4223
380
}
4224
4225
1
void Scop::dump() const { print(dbgs()); }
4226
4227
23.5k
isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
4228
4229
__isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB,
4230
8.00k
                                 bool NonNegative) {
4231
8.00k
  // First try to use the SCEVAffinator to generate a piecewise defined
4232
8.00k
  // affine function from @p E in the context of @p BB. If that tasks becomes to
4233
8.00k
  // complex the affinator might return a nullptr. In such a case we invalidate
4234
8.00k
  // the SCoP and return a dummy value. This way we do not need to add error
4235
8.00k
  // handling code to all users of this function.
4236
8.00k
  auto PWAC = Affinator.getPwAff(E, BB);
4237
8.00k
  if (
PWAC.first8.00k
)
{8.00k
4238
8.00k
    // TODO: We could use a heuristic and either use:
4239
8.00k
    //         SCEVAffinator::takeNonNegativeAssumption
4240
8.00k
    //       or
4241
8.00k
    //         SCEVAffinator::interpretAsUnsigned
4242
8.00k
    //       to deal with unsigned or "NonNegative" SCEVs.
4243
8.00k
    if (NonNegative)
4244
40
      Affinator.takeNonNegativeAssumption(PWAC);
4245
8.00k
    return PWAC;
4246
8.00k
  }
4247
8.00k
4248
1
  
auto DL = BB ? 1
BB->getTerminator()->getDebugLoc()1
:
DebugLoc()0
;
4249
1
  invalidate(COMPLEXITY, DL);
4250
1
  return Affinator.getPwAff(SE->getZero(E->getType()), BB);
4251
8.00k
}
4252
4253
9.09k
__isl_give isl_union_set *Scop::getDomains() const {
4254
9.09k
  isl_space *EmptySpace = isl_space_params_alloc(getIslCtx(), 0);
4255
9.09k
  isl_union_set *Domain = isl_union_set_empty(EmptySpace);
4256
9.09k
4257
9.09k
  for (const ScopStmt &Stmt : *this)
4258
17.0k
    Domain = isl_union_set_add_set(Domain, Stmt.getDomain());
4259
9.09k
4260
9.09k
  return Domain;
4261
9.09k
}
4262
4263
384
__isl_give isl_pw_aff *Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) {
4264
384
  PWACtx PWAC = getPwAff(E, BB);
4265
384
  isl_set_free(PWAC.second);
4266
384
  return PWAC.first;
4267
384
}
4268
4269
__isl_give isl_union_map *
4270
1.06k
Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) {
4271
1.06k
  isl_union_map *Accesses = isl_union_map_empty(getParamSpace());
4272
1.06k
4273
1.90k
  for (ScopStmt &Stmt : *this) {
4274
4.38k
    for (MemoryAccess *MA : Stmt) {
4275
4.38k
      if (!Predicate(*MA))
4276
568
        continue;
4277
4.38k
4278
3.82k
      isl_set *Domain = Stmt.getDomain();
4279
3.82k
      isl_map *AccessDomain = MA->getAccessRelation();
4280
3.82k
      AccessDomain = isl_map_intersect_domain(AccessDomain, Domain);
4281
3.82k
      Accesses = isl_union_map_add_map(Accesses, AccessDomain);
4282
3.82k
    }
4283
1.90k
  }
4284
1.06k
  return isl_union_map_coalesce(Accesses);
4285
1.06k
}
4286
4287
7
__isl_give isl_union_map *Scop::getMustWrites() {
4288
23
  return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); });
4289
7
}
4290
4291
7
__isl_give isl_union_map *Scop::getMayWrites() {
4292
23
  return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); });
4293
7
}
4294
4295
147
__isl_give isl_union_map *Scop::getWrites() {
4296
871
  return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); });
4297
147
}
4298
4299
0
__isl_give isl_union_map *Scop::getReads() {
4300
0
  return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); });
4301
0
}
4302
4303
901
__isl_give isl_union_map *Scop::getAccesses() {
4304
3.47k
  return getAccessesOfType([](MemoryAccess &MA) { return true; });
4305
901
}
4306
4307
// Check whether @p Node is an extension node.
4308
//
4309
// @return true if @p Node is an extension node.
4310
11.1k
isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
4311
11.1k
  if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
4312
13
    return isl_bool_error;
4313
11.1k
  else
4314
11.0k
    return isl_bool_true;
4315
11.1k
}
4316
4317
1.06k
bool Scop::containsExtensionNode(__isl_keep isl_schedule *Schedule) {
4318
1.06k
  return isl_schedule_foreach_schedule_node_top_down(Schedule, isNotExtNode,
4319
1.06k
                                                     nullptr) == isl_stat_error;
4320
1.06k
}
4321
4322
921
__isl_give isl_union_map *Scop::getSchedule() const {
4323
921
  auto *Tree = getScheduleTree();
4324
921
  if (
containsExtensionNode(Tree)921
)
{4
4325
4
    isl_schedule_free(Tree);
4326
4
    return nullptr;
4327
4
  }
4328
917
  auto *S = isl_schedule_get_map(Tree);
4329
917
  isl_schedule_free(Tree);
4330
917
  return S;
4331
921
}
4332
4333
2.38k
__isl_give isl_schedule *Scop::getScheduleTree() const {
4334
2.38k
  return isl_schedule_intersect_domain(isl_schedule_copy(Schedule),
4335
2.38k
                                       getDomains());
4336
2.38k
}
4337
4338
45
void Scop::setSchedule(__isl_take isl_union_map *NewSchedule) {
4339
45
  auto *S = isl_schedule_from_domain(getDomains());
4340
45
  S = isl_schedule_insert_partial_schedule(
4341
45
      S, isl_multi_union_pw_aff_from_union_map(NewSchedule));
4342
45
  isl_schedule_free(Schedule);
4343
45
  Schedule = S;
4344
45
}
4345
4346
30
void Scop::setScheduleTree(__isl_take isl_schedule *NewSchedule) {
4347
30
  isl_schedule_free(Schedule);
4348
30
  Schedule = NewSchedule;
4349
30
}
4350
4351
7
bool Scop::restrictDomains(__isl_take isl_union_set *Domain) {
4352
7
  bool Changed = false;
4353
18
  for (ScopStmt &Stmt : *this) {
4354
18
    isl_union_set *StmtDomain = isl_union_set_from_set(Stmt.getDomain());
4355
18
    isl_union_set *NewStmtDomain = isl_union_set_intersect(
4356
18
        isl_union_set_copy(StmtDomain), isl_union_set_copy(Domain));
4357
18
4358
18
    if (
isl_union_set_is_subset(StmtDomain, NewStmtDomain)18
)
{10
4359
10
      isl_union_set_free(StmtDomain);
4360
10
      isl_union_set_free(NewStmtDomain);
4361
10
      continue;
4362
10
    }
4363
18
4364
8
    Changed = true;
4365
8
4366
8
    isl_union_set_free(StmtDomain);
4367
8
    NewStmtDomain = isl_union_set_coalesce(NewStmtDomain);
4368
8
4369
8
    if (
isl_union_set_is_empty(NewStmtDomain)8
)
{7
4370
7
      Stmt.restrictDomain(isl_set_empty(Stmt.getDomainSpace()));
4371
7
      isl_union_set_free(NewStmtDomain);
4372
7
    } else
4373
1
      Stmt.restrictDomain(isl_set_from_union_set(NewStmtDomain));
4374
8
  }
4375
7
  isl_union_set_free(Domain);
4376
7
  return Changed;
4377
7
}
4378
4379
3.97k
ScalarEvolution *Scop::getSE() const { return SE; }
4380
4381
struct MapToDimensionDataTy {
4382
  int N;
4383
  isl_union_pw_multi_aff *Res;
4384
};
4385
4386
// Create a function that maps the elements of 'Set' to its N-th dimension and
4387
// add it to User->Res.
4388
//
4389
// @param Set        The input set.
4390
// @param User->N    The dimension to map to.
4391
// @param User->Res  The isl_union_pw_multi_aff to which to add the result.
4392
//
4393
// @returns   isl_stat_ok if no error occured, othewise isl_stat_error.
4394
1.74k
static isl_stat mapToDimension_AddSet(__isl_take isl_set *Set, void *User) {
4395
1.74k
  struct MapToDimensionDataTy *Data = (struct MapToDimensionDataTy *)User;
4396
1.74k
  int Dim;
4397
1.74k
  isl_space *Space;
4398
1.74k
  isl_pw_multi_aff *PMA;
4399
1.74k
4400
1.74k
  Dim = isl_set_dim(Set, isl_dim_set);
4401
1.74k
  Space = isl_set_get_space(Set);
4402
1.74k
  PMA = isl_pw_multi_aff_project_out_map(Space, isl_dim_set, Data->N,
4403
1.74k
                                         Dim - Data->N);
4404
1.74k
  if (Data->N > 1)
4405
440
    PMA = isl_pw_multi_aff_drop_dims(PMA, isl_dim_out, 0, Data->N - 1);
4406
1.74k
  Data->Res = isl_union_pw_multi_aff_add_pw_multi_aff(Data->Res, PMA);
4407
1.74k
4408
1.74k
  isl_set_free(Set);
4409
1.74k
4410
1.74k
  return isl_stat_ok;
4411
1.74k
}
4412
4413
// Create an isl_multi_union_aff that defines an identity mapping from the
4414
// elements of USet to their N-th dimension.
4415
//
4416
// # Example:
4417
//
4418
//            Domain: { A[i,j]; B[i,j,k] }
4419
//                 N: 1
4420
//
4421
// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
4422
//
4423
// @param USet   A union set describing the elements for which to generate a
4424
//               mapping.
4425
// @param N      The dimension to map to.
4426
// @returns      A mapping from USet to its N-th dimension.
4427
static __isl_give isl_multi_union_pw_aff *
4428
1.26k
mapToDimension(__isl_take isl_union_set *USet, int N) {
4429
1.26k
  assert(N >= 0);
4430
1.26k
  assert(USet);
4431
1.26k
  assert(!isl_union_set_is_empty(USet));
4432
1.26k
4433
1.26k
  struct MapToDimensionDataTy Data;
4434
1.26k
4435
1.26k
  auto *Space = isl_union_set_get_space(USet);
4436
1.26k
  auto *PwAff = isl_union_pw_multi_aff_empty(Space);
4437
1.26k
4438
1.26k
  Data = {N, PwAff};
4439
1.26k
4440
1.26k
  auto Res = isl_union_set_foreach_set(USet, &mapToDimension_AddSet, &Data);
4441
1.26k
  (void)Res;
4442
1.26k
4443
1.26k
  assert(Res == isl_stat_ok);
4444
1.26k
4445
1.26k
  isl_union_set_free(USet);
4446
1.26k
  return isl_multi_union_pw_aff_from_union_pw_multi_aff(Data.Res);
4447
1.26k
}
4448
4449
4.33k
void Scop::addScopStmt(BasicBlock *BB, Loop *SurroundingLoop) {
4450
4.33k
  assert(BB && "Unexpected nullptr!");
4451
4.33k
  Stmts.emplace_back(*this, *BB, SurroundingLoop);
4452
4.33k
  auto *Stmt = &Stmts.back();
4453
4.33k
  StmtMap[BB] = Stmt;
4454
4.33k
}
4455
4456
97
void Scop::addScopStmt(Region *R, Loop *SurroundingLoop) {
4457
97
  assert(R && "Unexpected nullptr!");
4458
97
  Stmts.emplace_back(*this, *R, SurroundingLoop);
4459
97
  auto *Stmt = &Stmts.back();
4460
97
  for (BasicBlock *BB : R->blocks())
4461
277
    StmtMap[BB] = Stmt;
4462
97
}
4463
4464
ScopStmt *Scop::addScopStmt(__isl_take isl_map *SourceRel,
4465
                            __isl_take isl_map *TargetRel,
4466
18
                            __isl_take isl_set *Domain) {
4467
18
#ifndef NDEBUG
4468
  isl_set *SourceDomain = isl_map_domain(isl_map_copy(SourceRel));
4469
  isl_set *TargetDomain = isl_map_domain(isl_map_copy(TargetRel));
4470
  assert(isl_set_is_subset(Domain, TargetDomain) &&
4471
         "Target access not defined for complete statement domain");
4472
  assert(isl_set_is_subset(Domain, SourceDomain) &&
4473
         "Source access not defined for complete statement domain");
4474
  isl_set_free(SourceDomain);
4475
  isl_set_free(TargetDomain);
4476
#endif
4477
18
  Stmts.emplace_back(*this, SourceRel, TargetRel, Domain);
4478
18
  CopyStmtsNum++;
4479
18
  return &(Stmts.back());
4480
18
}
4481
4482
901
void Scop::buildSchedule(LoopInfo &LI) {
4483
901
  Loop *L = getLoopSurroundingScop(*this, LI);
4484
901
  LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
4485
901
  buildSchedule(getRegion().getNode(), LoopStack, LI);
4486
901
  assert(LoopStack.size() == 1 && LoopStack.back().L == L);
4487
901
  Schedule = LoopStack[0].Schedule;
4488
901
}
4489
4490
/// To generate a schedule for the elements in a Region we traverse the Region
4491
/// in reverse-post-order and add the contained RegionNodes in traversal order
4492
/// to the schedule of the loop that is currently at the top of the LoopStack.
4493
/// For loop-free codes, this results in a correct sequential ordering.
4494
///
4495
/// Example:
4496
///           bb1(0)
4497
///         /     \.
4498
///      bb2(1)   bb3(2)
4499
///         \    /  \.
4500
///          bb4(3)  bb5(4)
4501
///             \   /
4502
///              bb6(5)
4503
///
4504
/// Including loops requires additional processing. Whenever a loop header is
4505
/// encountered, the corresponding loop is added to the @p LoopStack. Starting
4506
/// from an empty schedule, we first process all RegionNodes that are within
4507
/// this loop and complete the sequential schedule at this loop-level before
4508
/// processing about any other nodes. To implement this
4509
/// loop-nodes-first-processing, the reverse post-order traversal is
4510
/// insufficient. Hence, we additionally check if the traversal yields
4511
/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
4512
/// These region-nodes are then queue and only traverse after the all nodes
4513
/// within the current loop have been processed.
4514
1.76k
void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
4515
1.76k
  Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
4516
1.76k
4517
1.76k
  ReversePostOrderTraversal<Region *> RTraversal(R);
4518
1.76k
  std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
4519
1.76k
  std::deque<RegionNode *> DelayList;
4520
1.76k
  bool LastRNWaiting = false;
4521
1.76k
4522
1.76k
  // Iterate over the region @p R in reverse post-order but queue
4523
1.76k
  // sub-regions/blocks iff they are not part of the last encountered but not
4524
1.76k
  // completely traversed loop. The variable LastRNWaiting is a flag to indicate
4525
1.76k
  // that we queued the last sub-region/block from the reverse post-order
4526
1.76k
  // iterator. If it is set we have to explore the next sub-region/block from
4527
1.76k
  // the iterator (if any) to guarantee progress. If it is not set we first try
4528
1.76k
  // the next queued sub-region/blocks.
4529
6.89k
  while (
!WorkList.empty() || 6.89k
!DelayList.empty()1.76k
)
{5.13k
4530
5.13k
    RegionNode *RN;
4531
5.13k
4532
5.13k
    if (
(LastRNWaiting && 5.13k
!WorkList.empty()30
) ||
DelayList.size() == 05.11k
)
{5.11k
4533
5.11k
      RN = WorkList.front();
4534
5.11k
      WorkList.pop_front();
4535
5.11k
      LastRNWaiting = false;
4536
26
    } else {
4537
26
      RN = DelayList.front();
4538
26
      DelayList.pop_front();
4539
26
    }
4540
5.13k
4541
5.13k
    Loop *L = getRegionNodeLoop(RN, LI);
4542
5.13k
    if (!contains(L))
4543
1.03k
      L = OuterScopLoop;
4544
5.13k
4545
5.13k
    Loop *LastLoop = LoopStack.back().L;
4546
5.13k
    if (
LastLoop != L5.13k
)
{1.31k
4547
1.31k
      if (
LastLoop && 1.31k
!LastLoop->contains(L)404
)
{26
4548
26
        LastRNWaiting = true;
4549
26
        DelayList.push_back(RN);
4550
26
        continue;
4551
26
      }
4552
1.29k
      LoopStack.push_back({L, nullptr, 0});
4553
1.29k
    }
4554
5.11k
    buildSchedule(RN, LoopStack, LI);
4555
5.11k
  }
4556
1.76k
4557
1.76k
  return;
4558
1.76k
}
4559
4560
6.01k
void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
4561
6.01k
4562
6.01k
  if (
RN->isSubRegion()6.01k
)
{1.84k
4563
1.84k
    auto *LocalRegion = RN->getNodeAs<Region>();
4564
1.84k
    if (
!isNonAffineSubRegion(LocalRegion)1.84k
)
{1.76k
4565
1.76k
      buildSchedule(LocalRegion, LoopStack, LI);
4566
1.76k
      return;
4567
1.76k
    }
4568
1.84k
  }
4569
6.01k
4570
4.25k
  auto &LoopData = LoopStack.back();
4571
4.25k
  LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN);
4572
4.25k
4573
4.25k
  if (auto *
Stmt4.25k
= getStmtFor(RN))
{1.56k
4574
1.56k
    auto *UDomain = isl_union_set_from_set(Stmt->getDomain());
4575
1.56k
    auto *StmtSchedule = isl_schedule_from_domain(UDomain);
4576
1.56k
    LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule);
4577
1.56k
  }
4578
4.25k
4579
4.25k
  // Check if we just processed the last node in this loop. If we did, finalize
4580
4.25k
  // the loop by:
4581
4.25k
  //
4582
4.25k
  //   - adding new schedule dimensions
4583
4.25k
  //   - folding the resulting schedule into the parent loop schedule
4584
4.25k
  //   - dropping the loop schedule from the LoopStack.
4585
4.25k
  //
4586
4.25k
  // Then continue to check surrounding loops, which might also have been
4587
4.25k
  // completed by this node.
4588
5.54k
  while (LoopData.L &&
4589
4.10k
         
LoopData.NumBlocksProcessed == getNumBlocksInLoop(LoopData.L)4.10k
)
{1.29k
4590
1.29k
    auto *Schedule = LoopData.Schedule;
4591
1.29k
    auto NumBlocksProcessed = LoopData.NumBlocksProcessed;
4592
1.29k
4593
1.29k
    LoopStack.pop_back();
4594
1.29k
    auto &NextLoopData = LoopStack.back();
4595
1.29k
4596
1.29k
    if (
Schedule1.29k
)
{1.26k
4597
1.26k
      auto *Domain = isl_schedule_get_domain(Schedule);
4598
1.26k
      auto *MUPA = mapToDimension(Domain, LoopStack.size());
4599
1.26k
      Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA);
4600
1.26k
      NextLoopData.Schedule =
4601
1.26k
          combineInSequence(NextLoopData.Schedule, Schedule);
4602
1.26k
    }
4603
1.29k
4604
1.29k
    NextLoopData.NumBlocksProcessed += NumBlocksProcessed;
4605
1.29k
    LoopData = NextLoopData;
4606
1.29k
  }
4607
4.25k
}
4608
4609
45.3k
ScopStmt *Scop::getStmtFor(BasicBlock *BB) const {
4610
45.3k
  auto StmtMapIt = StmtMap.find(BB);
4611
45.3k
  if (StmtMapIt == StmtMap.end())
4612
4.29k
    return nullptr;
4613
41.0k
  return StmtMapIt->second;
4614
45.3k
}
4615
4616
4.25k
ScopStmt *Scop::getStmtFor(RegionNode *RN) const {
4617
4.25k
  if (RN->isSubRegion())
4618
82
    return getStmtFor(RN->getNodeAs<Region>());
4619
4.17k
  return getStmtFor(RN->getNodeAs<BasicBlock>());
4620
4.25k
}
4621
4622
82
ScopStmt *Scop::getStmtFor(Region *R) const {
4623
82
  ScopStmt *Stmt = getStmtFor(R->getEntry());
4624
82
  assert(!Stmt || Stmt->getRegion() == R);
4625
82
  return Stmt;
4626
82
}
4627
4628
10.9k
int Scop::getRelativeLoopDepth(const Loop *L) const {
4629
10.9k
  Loop *OuterLoop =
4630
10.0k
      L ? 
R.outermostLoopInRegion(const_cast<Loop *>(L))10.0k
:
nullptr976
;
4631
10.9k
  if (!OuterLoop)
4632
1.06k
    return -1;
4633
9.92k
  return L->getLoopDepth() - OuterLoop->getLoopDepth();
4634
10.9k
}
4635
4636
111
ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) {
4637
214
  for (auto &SAI : arrays()) {
4638
214
    if (SAI->getName() == BaseName)
4639
111
      return SAI;
4640
214
  }
4641
0
  return nullptr;
4642
111
}
4643
4644
//===----------------------------------------------------------------------===//
4645
877
void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
4646
877
  AU.addRequired<LoopInfoWrapperPass>();
4647
877
  AU.addRequired<RegionInfoPass>();
4648
877
  AU.addRequired<DominatorTreeWrapperPass>();
4649
877
  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
4650
877
  AU.addRequiredTransitive<ScopDetection>();
4651
877
  AU.addRequired<AAResultsWrapperPass>();
4652
877
  AU.addRequired<AssumptionCacheTracker>();
4653
877
  AU.setPreservesAll();
4654
877
}
4655
4656
837
void updateLoopCountStatistic(ScopDetection::LoopStats Stats) {
4657
837
  NumLoopsInScop += Stats.NumLoops;
4658
837
  MaxNumLoopsInScop =
4659
837
      std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
4660
837
4661
837
  if (Stats.MaxDepth == 1)
4662
552
    NumScopsDepthOne++;
4663
285
  else 
if (285
Stats.MaxDepth == 2285
)
4664
159
    NumScopsDepthTwo++;
4665
126
  else 
if (126
Stats.MaxDepth == 3126
)
4666
58
    NumScopsDepthThree++;
4667
68
  else 
if (68
Stats.MaxDepth == 468
)
4668
2
    NumScopsDepthFour++;
4669
66
  else 
if (66
Stats.MaxDepth == 566
)
4670
0
    NumScopsDepthFive++;
4671
66
  else
4672
66
    NumScopsDepthLarger++;
4673
837
}
4674
4675
3.24k
bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
4676
3.24k
  auto &SD = getAnalysis<ScopDetection>();
4677
3.24k
4678
3.24k
  if (!SD.isMaxRegionInScop(*R))
4679
2.34k
    return false;
4680
3.24k
4681
891
  Function *F = R->getEntry()->getParent();
4682
891
  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4683
891
  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
4684
891
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4685
891
  auto const &DL = F->getParent()->getDataLayout();
4686
891
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4687
891
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
4688
891
4689
891
  ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE);
4690
891
  S = SB.getScop(); // take ownership of scop object
4691
891
4692
891
  if (
S891
)
{837
4693
837
    ScopDetection::LoopStats Stats =
4694
837
        ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
4695
837
    updateLoopCountStatistic(Stats);
4696
837
  }
4697
891
4698
891
  return false;
4699
3.24k
}
4700
4701
1.23k
void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const {
4702
1.23k
  if (S)
4703
339
    S->print(OS);
4704
1.23k
  else
4705
891
    OS << "Invalid Scop!\n";
4706
1.23k
}
4707
4708
char ScopInfoRegionPass::ID = 0;
4709
4710
0
Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); }
4711
4712
39.7k
INITIALIZE_PASS_BEGIN39.7k
(ScopInfoRegionPass, "polly-scops",39.7k
4713
39.7k
                      "Polly - Create polyhedral description of Scops", false,
4714
39.7k
                      false);
4715
39.7k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
4716
39.7k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
4717
39.7k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
4718
39.7k
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
4719
39.7k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
4720
39.7k
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
4721
39.7k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
4722
39.7k
INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops",
4723
                    "Polly - Create polyhedral description of Scops", false,
4724
                    false)
4725
4726
//===----------------------------------------------------------------------===//
4727
44
void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
4728
44
  AU.addRequired<LoopInfoWrapperPass>();
4729
44
  AU.addRequired<RegionInfoPass>();
4730
44
  AU.addRequired<DominatorTreeWrapperPass>();
4731
44
  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
4732
44
  AU.addRequiredTransitive<ScopDetection>();
4733
44
  AU.addRequired<AAResultsWrapperPass>();
4734
44
  AU.addRequired<AssumptionCacheTracker>();
4735
44
  AU.setPreservesAll();
4736
44
}
4737
4738
49
bool ScopInfoWrapperPass::runOnFunction(Function &F) {
4739
49
  auto &SD = getAnalysis<ScopDetection>();
4740
49
4741
49
  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4742
49
  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
4743
49
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4744
49
  auto const &DL = F.getParent()->getDataLayout();
4745
49
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4746
49
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4747
49
4748
49
  /// Create polyhedral descripton of scops for all the valid regions of a
4749
49
  /// function.
4750
49
  for (auto &It : SD) {
4751
49
    Region *R = const_cast<Region *>(It);
4752
49
    if (!SD.isMaxRegionInScop(*R))
4753
0
      continue;
4754
49
4755
49
    ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE);
4756
49
    std::unique_ptr<Scop> S = SB.getScop();
4757
49
    if (!S)
4758
3
      continue;
4759
46
    bool Inserted =
4760
46
        RegionToScopMap.insert(std::make_pair(R, std::move(S))).second;
4761
46
    assert(Inserted && "Building Scop for the same region twice!");
4762
46
    (void)Inserted;
4763
46
  }
4764
49
  return false;
4765
49
}
4766
4767
23
void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
4768
21
  for (auto &It : RegionToScopMap) {
4769
21
    if (It.second)
4770
21
      It.second->print(OS);
4771
21
    else
4772
0
      OS << "Invalid Scop!\n";
4773
21
  }
4774
23
}
4775
4776
char ScopInfoWrapperPass::ID = 0;
4777
4778
0
Pass *polly::createScopInfoWrapperPassPass() {
4779
0
  return new ScopInfoWrapperPass();
4780
0
}
4781
4782
39.7k
INITIALIZE_PASS_BEGIN39.7k
(39.7k
4783
39.7k
    ScopInfoWrapperPass, "polly-function-scops",
4784
39.7k
    "Polly - Create polyhedral description of all Scops of a function", false,
4785
39.7k
    false);
4786
39.7k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
4787
39.7k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker);
4788
39.7k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
4789
39.7k
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
4790
39.7k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
4791
39.7k
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
4792
39.7k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
4793
39.7k
INITIALIZE_PASS_END(
4794
    ScopInfoWrapperPass, "polly-function-scops",
4795
    "Polly - Create polyhedral description of all Scops of a function", false,
4796
    false)