Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Analysis/ScopBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ScopBuilder.cpp ----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Create a polyhedral description for a static control flow region.
10
//
11
// The pass creates a polyhedral description of the Scops detected by the SCoP
12
// detection derived from their LLVM-IR code.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "polly/ScopBuilder.h"
17
#include "polly/Options.h"
18
#include "polly/ScopDetection.h"
19
#include "polly/ScopInfo.h"
20
#include "polly/Support/GICHelper.h"
21
#include "polly/Support/ISLTools.h"
22
#include "polly/Support/SCEVValidator.h"
23
#include "polly/Support/ScopHelper.h"
24
#include "polly/Support/VirtualInstruction.h"
25
#include "llvm/ADT/ArrayRef.h"
26
#include "llvm/ADT/EquivalenceClasses.h"
27
#include "llvm/ADT/PostOrderIterator.h"
28
#include "llvm/ADT/Statistic.h"
29
#include "llvm/Analysis/AliasAnalysis.h"
30
#include "llvm/Analysis/Loads.h"
31
#include "llvm/Analysis/LoopInfo.h"
32
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
33
#include "llvm/Analysis/RegionInfo.h"
34
#include "llvm/Analysis/RegionIterator.h"
35
#include "llvm/Analysis/ScalarEvolution.h"
36
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
37
#include "llvm/IR/BasicBlock.h"
38
#include "llvm/IR/DataLayout.h"
39
#include "llvm/IR/DebugLoc.h"
40
#include "llvm/IR/DerivedTypes.h"
41
#include "llvm/IR/Dominators.h"
42
#include "llvm/IR/Function.h"
43
#include "llvm/IR/InstrTypes.h"
44
#include "llvm/IR/Instruction.h"
45
#include "llvm/IR/Instructions.h"
46
#include "llvm/IR/Type.h"
47
#include "llvm/IR/Use.h"
48
#include "llvm/IR/Value.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/Compiler.h"
51
#include "llvm/Support/Debug.h"
52
#include "llvm/Support/ErrorHandling.h"
53
#include "llvm/Support/raw_ostream.h"
54
#include <cassert>
55
56
using namespace llvm;
57
using namespace polly;
58
59
3.76k
#define DEBUG_TYPE "polly-scops"
60
61
STATISTIC(ScopFound, "Number of valid Scops");
62
STATISTIC(RichScopFound, "Number of Scops containing a loop");
63
STATISTIC(InfeasibleScops,
64
          "Number of SCoPs with statically infeasible context.");
65
66
bool polly::ModelReadOnlyScalars;
67
68
// The maximal number of dimensions we allow during invariant load construction.
69
// More complex access ranges will result in very high compile time and are also
70
// unlikely to result in good code. This value is very high and should only
71
// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
72
static int const MaxDimensionsInAccessRange = 9;
73
74
static cl::opt<bool, true> XModelReadOnlyScalars(
75
    "polly-analyze-read-only-scalars",
76
    cl::desc("Model read-only scalar values in the scop description"),
77
    cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
78
    cl::init(true), cl::cat(PollyCategory));
79
80
static cl::opt<int>
81
    OptComputeOut("polly-analysis-computeout",
82
                  cl::desc("Bound the scop analysis by a maximal amount of "
83
                           "computational steps (0 means no bound)"),
84
                  cl::Hidden, cl::init(800000), cl::ZeroOrMore,
85
                  cl::cat(PollyCategory));
86
87
static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
88
    "polly-allow-dereference-of-all-function-parameters",
89
    cl::desc(
90
        "Treat all parameters to functions that are pointers as dereferencible."
91
        " This is useful for invariant load hoisting, since we can generate"
92
        " less runtime checks. This is only valid if all pointers to functions"
93
        " are always initialized, so that Polly can choose to hoist"
94
        " their loads. "),
95
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
96
97
static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
98
    "polly-rtc-max-arrays-per-group",
99
    cl::desc("The maximal number of arrays to compare in each alias group."),
100
    cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
101
102
static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
103
    "polly-rtc-max-array-disjuncts",
104
    cl::desc("The maximal number of disjunts allowed in memory accesses to "
105
             "to build RTCs."),
106
    cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
107
108
static cl::opt<unsigned> RunTimeChecksMaxParameters(
109
    "polly-rtc-max-parameters",
110
    cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
111
    cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
112
113
static cl::opt<bool> UnprofitableScalarAccs(
114
    "polly-unprofitable-scalar-accs",
115
    cl::desc("Count statements with scalar accesses as not optimizable"),
116
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
117
118
static cl::opt<std::string> UserContextStr(
119
    "polly-context", cl::value_desc("isl parameter set"),
120
    cl::desc("Provide additional constraints on the context parameters"),
121
    cl::init(""), cl::cat(PollyCategory));
122
123
static cl::opt<bool> DetectFortranArrays(
124
    "polly-detect-fortran-arrays",
125
    cl::desc("Detect Fortran arrays and use this for code generation"),
126
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));
127
128
static cl::opt<bool> DetectReductions("polly-detect-reductions",
129
                                      cl::desc("Detect and exploit reductions"),
130
                                      cl::Hidden, cl::ZeroOrMore,
131
                                      cl::init(true), cl::cat(PollyCategory));
132
133
// Multiplicative reductions can be disabled separately as these kind of
134
// operations can overflow easily. Additive reductions and bit operations
135
// are in contrast pretty stable.
136
static cl::opt<bool> DisableMultiplicativeReductions(
137
    "polly-disable-multiplicative-reductions",
138
    cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
139
    cl::init(false), cl::cat(PollyCategory));
140
141
enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
142
143
static cl::opt<GranularityChoice> StmtGranularity(
144
    "polly-stmt-granularity",
145
    cl::desc(
146
        "Algorithm to use for splitting basic blocks into multiple statements"),
147
    cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
148
                          "One statement per basic block"),
149
               clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
150
                          "Scalar independence heuristic"),
151
               clEnumValN(GranularityChoice::Stores, "store",
152
                          "Store-level granularity")),
153
    cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
154
155
1.20k
void ScopBuilder::buildInvariantEquivalenceClasses() {
156
1.20k
  DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
157
1.20k
158
1.20k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
159
1.20k
  for (LoadInst *LInst : RIL) {
160
290
    const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
161
290
162
290
    Type *Ty = LInst->getType();
163
290
    LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
164
290
    if (ClassRep) {
165
36
      scop->addInvariantLoadMapping(LInst, ClassRep);
166
36
      continue;
167
36
    }
168
254
169
254
    ClassRep = LInst;
170
254
    scop->addInvariantEquivClass(
171
254
        InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
172
254
  }
173
1.20k
}
174
175
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
176
                                   Region *NonAffineSubRegion,
177
294
                                   bool IsExitBlock) {
178
294
  // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
179
294
  // true, are not modeled as ordinary PHI nodes as they are not part of the
180
294
  // region. However, we model the operands in the predecessor blocks that are
181
294
  // part of the region as regular scalar accesses.
182
294
183
294
  // If we can synthesize a PHI we can skip it, however only if it is in
184
294
  // the region. If it is not it can only be in the exit block of the region.
185
294
  // In this case we model the operands but not the PHI itself.
186
294
  auto *Scope = LI.getLoopFor(PHI->getParent());
187
294
  if (!IsExitBlock && 
canSynthesize(PHI, *scop, &SE, Scope)252
)
188
7
    return;
189
287
190
287
  // PHI nodes are modeled as if they had been demoted prior to the SCoP
191
287
  // detection. Hence, the PHI is a load of a new memory location in which the
192
287
  // incoming value was written at the end of the incoming basic block.
193
287
  bool OnlyNonAffineSubRegionOperands = true;
194
892
  for (unsigned u = 0; u < PHI->getNumIncomingValues(); 
u++605
) {
195
605
    Value *Op = PHI->getIncomingValue(u);
196
605
    BasicBlock *OpBB = PHI->getIncomingBlock(u);
197
605
    ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
198
605
199
605
    // Do not build PHI dependences inside a non-affine subregion, but make
200
605
    // sure that the necessary scalar values are still made available.
201
605
    if (NonAffineSubRegion && 
NonAffineSubRegion->contains(OpBB)73
) {
202
20
      auto *OpInst = dyn_cast<Instruction>(Op);
203
20
      if (!OpInst || 
!NonAffineSubRegion->contains(OpInst)10
)
204
12
        ensureValueRead(Op, OpStmt);
205
20
      continue;
206
20
    }
207
585
208
585
    OnlyNonAffineSubRegionOperands = false;
209
585
    ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
210
585
  }
211
287
212
287
  if (!OnlyNonAffineSubRegionOperands && 
!IsExitBlock280
) {
213
238
    addPHIReadAccess(PHIStmt, PHI);
214
238
  }
215
287
}
216
217
void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
218
7.88k
                                         Instruction *Inst) {
219
7.88k
  assert(!isa<PHINode>(Inst));
220
7.88k
221
7.88k
  // Pull-in required operands.
222
7.88k
  for (Use &Op : Inst->operands())
223
13.9k
    ensureValueRead(Op.get(), UserStmt);
224
7.88k
}
225
226
// Create a sequence of two schedules. Either argument may be null and is
227
// interpreted as the empty schedule. Can also return null if both schedules are
228
// empty.
229
3.94k
static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
230
3.94k
  if (!Prev)
231
2.79k
    return Succ;
232
1.15k
  if (!Succ)
233
0
    return Prev;
234
1.15k
235
1.15k
  return Prev.sequence(Succ);
236
1.15k
}
237
238
// Create an isl_multi_union_aff that defines an identity mapping from the
239
// elements of USet to their N-th dimension.
240
//
241
// # Example:
242
//
243
//            Domain: { A[i,j]; B[i,j,k] }
244
//                 N: 1
245
//
246
// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
247
//
248
// @param USet   A union set describing the elements for which to generate a
249
//               mapping.
250
// @param N      The dimension to map to.
251
// @returns      A mapping from USet to its N-th dimension.
252
1.63k
static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
253
1.63k
  assert(N >= 0);
254
1.63k
  assert(USet);
255
1.63k
  assert(!USet.is_empty());
256
1.63k
257
1.63k
  auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
258
1.63k
259
2.57k
  for (isl::set S : USet.get_set_list()) {
260
2.57k
    int Dim = S.dim(isl::dim::set);
261
2.57k
    auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
262
2.57k
                                                  N, Dim - N);
263
2.57k
    if (N > 1)
264
620
      PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
265
2.57k
266
2.57k
    Result = Result.add_pw_multi_aff(PMA);
267
2.57k
  }
268
1.63k
269
1.63k
  return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
270
1.63k
}
271
272
1.16k
void ScopBuilder::buildSchedule() {
273
1.16k
  Loop *L = getLoopSurroundingScop(*scop, LI);
274
1.16k
  LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
275
1.16k
  buildSchedule(scop->getRegion().getNode(), LoopStack);
276
1.16k
  assert(LoopStack.size() == 1 && LoopStack.back().L == L);
277
1.16k
  scop->setScheduleTree(LoopStack[0].Schedule);
278
1.16k
}
279
280
/// To generate a schedule for the elements in a Region we traverse the Region
281
/// in reverse-post-order and add the contained RegionNodes in traversal order
282
/// to the schedule of the loop that is currently at the top of the LoopStack.
283
/// For loop-free codes, this results in a correct sequential ordering.
284
///
285
/// Example:
286
///           bb1(0)
287
///         /     \.
288
///      bb2(1)   bb3(2)
289
///         \    /  \.
290
///          bb4(3)  bb5(4)
291
///             \   /
292
///              bb6(5)
293
///
294
/// Including loops requires additional processing. Whenever a loop header is
295
/// encountered, the corresponding loop is added to the @p LoopStack. Starting
296
/// from an empty schedule, we first process all RegionNodes that are within
297
/// this loop and complete the sequential schedule at this loop-level before
298
/// processing about any other nodes. To implement this
299
/// loop-nodes-first-processing, the reverse post-order traversal is
300
/// insufficient. Hence, we additionally check if the traversal yields
301
/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
302
/// These region-nodes are then queue and only traverse after the all nodes
303
/// within the current loop have been processed.
304
2.38k
void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
305
2.38k
  Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
306
2.38k
307
2.38k
  ReversePostOrderTraversal<Region *> RTraversal(R);
308
2.38k
  std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
309
2.38k
  std::deque<RegionNode *> DelayList;
310
2.38k
  bool LastRNWaiting = false;
311
2.38k
312
2.38k
  // Iterate over the region @p R in reverse post-order but queue
313
2.38k
  // sub-regions/blocks iff they are not part of the last encountered but not
314
2.38k
  // completely traversed loop. The variable LastRNWaiting is a flag to indicate
315
2.38k
  // that we queued the last sub-region/block from the reverse post-order
316
2.38k
  // iterator. If it is set we have to explore the next sub-region/block from
317
2.38k
  // the iterator (if any) to guarantee progress. If it is not set we first try
318
2.38k
  // the next queued sub-region/blocks.
319
9.16k
  while (!WorkList.empty() || 
!DelayList.empty()2.38k
) {
320
6.78k
    RegionNode *RN;
321
6.78k
322
6.78k
    if ((LastRNWaiting && 
!WorkList.empty()0
) || DelayList.empty()) {
323
6.78k
      RN = WorkList.front();
324
6.78k
      WorkList.pop_front();
325
6.78k
      LastRNWaiting = false;
326
6.78k
    } else {
327
0
      RN = DelayList.front();
328
0
      DelayList.pop_front();
329
0
    }
330
6.78k
331
6.78k
    Loop *L = getRegionNodeLoop(RN, LI);
332
6.78k
    if (!scop->contains(L))
333
1.48k
      L = OuterScopLoop;
334
6.78k
335
6.78k
    Loop *LastLoop = LoopStack.back().L;
336
6.78k
    if (LastLoop != L) {
337
1.66k
      if (LastLoop && 
!LastLoop->contains(L)490
) {
338
0
        LastRNWaiting = true;
339
0
        DelayList.push_back(RN);
340
0
        continue;
341
0
      }
342
1.66k
      LoopStack.push_back({L, nullptr, 0});
343
1.66k
    }
344
6.78k
    buildSchedule(RN, LoopStack);
345
6.78k
  }
346
2.38k
}
347
348
7.94k
void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
349
7.94k
  if (RN->isSubRegion()) {
350
2.49k
    auto *LocalRegion = RN->getNodeAs<Region>();
351
2.49k
    if (!scop->isNonAffineSubRegion(LocalRegion)) {
352
2.38k
      buildSchedule(LocalRegion, LoopStack);
353
2.38k
      return;
354
2.38k
    }
355
5.55k
  }
356
5.55k
357
5.55k
  assert(LoopStack.rbegin() != LoopStack.rend());
358
5.55k
  auto LoopData = LoopStack.rbegin();
359
5.55k
  LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
360
5.55k
361
5.55k
  for (auto *Stmt : scop->getStmtListFor(RN)) {
362
2.31k
    isl::union_set UDomain{Stmt->getDomain()};
363
2.31k
    auto StmtSchedule = isl::schedule::from_domain(UDomain);
364
2.31k
    LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
365
2.31k
  }
366
5.55k
367
5.55k
  // Check if we just processed the last node in this loop. If we did, finalize
368
5.55k
  // the loop by:
369
5.55k
  //
370
5.55k
  //   - adding new schedule dimensions
371
5.55k
  //   - folding the resulting schedule into the parent loop schedule
372
5.55k
  //   - dropping the loop schedule from the LoopStack.
373
5.55k
  //
374
5.55k
  // Then continue to check surrounding loops, which might also have been
375
5.55k
  // completed by this node.
376
5.55k
  size_t Dimension = LoopStack.size();
377
7.21k
  while (LoopData->L &&
378
7.21k
         
LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)5.29k
) {
379
1.66k
    isl::schedule Schedule = LoopData->Schedule;
380
1.66k
    auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
381
1.66k
382
1.66k
    assert(std::next(LoopData) != LoopStack.rend());
383
1.66k
    ++LoopData;
384
1.66k
    --Dimension;
385
1.66k
386
1.66k
    if (Schedule) {
387
1.63k
      isl::union_set Domain = Schedule.get_domain();
388
1.63k
      isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
389
1.63k
      Schedule = Schedule.insert_partial_schedule(MUPA);
390
1.63k
      LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
391
1.63k
    }
392
1.66k
393
1.66k
    LoopData->NumBlocksProcessed += NumBlocksProcessed;
394
1.66k
  }
395
5.55k
  // Now pop all loops processed up there from the LoopStack
396
5.55k
  LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
397
5.55k
}
398
399
21.9k
void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
400
21.9k
  // Check for uses of this instruction outside the scop. Because we do not
401
21.9k
  // iterate over such instructions and therefore did not "ensure" the existence
402
21.9k
  // of a write, we must determine such use here.
403
21.9k
  if (scop->isEscaping(Inst))
404
93
    ensureValueWrite(Inst);
405
21.9k
}
406
407
/// Check that a value is a Fortran Array descriptor.
408
///
409
/// We check if V has the following structure:
410
/// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
411
///                                   [<num> x %struct.descriptor_dimension] }
412
///
413
///
414
/// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
415
///
416
/// 1. V's type name starts with "struct.array"
417
/// 2. V's type has layout as shown.
418
/// 3. Final member of V's type has name "struct.descriptor_dimension",
419
/// 4. "struct.descriptor_dimension" has layout as shown.
420
/// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
421
///
422
/// We are interested in such types since this is the code that dragonegg
423
/// generates for Fortran array descriptors.
424
///
425
/// @param V the Value to be checked.
426
///
427
/// @returns True if V is a Fortran array descriptor, False otherwise.
428
10
bool isFortranArrayDescriptor(Value *V) {
429
10
  PointerType *PTy = dyn_cast<PointerType>(V->getType());
430
10
431
10
  if (!PTy)
432
0
    return false;
433
10
434
10
  Type *Ty = PTy->getElementType();
435
10
  assert(Ty && "Ty expected to be initialized");
436
10
  auto *StructArrTy = dyn_cast<StructType>(Ty);
437
10
438
10
  if (!(StructArrTy && StructArrTy->hasName()))
439
0
    return false;
440
10
441
10
  if (!StructArrTy->getName().startswith("struct.array"))
442
0
    return false;
443
10
444
10
  if (StructArrTy->getNumElements() != 4)
445
0
    return false;
446
10
447
10
  const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
448
10
449
10
  // i8* match
450
10
  if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
451
0
    return false;
452
10
453
10
  // Get a reference to the int type and check that all the members
454
10
  // share the same int type
455
10
  Type *IntTy = ArrMemberTys[1];
456
10
  if (ArrMemberTys[2] != IntTy)
457
0
    return false;
458
10
459
10
  // type: [<num> x %struct.descriptor_dimension]
460
10
  ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
461
10
  if (!DescriptorDimArrayTy)
462
0
    return false;
463
10
464
10
  // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
465
10
  StructType *DescriptorDimTy =
466
10
      dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
467
10
468
10
  if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
469
0
    return false;
470
10
471
10
  if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
472
0
    return false;
473
10
474
10
  if (DescriptorDimTy->getNumElements() != 3)
475
0
    return false;
476
10
477
30
  
for (auto MemberTy : DescriptorDimTy->elements())10
{
478
30
    if (MemberTy != IntTy)
479
0
      return false;
480
30
  }
481
10
482
10
  return true;
483
10
}
484
485
5
Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
486
5
  // match: 4.1 & 4.2 store/load
487
5
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)1
)
488
0
    return nullptr;
489
5
490
5
  // match: 4
491
5
  if (Inst.getAlignment() != 8)
492
1
    return nullptr;
493
4
494
4
  Value *Address = Inst.getPointerOperand();
495
4
496
4
  const BitCastInst *Bitcast = nullptr;
497
4
  // [match: 3]
498
4
  if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
499
3
    Value *TypedMem = Slot->getPointerOperand();
500
3
    // match: 2
501
3
    Bitcast = dyn_cast<BitCastInst>(TypedMem);
502
3
  } else {
503
1
    // match: 2
504
1
    Bitcast = dyn_cast<BitCastInst>(Address);
505
1
  }
506
4
507
4
  if (!Bitcast)
508
1
    return nullptr;
509
3
510
3
  auto *MallocMem = Bitcast->getOperand(0);
511
3
512
3
  // match: 1
513
3
  auto *MallocCall = dyn_cast<CallInst>(MallocMem);
514
3
  if (!MallocCall)
515
1
    return nullptr;
516
2
517
2
  Function *MallocFn = MallocCall->getCalledFunction();
518
2
  if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
519
0
    return nullptr;
520
2
521
2
  // Find all uses the malloc'd memory.
522
2
  // We are looking for a "store" into a struct with the type being the Fortran
523
2
  // descriptor type
524
4
  
for (auto user : MallocMem->users())2
{
525
4
    /// match: 5
526
4
    auto *MallocStore = dyn_cast<StoreInst>(user);
527
4
    if (!MallocStore)
528
2
      continue;
529
2
530
2
    auto *DescriptorGEP =
531
2
        dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
532
2
    if (!DescriptorGEP)
533
0
      continue;
534
2
535
2
    // match: 5
536
2
    auto DescriptorType =
537
2
        dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
538
2
    if (!(DescriptorType && DescriptorType->hasName()))
539
0
      continue;
540
2
541
2
    Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
542
2
543
2
    if (!Descriptor)
544
0
      continue;
545
2
546
2
    if (!isFortranArrayDescriptor(Descriptor))
547
0
      continue;
548
2
549
2
    return Descriptor;
550
2
  }
551
2
552
2
  
return nullptr0
;
553
2
}
554
555
13
Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
556
13
  // match: 3
557
13
  if (!isa<LoadInst>(Inst) && 
!isa<StoreInst>(Inst)5
)
558
0
    return nullptr;
559
13
560
13
  Value *Slot = Inst.getPointerOperand();
561
13
562
13
  LoadInst *MemLoad = nullptr;
563
13
  // [match: 2]
564
13
  if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
565
11
    // match: 1
566
11
    MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
567
11
  } else {
568
2
    // match: 1
569
2
    MemLoad = dyn_cast<LoadInst>(Slot);
570
2
  }
571
13
572
13
  if (!MemLoad)
573
5
    return nullptr;
574
8
575
8
  auto *BitcastOperator =
576
8
      dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
577
8
  if (!BitcastOperator)
578
0
    return nullptr;
579
8
580
8
  Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
581
8
  if (!Descriptor)
582
0
    return nullptr;
583
8
584
8
  if (!isFortranArrayDescriptor(Descriptor))
585
0
    return nullptr;
586
8
587
8
  return Descriptor;
588
8
}
589
590
1.16k
void ScopBuilder::addRecordedAssumptions() {
591
7.99k
  for (auto &AS : llvm::reverse(scop->recorded_assumptions())) {
592
7.99k
593
7.99k
    if (!AS.BB) {
594
5.11k
      scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
595
5.11k
                          nullptr /* BasicBlock */);
596
5.11k
      continue;
597
5.11k
    }
598
2.87k
599
2.87k
    // If the domain was deleted the assumptions are void.
600
2.87k
    isl_set *Dom = scop->getDomainConditions(AS.BB).release();
601
2.87k
    if (!Dom)
602
0
      continue;
603
2.87k
604
2.87k
    // If a basic block was given use its domain to simplify the assumption.
605
2.87k
    // In case of restrictions we know they only have to hold on the domain,
606
2.87k
    // thus we can intersect them with the domain of the block. However, for
607
2.87k
    // assumptions the domain has to imply them, thus:
608
2.87k
    //                     _              _____
609
2.87k
    //   Dom => S   <==>   A v B   <==>   A - B
610
2.87k
    //
611
2.87k
    // To avoid the complement we will register A - B as a restriction not an
612
2.87k
    // assumption.
613
2.87k
    isl_set *S = AS.Set.copy();
614
2.87k
    if (AS.Sign == AS_RESTRICTION)
615
2.87k
      S = isl_set_params(isl_set_intersect(S, Dom));
616
0
    else /* (AS.Sign == AS_ASSUMPTION) */
617
0
      S = isl_set_params(isl_set_subtract(Dom, S));
618
2.87k
619
2.87k
    scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB);
620
2.87k
  }
621
1.16k
  scop->clearRecordedAssumptions();
622
1.16k
}
623
624
3.63k
bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
625
3.63k
  Value *Val = Inst.getValueOperand();
626
3.63k
  Type *ElementType = Val->getType();
627
3.63k
  Value *Address = Inst.getPointerOperand();
628
3.63k
  const SCEV *AccessFunction =
629
3.63k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
630
3.63k
  const SCEVUnknown *BasePointer =
631
3.63k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
632
3.63k
  enum MemoryAccess::AccessType AccType =
633
3.63k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.81k
:
MemoryAccess::MUST_WRITE1.82k
;
634
3.63k
635
3.63k
  if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
636
107
    auto *Src = BitCast->getOperand(0);
637
107
    auto *SrcTy = Src->getType();
638
107
    auto *DstTy = BitCast->getType();
639
107
    // Do not try to delinearize non-sized (opaque) pointers.
640
107
    if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
641
107
        
(106
DstTy->isPointerTy()106
&&
!DstTy->getPointerElementType()->isSized()106
)) {
642
1
      return false;
643
1
    }
644
106
    if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
645
106
        DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
646
106
            DL.getTypeAllocSize(DstTy->getPointerElementType()))
647
63
      Address = Src;
648
106
  }
649
3.63k
650
3.63k
  auto *GEP = dyn_cast<GetElementPtrInst>(Address);
651
3.63k
  if (!GEP)
652
871
    return false;
653
2.76k
654
2.76k
  std::vector<const SCEV *> Subscripts;
655
2.76k
  std::vector<int> Sizes;
656
2.76k
  std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE);
657
2.76k
  auto *BasePtr = GEP->getOperand(0);
658
2.76k
659
2.76k
  if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
660
18
    BasePtr = BasePtrCast->getOperand(0);
661
2.76k
662
2.76k
  // Check for identical base pointers to ensure that we do not miss index
663
2.76k
  // offsets that have been added before this GEP is applied.
664
2.76k
  if (BasePtr != BasePointer->getValue())
665
90
    return false;
666
2.67k
667
2.67k
  std::vector<const SCEV *> SizesSCEV;
668
2.67k
669
2.67k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
670
2.67k
671
2.67k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
672
2.78k
  for (auto *Subscript : Subscripts) {
673
2.78k
    InvariantLoadsSetTy AccessILS;
674
2.78k
    if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
675
2.78k
                      &AccessILS))
676
152
      return false;
677
2.63k
678
2.63k
    for (LoadInst *LInst : AccessILS)
679
47
      if (!ScopRIL.count(LInst))
680
3
        return false;
681
2.63k
  }
682
2.67k
683
2.67k
  
if (2.52k
Sizes.empty()2.52k
)
684
2.27k
    return false;
685
246
686
246
  SizesSCEV.push_back(nullptr);
687
246
688
246
  for (auto V : Sizes)
689
265
    SizesSCEV.push_back(SE.getSCEV(
690
265
        ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
691
246
692
246
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
693
246
                 true, Subscripts, SizesSCEV, Val);
694
246
  return true;
695
246
}
696
697
3.39k
bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
698
3.39k
  if (!PollyDelinearize)
699
6
    return false;
700
3.38k
701
3.38k
  Value *Address = Inst.getPointerOperand();
702
3.38k
  Value *Val = Inst.getValueOperand();
703
3.38k
  Type *ElementType = Val->getType();
704
3.38k
  unsigned ElementSize = DL.getTypeAllocSize(ElementType);
705
3.38k
  enum MemoryAccess::AccessType AccType =
706
3.38k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.68k
:
MemoryAccess::MUST_WRITE1.70k
;
707
3.38k
708
3.38k
  const SCEV *AccessFunction =
709
3.38k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
710
3.38k
  const SCEVUnknown *BasePointer =
711
3.38k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
712
3.38k
713
3.38k
  assert(BasePointer && "Could not find base pointer");
714
3.38k
715
3.38k
  auto &InsnToMemAcc = scop->getInsnToMemAccMap();
716
3.38k
  auto AccItr = InsnToMemAcc.find(Inst);
717
3.38k
  if (AccItr == InsnToMemAcc.end())
718
3.19k
    return false;
719
187
720
187
  std::vector<const SCEV *> Sizes = {nullptr};
721
187
722
187
  Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
723
187
               AccItr->second.Shape->DelinearizedSizes.end());
724
187
725
187
  // In case only the element size is contained in the 'Sizes' array, the
726
187
  // access does not access a real multi-dimensional array. Hence, we allow
727
187
  // the normal single-dimensional access construction to handle this.
728
187
  if (Sizes.size() == 1)
729
1
    return false;
730
186
731
186
  // Remove the element size. This information is already provided by the
732
186
  // ElementSize parameter. In case the element size of this access and the
733
186
  // element size used for delinearization differs the delinearization is
734
186
  // incorrect. Hence, we invalidate the scop.
735
186
  //
736
186
  // TODO: Handle delinearization with differing element sizes.
737
186
  auto DelinearizedSize =
738
186
      cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
739
186
  Sizes.pop_back();
740
186
  if (ElementSize != DelinearizedSize)
741
2
    scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
742
186
743
186
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
744
186
                 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
745
186
  return true;
746
186
}
747
748
3.68k
bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
749
3.68k
  auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
750
3.68k
751
3.68k
  if (MemIntr == nullptr)
752
3.66k
    return false;
753
19
754
19
  auto *L = LI.getLoopFor(Inst->getParent());
755
19
  auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
756
19
  assert(LengthVal);
757
19
758
19
  // Check if the length val is actually affine or if we overapproximate it
759
19
  InvariantLoadsSetTy AccessILS;
760
19
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
761
19
762
19
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
763
19
  bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
764
19
                                     LengthVal, SE, &AccessILS);
765
19
  for (LoadInst *LInst : AccessILS)
766
0
    if (!ScopRIL.count(LInst))
767
0
      LengthIsAffine = false;
768
19
  if (!LengthIsAffine)
769
2
    LengthVal = nullptr;
770
19
771
19
  auto *DestPtrVal = MemIntr->getDest();
772
19
  assert(DestPtrVal);
773
19
774
19
  auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
775
19
  assert(DestAccFunc);
776
19
  // Ignore accesses to "NULL".
777
19
  // TODO: We could use this to optimize the region further, e.g., intersect
778
19
  //       the context with
779
19
  //          isl_set_complement(isl_set_params(getDomain()))
780
19
  //       as we know it would be undefined to execute this instruction anyway.
781
19
  if (DestAccFunc->isZero())
782
2
    return true;
783
17
784
17
  auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
785
17
  assert(DestPtrSCEV);
786
17
  DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
787
17
  addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
788
17
                 IntegerType::getInt8Ty(DestPtrVal->getContext()),
789
17
                 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
790
17
                 Inst.getValueOperand());
791
17
792
17
  auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
793
17
  if (!MemTrans)
794
10
    return true;
795
7
796
7
  auto *SrcPtrVal = MemTrans->getSource();
797
7
  assert(SrcPtrVal);
798
7
799
7
  auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
800
7
  assert(SrcAccFunc);
801
7
  // Ignore accesses to "NULL".
802
7
  // TODO: See above TODO
803
7
  if (SrcAccFunc->isZero())
804
0
    return true;
805
7
806
7
  auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
807
7
  assert(SrcPtrSCEV);
808
7
  SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
809
7
  addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
810
7
                 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
811
7
                 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
812
7
                 Inst.getValueOperand());
813
7
814
7
  return true;
815
7
}
816
817
3.66k
bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
818
3.66k
  auto *CI = dyn_cast_or_null<CallInst>(Inst);
819
3.66k
820
3.66k
  if (CI == nullptr)
821
3.63k
    return false;
822
28
823
28
  if (CI->doesNotAccessMemory() || 
isIgnoredIntrinsic(CI)15
||
isDebugCall(CI)15
)
824
14
    return true;
825
14
826
14
  bool ReadOnly = false;
827
14
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
828
14
  auto *CalledFunction = CI->getCalledFunction();
829
14
  switch (AA.getModRefBehavior(CalledFunction)) {
830
14
  case FMRB_UnknownModRefBehavior:
831
0
    llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
832
14
  case FMRB_DoesNotAccessMemory:
833
0
    return true;
834
14
  case FMRB_DoesNotReadMemory:
835
0
  case FMRB_OnlyAccessesInaccessibleMem:
836
0
  case FMRB_OnlyAccessesInaccessibleOrArgMem:
837
0
    return false;
838
8
  case FMRB_OnlyReadsMemory:
839
8
    GlobalReads.emplace_back(Stmt, CI);
840
8
    return true;
841
4
  case FMRB_OnlyReadsArgumentPointees:
842
4
    ReadOnly = true;
843
4
    LLVM_FALLTHROUGH;
844
6
  case FMRB_OnlyAccessesArgumentPointees: {
845
6
    auto AccType = ReadOnly ? 
MemoryAccess::READ4
:
MemoryAccess::MAY_WRITE2
;
846
6
    Loop *L = LI.getLoopFor(Inst->getParent());
847
16
    for (const auto &Arg : CI->arg_operands()) {
848
16
      if (!Arg->getType()->isPointerTy())
849
6
        continue;
850
10
851
10
      auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
852
10
      if (ArgSCEV->isZero())
853
4
        continue;
854
6
855
6
      auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
856
6
      addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
857
6
                     ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
858
6
    }
859
6
    return true;
860
0
  }
861
0
  }
862
0
863
0
  return true;
864
0
}
865
866
3.20k
void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
867
3.20k
  Value *Address = Inst.getPointerOperand();
868
3.20k
  Value *Val = Inst.getValueOperand();
869
3.20k
  Type *ElementType = Val->getType();
870
3.20k
  enum MemoryAccess::AccessType AccType =
871
3.20k
      isa<LoadInst>(Inst) ? 
MemoryAccess::READ1.57k
:
MemoryAccess::MUST_WRITE1.63k
;
872
3.20k
873
3.20k
  const SCEV *AccessFunction =
874
3.20k
      SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
875
3.20k
  const SCEVUnknown *BasePointer =
876
3.20k
      dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
877
3.20k
878
3.20k
  assert(BasePointer && "Could not find base pointer");
879
3.20k
  AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
880
3.20k
881
3.20k
  // Check if the access depends on a loop contained in a non-affine subregion.
882
3.20k
  bool isVariantInNonAffineLoop = false;
883
3.20k
  SetVector<const Loop *> Loops;
884
3.20k
  findLoops(AccessFunction, Loops);
885
3.20k
  for (const Loop *L : Loops)
886
1.80k
    if (Stmt->contains(L)) {
887
8
      isVariantInNonAffineLoop = true;
888
8
      break;
889
8
    }
890
3.20k
891
3.20k
  InvariantLoadsSetTy AccessILS;
892
3.20k
893
3.20k
  Loop *SurroundingLoop = Stmt->getSurroundingLoop();
894
3.20k
  bool IsAffine = !isVariantInNonAffineLoop &&
895
3.20k
                  isAffineExpr(&scop->getRegion(), SurroundingLoop,
896
3.19k
                               AccessFunction, SE, &AccessILS);
897
3.20k
898
3.20k
  const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
899
3.20k
  for (LoadInst *LInst : AccessILS)
900
51
    if (!ScopRIL.count(LInst))
901
7
      IsAffine = false;
902
3.20k
903
3.20k
  if (!IsAffine && 
AccType == MemoryAccess::MUST_WRITE40
)
904
25
    AccType = MemoryAccess::MAY_WRITE;
905
3.20k
906
3.20k
  addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
907
3.20k
                 IsAffine, {AccessFunction}, {nullptr}, Val);
908
3.20k
}
909
910
3.68k
void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
911
3.68k
  if (buildAccessMemIntrinsic(Inst, Stmt))
912
19
    return;
913
3.66k
914
3.66k
  if (buildAccessCallInst(Inst, Stmt))
915
28
    return;
916
3.63k
917
3.63k
  if (buildAccessMultiDimFixed(Inst, Stmt))
918
246
    return;
919
3.39k
920
3.39k
  if (buildAccessMultiDimParam(Inst, Stmt))
921
186
    return;
922
3.20k
923
3.20k
  buildAccessSingleDim(Inst, Stmt);
924
3.20k
}
925
926
1.20k
void ScopBuilder::buildAccessFunctions() {
927
9.27k
  for (auto &Stmt : *scop) {
928
9.27k
    if (Stmt.isBlockStmt()) {
929
9.15k
      buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
930
9.15k
      continue;
931
9.15k
    }
932
122
933
122
    Region *R = Stmt.getRegion();
934
122
    for (BasicBlock *BB : R->blocks())
935
336
      buildAccessFunctions(&Stmt, *BB, R);
936
122
  }
937
1.20k
938
1.20k
  // Build write accesses for values that are used after the SCoP.
939
1.20k
  // The instructions defining them might be synthesizable and therefore not
940
1.20k
  // contained in any statement, hence we iterate over the original instructions
941
1.20k
  // to identify all escaping values.
942
5.96k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
943
5.96k
    for (Instruction &Inst : *BB)
944
21.9k
      buildEscapingDependences(&Inst);
945
5.96k
  }
946
1.20k
}
947
948
21.4k
bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
949
21.4k
  return !Inst->isTerminator() && 
!isIgnoredIntrinsic(Inst)15.6k
&&
950
21.4k
         
!canSynthesize(Inst, *scop, &SE, L)15.6k
;
951
21.4k
}
952
953
/// Generate a name for a statement.
954
///
955
/// @param BB     The basic block the statement will represent.
956
/// @param BBIdx  The index of the @p BB relative to other BBs/regions.
957
/// @param Count  The index of the created statement in @p BB.
958
/// @param IsMain Whether this is the main of all statement for @p BB. If true,
959
///               no suffix will be added.
960
/// @param IsLast Uses a special indicator for the last statement of a BB.
961
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
962
9.15k
                                bool IsMain, bool IsLast = false) {
963
9.15k
  std::string Suffix;
964
9.15k
  if (!IsMain) {
965
3.52k
    if (UseInstructionNames)
966
3.51k
      Suffix = '_';
967
3.52k
    if (IsLast)
968
2.84k
      Suffix += "last";
969
677
    else if (Count < 26)
970
673
      Suffix += 'a' + Count;
971
4
    else
972
4
      Suffix += std::to_string(Count);
973
3.52k
  }
974
9.15k
  return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
975
9.15k
}
976
977
/// Generate a name for a statement that represents a non-affine subregion.
978
///
979
/// @param R    The region the statement will represent.
980
/// @param RIdx The index of the @p R relative to other BBs/regions.
981
122
static std::string makeStmtName(Region *R, long RIdx) {
982
122
  return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
983
122
                              UseInstructionNames);
984
122
}
985
986
697
void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
987
697
  Loop *SurroundingLoop = LI.getLoopFor(BB);
988
697
989
697
  int Count = 0;
990
697
  long BBIdx = scop->getNextStmtIdx();
991
697
  std::vector<Instruction *> Instructions;
992
2.61k
  for (Instruction &Inst : *BB) {
993
2.61k
    if (shouldModelInst(&Inst, SurroundingLoop))
994
1.00k
      Instructions.push_back(&Inst);
995
2.61k
    if (Inst.getMetadata("polly_split_after") ||
996
2.61k
        
(2.61k
SplitOnStore2.61k
&&
isa<StoreInst>(Inst)13
)) {
997
5
      std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
998
5
      scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
999
5
      Count++;
1000
5
      Instructions.clear();
1001
5
    }
1002
2.61k
  }
1003
697
1004
697
  std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1005
697
  scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1006
697
}
1007
1008
/// Is @p Inst an ordered instruction?
1009
///
1010
/// An unordered instruction is an instruction, such that a sequence of
1011
/// unordered instructions can be permuted without changing semantics. Any
1012
/// instruction for which this is not always the case is ordered.
1013
6.08k
static bool isOrderedInstruction(Instruction *Inst) {
1014
6.08k
  return Inst->mayHaveSideEffects() || 
Inst->mayReadOrWriteMemory()4.56k
;
1015
6.08k
}
1016
1017
/// Join instructions to the same statement if one uses the scalar result of the
1018
/// other.
1019
static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1020
4.93k
                            ArrayRef<Instruction *> ModeledInsts) {
1021
6.08k
  for (Instruction *Inst : ModeledInsts) {
1022
6.08k
    if (isa<PHINode>(Inst))
1023
135
      continue;
1024
5.95k
1025
10.4k
    
for (Use &Op : Inst->operands())5.95k
{
1026
10.4k
      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1027
10.4k
      if (!OpInst)
1028
3.23k
        continue;
1029
7.21k
1030
7.21k
      // Check if OpInst is in the BB and is a modeled instruction.
1031
7.21k
      auto OpVal = UnionFind.findValue(OpInst);
1032
7.21k
      if (OpVal == UnionFind.end())
1033
4.65k
        continue;
1034
2.56k
1035
2.56k
      UnionFind.unionSets(Inst, OpInst);
1036
2.56k
    }
1037
5.95k
  }
1038
4.93k
}
1039
1040
/// Ensure that the order of ordered instructions does not change.
1041
///
1042
/// If we encounter an ordered instruction enclosed in instructions belonging to
1043
/// a different statement (which might as well contain ordered instructions, but
1044
/// this is not tested here), join them.
1045
static void
1046
joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1047
4.93k
                        ArrayRef<Instruction *> ModeledInsts) {
1048
4.93k
  SetVector<Instruction *> SeenLeaders;
1049
6.08k
  for (Instruction *Inst : ModeledInsts) {
1050
6.08k
    if (!isOrderedInstruction(Inst))
1051
3.32k
      continue;
1052
2.76k
1053
2.76k
    Instruction *Leader = UnionFind.getLeaderValue(Inst);
1054
2.76k
    bool Inserted = SeenLeaders.insert(Leader);
1055
2.76k
    if (Inserted)
1056
1.63k
      continue;
1057
1.13k
1058
1.13k
    // Merge statements to close holes. Say, we have already seen statements A
1059
1.13k
    // and B, in this order. Then we see an instruction of A again and we would
1060
1.13k
    // see the pattern "A B A". This function joins all statements until the
1061
1.13k
    // only seen occurrence of A.
1062
1.16k
    
for (Instruction *Prev : reverse(SeenLeaders))1.13k
{
1063
1.16k
      // Items added to 'SeenLeaders' are leaders, but may have lost their
1064
1.16k
      // leadership status when merged into another statement.
1065
1.16k
      Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
1066
1.16k
      if (PrevLeader == Leader)
1067
1.12k
        break;
1068
32
      UnionFind.unionSets(Prev, Leader);
1069
32
    }
1070
1.13k
  }
1071
4.93k
}
1072
1073
/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1074
/// the incoming values from this block are executed after the PHI READ.
1075
///
1076
/// Otherwise it could overwrite the incoming value from before the BB with the
1077
/// value for the next execution. This can happen if the PHI WRITE is added to
1078
/// the statement with the instruction that defines the incoming value (instead
1079
/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1080
/// are in order, we put both into the statement. PHI WRITEs are always executed
1081
/// after PHI READs when they are in the same statement.
1082
///
1083
/// TODO: This is an overpessimization. We only have to ensure that the PHI
1084
/// WRITE is not put into a statement containing the PHI itself. That could also
1085
/// be done by
1086
/// - having all (strongly connected) PHIs in a single statement,
1087
/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1088
///   has a chance of being lifted before a PHI by being in a statement with a
1089
///   PHI that comes before in the basic block), or
1090
/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1091
static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1092
4.93k
                            ArrayRef<Instruction *> ModeledInsts) {
1093
6.08k
  for (Instruction *Inst : ModeledInsts) {
1094
6.08k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
1095
6.08k
    if (!PHI)
1096
5.95k
      continue;
1097
135
1098
135
    int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1099
135
    if (Idx < 0)
1100
111
      continue;
1101
24
1102
24
    Instruction *IncomingVal =
1103
24
        dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1104
24
    if (!IncomingVal)
1105
6
      continue;
1106
18
1107
18
    UnionFind.unionSets(PHI, IncomingVal);
1108
18
  }
1109
4.93k
}
1110
1111
4.93k
void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1112
4.93k
  Loop *L = LI.getLoopFor(BB);
1113
4.93k
1114
4.93k
  // Extracting out modeled instructions saves us from checking
1115
4.93k
  // shouldModelInst() repeatedly.
1116
4.93k
  SmallVector<Instruction *, 32> ModeledInsts;
1117
4.93k
  EquivalenceClasses<Instruction *> UnionFind;
1118
4.93k
  Instruction *MainInst = nullptr;
1119
18.0k
  for (Instruction &Inst : *BB) {
1120
18.0k
    if (!shouldModelInst(&Inst, L))
1121
11.9k
      continue;
1122
6.08k
    ModeledInsts.push_back(&Inst);
1123
6.08k
    UnionFind.insert(&Inst);
1124
6.08k
1125
6.08k
    // When a BB is split into multiple statements, the main statement is the
1126
6.08k
    // one containing the 'main' instruction. We select the first instruction
1127
6.08k
    // that is unlikely to be removed (because it has side-effects) as the main
1128
6.08k
    // one. It is used to ensure that at least one statement from the bb has the
1129
6.08k
    // same name as with -polly-stmt-granularity=bb.
1130
6.08k
    if (!MainInst && 
(5.17k
isa<StoreInst>(Inst)5.17k
||
1131
5.17k
                      
(3.86k
isa<CallInst>(Inst)3.86k
&&
!isa<IntrinsicInst>(Inst)61
)))
1132
1.35k
      MainInst = &Inst;
1133
6.08k
  }
1134
4.93k
1135
4.93k
  joinOperandTree(UnionFind, ModeledInsts);
1136
4.93k
  joinOrderedInstructions(UnionFind, ModeledInsts);
1137
4.93k
  joinOrderedPHIs(UnionFind, ModeledInsts);
1138
4.93k
1139
4.93k
  // The list of instructions for statement (statement represented by the leader
1140
4.93k
  // instruction). The order of statements instructions is reversed such that
1141
4.93k
  // the epilogue is first. This makes it easier to ensure that the epilogue is
1142
4.93k
  // the last statement.
1143
4.93k
  MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1144
4.93k
1145
4.93k
  // Collect the instructions of all leaders. UnionFind's member iterator
1146
4.93k
  // unfortunately are not in any specific order.
1147
18.0k
  for (Instruction &Inst : reverse(*BB)) {
1148
18.0k
    auto LeaderIt = UnionFind.findLeader(&Inst);
1149
18.0k
    if (LeaderIt == UnionFind.member_end())
1150
11.9k
      continue;
1151
6.08k
1152
6.08k
    std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
1153
6.08k
    InstList.push_back(&Inst);
1154
6.08k
  }
1155
4.93k
1156
4.93k
  // Finally build the statements.
1157
4.93k
  int Count = 0;
1158
4.93k
  long BBIdx = scop->getNextStmtIdx();
1159
4.93k
  bool MainFound = false;
1160
4.93k
  for (auto &Instructions : reverse(LeaderToInstList)) {
1161
3.52k
    std::vector<Instruction *> &InstList = Instructions.second;
1162
3.52k
1163
3.52k
    // If there is no main instruction, make the first statement the main.
1164
3.52k
    bool IsMain;
1165
3.52k
    if (MainInst)
1166
1.92k
      IsMain = std::find(InstList.begin(), InstList.end(), MainInst) !=
1167
1.92k
               InstList.end();
1168
1.59k
    else
1169
1.59k
      IsMain = (Count == 0);
1170
3.52k
    if (IsMain)
1171
2.84k
      MainFound = true;
1172
3.52k
1173
3.52k
    std::reverse(InstList.begin(), InstList.end());
1174
3.52k
    std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
1175
3.52k
    scop->addScopStmt(BB, Name, L, std::move(InstList));
1176
3.52k
    Count += 1;
1177
3.52k
  }
1178
4.93k
1179
4.93k
  // Unconditionally add an epilogue (last statement). It contains no
1180
4.93k
  // instructions, but holds the PHI write accesses for successor basic blocks,
1181
4.93k
  // if the incoming value is not defined in another statement if the same BB.
1182
4.93k
  // The epilogue will be removed if no PHIWrite is added to it.
1183
4.93k
  std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true);
1184
4.93k
  scop->addScopStmt(BB, EpilogueName, L, {});
1185
4.93k
}
1186
1187
2.58k
void ScopBuilder::buildStmts(Region &SR) {
1188
2.58k
  if (scop->isNonAffineSubRegion(&SR)) {
1189
122
    std::vector<Instruction *> Instructions;
1190
122
    Loop *SurroundingLoop =
1191
122
        getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
1192
122
    for (Instruction &Inst : *SR.getEntry())
1193
725
      if (shouldModelInst(&Inst, SurroundingLoop))
1194
448
        Instructions.push_back(&Inst);
1195
122
    long RIdx = scop->getNextStmtIdx();
1196
122
    std::string Name = makeStmtName(&SR, RIdx);
1197
122
    scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
1198
122
    return;
1199
122
  }
1200
2.46k
1201
9.47k
  
for (auto I = SR.element_begin(), E = SR.element_end(); 2.46k
I != E;
++I7.00k
)
1202
7.00k
    if (I->isSubRegion())
1203
1.38k
      buildStmts(*I->getNodeAs<Region>());
1204
5.62k
    else {
1205
5.62k
      BasicBlock *BB = I->getNodeAs<BasicBlock>();
1206
5.62k
      switch (StmtGranularity) {
1207
5.62k
      case GranularityChoice::BasicBlocks:
1208
693
        buildSequentialBlockStmts(BB);
1209
693
        break;
1210
5.62k
      case GranularityChoice::ScalarIndependence:
1211
4.93k
        buildEqivClassBlockStmts(BB);
1212
4.93k
        break;
1213
5.62k
      case GranularityChoice::Stores:
1214
4
        buildSequentialBlockStmts(BB, true);
1215
4
        break;
1216
5.62k
      }
1217
5.62k
    }
1218
2.46k
}
1219
1220
void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
1221
9.48k
                                       Region *NonAffineSubRegion) {
1222
9.48k
  assert(
1223
9.48k
      Stmt &&
1224
9.48k
      "The exit BB is the only one that cannot be represented by a statement");
1225
9.48k
  assert(Stmt->represents(&BB));
1226
9.48k
1227
9.48k
  // We do not build access functions for error blocks, as they may contain
1228
9.48k
  // instructions we can not model.
1229
9.48k
  if (isErrorBlock(BB, scop->getRegion(), LI, DT))
1230
78
    return;
1231
9.41k
1232
9.41k
  auto BuildAccessesForInst = [this, Stmt,
1233
9.41k
                               NonAffineSubRegion](Instruction *Inst) {
1234
8.13k
    PHINode *PHI = dyn_cast<PHINode>(Inst);
1235
8.13k
    if (PHI)
1236
252
      buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
1237
8.13k
1238
8.13k
    if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
1239
3.39k
      assert(Stmt && "Cannot build access function in non-existing statement");
1240
3.39k
      buildMemoryAccess(MemInst, Stmt);
1241
3.39k
    }
1242
8.13k
1243
8.13k
    // PHI nodes have already been modeled above and terminators that are
1244
8.13k
    // not part of a non-affine subregion are fully modeled and regenerated
1245
8.13k
    // from the polyhedral domains. Hence, they do not need to be modeled as
1246
8.13k
    // explicit data dependences.
1247
8.13k
    if (!PHI)
1248
7.88k
      buildScalarDependences(Stmt, Inst);
1249
8.13k
  };
1250
9.41k
1251
9.41k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
1252
9.41k
  bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
1253
9.41k
  if (IsEntryBlock) {
1254
9.20k
    for (Instruction *Inst : Stmt->getInstructions())
1255
7.48k
      BuildAccessesForInst(Inst);
1256
9.20k
    if (Stmt->isRegionStmt())
1257
120
      BuildAccessesForInst(BB.getTerminator());
1258
9.20k
  } else {
1259
529
    for (Instruction &Inst : BB) {
1260
529
      if (isIgnoredIntrinsic(&Inst))
1261
0
        continue;
1262
529
1263
529
      // Invariant loads already have been processed.
1264
529
      if (isa<LoadInst>(Inst) && 
RIL.count(cast<LoadInst>(&Inst))49
)
1265
2
        continue;
1266
527
1267
527
      BuildAccessesForInst(&Inst);
1268
527
    }
1269
206
  }
1270
9.41k
}
1271
1272
MemoryAccess *ScopBuilder::addMemoryAccess(
1273
    ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
1274
    Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
1275
    ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
1276
5.08k
    MemoryKind Kind) {
1277
5.08k
  bool isKnownMustAccess = false;
1278
5.08k
1279
5.08k
  // Accesses in single-basic block statements are always executed.
1280
5.08k
  if (Stmt->isBlockStmt())
1281
4.67k
    isKnownMustAccess = true;
1282
5.08k
1283
5.08k
  if (Stmt->isRegionStmt()) {
1284
416
    // Accesses that dominate the exit block of a non-affine region are always
1285
416
    // executed. In non-affine regions there may exist MemoryKind::Values that
1286
416
    // do not dominate the exit. MemoryKind::Values will always dominate the
1287
416
    // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
1288
416
    // non-affine region.
1289
416
    if (Inst && 
DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())376
)
1290
236
      isKnownMustAccess = true;
1291
416
  }
1292
5.08k
1293
5.08k
  // Non-affine PHI writes do not "happen" at a particular instruction, but
1294
5.08k
  // after exiting the statement. Therefore they are guaranteed to execute and
1295
5.08k
  // overwrite the old value.
1296
5.08k
  if (Kind == MemoryKind::PHI || 
Kind == MemoryKind::ExitPHI4.49k
)
1297
699
    isKnownMustAccess = true;
1298
5.08k
1299
5.08k
  if (!isKnownMustAccess && 
AccType == MemoryAccess::MUST_WRITE178
)
1300
82
    AccType = MemoryAccess::MAY_WRITE;
1301
5.08k
1302
5.08k
  auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
1303
5.08k
                                  Affine, Subscripts, Sizes, AccessValue, Kind);
1304
5.08k
1305
5.08k
  scop->addAccessFunction(Access);
1306
5.08k
  Stmt->addAccess(Access);
1307
5.08k
  return Access;
1308
5.08k
}
1309
1310
void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
1311
                                 MemoryAccess::AccessType AccType,
1312
                                 Value *BaseAddress, Type *ElementType,
1313
                                 bool IsAffine,
1314
                                 ArrayRef<const SCEV *> Subscripts,
1315
                                 ArrayRef<const SCEV *> Sizes,
1316
3.68k
                                 Value *AccessValue) {
1317
3.68k
  ArrayBasePointers.insert(BaseAddress);
1318
3.68k
  auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
1319
3.68k
                                    ElementType, IsAffine, AccessValue,
1320
3.68k
                                    Subscripts, Sizes, MemoryKind::Array);
1321
3.68k
1322
3.68k
  if (!DetectFortranArrays)
1323
3.67k
    return;
1324
13
1325
13
  if (Value *FAD = findFADAllocationInvisible(MemAccInst))
1326
8
    MemAccess->setFortranArrayDescriptor(FAD);
1327
5
  else if (Value *FAD = findFADAllocationVisible(MemAccInst))
1328
2
    MemAccess->setFortranArrayDescriptor(FAD);
1329
13
}
1330
1331
/// Check if @p Expr is divisible by @p Size.
1332
6.74k
static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
1333
6.74k
  assert(Size != 0);
1334
6.74k
  if (Size == 1)
1335
141
    return true;
1336
6.60k
1337
6.60k
  // Only one factor needs to be divisible.
1338
6.60k
  if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
1339
394
    for (auto *FactorExpr : MulExpr->operands())
1340
394
      if (isDivisible(FactorExpr, Size, SE))
1341
394
        return true;
1342
394
    
return false0
;
1343
6.20k
  }
1344
6.20k
1345
6.20k
  // For other n-ary expressions (Add, AddRec, Max,...) all operands need
1346
6.20k
  // to be divisible.
1347
6.20k
  if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
1348
1.68k
    for (auto *OpExpr : NAryExpr->operands())
1349
3.38k
      if (!isDivisible(OpExpr, Size, SE))
1350
0
        return false;
1351
1.68k
    return true;
1352
4.51k
  }
1353
4.51k
1354
4.51k
  auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
1355
4.51k
  auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
1356
4.51k
  auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
1357
4.51k
  return MulSCEV == Expr;
1358
4.51k
}
1359
1360
1.16k
void ScopBuilder::foldSizeConstantsToRight() {
1361
1.16k
  isl::union_set Accessed = scop->getAccesses().range();
1362
1.16k
1363
2.47k
  for (auto Array : scop->arrays()) {
1364
2.47k
    if (Array->getNumberOfDimensions() <= 1)
1365
2.18k
      continue;
1366
283
1367
283
    isl::space Space = Array->getSpace();
1368
283
    Space = Space.align_params(Accessed.get_space());
1369
283
1370
283
    if (!Accessed.contains(Space))
1371
0
      continue;
1372
283
1373
283
    isl::set Elements = Accessed.extract_set(Space);
1374
283
    isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
1375
283
1376
283
    std::vector<int> Int;
1377
283
    int Dims = Elements.dim(isl::dim::set);
1378
933
    for (int i = 0; i < Dims; 
i++650
) {
1379
650
      isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
1380
650
      DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
1381
650
      DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
1382
650
1383
650
      isl::basic_set DimHull = DimOnly.affine_hull();
1384
650
1385
650
      if (i == Dims - 1) {
1386
283
        Int.push_back(1);
1387
283
        Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
1388
283
        continue;
1389
283
      }
1390
367
1391
367
      if (DimHull.dim(isl::dim::div) == 1) {
1392
5
        isl::aff Diff = DimHull.get_div(0);
1393
5
        isl::val Val = Diff.get_denominator_val();
1394
5
1395
5
        int ValInt = 1;
1396
5
        if (Val.is_int()) {
1397
5
          auto ValAPInt = APIntFromVal(Val);
1398
5
          if (ValAPInt.isSignedIntN(32))
1399
4
            ValInt = ValAPInt.getSExtValue();
1400
5
        } else {
1401
0
        }
1402
5
1403
5
        Int.push_back(ValInt);
1404
5
        isl::constraint C = isl::constraint::alloc_equality(
1405
5
            isl::local_space(Transform.get_space()));
1406
5
        C = C.set_coefficient_si(isl::dim::out, i, ValInt);
1407
5
        C = C.set_coefficient_si(isl::dim::in, i, -1);
1408
5
        Transform = Transform.add_constraint(C);
1409
5
        continue;
1410
5
      }
1411
362
1412
362
      isl::basic_set ZeroSet = isl::basic_set(DimHull);
1413
362
      ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
1414
362
1415
362
      int ValInt = 1;
1416
362
      if (ZeroSet.is_equal(DimHull)) {
1417
11
        ValInt = 0;
1418
11
      }
1419
362
1420
362
      Int.push_back(ValInt);
1421
362
      Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
1422
362
    }
1423
283
1424
283
    isl::set MappedElements = isl::map(Transform).domain();
1425
283
    if (!Elements.is_subset(MappedElements))
1426
0
      continue;
1427
283
1428
283
    bool CanFold = true;
1429
283
    if (Int[0] <= 1)
1430
279
      CanFold = false;
1431
283
1432
283
    unsigned NumDims = Array->getNumberOfDimensions();
1433
367
    for (unsigned i = 1; i < NumDims - 1; 
i++84
)
1434
84
      if (Int[0] != Int[i] && 
Int[i]3
)
1435
1
        CanFold = false;
1436
283
1437
283
    if (!CanFold)
1438
279
      continue;
1439
4
1440
4
    for (auto &Access : scop->access_functions())
1441
60
      if (Access->getScopArrayInfo() == Array)
1442
15
        Access->setAccessRelation(
1443
15
            Access->getAccessRelation().apply_range(Transform));
1444
4
1445
4
    std::vector<const SCEV *> Sizes;
1446
12
    for (unsigned i = 0; i < NumDims; 
i++8
) {
1447
8
      auto Size = Array->getDimensionSize(i);
1448
8
1449
8
      if (i == NumDims - 1)
1450
4
        Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
1451
8
      Sizes.push_back(Size);
1452
8
    }
1453
4
1454
4
    Array->updateSizes(Sizes, false /* CheckConsistency */);
1455
4
  }
1456
1.16k
}
1457
1458
1.16k
void ScopBuilder::markFortranArrays() {
1459
2.31k
  for (ScopStmt &Stmt : *scop) {
1460
4.72k
    for (MemoryAccess *MemAcc : Stmt) {
1461
4.72k
      Value *FAD = MemAcc->getFortranArrayDescriptor();
1462
4.72k
      if (!FAD)
1463
4.71k
        continue;
1464
10
1465
10
      // TODO: const_cast-ing to edit
1466
10
      ScopArrayInfo *SAI =
1467
10
          const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
1468
10
      assert(SAI && "memory access into a Fortran array does not "
1469
10
                    "have an associated ScopArrayInfo");
1470
10
      SAI->applyAndSetFAD(FAD);
1471
10
    }
1472
2.31k
  }
1473
1.16k
}
1474
1475
1.16k
void ScopBuilder::finalizeAccesses() {
1476
1.16k
  updateAccessDimensionality();
1477
1.16k
  foldSizeConstantsToRight();
1478
1.16k
  foldAccessRelations();
1479
1.16k
  assumeNoOutOfBounds();
1480
1.16k
  markFortranArrays();
1481
1.16k
}
1482
1483
1.16k
void ScopBuilder::updateAccessDimensionality() {
1484
1.16k
  // Check all array accesses for each base pointer and find a (virtual) element
1485
1.16k
  // size for the base pointer that divides all access functions.
1486
1.16k
  for (ScopStmt &Stmt : *scop)
1487
4.72k
    
for (MemoryAccess *Access : Stmt)2.31k
{
1488
4.72k
      if (!Access->isArrayKind())
1489
1.32k
        continue;
1490
3.39k
      ScopArrayInfo *Array =
1491
3.39k
          const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
1492
3.39k
1493
3.39k
      if (Array->getNumberOfDimensions() != 1)
1494
440
        continue;
1495
2.95k
      unsigned DivisibleSize = Array->getElemSizeInBytes();
1496
2.95k
      const SCEV *Subscript = Access->getSubscript(0);
1497
2.96k
      while (!isDivisible(Subscript, DivisibleSize, SE))
1498
2
        DivisibleSize /= 2;
1499
2.95k
      auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
1500
2.95k
      Array->updateElementType(Ty);
1501
2.95k
    }
1502
1.16k
1503
1.16k
  for (auto &Stmt : *scop)
1504
2.31k
    for (auto &Access : Stmt)
1505
4.72k
      Access->updateDimensionality();
1506
1.16k
}
1507
1508
1.16k
void ScopBuilder::foldAccessRelations() {
1509
1.16k
  for (auto &Stmt : *scop)
1510
2.31k
    for (auto &Access : Stmt)
1511
4.72k
      Access->foldAccessRelation();
1512
1.16k
}
1513
1514
1.16k
void ScopBuilder::assumeNoOutOfBounds() {
1515
1.16k
  for (auto &Stmt : *scop)
1516
2.31k
    for (auto &Access : Stmt)
1517
4.72k
      Access->assumeNoOutOfBound();
1518
1.16k
}
1519
1520
409
void ScopBuilder::ensureValueWrite(Instruction *Inst) {
1521
409
  // Find the statement that defines the value of Inst. That statement has to
1522
409
  // write the value to make it available to those statements that read it.
1523
409
  ScopStmt *Stmt = scop->getStmtFor(Inst);
1524
409
1525
409
  // It is possible that the value is synthesizable within a loop (such that it
1526
409
  // is not part of any statement), but not after the loop (where you need the
1527
409
  // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
1528
409
  // avoid this. In case the IR has no such PHI, use the last statement (where
1529
409
  // the value is synthesizable) to write the value.
1530
409
  if (!Stmt)
1531
42
    Stmt = scop->getLastStmtFor(Inst->getParent());
1532
409
1533
409
  // Inst not defined within this SCoP.
1534
409
  if (!Stmt)
1535
0
    return;
1536
409
1537
409
  // Do not process further if the instruction is already written.
1538
409
  if (Stmt->lookupValueWriteOf(Inst))
1539
66
    return;
1540
343
1541
343
  addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
1542
343
                  true, Inst, ArrayRef<const SCEV *>(),
1543
343
                  ArrayRef<const SCEV *>(), MemoryKind::Value);
1544
343
}
1545
1546
14.4k
void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
1547
14.4k
  // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
1548
14.4k
  // to be able to replace this one. Currently, there is a split responsibility.
1549
14.4k
  // In a first step, the MemoryAccess is created, but without the
1550
14.4k
  // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
1551
14.4k
  // AccessRelation is created. At least for scalar accesses, there is no new
1552
14.4k
  // information available at ScopStmt::buildAccessRelations(), so we could
1553
14.4k
  // create the AccessRelation right away. This is what
1554
14.4k
  // ScopStmt::ensureValueRead(Value*) does.
1555
14.4k
1556
14.4k
  auto *Scope = UserStmt->getSurroundingLoop();
1557
14.4k
  auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
1558
14.4k
  switch (VUse.getKind()) {
1559
14.4k
  case VirtualUse::Constant:
1560
14.0k
  case VirtualUse::Block:
1561
14.0k
  case VirtualUse::Synthesizable:
1562
14.0k
  case VirtualUse::Hoisted:
1563
14.0k
  case VirtualUse::Intra:
1564
14.0k
    // Uses of these kinds do not need a MemoryAccess.
1565
14.0k
    break;
1566
14.0k
1567
14.0k
  case VirtualUse::ReadOnly:
1568
65
    // Add MemoryAccess for invariant values only if requested.
1569
65
    if (!ModelReadOnlyScalars)
1570
4
      break;
1571
61
1572
61
    LLVM_FALLTHROUGH;
1573
394
  case VirtualUse::Inter:
1574
394
1575
394
    // Do not create another MemoryAccess for reloading the value if one already
1576
394
    // exists.
1577
394
    if (UserStmt->lookupValueReadOf(V))
1578
30
      break;
1579
364
1580
364
    addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
1581
364
                    true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1582
364
                    MemoryKind::Value);
1583
364
1584
364
    // Inter-statement uses need to write the value in their defining statement.
1585
364
    if (VUse.isInter())
1586
316
      ensureValueWrite(cast<Instruction>(V));
1587
364
    break;
1588
14.4k
  }
1589
14.4k
}
1590
1591
void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
1592
                                 BasicBlock *IncomingBlock,
1593
585
                                 Value *IncomingValue, bool IsExitBlock) {
1594
585
  // As the incoming block might turn out to be an error statement ensure we
1595
585
  // will create an exit PHI SAI object. It is needed during code generation
1596
585
  // and would be created later anyway.
1597
585
  if (IsExitBlock)
1598
136
    scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
1599
136
                                   MemoryKind::ExitPHI);
1600
585
1601
585
  // This is possible if PHI is in the SCoP's entry block. The incoming blocks
1602
585
  // from outside the SCoP's region have no statement representation.
1603
585
  if (!IncomingStmt)
1604
71
    return;
1605
514
1606
514
  // Take care for the incoming value being available in the incoming block.
1607
514
  // This must be done before the check for multiple PHI writes because multiple
1608
514
  // exiting edges from subregion each can be the effective written value of the
1609
514
  // subregion. As such, all of them must be made available in the subregion
1610
514
  // statement.
1611
514
  ensureValueRead(IncomingValue, IncomingStmt);
1612
514
1613
514
  // Do not add more than one MemoryAccess per PHINode and ScopStmt.
1614
514
  if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
1615
53
    assert(Acc->getAccessInstruction() == PHI);
1616
53
    Acc->addIncoming(IncomingBlock, IncomingValue);
1617
53
    return;
1618
53
  }
1619
461
1620
461
  MemoryAccess *Acc = addMemoryAccess(
1621
461
      IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
1622
461
      PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1623
461
      IsExitBlock ? 
MemoryKind::ExitPHI101
:
MemoryKind::PHI360
);
1624
461
  assert(Acc);
1625
461
  Acc->addIncoming(IncomingBlock, IncomingValue);
1626
461
}
1627
1628
238
void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
1629
238
  addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
1630
238
                  PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
1631
238
                  MemoryKind::PHI);
1632
238
}
1633
1634
2.32k
void ScopBuilder::buildDomain(ScopStmt &Stmt) {
1635
2.32k
  isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
1636
2.32k
1637
2.32k
  Stmt.Domain = scop->getDomainConditions(&Stmt);
1638
2.32k
  Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
1639
2.32k
}
1640
1641
2.32k
void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
1642
2.32k
  isl::set Domain = Stmt.getDomain();
1643
2.32k
  BasicBlock *BB = Stmt.getEntryBlock();
1644
2.32k
1645
2.32k
  Loop *L = LI.getLoopFor(BB);
1646
2.32k
1647
2.33k
  while (L && 
Stmt.isRegionStmt()2.06k
&&
Stmt.getRegion()->contains(L)86
)
1648
12
    L = L->getParentLoop();
1649
2.32k
1650
2.32k
  SmallVector<llvm::Loop *, 8> Loops;
1651
2.32k
1652
4.91k
  while (L && 
Stmt.getParent()->getRegion().contains(L)2.73k
) {
1653
2.59k
    Loops.push_back(L);
1654
2.59k
    L = L->getParentLoop();
1655
2.59k
  }
1656
2.32k
1657
2.32k
  Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
1658
2.32k
}
1659
1660
/// Return the reduction type for a given binary operator.
1661
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
1662
292
                                                    const Instruction *Load) {
1663
292
  if (!BinOp)
1664
0
    return MemoryAccess::RT_NONE;
1665
292
  switch (BinOp->getOpcode()) {
1666
292
  case Instruction::FAdd:
1667
1
    if (!BinOp->isFast())
1668
0
      return MemoryAccess::RT_NONE;
1669
1
    LLVM_FALLTHROUGH;
1670
269
  case Instruction::Add:
1671
269
    return MemoryAccess::RT_ADD;
1672
1
  case Instruction::Or:
1673
1
    return MemoryAccess::RT_BOR;
1674
3
  case Instruction::Xor:
1675
3
    return MemoryAccess::RT_BXOR;
1676
1
  case Instruction::And:
1677
1
    return MemoryAccess::RT_BAND;
1678
1
  case Instruction::FMul:
1679
0
    if (!BinOp->isFast())
1680
0
      return MemoryAccess::RT_NONE;
1681
0
    LLVM_FALLTHROUGH;
1682
18
  case Instruction::Mul:
1683
18
    if (DisableMultiplicativeReductions)
1684
0
      return MemoryAccess::RT_NONE;
1685
18
    return MemoryAccess::RT_MUL;
1686
18
  default:
1687
0
    return MemoryAccess::RT_NONE;
1688
292
  }
1689
292
}
1690
1691
2.32k
void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
1692
2.32k
  SmallVector<MemoryAccess *, 2> Loads;
1693
2.32k
  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
1694
2.32k
1695
2.32k
  // First collect candidate load-store reduction chains by iterating over all
1696
2.32k
  // stores and collecting possible reduction loads.
1697
4.77k
  for (MemoryAccess *StoreMA : Stmt) {
1698
4.77k
    if (StoreMA->isRead())
1699
2.27k
      continue;
1700
2.49k
1701
2.49k
    Loads.clear();
1702
2.49k
    collectCandidateReductionLoads(StoreMA, Loads);
1703
2.49k
    for (MemoryAccess *LoadMA : Loads)
1704
418
      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
1705
2.49k
  }
1706
2.32k
1707
2.32k
  // Then check each possible candidate pair.
1708
2.32k
  for (const auto &CandidatePair : Candidates) {
1709
418
    bool Valid = true;
1710
418
    isl::map LoadAccs = CandidatePair.first->getAccessRelation();
1711
418
    isl::map StoreAccs = CandidatePair.second->getAccessRelation();
1712
418
1713
418
    // Skip those with obviously unequal base addresses.
1714
418
    if (!LoadAccs.has_equal_space(StoreAccs)) {
1715
75
      continue;
1716
75
    }
1717
343
1718
343
    // And check if the remaining for overlap with other memory accesses.
1719
343
    isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
1720
343
    AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
1721
343
    isl::set AllAccs = AllAccsRel.range();
1722
343
1723
973
    for (MemoryAccess *MA : Stmt) {
1724
973
      if (MA == CandidatePair.first || 
MA == CandidatePair.second630
)
1725
686
        continue;
1726
287
1727
287
      isl::map AccRel =
1728
287
          MA->getAccessRelation().intersect_domain(Stmt.getDomain());
1729
287
      isl::set Accs = AccRel.range();
1730
287
1731
287
      if (AllAccs.has_equal_space(Accs)) {
1732
70
        isl::set OverlapAccs = Accs.intersect(AllAccs);
1733
70
        Valid = Valid && 
OverlapAccs.is_empty()51
;
1734
70
      }
1735
287
    }
1736
343
1737
343
    if (!Valid)
1738
51
      continue;
1739
292
1740
292
    const LoadInst *Load =
1741
292
        dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
1742
292
    MemoryAccess::ReductionType RT =
1743
292
        getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
1744
292
1745
292
    // If no overlapping access was found we mark the load and store as
1746
292
    // reduction like.
1747
292
    CandidatePair.first->markAsReductionLike(RT);
1748
292
    CandidatePair.second->markAsReductionLike(RT);
1749
292
  }
1750
2.32k
}
1751
1752
1.15k
void ScopBuilder::verifyInvariantLoads() {
1753
1.15k
  auto &RIL = scop->getRequiredInvariantLoads();
1754
1.15k
  for (LoadInst *LI : RIL) {
1755
230
    assert(LI && scop->contains(LI));
1756
230
    // If there exists a statement in the scop which has a memory access for
1757
230
    // @p LI, then mark this scop as infeasible for optimization.
1758
230
    for (ScopStmt &Stmt : *scop)
1759
887
      if (Stmt.getArrayAccessOrNULLFor(LI)) {
1760
3
        scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
1761
3
        return;
1762
3
      }
1763
230
  }
1764
1.15k
}
1765
1766
1.15k
void ScopBuilder::hoistInvariantLoads() {
1767
1.15k
  if (!PollyInvariantLoadHoisting)
1768
988
    return;
1769
166
1770
166
  isl::union_map Writes = scop->getWrites();
1771
369
  for (ScopStmt &Stmt : *scop) {
1772
369
    InvariantAccessesTy InvariantAccesses;
1773
369
1774
369
    for (MemoryAccess *Access : Stmt)
1775
903
      if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
1776
362
        InvariantAccesses.push_back({Access, NHCtx});
1777
369
1778
369
    // Transfer the memory access from the statement to the SCoP.
1779
369
    for (auto InvMA : InvariantAccesses)
1780
362
      Stmt.removeMemoryAccess(InvMA.MA);
1781
369
    addInvariantLoads(Stmt, InvariantAccesses);
1782
369
  }
1783
166
}
1784
1785
/// Check if an access range is too complex.
1786
///
1787
/// An access range is too complex, if it contains either many disjuncts or
1788
/// very complex expressions. As a simple heuristic, we assume if a set to
1789
/// be too complex if the sum of existentially quantified dimensions and
1790
/// set dimensions is larger than a threshold. This reliably detects both
1791
/// sets with many disjuncts as well as sets with many divisions as they
1792
/// arise in h264.
1793
///
1794
/// @param AccessRange The range to check for complexity.
1795
///
1796
/// @returns True if the access range is too complex.
1797
411
static bool isAccessRangeTooComplex(isl::set AccessRange) {
1798
411
  int NumTotalDims = 0;
1799
411
1800
457
  for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
1801
457
    NumTotalDims += BSet.dim(isl::dim::div);
1802
457
    NumTotalDims += BSet.dim(isl::dim::set);
1803
457
  }
1804
411
1805
411
  if (NumTotalDims > MaxDimensionsInAccessRange)
1806
1
    return true;
1807
410
1808
410
  return false;
1809
410
}
1810
1811
bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
1812
486
                                               isl::union_map Writes) {
1813
486
  if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
1814
0
    return getNonHoistableCtx(BasePtrMA, Writes).is_null();
1815
0
  }
1816
486
1817
486
  Value *BaseAddr = MA->getOriginalBaseAddr();
1818
486
  if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
1819
115
    if (!isa<LoadInst>(BasePtrInst))
1820
36
      return scop->contains(BasePtrInst);
1821
450
1822
450
  return false;
1823
450
}
1824
1825
1.16k
void ScopBuilder::addUserContext() {
1826
1.16k
  if (UserContextStr.empty())
1827
1.15k
    return;
1828
3
1829
3
  isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
1830
3
  isl::space Space = scop->getParamSpace();
1831
3
  if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) {
1832
2
    std::string SpaceStr = Space.to_str();
1833
2
    errs() << "Error: the context provided in -polly-context has not the same "
1834
2
           << "number of dimensions than the computed context. Due to this "
1835
2
           << "mismatch, the -polly-context option is ignored. Please provide "
1836
2
           << "the context in the parameter space: " << SpaceStr << ".\n";
1837
2
    return;
1838
2
  }
1839
1
1840
2
  
for (unsigned i = 0; 1
i < Space.dim(isl::dim::param);
i++1
) {
1841
1
    std::string NameContext =
1842
1
        scop->getContext().get_dim_name(isl::dim::param, i);
1843
1
    std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
1844
1
1845
1
    if (NameContext != NameUserContext) {
1846
0
      std::string SpaceStr = Space.to_str();
1847
0
      errs() << "Error: the name of dimension " << i
1848
0
             << " provided in -polly-context "
1849
0
             << "is '" << NameUserContext << "', but the name in the computed "
1850
0
             << "context is '" << NameContext
1851
0
             << "'. Due to this name mismatch, "
1852
0
             << "the -polly-context option is ignored. Please provide "
1853
0
             << "the context in the parameter space: " << SpaceStr << ".\n";
1854
0
      return;
1855
0
    }
1856
1
1857
1
    UserContext = UserContext.set_dim_id(isl::dim::param, i,
1858
1
                                         Space.get_dim_id(isl::dim::param, i));
1859
1
  }
1860
1
  isl::set newContext = scop->getContext().intersect(UserContext);
1861
1
  scop->setContext(newContext);
1862
1
}
1863
1864
isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
1865
903
                                         isl::union_map Writes) {
1866
903
  // TODO: Loads that are not loop carried, hence are in a statement with
1867
903
  //       zero iterators, are by construction invariant, though we
1868
903
  //       currently "hoist" them anyway. This is necessary because we allow
1869
903
  //       them to be treated as parameters (e.g., in conditions) and our code
1870
903
  //       generation would otherwise use the old value.
1871
903
1872
903
  auto &Stmt = *Access->getStatement();
1873
903
  BasicBlock *BB = Stmt.getEntryBlock();
1874
903
1875
903
  if (Access->isScalarKind() || 
Access->isWrite()762
||
!Access->isAffine()500
||
1876
903
      
Access->isMemoryIntrinsic()486
)
1877
417
    return nullptr;
1878
486
1879
486
  // Skip accesses that have an invariant base pointer which is defined but
1880
486
  // not loaded inside the SCoP. This can happened e.g., if a readnone call
1881
486
  // returns a pointer that is used as a base address. However, as we want
1882
486
  // to hoist indirect pointers, we allow the base pointer to be defined in
1883
486
  // the region if it is also a memory access. Each ScopArrayInfo object
1884
486
  // that has a base pointer origin has a base pointer that is loaded and
1885
486
  // that it is invariant, thus it will be hoisted too. However, if there is
1886
486
  // no base pointer origin we check that the base pointer is defined
1887
486
  // outside the region.
1888
486
  auto *LI = cast<LoadInst>(Access->getAccessInstruction());
1889
486
  if (hasNonHoistableBasePtrInScop(Access, Writes))
1890
0
    return nullptr;
1891
486
1892
486
  isl::map AccessRelation = Access->getAccessRelation();
1893
486
  assert(!AccessRelation.is_empty());
1894
486
1895
486
  if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
1896
74
    return nullptr;
1897
412
1898
412
  AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
1899
412
  isl::set SafeToLoad;
1900
412
1901
412
  auto &DL = scop->getFunction().getParent()->getDataLayout();
1902
412
  if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
1903
412
                                  LI->getAlignment(), DL)) {
1904
79
    SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
1905
333
  } else if (BB != LI->getParent()) {
1906
1
    // Skip accesses in non-affine subregions as they might not be executed
1907
1
    // under the same condition as the entry of the non-affine subregion.
1908
1
    return nullptr;
1909
332
  } else {
1910
332
    SafeToLoad = AccessRelation.range();
1911
332
  }
1912
412
1913
412
  
if (411
isAccessRangeTooComplex(AccessRelation.range())411
)
1914
1
    return nullptr;
1915
410
1916
410
  isl::union_map Written = Writes.intersect_range(SafeToLoad);
1917
410
  isl::set WrittenCtx = Written.params();
1918
410
  bool IsWritten = !WrittenCtx.is_empty();
1919
410
1920
410
  if (!IsWritten)
1921
348
    return WrittenCtx;
1922
62
1923
62
  WrittenCtx = WrittenCtx.remove_divs();
1924
62
  bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain;
1925
62
  if (TooComplex || 
!isRequiredInvariantLoad(LI)56
)
1926
48
    return nullptr;
1927
14
1928
14
  scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
1929
14
                      AS_RESTRICTION, LI->getParent());
1930
14
  return WrittenCtx;
1931
14
}
1932
1933
6
static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
1934
6
  for (const llvm::Argument &Arg : F.args())
1935
10
    if (&Arg == maybeParam)
1936
6
      return true;
1937
6
1938
6
  
return false0
;
1939
6
}
1940
1941
bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
1942
                                     bool StmtInvalidCtxIsEmpty,
1943
                                     bool MAInvalidCtxIsEmpty,
1944
361
                                     bool NonHoistableCtxIsEmpty) {
1945
361
  LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
1946
361
  const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
1947
361
  if (PollyAllowDereferenceOfAllFunctionParams &&
1948
361
      
isAParameter(LInst->getPointerOperand(), scop->getFunction())6
)
1949
6
    return true;
1950
355
1951
355
  // TODO: We can provide more information for better but more expensive
1952
355
  //       results.
1953
355
  if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(),
1954
355
                                          LInst->getType(),
1955
355
                                          LInst->getAlignment(), DL))
1956
299
    return false;
1957
56
1958
56
  // If the location might be overwritten we do not hoist it unconditionally.
1959
56
  //
1960
56
  // TODO: This is probably too conservative.
1961
56
  if (!NonHoistableCtxIsEmpty)
1962
1
    return false;
1963
55
1964
55
  // If a dereferenceable load is in a statement that is modeled precisely we
1965
55
  // can hoist it.
1966
55
  if (StmtInvalidCtxIsEmpty && 
MAInvalidCtxIsEmpty50
)
1967
50
    return true;
1968
5
1969
5
  // Even if the statement is not modeled precisely we can hoist the load if it
1970
5
  // does not involve any parameters that might have been specialized by the
1971
5
  // statement domain.
1972
10
  
for (unsigned u = 0, e = MA->getNumSubscripts(); 5
u < e;
u++5
)
1973
5
    if (!isa<SCEVConstant>(MA->getSubscript(u)))
1974
0
      return false;
1975
5
  return true;
1976
5
}
1977
1978
void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
1979
369
                                    InvariantAccessesTy &InvMAs) {
1980
369
  if (InvMAs.empty())
1981
122
    return;
1982
247
1983
247
  isl::set StmtInvalidCtx = Stmt.getInvalidContext();
1984
247
  bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
1985
247
1986
247
  // Get the context under which the statement is executed but remove the error
1987
247
  // context under which this statement is reached.
1988
247
  isl::set DomainCtx = Stmt.getDomain().params();
1989
247
  DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
1990
247
1991
247
  if (DomainCtx.n_basic_set() >= MaxDisjunctsInDomain) {
1992
1
    auto *AccInst = InvMAs.front().MA->getAccessInstruction();
1993
1
    scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
1994
1
    return;
1995
1
  }
1996
246
1997
246
  // Project out all parameters that relate to loads in the statement. Otherwise
1998
246
  // we could have cyclic dependences on the constraints under which the
1999
246
  // hoisted loads are executed and we could not determine an order in which to
2000
246
  // pre-load them. This happens because not only lower bounds are part of the
2001
246
  // domain but also upper bounds.
2002
361
  
for (auto &InvMA : InvMAs)246
{
2003
361
    auto *MA = InvMA.MA;
2004
361
    Instruction *AccInst = MA->getAccessInstruction();
2005
361
    if (SE.isSCEVable(AccInst->getType())) {
2006
337
      SetVector<Value *> Values;
2007
586
      for (const SCEV *Parameter : scop->parameters()) {
2008
586
        Values.clear();
2009
586
        findValues(Parameter, SE, Values);
2010
586
        if (!Values.count(AccInst))
2011
474
          continue;
2012
112
2013
112
        if (isl::id ParamId = scop->getIdForParam(Parameter)) {
2014
112
          int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
2015
112
          if (Dim >= 0)
2016
111
            DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
2017
112
        }
2018
112
      }
2019
337
    }
2020
361
  }
2021
246
2022
361
  for (auto &InvMA : InvMAs) {
2023
361
    auto *MA = InvMA.MA;
2024
361
    isl::set NHCtx = InvMA.NonHoistableCtx;
2025
361
2026
361
    // Check for another invariant access that accesses the same location as
2027
361
    // MA and if found consolidate them. Otherwise create a new equivalence
2028
361
    // class at the end of InvariantEquivClasses.
2029
361
    LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2030
361
    Type *Ty = LInst->getType();
2031
361
    const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
2032
361
2033
361
    isl::set MAInvalidCtx = MA->getInvalidContext();
2034
361
    bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2035
361
    bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2036
361
2037
361
    isl::set MACtx;
2038
361
    // Check if we know that this pointer can be speculatively accessed.
2039
361
    if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2040
361
                           NonHoistableCtxIsEmpty)) {
2041
61
      MACtx = isl::set::universe(DomainCtx.get_space());
2042
300
    } else {
2043
300
      MACtx = DomainCtx;
2044
300
      MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
2045
300
      MACtx = MACtx.gist_params(scop->getContext());
2046
300
    }
2047
361
2048
361
    bool Consolidated = false;
2049
927
    for (auto &IAClass : scop->invariantEquivClasses()) {
2050
927
      if (PointerSCEV != IAClass.IdentifyingPointer || 
Ty != IAClass.AccessType241
)
2051
690
        continue;
2052
237
2053
237
      // If the pointer and the type is equal check if the access function wrt.
2054
237
      // to the domain is equal too. It can happen that the domain fixes
2055
237
      // parameter values and these can be different for distinct part of the
2056
237
      // SCoP. If this happens we cannot consolidate the loads but need to
2057
237
      // create a new invariant load equivalence class.
2058
237
      auto &MAs = IAClass.InvariantAccesses;
2059
237
      if (!MAs.empty()) {
2060
45
        auto *LastMA = MAs.front();
2061
45
2062
45
        isl::set AR = MA->getAccessRelation().range();
2063
45
        isl::set LastAR = LastMA->getAccessRelation().range();
2064
45
        bool SameAR = AR.is_equal(LastAR);
2065
45
2066
45
        if (!SameAR)
2067
4
          continue;
2068
233
      }
2069
233
2070
233
      // Add MA to the list of accesses that are in this class.
2071
233
      MAs.push_front(MA);
2072
233
2073
233
      Consolidated = true;
2074
233
2075
233
      // Unify the execution context of the class and this statement.
2076
233
      isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2077
233
      if (IAClassDomainCtx)
2078
41
        IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
2079
192
      else
2080
192
        IAClassDomainCtx = MACtx;
2081
233
      IAClass.ExecutionContext = IAClassDomainCtx;
2082
233
      break;
2083
233
    }
2084
361
2085
361
    if (Consolidated)
2086
233
      continue;
2087
128
2088
128
    MACtx = MACtx.coalesce();
2089
128
2090
128
    // If we did not consolidate MA, thus did not find an equivalence class
2091
128
    // for it, we create a new one.
2092
128
    scop->addInvariantEquivClass(
2093
128
        InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
2094
128
  }
2095
246
}
2096
2097
void ScopBuilder::collectCandidateReductionLoads(
2098
2.49k
    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
2099
2.49k
  ScopStmt *Stmt = StoreMA->getStatement();
2100
2.49k
2101
2.49k
  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
2102
2.49k
  if (!Store)
2103
778
    return;
2104
1.72k
2105
1.72k
  // Skip if there is not one binary operator between the load and the store
2106
1.72k
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
2107
1.72k
  if (!BinOp)
2108
1.05k
    return;
2109
666
2110
666
  // Skip if the binary operators has multiple uses
2111
666
  if (BinOp->getNumUses() != 1)
2112
48
    return;
2113
618
2114
618
  // Skip if the opcode of the binary operator is not commutative/associative
2115
618
  if (!BinOp->isCommutative() || 
!BinOp->isAssociative()604
)
2116
244
    return;
2117
374
2118
374
  // Skip if the binary operator is outside the current SCoP
2119
374
  if (BinOp->getParent() != Store->getParent())
2120
1
    return;
2121
373
2122
373
  // Skip if it is a multiplicative reduction and we disabled them
2123
373
  if (DisableMultiplicativeReductions &&
2124
373
      
(2
BinOp->getOpcode() == Instruction::Mul2
||
2125
2
       
BinOp->getOpcode() == Instruction::FMul1
))
2126
1
    return;
2127
372
2128
372
  // Check the binary operator operands for a candidate load
2129
372
  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
2130
372
  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
2131
372
  if (!PossibleLoad0 && 
!PossibleLoad119
)
2132
9
    return;
2133
363
2134
363
  // A load is only a candidate if it cannot escape (thus has only this use)
2135
363
  if (PossibleLoad0 && 
PossibleLoad0->getNumUses() == 1353
)
2136
346
    if (PossibleLoad0->getParent() == Store->getParent())
2137
342
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
2138
363
  if (PossibleLoad1 && 
PossibleLoad1->getNumUses() == 183
)
2139
77
    if (PossibleLoad1->getParent() == Store->getParent())
2140
76
      Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
2141
363
}
2142
2143
/// Find the canonical scop array info object for a set of invariant load
2144
/// hoisted loads. The canonical array is the one that corresponds to the
2145
/// first load in the list of accesses which is used as base pointer of a
2146
/// scop array.
2147
static const ScopArrayInfo *findCanonicalArray(Scop &S,
2148
327
                                               MemoryAccessList &Accesses) {
2149
345
  for (MemoryAccess *Access : Accesses) {
2150
345
    const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
2151
345
        Access->getAccessInstruction(), MemoryKind::Array);
2152
345
    if (CanonicalArray)
2153
75
      return CanonicalArray;
2154
345
  }
2155
327
  
return nullptr252
;
2156
327
}
2157
2158
/// Check if @p Array severs as base array in an invariant load.
2159
11
static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
2160
11
  for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
2161
23
    for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
2162
38
      if (Access2->getScopArrayInfo() == Array)
2163
6
        return true;
2164
11
  
return false5
;
2165
11
}
2166
2167
/// Replace the base pointer arrays in all memory accesses referencing @p Old,
2168
/// with a reference to @p New.
2169
static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
2170
5
                                 const ScopArrayInfo *New) {
2171
5
  for (ScopStmt &Stmt : S)
2172
17
    
for (MemoryAccess *Access : Stmt)14
{
2173
17
      if (Access->getLatestScopArrayInfo() != Old)
2174
11
        continue;
2175
6
2176
6
      isl::id Id = New->getBasePtrId();
2177
6
      isl::map Map = Access->getAccessRelation();
2178
6
      Map = Map.set_tuple_id(isl::dim::out, Id);
2179
6
      Access->setAccessRelation(Map);
2180
6
    }
2181
5
}
2182
2183
1.15k
void ScopBuilder::canonicalizeDynamicBasePtrs() {
2184
1.15k
  for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
2185
327
    MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
2186
327
2187
327
    const ScopArrayInfo *CanonicalBasePtrSAI =
2188
327
        findCanonicalArray(*scop, BasePtrAccesses);
2189
327
2190
327
    if (!CanonicalBasePtrSAI)
2191
252
      continue;
2192
75
2193
92
    
for (MemoryAccess *BasePtrAccess : BasePtrAccesses)75
{
2194
92
      const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
2195
92
          BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
2196
92
      if (!BasePtrSAI || 
BasePtrSAI == CanonicalBasePtrSAI89
||
2197
92
          
!BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI)14
)
2198
81
        continue;
2199
11
2200
11
      // we currently do not canonicalize arrays where some accesses are
2201
11
      // hoisted as invariant loads. If we would, we need to update the access
2202
11
      // function of the invariant loads as well. However, as this is not a
2203
11
      // very common situation, we leave this for now to avoid further
2204
11
      // complexity increases.
2205
11
      if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
2206
6
        continue;
2207
5
2208
5
      replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
2209
5
    }
2210
75
  }
2211
1.15k
}
2212
2213
2.32k
void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
2214
4.77k
  for (MemoryAccess *Access : Stmt.MemAccs) {
2215
4.77k
    Type *ElementType = Access->getElementType();
2216
4.77k
2217
4.77k
    MemoryKind Ty;
2218
4.77k
    if (Access->isPHIKind())
2219
576
      Ty = MemoryKind::PHI;
2220
4.19k
    else if (Access->isExitPHIKind())
2221
84
      Ty = MemoryKind::ExitPHI;
2222
4.11k
    else if (Access->isValueKind())
2223
682
      Ty = MemoryKind::Value;
2224
3.43k
    else
2225
3.43k
      Ty = MemoryKind::Array;
2226
4.77k
2227
4.77k
    auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
2228
4.77k
                                               ElementType, Access->Sizes, Ty);
2229
4.77k
    Access->buildAccessRelation(SAI);
2230
4.77k
    scop->addAccessData(Access);
2231
4.77k
  }
2232
2.32k
}
2233
2234
/// Add the minimal/maximal access in @p Set to @p User.
2235
///
2236
/// @return True if more accesses should be added, false if we reached the
2237
///         maximal number of run-time checks to be generated.
2238
static bool buildMinMaxAccess(isl::set Set,
2239
550
                              Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
2240
550
  isl::pw_multi_aff MinPMA, MaxPMA;
2241
550
  isl::pw_aff LastDimAff;
2242
550
  isl::aff OneAff;
2243
550
  unsigned Pos;
2244
550
2245
550
  Set = Set.remove_divs();
2246
550
  polly::simplify(Set);
2247
550
2248
550
  if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts)
2249
4
    Set = Set.simple_hull();
2250
550
2251
550
  // Restrict the number of parameters involved in the access as the lexmin/
2252
550
  // lexmax computation will take too long if this number is high.
2253
550
  //
2254
550
  // Experiments with a simple test case using an i7 4800MQ:
2255
550
  //
2256
550
  //  #Parameters involved | Time (in sec)
2257
550
  //            6          |     0.01
2258
550
  //            7          |     0.04
2259
550
  //            8          |     0.12
2260
550
  //            9          |     0.40
2261
550
  //           10          |     1.54
2262
550
  //           11          |     6.78
2263
550
  //           12          |    30.38
2264
550
  //
2265
550
  if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
2266
7
    unsigned InvolvedParams = 0;
2267
81
    for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; 
u++74
)
2268
74
      if (Set.involves_dims(isl::dim::param, u, 1))
2269
23
        InvolvedParams++;
2270
7
2271
7
    if (InvolvedParams > RunTimeChecksMaxParameters)
2272
1
      return false;
2273
549
  }
2274
549
2275
549
  MinPMA = Set.lexmin_pw_multi_aff();
2276
549
  MaxPMA = Set.lexmax_pw_multi_aff();
2277
549
2278
549
  MinPMA = MinPMA.coalesce();
2279
549
  MaxPMA = MaxPMA.coalesce();
2280
549
2281
549
  // Adjust the last dimension of the maximal access by one as we want to
2282
549
  // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
2283
549
  // we test during code generation might now point after the end of the
2284
549
  // allocated array but we will never dereference it anyway.
2285
549
  assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) &&
2286
549
         "Assumed at least one output dimension");
2287
549
2288
549
  Pos = MaxPMA.dim(isl::dim::out) - 1;
2289
549
  LastDimAff = MaxPMA.get_pw_aff(Pos);
2290
549
  OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
2291
549
  OneAff = OneAff.add_constant_si(1);
2292
549
  LastDimAff = LastDimAff.add(OneAff);
2293
549
  MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
2294
549
2295
549
  if (!MinPMA || 
!MaxPMA546
)
2296
3
    return false;
2297
546
2298
546
  MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
2299
546
2300
546
  return true;
2301
546
}
2302
2303
/// Wrapper function to calculate minimal/maximal accesses to each array.
2304
bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
2305
429
                                        Scop::MinMaxVectorTy &MinMaxAccesses) {
2306
429
  MinMaxAccesses.reserve(AliasGroup.size());
2307
429
2308
429
  isl::union_set Domains = scop->getDomains();
2309
429
  isl::union_map Accesses = isl::union_map::empty(scop->getParamSpace());
2310
429
2311
429
  for (MemoryAccess *MA : AliasGroup)
2312
848
    Accesses = Accesses.add_map(MA->getAccessRelation());
2313
429
2314
429
  Accesses = Accesses.intersect_domain(Domains);
2315
429
  isl::union_set Locations = Accesses.range();
2316
429
2317
429
  bool LimitReached = false;
2318
550
  for (isl::set Set : Locations.get_set_list()) {
2319
550
    LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
2320
550
    if (LimitReached)
2321
4
      break;
2322
550
  }
2323
429
2324
429
  return !LimitReached;
2325
429
}
2326
2327
1.76k
static isl::set getAccessDomain(MemoryAccess *MA) {
2328
1.76k
  isl::set Domain = MA->getStatement()->getDomain();
2329
1.76k
  Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim());
2330
1.76k
  return Domain.reset_tuple_id();
2331
1.76k
}
2332
2333
1.16k
bool ScopBuilder::buildAliasChecks() {
2334
1.16k
  if (!PollyUseRuntimeAliasChecks)
2335
21
    return true;
2336
1.13k
2337
1.13k
  if (buildAliasGroups()) {
2338
1.13k
    // Aliasing assumptions do not go through addAssumption but we still want to
2339
1.13k
    // collect statistics so we do it here explicitly.
2340
1.13k
    if (scop->getAliasGroups().size())
2341
204
      Scop::incrementNumberOfAliasingAssumptions(1);
2342
1.13k
    return true;
2343
1.13k
  }
2344
6
2345
6
  // If a problem occurs while building the alias groups we need to delete
2346
6
  // this SCoP and pretend it wasn't valid in the first place. To this end
2347
6
  // we make the assumed context infeasible.
2348
6
  scop->invalidate(ALIASING, DebugLoc());
2349
6
2350
6
  LLVM_DEBUG(
2351
6
      dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
2352
6
             << " could not be created as the number of parameters involved "
2353
6
                "is too high. The SCoP will be "
2354
6
                "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
2355
6
                "the maximal number of parameters but be advised that the "
2356
6
                "compile time might increase exponentially.\n\n");
2357
6
  return false;
2358
6
}
2359
2360
std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
2361
1.13k
ScopBuilder::buildAliasGroupsForAccesses() {
2362
1.13k
  AliasSetTracker AST(AA);
2363
1.13k
2364
1.13k
  DenseMap<Value *, MemoryAccess *> PtrToAcc;
2365
1.13k
  DenseSet<const ScopArrayInfo *> HasWriteAccess;
2366
2.25k
  for (ScopStmt &Stmt : *scop) {
2367
2.25k
2368
2.25k
    isl::set StmtDomain = Stmt.getDomain();
2369
2.25k
    bool StmtDomainEmpty = StmtDomain.is_empty();
2370
2.25k
2371
2.25k
    // Statements with an empty domain will never be executed.
2372
2.25k
    if (StmtDomainEmpty)
2373
4
      continue;
2374
2.25k
2375
4.57k
    
for (MemoryAccess *MA : Stmt)2.25k
{
2376
4.57k
      if (MA->isScalarKind())
2377
1.32k
        continue;
2378
3.25k
      if (!MA->isRead())
2379
1.68k
        HasWriteAccess.insert(MA->getScopArrayInfo());
2380
3.25k
      MemAccInst Acc(MA->getAccessInstruction());
2381
3.25k
      if (MA->isRead() && 
isa<MemTransferInst>(Acc)1.57k
)
2382
7
        PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
2383
3.25k
      else
2384
3.25k
        PtrToAcc[Acc.getPointerOperand()] = MA;
2385
3.25k
      AST.add(Acc);
2386
3.25k
    }
2387
2.25k
  }
2388
1.13k
2389
1.13k
  AliasGroupVectorTy AliasGroups;
2390
1.51k
  for (AliasSet &AS : AST) {
2391
1.51k
    if (AS.isMustAlias() || 
AS.isForwardingAliasSet()375
)
2392
1.14k
      continue;
2393
375
    AliasGroupTy AG;
2394
375
    for (auto &PR : AS)
2395
1.38k
      AG.push_back(PtrToAcc[PR.getValue()]);
2396
375
    if (AG.size() < 2)
2397
8
      continue;
2398
367
    AliasGroups.push_back(std::move(AG));
2399
367
  }
2400
1.13k
2401
1.13k
  return std::make_tuple(AliasGroups, HasWriteAccess);
2402
1.13k
}
2403
2404
1.13k
bool ScopBuilder::buildAliasGroups() {
2405
1.13k
  // To create sound alias checks we perform the following steps:
2406
1.13k
  //   o) We partition each group into read only and non read only accesses.
2407
1.13k
  //   o) For each group with more than one base pointer we then compute minimal
2408
1.13k
  //      and maximal accesses to each array of a group in read only and non
2409
1.13k
  //      read only partitions separately.
2410
1.13k
  AliasGroupVectorTy AliasGroups;
2411
1.13k
  DenseSet<const ScopArrayInfo *> HasWriteAccess;
2412
1.13k
2413
1.13k
  std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
2414
1.13k
2415
1.13k
  splitAliasGroupsByDomain(AliasGroups);
2416
1.13k
2417
1.13k
  for (AliasGroupTy &AG : AliasGroups) {
2418
374
    if (!scop->hasFeasibleRuntimeContext())
2419
1
      return false;
2420
373
2421
373
    {
2422
373
      IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
2423
373
      bool Valid = buildAliasGroup(AG, HasWriteAccess);
2424
373
      if (!Valid)
2425
5
        return false;
2426
368
    }
2427
368
    if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
2428
0
      scop->invalidate(COMPLEXITY, DebugLoc());
2429
0
      return false;
2430
0
    }
2431
368
  }
2432
1.13k
2433
1.13k
  
return true1.13k
;
2434
1.13k
}
2435
2436
bool ScopBuilder::buildAliasGroup(
2437
373
    AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
2438
373
  AliasGroupTy ReadOnlyAccesses;
2439
373
  AliasGroupTy ReadWriteAccesses;
2440
373
  SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
2441
373
  SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
2442
373
2443
373
  if (AliasGroup.size() < 2)
2444
4
    return true;
2445
369
2446
1.35k
  
for (MemoryAccess *Access : AliasGroup)369
{
2447
1.35k
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
2448
1.35k
                                        Access->getAccessInstruction())
2449
1.35k
             << "Possibly aliasing pointer, use restrict keyword.");
2450
1.35k
    const ScopArrayInfo *Array = Access->getScopArrayInfo();
2451
1.35k
    if (HasWriteAccess.count(Array)) {
2452
890
      ReadWriteArrays.insert(Array);
2453
890
      ReadWriteAccesses.push_back(Access);
2454
890
    } else {
2455
469
      ReadOnlyArrays.insert(Array);
2456
469
      ReadOnlyAccesses.push_back(Access);
2457
469
    }
2458
1.35k
  }
2459
369
2460
369
  // If there are no read-only pointers, and less than two read-write pointers,
2461
369
  // no alias check is needed.
2462
369
  if (ReadOnlyAccesses.empty() && 
ReadWriteArrays.size() <= 1187
)
2463
144
    return true;
2464
225
2465
225
  // If there is no read-write pointer, no alias check is needed.
2466
225
  if (ReadWriteArrays.empty())
2467
10
    return true;
2468
215
2469
215
  // For non-affine accesses, no alias check can be generated as we cannot
2470
215
  // compute a sufficiently tight lower and upper bound: bail out.
2471
848
  
for (MemoryAccess *MA : AliasGroup)215
{
2472
848
    if (!MA->isAffine()) {
2473
0
      scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
2474
0
                       MA->getAccessInstruction()->getParent());
2475
0
      return false;
2476
0
    }
2477
848
  }
2478
215
2479
215
  // Ensure that for all memory accesses for which we generate alias checks,
2480
215
  // their base pointers are available.
2481
848
  
for (MemoryAccess *MA : AliasGroup)215
{
2482
848
    if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
2483
0
      scop->addRequiredInvariantLoad(
2484
0
          cast<LoadInst>(BasePtrMA->getAccessInstruction()));
2485
848
  }
2486
215
2487
215
  //  scop->getAliasGroups().emplace_back();
2488
215
  //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
2489
215
  Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
2490
215
  Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
2491
215
2492
215
  bool Valid;
2493
215
2494
215
  Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
2495
215
2496
215
  if (!Valid)
2497
0
    return false;
2498
215
2499
215
  // Bail out if the number of values we need to compare is too large.
2500
215
  // This is important as the number of comparisons grows quadratically with
2501
215
  // the number of values we need to compare.
2502
215
  if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
2503
215
      RunTimeChecksMaxArraysPerGroup)
2504
1
    return false;
2505
214
2506
214
  Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
2507
214
2508
214
  scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
2509
214
  if (!Valid)
2510
4
    return false;
2511
210
2512
210
  return true;
2513
210
}
2514
2515
1.13k
void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
2516
1.51k
  for (unsigned u = 0; u < AliasGroups.size(); 
u++375
) {
2517
375
    AliasGroupTy NewAG;
2518
375
    AliasGroupTy &AG = AliasGroups[u];
2519
375
    AliasGroupTy::iterator AGI = AG.begin();
2520
375
    isl::set AGDomain = getAccessDomain(*AGI);
2521
1.76k
    while (AGI != AG.end()) {
2522
1.39k
      MemoryAccess *MA = *AGI;
2523
1.39k
      isl::set MADomain = getAccessDomain(MA);
2524
1.39k
      if (AGDomain.is_disjoint(MADomain)) {
2525
26
        NewAG.push_back(MA);
2526
26
        AGI = AG.erase(AGI);
2527
1.36k
      } else {
2528
1.36k
        AGDomain = AGDomain.unite(MADomain);
2529
1.36k
        AGI++;
2530
1.36k
      }
2531
1.39k
    }
2532
375
    if (NewAG.size() > 1)
2533
8
      AliasGroups.push_back(std::move(NewAG));
2534
375
  }
2535
1.13k
}
2536
2537
#ifndef NDEBUG
2538
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
2539
  auto PhysUse = VirtualUse::create(S, Op, &LI, false);
2540
  auto VirtUse = VirtualUse::create(S, Op, &LI, true);
2541
  assert(PhysUse.getKind() == VirtUse.getKind());
2542
}
2543
2544
/// Check the consistency of every statement's MemoryAccesses.
2545
///
2546
/// The check is carried out by expecting the "physical" kind of use (derived
2547
/// from the BasicBlocks instructions resides in) to be same as the "virtual"
2548
/// kind of use (derived from a statement's MemoryAccess).
2549
///
2550
/// The "physical" uses are taken by ensureValueRead to determine whether to
2551
/// create MemoryAccesses. When done, the kind of scalar access should be the
2552
/// same no matter which way it was derived.
2553
///
2554
/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
2555
/// can intentionally influence on the kind of uses (not corresponding to the
2556
/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
2557
/// to pick up the virtual uses. But here in the code generator, this has not
2558
/// happened yet, such that virtual and physical uses are equivalent.
2559
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
2560
  for (auto *BB : S->getRegion().blocks()) {
2561
    for (auto &Inst : *BB) {
2562
      auto *Stmt = S->getStmtFor(&Inst);
2563
      if (!Stmt)
2564
        continue;
2565
2566
      if (isIgnoredIntrinsic(&Inst))
2567
        continue;
2568
2569
      // Branch conditions are encoded in the statement domains.
2570
      if (Inst.isTerminator() && Stmt->isBlockStmt())
2571
        continue;
2572
2573
      // Verify all uses.
2574
      for (auto &Op : Inst.operands())
2575
        verifyUse(S, Op, LI);
2576
2577
      // Stores do not produce values used by other statements.
2578
      if (isa<StoreInst>(Inst))
2579
        continue;
2580
2581
      // For every value defined in the block, also check that a use of that
2582
      // value in the same statement would not be an inter-statement use. It can
2583
      // still be synthesizable or load-hoisted, but these kind of instructions
2584
      // are not directly copied in code-generation.
2585
      auto VirtDef =
2586
          VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
2587
      assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
2588
             VirtDef.getKind() == VirtualUse::Intra ||
2589
             VirtDef.getKind() == VirtualUse::Hoisted);
2590
    }
2591
  }
2592
2593
  if (S->hasSingleExitEdge())
2594
    return;
2595
2596
  // PHINodes in the SCoP region's exit block are also uses to be checked.
2597
  if (!S->getRegion().isTopLevelRegion()) {
2598
    for (auto &Inst : *S->getRegion().getExit()) {
2599
      if (!isa<PHINode>(Inst))
2600
        break;
2601
2602
      for (auto &Op : Inst.operands())
2603
        verifyUse(S, Op, LI);
2604
    }
2605
  }
2606
}
2607
#endif
2608
2609
/// Return the block that is the representing block for @p RN.
2610
115
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
2611
115
  return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
2612
115
                           : 
RN->getNodeAs<BasicBlock>()0
;
2613
115
}
2614
2615
1.20k
void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
2616
1.20k
  scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
2617
1.20k
2618
1.20k
  buildStmts(R);
2619
1.20k
2620
1.20k
  // Create all invariant load instructions first. These are categorized as
2621
1.20k
  // 'synthesizable', therefore are not part of any ScopStmt but need to be
2622
1.20k
  // created somewhere.
2623
1.20k
  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2624
5.96k
  for (BasicBlock *BB : scop->getRegion().blocks()) {
2625
5.96k
    if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
2626
43
      continue;
2627
5.92k
2628
21.8k
    
for (Instruction &Inst : *BB)5.92k
{
2629
21.8k
      LoadInst *Load = dyn_cast<LoadInst>(&Inst);
2630
21.8k
      if (!Load)
2631
20.0k
        continue;
2632
1.81k
2633
1.81k
      if (!RIL.count(Load))
2634
1.52k
        continue;
2635
288
2636
288
      // Invariant loads require a MemoryAccess to be created in some statement.
2637
288
      // It is not important to which statement the MemoryAccess is added
2638
288
      // because it will later be removed from the ScopStmt again. We chose the
2639
288
      // first statement of the basic block the LoadInst is in.
2640
288
      ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
2641
288
      assert(!List.empty());
2642
288
      ScopStmt *RILStmt = List.front();
2643
288
      buildMemoryAccess(Load, RILStmt);
2644
288
    }
2645
5.92k
  }
2646
1.20k
  buildAccessFunctions();
2647
1.20k
2648
1.20k
  // In case the region does not have an exiting block we will later (during
2649
1.20k
  // code generation) split the exit block. This will move potential PHI nodes
2650
1.20k
  // from the current exit block into the new region exiting block. Hence, PHI
2651
1.20k
  // nodes that are at this point not part of the region will be.
2652
1.20k
  // To handle these PHI nodes later we will now model their operands as scalar
2653
1.20k
  // accesses. Note that we do not model anything in the exit block if we have
2654
1.20k
  // an exiting block in the region, as there will not be any splitting later.
2655
1.20k
  if (!R.isTopLevelRegion() && 
!scop->hasSingleExitEdge()1.20k
) {
2656
203
    for (Instruction &Inst : *R.getExit()) {
2657
203
      PHINode *PHI = dyn_cast<PHINode>(&Inst);
2658
203
      if (!PHI)
2659
161
        break;
2660
42
2661
42
      buildPHIAccesses(nullptr, PHI, nullptr, true);
2662
42
    }
2663
161
  }
2664
1.20k
2665
1.20k
  // Create memory accesses for global reads since all arrays are now known.
2666
1.20k
  auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
2667
1.20k
  for (auto GlobalReadPair : GlobalReads) {
2668
8
    ScopStmt *GlobalReadStmt = GlobalReadPair.first;
2669
8
    Instruction *GlobalRead = GlobalReadPair.second;
2670
8
    for (auto *BP : ArrayBasePointers)
2671
16
      addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
2672
16
                     BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
2673
8
  }
2674
1.20k
2675
1.20k
  buildInvariantEquivalenceClasses();
2676
1.20k
2677
1.20k
  /// A map from basic blocks to their invalid domains.
2678
1.20k
  DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
2679
1.20k
2680
1.20k
  if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) {
2681
8
    LLVM_DEBUG(
2682
8
        dbgs() << "Bailing-out because buildDomains encountered problems\n");
2683
8
    return;
2684
8
  }
2685
1.19k
2686
1.19k
  scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap);
2687
1.19k
2688
1.19k
  // Initialize the invalid domain.
2689
1.19k
  for (ScopStmt &Stmt : scop->Stmts)
2690
9.12k
    if (Stmt.isBlockStmt())
2691
9.01k
      Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
2692
115
    else
2693
115
      Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
2694
115
          Stmt.getRegion()->getNode())]);
2695
1.19k
2696
1.19k
  // Remove empty statements.
2697
1.19k
  // Exit early in case there are no executable statements left in this scop.
2698
1.19k
  scop->removeStmtNotInDomainMap();
2699
1.19k
  scop->simplifySCoP(false);
2700
1.19k
  if (scop->isEmpty()) {
2701
31
    LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
2702
31
    return;
2703
31
  }
2704
1.16k
2705
1.16k
  // The ScopStmts now have enough information to initialize themselves.
2706
2.32k
  
for (ScopStmt &Stmt : *scop)1.16k
{
2707
2.32k
    collectSurroundingLoops(Stmt);
2708
2.32k
2709
2.32k
    buildDomain(Stmt);
2710
2.32k
    buildAccessRelations(Stmt);
2711
2.32k
2712
2.32k
    if (DetectReductions)
2713
2.32k
      checkForReductions(Stmt);
2714
2.32k
  }
2715
1.16k
2716
1.16k
  // Check early for a feasible runtime context.
2717
1.16k
  if (!scop->hasFeasibleRuntimeContext()) {
2718
3
    LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
2719
3
    return;
2720
3
  }
2721
1.16k
2722
1.16k
  // Check early for profitability. Afterwards it cannot change anymore,
2723
1.16k
  // only the runtime context could become infeasible.
2724
1.16k
  if (!scop->isProfitable(UnprofitableScalarAccs)) {
2725
2
    scop->invalidate(PROFITABLE, DebugLoc());
2726
2
    LLVM_DEBUG(
2727
2
        dbgs() << "Bailing-out because SCoP is not considered profitable\n");
2728
2
    return;
2729
2
  }
2730
1.16k
2731
1.16k
  buildSchedule();
2732
1.16k
2733
1.16k
  finalizeAccesses();
2734
1.16k
2735
1.16k
  scop->realignParams();
2736
1.16k
  addUserContext();
2737
1.16k
2738
1.16k
  // After the context was fully constructed, thus all our knowledge about
2739
1.16k
  // the parameters is in there, we add all recorded assumptions to the
2740
1.16k
  // assumed/invalid context.
2741
1.16k
  addRecordedAssumptions();
2742
1.16k
2743
1.16k
  scop->simplifyContexts();
2744
1.16k
  if (!buildAliasChecks()) {
2745
6
    LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
2746
6
    return;
2747
6
  }
2748
1.15k
2749
1.15k
  hoistInvariantLoads();
2750
1.15k
  canonicalizeDynamicBasePtrs();
2751
1.15k
  verifyInvariantLoads();
2752
1.15k
  scop->simplifySCoP(true);
2753
1.15k
2754
1.15k
  // Check late for a feasible runtime context because profitability did not
2755
1.15k
  // change.
2756
1.15k
  if (!scop->hasFeasibleRuntimeContext()) {
2757
18
    LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
2758
18
    return;
2759
18
  }
2760
1.15k
2761
#ifndef NDEBUG
2762
  verifyUses(scop.get(), LI, DT);
2763
#endif
2764
}
2765
2766
ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
2767
                         const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
2768
                         ScopDetection &SD, ScalarEvolution &SE,
2769
                         OptimizationRemarkEmitter &ORE)
2770
1.20k
    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
2771
1.20k
  DebugLoc Beg, End;
2772
1.20k
  auto P = getBBPairForRegion(R);
2773
1.20k
  getDebugLocations(P, Beg, End);
2774
1.20k
2775
1.20k
  std::string Msg = "SCoP begins here.";
2776
1.20k
  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
2777
1.20k
           << Msg);
2778
1.20k
2779
1.20k
  buildScop(*R, AC);
2780
1.20k
2781
1.20k
  LLVM_DEBUG(dbgs() << *scop);
2782
1.20k
2783
1.20k
  if (!scop->hasFeasibleRuntimeContext()) {
2784
68
    InfeasibleScops++;
2785
68
    Msg = "SCoP ends here but was dismissed.";
2786
68
    LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
2787
68
    scop.reset();
2788
1.13k
  } else {
2789
1.13k
    Msg = "SCoP ends here.";
2790
1.13k
    ++ScopFound;
2791
1.13k
    if (scop->getMaxLoopDepth() > 0)
2792
1.06k
      ++RichScopFound;
2793
1.13k
  }
2794
1.20k
2795
1.20k
  if (R->isTopLevelRegion())
2796
4
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
2797
4
             << Msg);
2798
1.20k
  else
2799
1.20k
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
2800
1.20k
             << Msg);
2801
1.20k
}