Coverage Report

Created: 2017-04-29 12:21

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/CodeGen/IslNodeBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===------ IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST---===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains the IslNodeBuilder, a class to translate an isl AST into
11
// a LLVM-IR AST.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "polly/CodeGen/IslNodeBuilder.h"
16
#include "polly/CodeGen/BlockGenerators.h"
17
#include "polly/CodeGen/CodeGeneration.h"
18
#include "polly/CodeGen/IslAst.h"
19
#include "polly/CodeGen/IslExprBuilder.h"
20
#include "polly/CodeGen/LoopGenerators.h"
21
#include "polly/CodeGen/RuntimeDebugBuilder.h"
22
#include "polly/CodeGen/Utils.h"
23
#include "polly/Config/config.h"
24
#include "polly/DependenceInfo.h"
25
#include "polly/LinkAllPasses.h"
26
#include "polly/Options.h"
27
#include "polly/ScopInfo.h"
28
#include "polly/Support/GICHelper.h"
29
#include "polly/Support/SCEVValidator.h"
30
#include "polly/Support/ScopHelper.h"
31
#include "llvm/ADT/PostOrderIterator.h"
32
#include "llvm/ADT/SmallPtrSet.h"
33
#include "llvm/Analysis/LoopInfo.h"
34
#include "llvm/IR/DataLayout.h"
35
#include "llvm/IR/Module.h"
36
#include "llvm/IR/Verifier.h"
37
#include "llvm/Support/CommandLine.h"
38
#include "llvm/Support/Debug.h"
39
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
40
#include "isl/aff.h"
41
#include "isl/ast.h"
42
#include "isl/ast_build.h"
43
#include "isl/list.h"
44
#include "isl/map.h"
45
#include "isl/set.h"
46
#include "isl/union_map.h"
47
#include "isl/union_set.h"
48
49
using namespace polly;
50
using namespace llvm;
51
52
#define DEBUG_TYPE "polly-codegen"
53
54
STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
55
56
// The maximal number of dimensions we allow during invariant load construction.
57
// More complex access ranges will result in very high compile time and are also
58
// unlikely to result in good code. This value is very high and should only
59
// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
60
static int const MaxDimensionsInAccessRange = 9;
61
62
static cl::opt<bool> PollyGenerateRTCPrint(
63
    "polly-codegen-emit-rtc-print",
64
    cl::desc("Emit code that prints the runtime check result dynamically."),
65
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
66
67
// If this option is set we always use the isl AST generator to regenerate
68
// memory accesses. Without this option set we regenerate expressions using the
69
// original SCEV expressions and only generate new expressions in case the
70
// access relation has been changed and consequently must be regenerated.
71
static cl::opt<bool> PollyGenerateExpressions(
72
    "polly-codegen-generate-expressions",
73
    cl::desc("Generate AST expressions for unmodified and modified accesses"),
74
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
75
76
static cl::opt<int> PollyTargetFirstLevelCacheLineSize(
77
    "polly-target-first-level-cache-line-size",
78
    cl::desc("The size of the first level cache line size specified in bytes."),
79
    cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory));
80
81
__isl_give isl_ast_expr *
82
IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For,
83
260
                              ICmpInst::Predicate &Predicate) {
84
260
  isl_id *UBID, *IteratorID;
85
260
  isl_ast_expr *Cond, *Iterator, *UB, *Arg0;
86
260
  isl_ast_op_type Type;
87
260
88
260
  Cond = isl_ast_node_for_get_cond(For);
89
260
  Iterator = isl_ast_node_for_get_iterator(For);
90
260
  isl_ast_expr_get_type(Cond);
91
260
  assert(isl_ast_expr_get_type(Cond) == isl_ast_expr_op &&
92
260
         "conditional expression is not an atomic upper bound");
93
260
94
260
  Type = isl_ast_expr_get_op_type(Cond);
95
260
96
260
  switch (Type) {
97
194
  case isl_ast_op_le:
98
194
    Predicate = ICmpInst::ICMP_SLE;
99
194
    break;
100
66
  case isl_ast_op_lt:
101
66
    Predicate = ICmpInst::ICMP_SLT;
102
66
    break;
103
0
  default:
104
0
    llvm_unreachable("Unexpected comparision type in loop conditon");
105
260
  }
106
260
107
260
  Arg0 = isl_ast_expr_get_op_arg(Cond, 0);
108
260
109
260
  assert(isl_ast_expr_get_type(Arg0) == isl_ast_expr_id &&
110
260
         "conditional expression is not an atomic upper bound");
111
260
112
260
  UBID = isl_ast_expr_get_id(Arg0);
113
260
114
260
  assert(isl_ast_expr_get_type(Iterator) == isl_ast_expr_id &&
115
260
         "Could not get the iterator");
116
260
117
260
  IteratorID = isl_ast_expr_get_id(Iterator);
118
260
119
260
  assert(UBID == IteratorID &&
120
260
         "conditional expression is not an atomic upper bound");
121
260
122
260
  UB = isl_ast_expr_get_op_arg(Cond, 1);
123
260
124
260
  isl_ast_expr_free(Cond);
125
260
  isl_ast_expr_free(Iterator);
126
260
  isl_ast_expr_free(Arg0);
127
260
  isl_id_free(IteratorID);
128
260
  isl_id_free(UBID);
129
260
130
260
  return UB;
131
260
}
132
133
/// Return true if a return value of Predicate is true for the value represented
134
/// by passed isl_ast_expr_int.
135
static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr,
136
42
                               isl_bool (*Predicate)(__isl_keep isl_val *)) {
137
42
  if (
isl_ast_expr_get_type(Expr) != isl_ast_expr_int42
)
{0
138
0
    isl_ast_expr_free(Expr);
139
0
    return false;
140
0
  }
141
42
  auto ExprVal = isl_ast_expr_get_val(Expr);
142
42
  isl_ast_expr_free(Expr);
143
42
  if (
Predicate(ExprVal) != true42
)
{0
144
0
    isl_val_free(ExprVal);
145
0
    return false;
146
0
  }
147
42
  isl_val_free(ExprVal);
148
42
  return true;
149
42
}
150
151
22
int IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
152
22
  assert(isl_ast_node_get_type(For) == isl_ast_node_for);
153
22
  auto Body = isl_ast_node_for_get_body(For);
154
22
155
22
  // First, check if we can actually handle this code.
156
22
  switch (isl_ast_node_get_type(Body)) {
157
20
  case isl_ast_node_user:
158
20
    break;
159
2
  case isl_ast_node_block: {
160
2
    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
161
4
    for (int i = 0; 
i < isl_ast_node_list_n_ast_node(List)4
;
++i2
)
{3
162
3
      isl_ast_node *Node = isl_ast_node_list_get_ast_node(List, i);
163
3
      int Type = isl_ast_node_get_type(Node);
164
3
      isl_ast_node_free(Node);
165
3
      if (
Type != isl_ast_node_user3
)
{1
166
1
        isl_ast_node_list_free(List);
167
1
        isl_ast_node_free(Body);
168
1
        return -1;
169
1
      }
170
3
    }
171
1
    isl_ast_node_list_free(List);
172
1
    break;
173
2
  }
174
0
  default:
175
0
    isl_ast_node_free(Body);
176
0
    return -1;
177
22
  }
178
21
  isl_ast_node_free(Body);
179
21
180
21
  auto Init = isl_ast_node_for_get_init(For);
181
21
  if (!checkIslAstExprInt(Init, isl_val_is_zero))
182
0
    return -1;
183
21
  auto Inc = isl_ast_node_for_get_inc(For);
184
21
  if (!checkIslAstExprInt(Inc, isl_val_is_one))
185
0
    return -1;
186
21
  CmpInst::Predicate Predicate;
187
21
  auto UB = getUpperBound(For, Predicate);
188
21
  if (
isl_ast_expr_get_type(UB) != isl_ast_expr_int21
)
{2
189
2
    isl_ast_expr_free(UB);
190
2
    return -1;
191
2
  }
192
19
  auto UpVal = isl_ast_expr_get_val(UB);
193
19
  isl_ast_expr_free(UB);
194
19
  int NumberIterations = isl_val_get_num_si(UpVal);
195
19
  isl_val_free(UpVal);
196
19
  if (NumberIterations < 0)
197
0
    return -1;
198
19
  
if (19
Predicate == CmpInst::ICMP_SLT19
)
199
0
    return NumberIterations;
200
19
  else
201
19
    return NumberIterations + 1;
202
19
}
203
204
/// Extract the values and SCEVs needed to generate code for a block.
205
static int findReferencesInBlock(struct SubtreeReferences &References,
206
26
                                 const ScopStmt *Stmt, const BasicBlock *BB) {
207
26
  for (const Instruction &Inst : *BB)
208
328
    
for (Value *SrcVal : Inst.operands()) 173
{328
209
328
      auto *Scope = References.LI.getLoopFor(BB);
210
328
      if (
canSynthesize(SrcVal, References.S, &References.SE, Scope)328
)
{232
211
232
        References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
212
232
        continue;
213
96
      } else 
if (Value *96
NewVal96
= References.GlobalMap.lookup(SrcVal))
214
9
        References.Values.insert(NewVal);
215
328
    }
216
26
  return 0;
217
26
}
218
219
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
220
26
                               bool CreateScalarRefs) {
221
26
  auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
222
26
223
26
  if (Stmt->isBlockStmt())
224
26
    findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
225
0
  else {
226
0
    assert(Stmt->isRegionStmt() &&
227
0
           "Stmt was neither block nor region statement");
228
0
    for (const BasicBlock *BB : Stmt->getRegion()->blocks())
229
0
      findReferencesInBlock(References, Stmt, BB);
230
0
  }
231
26
232
40
  for (auto &Access : *Stmt) {
233
40
    if (
Access->isArrayKind()40
)
{38
234
38
      auto *BasePtr = Access->getScopArrayInfo()->getBasePtr();
235
38
      if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
236
5
        
if (5
Stmt->getParent()->contains(OpInst)5
)
237
4
          continue;
238
38
239
34
      References.Values.insert(BasePtr);
240
34
      continue;
241
38
    }
242
40
243
2
    
if (2
CreateScalarRefs2
)
244
2
      References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
245
2
  }
246
26
247
26
  return isl_stat_ok;
248
26
}
249
250
/// Extract the out-of-scop values and SCEVs referenced from a set describing
251
/// a ScopStmt.
252
///
253
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
254
/// statement and the base pointers of the memory accesses. For scalar
255
/// statements we force the generation of alloca memory locations and list
256
/// these locations in the set of out-of-scop values as well.
257
///
258
/// @param Set     A set which references the ScopStmt we are interested in.
259
/// @param UserPtr A void pointer that can be casted to a SubtreeReferences
260
///                structure.
261
static isl_stat addReferencesFromStmtSet(__isl_take isl_set *Set,
262
26
                                         void *UserPtr) {
263
26
  isl_id *Id = isl_set_get_tuple_id(Set);
264
26
  auto *Stmt = static_cast<const ScopStmt *>(isl_id_get_user(Id));
265
26
  isl_id_free(Id);
266
26
  isl_set_free(Set);
267
26
  return addReferencesFromStmt(Stmt, UserPtr);
268
26
}
269
270
/// Extract the out-of-scop values and SCEVs referenced from a union set
271
/// referencing multiple ScopStmts.
272
///
273
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
274
/// statement and the base pointers of the memory accesses. For scalar
275
/// statements we force the generation of alloca memory locations and list
276
/// these locations in the set of out-of-scop values as well.
277
///
278
/// @param USet       A union set referencing the ScopStmts we are interested
279
///                   in.
280
/// @param References The SubtreeReferences data structure through which
281
///                   results are returned and further information is
282
///                   provided.
283
static void
284
addReferencesFromStmtUnionSet(isl_union_set *USet,
285
26
                              struct SubtreeReferences &References) {
286
26
  isl_union_set_foreach_set(USet, addReferencesFromStmtSet, &References);
287
26
  isl_union_set_free(USet);
288
26
}
289
290
__isl_give isl_union_map *
291
45
IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) {
292
45
  return IslAstInfo::getSchedule(For);
293
45
}
294
295
void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
296
                                            SetVector<Value *> &Values,
297
26
                                            SetVector<const Loop *> &Loops) {
298
26
299
26
  SetVector<const SCEV *> SCEVs;
300
26
  struct SubtreeReferences References = {
301
26
      LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()};
302
26
303
26
  for (const auto &I : IDToValue)
304
19
    Values.insert(I.second);
305
26
306
26
  for (const auto &I : OutsideLoopIterations)
307
2
    Values.insert(cast<SCEVUnknown>(I.second)->getValue());
308
26
309
26
  isl_union_set *Schedule = isl_union_map_domain(getScheduleForAstNode(For));
310
26
  addReferencesFromStmtUnionSet(Schedule, References);
311
26
312
172
  for (const SCEV *Expr : SCEVs) {
313
172
    findValues(Expr, SE, Values);
314
172
    findLoops(Expr, Loops);
315
172
  }
316
26
317
64
  Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
318
26
319
26
  /// Remove loops that contain the scop or that are part of the scop, as they
320
26
  /// are considered local. This leaves only loops that are before the scop, but
321
26
  /// do not contain the scop itself.
322
38
  Loops.remove_if([this](const Loop *L) {
323
3
    return S.contains(L) || L->contains(S.getEntry());
324
38
  });
325
26
}
326
327
26
void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
328
26
  SmallPtrSet<Value *, 5> Inserted;
329
26
330
19
  for (const auto &I : IDToValue) {
331
19
    IDToValue[I.first] = NewValues[I.second];
332
19
    Inserted.insert(I.second);
333
19
  }
334
26
335
58
  for (const auto &I : NewValues) {
336
58
    if (Inserted.count(I.first))
337
0
      continue;
338
58
339
58
    ValueMap[I.first] = I.second;
340
58
  }
341
26
}
342
343
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
344
                                      std::vector<Value *> &IVS,
345
                                      __isl_take isl_id *IteratorID,
346
20
                                      __isl_take isl_union_map *Schedule) {
347
20
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
348
20
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
349
20
  isl_id *Id = isl_ast_expr_get_id(StmtExpr);
350
20
  isl_ast_expr_free(StmtExpr);
351
20
  ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
352
20
  std::vector<LoopToScevMapT> VLTS(IVS.size());
353
20
354
20
  isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain());
355
20
  Schedule = isl_union_map_intersect_domain(Schedule, Domain);
356
20
  isl_map *S = isl_map_from_union_map(Schedule);
357
20
358
20
  auto *NewAccesses = createNewAccesses(Stmt, User);
359
20
  createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
360
20
  VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
361
20
  isl_id_to_ast_expr_free(NewAccesses);
362
20
  isl_map_free(S);
363
20
  isl_id_free(Id);
364
20
  isl_ast_node_free(User);
365
20
}
366
367
15
void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
368
15
  auto *Id = isl_ast_node_mark_get_id(Node);
369
15
  auto Child = isl_ast_node_mark_get_node(Node);
370
15
  isl_ast_node_free(Node);
371
15
  // If a child node of a 'SIMD mark' is a loop that has a single iteration,
372
15
  // it will be optimized away and we should skip it.
373
15
  if (!strcmp(isl_id_get_name(Id), "SIMD") &&
374
2
      
isl_ast_node_get_type(Child) == isl_ast_node_for2
)
{2
375
2
    bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
376
2
    int VectorWidth = getNumberOfIterations(Child);
377
2
    if (
Vector && 2
1 < VectorWidth2
&&
VectorWidth <= 162
)
378
2
      createForVector(Child, VectorWidth);
379
2
    else
380
0
      createForSequential(Child, true);
381
2
    isl_id_free(Id);
382
2
    return;
383
2
  }
384
13
  
if (13
!strcmp(isl_id_get_name(Id), "Inter iteration alias-free")13
)
{1
385
1
    auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
386
1
    Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
387
1
  }
388
13
  create(Child);
389
13
  isl_id_free(Id);
390
13
}
391
392
void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
393
19
                                     int VectorWidth) {
394
19
  isl_ast_node *Body = isl_ast_node_for_get_body(For);
395
19
  isl_ast_expr *Init = isl_ast_node_for_get_init(For);
396
19
  isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
397
19
  isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
398
19
  isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
399
19
400
19
  Value *ValueLB = ExprBuilder.create(Init);
401
19
  Value *ValueInc = ExprBuilder.create(Inc);
402
19
403
19
  Type *MaxType = ExprBuilder.getType(Iterator);
404
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
405
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
406
19
407
19
  if (MaxType != ValueLB->getType())
408
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
409
19
  if (MaxType != ValueInc->getType())
410
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
411
19
412
19
  std::vector<Value *> IVS(VectorWidth);
413
19
  IVS[0] = ValueLB;
414
19
415
119
  for (int i = 1; 
i < VectorWidth119
;
i++100
)
416
100
    IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
417
19
418
19
  isl_union_map *Schedule = getScheduleForAstNode(For);
419
19
  assert(Schedule && "For statement annotation does not contain its schedule");
420
19
421
19
  IDToValue[IteratorID] = ValueLB;
422
19
423
19
  switch (isl_ast_node_get_type(Body)) {
424
18
  case isl_ast_node_user:
425
18
    createUserVector(Body, IVS, isl_id_copy(IteratorID),
426
18
                     isl_union_map_copy(Schedule));
427
18
    break;
428
1
  case isl_ast_node_block: {
429
1
    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
430
1
431
3
    for (int i = 0; 
i < isl_ast_node_list_n_ast_node(List)3
;
++i2
)
432
2
      createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
433
2
                       isl_id_copy(IteratorID), isl_union_map_copy(Schedule));
434
1
435
1
    isl_ast_node_free(Body);
436
1
    isl_ast_node_list_free(List);
437
1
    break;
438
18
  }
439
0
  default:
440
0
    isl_ast_node_dump(Body);
441
0
    llvm_unreachable("Unhandled isl_ast_node in vectorizer");
442
19
  }
443
19
444
19
  IDToValue.erase(IDToValue.find(IteratorID));
445
19
  isl_id_free(IteratorID);
446
19
  isl_union_map_free(Schedule);
447
19
448
19
  isl_ast_node_free(For);
449
19
  isl_ast_expr_free(Iterator);
450
19
}
451
452
void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For,
453
213
                                         bool KnownParallel) {
454
213
  isl_ast_node *Body;
455
213
  isl_ast_expr *Init, *Inc, *Iterator, *UB;
456
213
  isl_id *IteratorID;
457
213
  Value *ValueLB, *ValueUB, *ValueInc;
458
213
  Type *MaxType;
459
213
  BasicBlock *ExitBlock;
460
213
  Value *IV;
461
213
  CmpInst::Predicate Predicate;
462
213
  bool Parallel;
463
213
464
213
  Parallel = KnownParallel || (IslAstInfo::isParallel(For) &&
465
16
                               !IslAstInfo::isReductionParallel(For));
466
213
467
213
  Body = isl_ast_node_for_get_body(For);
468
213
469
213
  // isl_ast_node_for_is_degenerate(For)
470
213
  //
471
213
  // TODO: For degenerated loops we could generate a plain assignment.
472
213
  //       However, for now we just reuse the logic for normal loops, which will
473
213
  //       create a loop with a single iteration.
474
213
475
213
  Init = isl_ast_node_for_get_init(For);
476
213
  Inc = isl_ast_node_for_get_inc(For);
477
213
  Iterator = isl_ast_node_for_get_iterator(For);
478
213
  IteratorID = isl_ast_expr_get_id(Iterator);
479
213
  UB = getUpperBound(For, Predicate);
480
213
481
213
  ValueLB = ExprBuilder.create(Init);
482
213
  ValueUB = ExprBuilder.create(UB);
483
213
  ValueInc = ExprBuilder.create(Inc);
484
213
485
213
  MaxType = ExprBuilder.getType(Iterator);
486
213
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
487
213
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
488
213
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
489
213
490
213
  if (MaxType != ValueLB->getType())
491
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
492
213
  if (MaxType != ValueUB->getType())
493
25
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
494
213
  if (MaxType != ValueInc->getType())
495
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
496
213
497
213
  // If we can show that LB <Predicate> UB holds at least once, we can
498
213
  // omit the GuardBB in front of the loop.
499
213
  bool UseGuardBB =
500
213
      !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
501
213
  IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
502
213
                  Predicate, &Annotator, Parallel, UseGuardBB);
503
213
  IDToValue[IteratorID] = IV;
504
213
505
213
  create(Body);
506
213
507
213
  Annotator.popLoop(Parallel);
508
213
509
213
  IDToValue.erase(IDToValue.find(IteratorID));
510
213
511
213
  Builder.SetInsertPoint(&ExitBlock->front());
512
213
513
213
  isl_ast_node_free(For);
514
213
  isl_ast_expr_free(Iterator);
515
213
  isl_id_free(IteratorID);
516
213
}
517
518
/// Remove the BBs contained in a (sub)function from the dominator tree.
519
///
520
/// This function removes the basic blocks that are part of a subfunction from
521
/// the dominator tree. Specifically, when generating code it may happen that at
522
/// some point the code generation continues in a new sub-function (e.g., when
523
/// generating OpenMP code). The basic blocks that are created in this
524
/// sub-function are then still part of the dominator tree of the original
525
/// function, such that the dominator tree reaches over function boundaries.
526
/// This is not only incorrect, but also causes crashes. This function now
527
/// removes from the dominator tree all basic blocks that are dominated (and
528
/// consequently reachable) from the entry block of this (sub)function.
529
///
530
/// FIXME: A LLVM (function or region) pass should not touch anything outside of
531
/// the function/region it runs on. Hence, the pure need for this function shows
532
/// that we do not comply to this rule. At the moment, this does not cause any
533
/// issues, but we should be aware that such issues may appear. Unfortunately
534
/// the current LLVM pass infrastructure does not allow to make Polly a module
535
/// or call-graph pass to solve this issue, as such a pass would not have access
536
/// to the per-function analyses passes needed by Polly. A future pass manager
537
/// infrastructure is supposed to enable such kind of access possibly allowing
538
/// us to create a cleaner solution here.
539
///
540
/// FIXME: Instead of adding the dominance information and then dropping it
541
/// later on, we should try to just not add it in the first place. This requires
542
/// some careful testing to make sure this does not break in interaction with
543
/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
544
/// which may try to update it.
545
///
546
/// @param F The function which contains the BBs to removed.
547
/// @param DT The dominator tree from which to remove the BBs.
548
26
static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
549
26
  DomTreeNode *N = DT.getNode(&F->getEntryBlock());
550
26
  std::vector<BasicBlock *> Nodes;
551
26
552
26
  // We can only remove an element from the dominator tree, if all its children
553
26
  // have been removed. To ensure this we obtain the list of nodes to remove
554
26
  // using a post-order tree traversal.
555
269
  for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); 
I != E269
;
++I243
)
556
243
    Nodes.push_back(I->getBlock());
557
26
558
26
  for (BasicBlock *BB : Nodes)
559
243
    DT.eraseNode(BB);
560
26
}
561
562
26
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
563
26
  isl_ast_node *Body;
564
26
  isl_ast_expr *Init, *Inc, *Iterator, *UB;
565
26
  isl_id *IteratorID;
566
26
  Value *ValueLB, *ValueUB, *ValueInc;
567
26
  Type *MaxType;
568
26
  Value *IV;
569
26
  CmpInst::Predicate Predicate;
570
26
571
26
  // The preamble of parallel code interacts different than normal code with
572
26
  // e.g., scalar initialization. Therefore, we ensure the parallel code is
573
26
  // separated from the last basic block.
574
26
  BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(),
575
26
                                 &*Builder.GetInsertPoint(), &DT, &LI);
576
26
  ParBB->setName("polly.parallel.for");
577
26
  Builder.SetInsertPoint(&ParBB->front());
578
26
579
26
  Body = isl_ast_node_for_get_body(For);
580
26
  Init = isl_ast_node_for_get_init(For);
581
26
  Inc = isl_ast_node_for_get_inc(For);
582
26
  Iterator = isl_ast_node_for_get_iterator(For);
583
26
  IteratorID = isl_ast_expr_get_id(Iterator);
584
26
  UB = getUpperBound(For, Predicate);
585
26
586
26
  ValueLB = ExprBuilder.create(Init);
587
26
  ValueUB = ExprBuilder.create(UB);
588
26
  ValueInc = ExprBuilder.create(Inc);
589
26
590
26
  // OpenMP always uses SLE. In case the isl generated AST uses a SLT
591
26
  // expression, we need to adjust the loop blound by one.
592
26
  if (Predicate == CmpInst::ICMP_SLT)
593
8
    ValueUB = Builder.CreateAdd(
594
8
        ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
595
26
596
26
  MaxType = ExprBuilder.getType(Iterator);
597
26
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
598
26
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
599
26
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
600
26
601
26
  if (MaxType != ValueLB->getType())
602
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
603
26
  if (MaxType != ValueUB->getType())
604
3
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
605
26
  if (MaxType != ValueInc->getType())
606
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
607
26
608
26
  BasicBlock::iterator LoopBody;
609
26
610
26
  SetVector<Value *> SubtreeValues;
611
26
  SetVector<const Loop *> Loops;
612
26
613
26
  getReferencesInSubtree(For, SubtreeValues, Loops);
614
26
615
26
  // Create for all loops we depend on values that contain the current loop
616
26
  // iteration. These values are necessary to generate code for SCEVs that
617
26
  // depend on such loops. As a result we need to pass them to the subfunction.
618
1
  for (const Loop *L : Loops) {
619
1
    const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
620
1
                                            SE.getUnknown(Builder.getInt64(1)),
621
1
                                            L, SCEV::FlagAnyWrap);
622
1
    Value *V = generateSCEV(OuterLIV);
623
1
    OutsideLoopIterations[L] = SE.getUnknown(V);
624
1
    SubtreeValues.insert(V);
625
1
  }
626
26
627
26
  ValueMapT NewValues;
628
26
  ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL);
629
26
630
26
  IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc,
631
26
                                          SubtreeValues, NewValues, &LoopBody);
632
26
  BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
633
26
  Builder.SetInsertPoint(&*LoopBody);
634
26
635
26
  // Remember the parallel subfunction
636
26
  ParallelSubfunctions.push_back(LoopBody->getFunction());
637
26
638
26
  // Save the current values.
639
26
  auto ValueMapCopy = ValueMap;
640
26
  IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
641
26
642
26
  updateValues(NewValues);
643
26
  IDToValue[IteratorID] = IV;
644
26
645
26
  ValueMapT NewValuesReverse;
646
26
647
26
  for (auto P : NewValues)
648
58
    NewValuesReverse[P.second] = P.first;
649
26
650
26
  Annotator.addAlternativeAliasBases(NewValuesReverse);
651
26
652
26
  create(Body);
653
26
654
26
  Annotator.resetAlternativeAliasBases();
655
26
  // Restore the original values.
656
26
  ValueMap = ValueMapCopy;
657
26
  IDToValue = IDToValueCopy;
658
26
659
26
  Builder.SetInsertPoint(&*AfterLoop);
660
26
  removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
661
26
662
26
  for (const Loop *L : Loops)
663
1
    OutsideLoopIterations.erase(L);
664
26
665
26
  isl_ast_node_free(For);
666
26
  isl_ast_expr_free(Iterator);
667
26
  isl_id_free(IteratorID);
668
26
}
669
670
256
void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
671
256
  bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
672
256
673
256
  if (
Vector && 256
IslAstInfo::isInnermostParallel(For)26
&&
674
20
      
!IslAstInfo::isReductionParallel(For)20
)
{20
675
20
    int VectorWidth = getNumberOfIterations(For);
676
20
    if (
1 < VectorWidth && 20
VectorWidth <= 1617
)
{17
677
17
      createForVector(For, VectorWidth);
678
17
      return;
679
17
    }
680
20
  }
681
256
682
239
  
if (239
IslAstInfo::isExecutedInParallel(For)239
)
{26
683
26
    createForParallel(For);
684
26
    return;
685
26
  }
686
213
  createForSequential(For, false);
687
213
}
688
689
54
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
690
54
  isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
691
54
692
54
  Function *F = Builder.GetInsertBlock()->getParent();
693
54
  LLVMContext &Context = F->getContext();
694
54
695
54
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
696
54
                                  &*Builder.GetInsertPoint(), &DT, &LI);
697
54
  CondBB->setName("polly.cond");
698
54
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
699
54
  MergeBB->setName("polly.merge");
700
54
  BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
701
54
  BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
702
54
703
54
  DT.addNewBlock(ThenBB, CondBB);
704
54
  DT.addNewBlock(ElseBB, CondBB);
705
54
  DT.changeImmediateDominator(MergeBB, CondBB);
706
54
707
54
  Loop *L = LI.getLoopFor(CondBB);
708
54
  if (
L54
)
{26
709
26
    L->addBasicBlockToLoop(ThenBB, LI);
710
26
    L->addBasicBlockToLoop(ElseBB, LI);
711
26
  }
712
54
713
54
  CondBB->getTerminator()->eraseFromParent();
714
54
715
54
  Builder.SetInsertPoint(CondBB);
716
54
  Value *Predicate = ExprBuilder.create(Cond);
717
54
  Builder.CreateCondBr(Predicate, ThenBB, ElseBB);
718
54
  Builder.SetInsertPoint(ThenBB);
719
54
  Builder.CreateBr(MergeBB);
720
54
  Builder.SetInsertPoint(ElseBB);
721
54
  Builder.CreateBr(MergeBB);
722
54
  Builder.SetInsertPoint(&ThenBB->front());
723
54
724
54
  create(isl_ast_node_if_get_then(If));
725
54
726
54
  Builder.SetInsertPoint(&ElseBB->front());
727
54
728
54
  if (isl_ast_node_if_has_else(If))
729
9
    create(isl_ast_node_if_get_else(If));
730
54
731
54
  Builder.SetInsertPoint(&MergeBB->front());
732
54
733
54
  isl_ast_node_free(If);
734
54
}
735
736
__isl_give isl_id_to_ast_expr *
737
IslNodeBuilder::createNewAccesses(ScopStmt *Stmt,
738
369
                                  __isl_keep isl_ast_node *Node) {
739
369
  isl_id_to_ast_expr *NewAccesses =
740
369
      isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx(), 0);
741
369
742
369
  auto *Build = IslAstInfo::getBuild(Node);
743
369
  assert(Build && "Could not obtain isl_ast_build from user node");
744
369
  Stmt->setAstBuild(Build);
745
369
746
752
  for (auto *MA : *Stmt) {
747
752
    if (
!MA->hasNewAccessRelation()752
)
{695
748
695
      if (
PollyGenerateExpressions695
)
{2
749
2
        if (!MA->isAffine())
750
0
          continue;
751
2
        
if (2
MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()2
)
752
0
          continue;
753
2
754
2
        auto *BasePtr =
755
2
            dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr());
756
2
        if (
BasePtr && 2
Stmt->getParent()->getRegion().contains(BasePtr)0
)
757
0
          continue;
758
693
      } else {
759
693
        continue;
760
693
      }
761
695
    }
762
59
    assert(MA->isAffine() &&
763
59
           "Only affine memory accesses can be code generated");
764
59
765
59
    auto Schedule = isl_ast_build_get_schedule(Build);
766
59
767
59
#ifndef NDEBUG
768
    auto Dom = Stmt->getDomain();
769
    auto SchedDom = isl_set_from_union_set(
770
        isl_union_map_domain(isl_union_map_copy(Schedule)));
771
    auto AccDom = isl_map_domain(MA->getAccessRelation());
772
    Dom = isl_set_intersect_params(Dom, Stmt->getParent()->getContext());
773
    SchedDom =
774
        isl_set_intersect_params(SchedDom, Stmt->getParent()->getContext());
775
    assert(isl_set_is_subset(SchedDom, AccDom) &&
776
           "Access relation not defined on full schedule domain");
777
    assert(isl_set_is_subset(Dom, AccDom) &&
778
           "Access relation not defined on full domain");
779
    isl_set_free(AccDom);
780
    isl_set_free(SchedDom);
781
    isl_set_free(Dom);
782
#endif
783
59
784
59
    auto PWAccRel = MA->applyScheduleToAccessRelation(Schedule);
785
59
786
59
    auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
787
59
    NewAccesses = isl_id_to_ast_expr_set(NewAccesses, MA->getId(), AccessExpr);
788
59
  }
789
369
790
369
  return NewAccesses;
791
369
}
792
793
void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr,
794
470
                                         ScopStmt *Stmt, LoopToScevMapT &LTS) {
795
470
  assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
796
470
         "Expression of type 'op' expected");
797
470
  assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
798
470
         "Opertation of type 'call' expected");
799
933
  for (int i = 0; 
i < isl_ast_expr_get_op_n_arg(Expr) - 1933
;
++i463
)
{463
800
463
    isl_ast_expr *SubExpr;
801
463
    Value *V;
802
463
803
463
    SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
804
463
    V = ExprBuilder.create(SubExpr);
805
463
    ScalarEvolution *SE = Stmt->getParent()->getSE();
806
463
    LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
807
463
  }
808
470
809
470
  isl_ast_expr_free(Expr);
810
470
}
811
812
void IslNodeBuilder::createSubstitutionsVector(
813
    __isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
814
    std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS,
815
20
    __isl_take isl_id *IteratorID) {
816
20
  int i = 0;
817
20
818
20
  Value *OldValue = IDToValue[IteratorID];
819
123
  for (Value *IV : IVS) {
820
123
    IDToValue[IteratorID] = IV;
821
123
    createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]);
822
123
    i++;
823
123
  }
824
20
825
20
  IDToValue[IteratorID] = OldValue;
826
20
  isl_id_free(IteratorID);
827
20
  isl_ast_expr_free(Expr);
828
20
}
829
830
void IslNodeBuilder::generateCopyStmt(
831
2
    ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
832
2
  assert(Stmt->size() == 2);
833
2
  auto ReadAccess = Stmt->begin();
834
2
  auto WriteAccess = ReadAccess++;
835
2
  assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite());
836
2
  assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&
837
2
         "Accesses use the same data type");
838
2
  assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind());
839
2
  auto *AccessExpr =
840
2
      isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId());
841
2
  auto *LoadValue = ExprBuilder.create(AccessExpr);
842
2
  AccessExpr = isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId());
843
2
  auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr);
844
2
  Builder.CreateStore(LoadValue, StoreAddr);
845
2
}
846
847
349
void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
848
349
  LoopToScevMapT LTS;
849
349
  isl_id *Id;
850
349
  ScopStmt *Stmt;
851
349
852
349
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
853
349
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
854
349
  Id = isl_ast_expr_get_id(StmtExpr);
855
349
  isl_ast_expr_free(StmtExpr);
856
349
857
349
  LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
858
349
859
349
  Stmt = (ScopStmt *)isl_id_get_user(Id);
860
349
  auto *NewAccesses = createNewAccesses(Stmt, User);
861
349
  if (
Stmt->isCopyStmt()349
)
{2
862
2
    generateCopyStmt(Stmt, NewAccesses);
863
2
    isl_ast_expr_free(Expr);
864
347
  } else {
865
347
    createSubstitutions(Expr, Stmt, LTS);
866
347
867
347
    if (Stmt->isBlockStmt())
868
318
      BlockGen.copyStmt(*Stmt, LTS, NewAccesses);
869
347
    else
870
29
      RegionGen.copyStmt(*Stmt, LTS, NewAccesses);
871
347
  }
872
349
873
349
  isl_id_to_ast_expr_free(NewAccesses);
874
349
  isl_ast_node_free(User);
875
349
  isl_id_free(Id);
876
349
}
877
878
89
void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) {
879
89
  isl_ast_node_list *List = isl_ast_node_block_get_children(Block);
880
89
881
280
  for (int i = 0; 
i < isl_ast_node_list_n_ast_node(List)280
;
++i191
)
882
191
    create(isl_ast_node_list_get_ast_node(List, i));
883
89
884
89
  isl_ast_node_free(Block);
885
89
  isl_ast_node_list_free(List);
886
89
}
887
888
763
void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
889
763
  switch (isl_ast_node_get_type(Node)) {
890
0
  case isl_ast_node_error:
891
0
    llvm_unreachable("code generation error");
892
15
  case isl_ast_node_mark:
893
15
    createMark(Node);
894
15
    return;
895
256
  case isl_ast_node_for:
896
256
    createFor(Node);
897
256
    return;
898
54
  case isl_ast_node_if:
899
54
    createIf(Node);
900
54
    return;
901
349
  case isl_ast_node_user:
902
349
    createUser(Node);
903
349
    return;
904
89
  case isl_ast_node_block:
905
89
    createBlock(Node);
906
89
    return;
907
763
  }
908
763
909
0
  
llvm_unreachable0
("Unknown isl_ast_node type");0
910
0
}
911
912
225
bool IslNodeBuilder::materializeValue(isl_id *Id) {
913
225
  // If the Id is already mapped, skip it.
914
225
  if (
!IDToValue.count(Id)225
)
{148
915
148
    auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id);
916
148
    Value *V = nullptr;
917
148
918
148
    // Parameters could refere to invariant loads that need to be
919
148
    // preloaded before we can generate code for the parameter. Thus,
920
148
    // check if any value refered to in ParamSCEV is an invariant load
921
148
    // and if so make sure its equivalence class is preloaded.
922
148
    SetVector<Value *> Values;
923
148
    findValues(ParamSCEV, SE, Values);
924
154
    for (auto *Val : Values) {
925
154
926
154
      // Check if the value is an instruction in a dead block within the SCoP
927
154
      // and if so do not code generate it.
928
154
      if (auto *
Inst154
= dyn_cast<Instruction>(Val))
{24
929
24
        if (
S.contains(Inst)24
)
{13
930
13
          bool IsDead = true;
931
13
932
13
          // Check for "undef" loads first, then if there is a statement for
933
13
          // the parent of Inst and lastly if the parent of Inst has an empty
934
13
          // domain. In the first and last case the instruction is dead but if
935
13
          // there is a statement or the domain is not empty Inst is not dead.
936
13
          auto MemInst = MemAccInst::dyn_cast(Inst);
937
12
          auto Address = MemInst ? 
MemInst.getPointerOperand()12
:
nullptr1
;
938
13
          if (
Address && 13
SE.getUnknown(UndefValue::get(Address->getType())) ==12
939
0
                             SE.getPointerBase(SE.getSCEV(Address))) {
940
13
          } else 
if (13
S.getStmtFor(Inst)13
)
{8
941
8
            IsDead = false;
942
5
          } else {
943
5
            auto *Domain = S.getDomainConditions(Inst->getParent());
944
5
            IsDead = isl_set_is_empty(Domain);
945
5
            isl_set_free(Domain);
946
5
          }
947
13
948
13
          if (
IsDead13
)
{2
949
2
            V = UndefValue::get(ParamSCEV->getType());
950
2
            break;
951
2
          }
952
13
        }
953
24
      }
954
154
955
152
      
if (auto *152
IAClass152
= S.lookupInvariantEquivClass(Val))
{10
956
10
957
10
        // Check if this invariant access class is empty, hence if we never
958
10
        // actually added a loads instruction to it. In that case it has no
959
10
        // (meaningful) users and we should not try to code generate it.
960
10
        if (IAClass->InvariantAccesses.empty())
961
0
          V = UndefValue::get(ParamSCEV->getType());
962
10
963
10
        if (
!preloadInvariantEquivClass(*IAClass)10
)
{3
964
3
          isl_id_free(Id);
965
3
          return false;
966
3
        }
967
10
      }
968
152
    }
969
148
970
145
    
V = V ? 145
V2
:
generateSCEV(ParamSCEV)143
;
971
145
    IDToValue[Id] = V;
972
145
  }
973
225
974
222
  isl_id_free(Id);
975
222
  return true;
976
225
}
977
978
139
bool IslNodeBuilder::materializeParameters(isl_set *Set) {
979
359
  for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); 
i < e359
;
++i220
)
{223
980
223
    if (!isl_set_involves_dims(Set, isl_dim_param, i, 1))
981
159
      continue;
982
64
    isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i);
983
64
    if (!materializeValue(Id))
984
3
      return false;
985
64
  }
986
136
  return true;
987
139
}
988
989
257
bool IslNodeBuilder::materializeParameters() {
990
161
  for (const SCEV *Param : S.parameters()) {
991
161
    isl_id *Id = S.getIdForParam(Param);
992
161
    if (!materializeValue(Id))
993
0
      return false;
994
161
  }
995
257
  return true;
996
257
}
997
998
/// Add the number of dimensions in @p BS to @p U.
999
144
static isl_stat countTotalDims(__isl_take isl_basic_set *BS, void *U) {
1000
144
  unsigned *NumTotalDim = static_cast<unsigned *>(U);
1001
144
  *NumTotalDim += isl_basic_set_total_dim(BS);
1002
144
  isl_basic_set_free(BS);
1003
144
  return isl_stat_ok;
1004
144
}
1005
1006
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
1007
                                              isl_ast_build *Build,
1008
110
                                              Instruction *AccInst) {
1009
110
1010
110
  // TODO: This check could be performed in the ScopInfo already.
1011
110
  unsigned NumTotalDim = 0;
1012
110
  isl_set_foreach_basic_set(AccessRange, countTotalDims, &NumTotalDim);
1013
110
  if (
NumTotalDim > MaxDimensionsInAccessRange110
)
{1
1014
1
    isl_set_free(AccessRange);
1015
1
    return nullptr;
1016
1
  }
1017
110
1018
109
  isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
1019
109
  isl_ast_expr *Access =
1020
109
      isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
1021
109
  auto *Address = isl_ast_expr_address_of(Access);
1022
109
  auto *AddressValue = ExprBuilder.create(Address);
1023
109
  Value *PreloadVal;
1024
109
1025
109
  // Correct the type as the SAI might have a different type than the user
1026
109
  // expects, especially if the base pointer is a struct.
1027
109
  Type *Ty = AccInst->getType();
1028
109
1029
109
  auto *Ptr = AddressValue;
1030
109
  auto Name = Ptr->getName();
1031
109
  auto AS = Ptr->getType()->getPointerAddressSpace();
1032
109
  Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast");
1033
109
  PreloadVal = Builder.CreateLoad(Ptr, Name + ".load");
1034
109
  if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal))
1035
109
    PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment());
1036
109
1037
109
  // TODO: This is only a hot fix for SCoP sequences that use the same load
1038
109
  //       instruction contained and hoisted by one of the SCoPs.
1039
109
  if (SE.isSCEVable(Ty))
1040
94
    SE.forgetValue(AccInst);
1041
109
1042
109
  return PreloadVal;
1043
110
}
1044
1045
Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
1046
113
                                            isl_set *Domain) {
1047
113
1048
113
  isl_set *AccessRange = isl_map_range(MA.getAddressFunction());
1049
113
  AccessRange = isl_set_gist_params(AccessRange, S.getContext());
1050
113
1051
113
  if (
!materializeParameters(AccessRange)113
)
{0
1052
0
    isl_set_free(AccessRange);
1053
0
    isl_set_free(Domain);
1054
0
    return nullptr;
1055
0
  }
1056
113
1057
113
  auto *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace()));
1058
113
  isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
1059
113
  bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
1060
113
  isl_set_free(Universe);
1061
113
1062
113
  Instruction *AccInst = MA.getAccessInstruction();
1063
113
  Type *AccInstTy = AccInst->getType();
1064
113
1065
113
  Value *PreloadVal = nullptr;
1066
113
  if (
AlwaysExecuted113
)
{87
1067
87
    PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst);
1068
87
    isl_ast_build_free(Build);
1069
87
    isl_set_free(Domain);
1070
87
    return PreloadVal;
1071
87
  }
1072
113
1073
26
  
if (26
!materializeParameters(Domain)26
)
{3
1074
3
    isl_ast_build_free(Build);
1075
3
    isl_set_free(AccessRange);
1076
3
    isl_set_free(Domain);
1077
3
    return nullptr;
1078
3
  }
1079
26
1080
23
  isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
1081
23
  Domain = nullptr;
1082
23
1083
23
  ExprBuilder.setTrackOverflow(true);
1084
23
  Value *Cond = ExprBuilder.create(DomainCond);
1085
23
  Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(),
1086
23
                                              "polly.preload.cond.overflown");
1087
23
  Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result");
1088
23
  ExprBuilder.setTrackOverflow(false);
1089
23
1090
23
  if (!Cond->getType()->isIntegerTy(1))
1091
0
    Cond = Builder.CreateIsNotNull(Cond);
1092
23
1093
23
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1094
23
                                  &*Builder.GetInsertPoint(), &DT, &LI);
1095
23
  CondBB->setName("polly.preload.cond");
1096
23
1097
23
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
1098
23
  MergeBB->setName("polly.preload.merge");
1099
23
1100
23
  Function *F = Builder.GetInsertBlock()->getParent();
1101
23
  LLVMContext &Context = F->getContext();
1102
23
  BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
1103
23
1104
23
  DT.addNewBlock(ExecBB, CondBB);
1105
23
  if (Loop *L = LI.getLoopFor(CondBB))
1106
9
    L->addBasicBlockToLoop(ExecBB, LI);
1107
23
1108
23
  auto *CondBBTerminator = CondBB->getTerminator();
1109
23
  Builder.SetInsertPoint(CondBBTerminator);
1110
23
  Builder.CreateCondBr(Cond, ExecBB, MergeBB);
1111
23
  CondBBTerminator->eraseFromParent();
1112
23
1113
23
  Builder.SetInsertPoint(ExecBB);
1114
23
  Builder.CreateBr(MergeBB);
1115
23
1116
23
  Builder.SetInsertPoint(ExecBB->getTerminator());
1117
23
  Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst);
1118
23
  Builder.SetInsertPoint(MergeBB->getTerminator());
1119
23
  auto *MergePHI = Builder.CreatePHI(
1120
23
      AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
1121
23
  PreloadVal = MergePHI;
1122
23
1123
23
  if (
!PreAccInst23
)
{0
1124
0
    PreloadVal = nullptr;
1125
0
    PreAccInst = UndefValue::get(AccInstTy);
1126
0
  }
1127
23
1128
23
  MergePHI->addIncoming(PreAccInst, ExecBB);
1129
23
  MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
1130
23
1131
23
  isl_ast_build_free(Build);
1132
23
  return PreloadVal;
1133
26
}
1134
1135
bool IslNodeBuilder::preloadInvariantEquivClass(
1136
157
    InvariantEquivClassTy &IAClass) {
1137
157
  // For an equivalence class of invariant loads we pre-load the representing
1138
157
  // element with the unified execution context. However, we have to map all
1139
157
  // elements of the class to the one preloaded load as they are referenced
1140
157
  // during the code generation and therefor need to be mapped.
1141
157
  const MemoryAccessList &MAs = IAClass.InvariantAccesses;
1142
157
  if (MAs.empty())
1143
2
    return true;
1144
157
1145
155
  MemoryAccess *MA = MAs.front();
1146
155
  assert(MA->isArrayKind() && MA->isRead());
1147
155
1148
155
  // If the access function was already mapped, the preload of this equivalence
1149
155
  // class was triggered earlier already and doesn't need to be done again.
1150
155
  if (ValueMap.count(MA->getAccessInstruction()))
1151
35
    return true;
1152
155
1153
155
  // Check for recursion which can be caused by additional constraints, e.g.,
1154
155
  // non-finite loop constraints. In such a case we have to bail out and insert
1155
155
  // a "false" runtime check that will cause the original code to be executed.
1156
120
  auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType);
1157
120
  if (!PreloadedPtrs.insert(PtrId).second)
1158
4
    return false;
1159
120
1160
120
  // The execution context of the IAClass.
1161
116
  isl_set *&ExecutionCtx = IAClass.ExecutionContext;
1162
116
1163
116
  // If the base pointer of this class is dependent on another one we have to
1164
116
  // make sure it was preloaded already.
1165
116
  auto *SAI = MA->getScopArrayInfo();
1166
116
  if (auto *
BaseIAClass116
= S.lookupInvariantEquivClass(SAI->getBasePtr()))
{29
1167
29
    if (!preloadInvariantEquivClass(*BaseIAClass))
1168
1
      return false;
1169
29
1170
29
    // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1171
29
    // we need to refine the ExecutionCtx.
1172
28
    isl_set *BaseExecutionCtx = isl_set_copy(BaseIAClass->ExecutionContext);
1173
28
    ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx);
1174
28
  }
1175
116
1176
116
  // If the size of a dimension is dependent on another class, make sure it is
1177
116
  // preloaded.
1178
115
  
for (unsigned i = 1, e = SAI->getNumberOfDimensions(); 115
i < e115
;
++i0
)
{2
1179
2
    const SCEV *Dim = SAI->getDimensionSize(i);
1180
2
    SetVector<Value *> Values;
1181
2
    findValues(Dim, SE, Values);
1182
2
    for (auto *Val : Values) {
1183
2
      if (auto *
BaseIAClass2
= S.lookupInvariantEquivClass(Val))
{2
1184
2
        if (!preloadInvariantEquivClass(*BaseIAClass))
1185
2
          return false;
1186
2
1187
2
        // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1188
2
        // and we need to refine the ExecutionCtx.
1189
0
        isl_set *BaseExecutionCtx = isl_set_copy(BaseIAClass->ExecutionContext);
1190
0
        ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx);
1191
0
      }
1192
2
    }
1193
2
  }
1194
115
1195
113
  Instruction *AccInst = MA->getAccessInstruction();
1196
113
  Type *AccInstTy = AccInst->getType();
1197
113
1198
113
  Value *PreloadVal = preloadInvariantLoad(*MA, isl_set_copy(ExecutionCtx));
1199
113
  if (!PreloadVal)
1200
4
    return false;
1201
113
1202
118
  
for (const MemoryAccess *MA : MAs) 109
{118
1203
118
    Instruction *MAAccInst = MA->getAccessInstruction();
1204
118
    assert(PreloadVal->getType() == MAAccInst->getType());
1205
118
    ValueMap[MAAccInst] = PreloadVal;
1206
118
  }
1207
109
1208
109
  if (
SE.isSCEVable(AccInstTy)109
)
{94
1209
94
    isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst));
1210
94
    if (ParamId)
1211
20
      IDToValue[ParamId] = PreloadVal;
1212
94
    isl_id_free(ParamId);
1213
94
  }
1214
109
1215
109
  BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
1216
109
  auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
1217
109
                                AccInst->getName() + ".preload.s2a");
1218
109
  Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
1219
109
  Builder.CreateStore(PreloadVal, Alloca);
1220
109
  ValueMapT PreloadedPointer;
1221
109
  PreloadedPointer[PreloadVal] = AccInst;
1222
109
  Annotator.addAlternativeAliasBases(PreloadedPointer);
1223
109
1224
51
  for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
1225
51
    Value *BasePtr = DerivedSAI->getBasePtr();
1226
51
1227
61
    for (const MemoryAccess *MA : MAs) {
1228
61
      // As the derived SAI information is quite coarse, any load from the
1229
61
      // current SAI could be the base pointer of the derived SAI, however we
1230
61
      // should only change the base pointer of the derived SAI if we actually
1231
61
      // preloaded it.
1232
61
      if (
BasePtr == MA->getBaseAddr()61
)
{0
1233
0
        assert(BasePtr->getType() == PreloadVal->getType());
1234
0
        DerivedSAI->setBasePtr(PreloadVal);
1235
0
      }
1236
61
1237
61
      // For scalar derived SAIs we remap the alloca used for the derived value.
1238
61
      if (BasePtr == MA->getAccessInstruction())
1239
39
        ScalarMap[DerivedSAI] = Alloca;
1240
61
    }
1241
51
  }
1242
109
1243
118
  for (const MemoryAccess *MA : MAs) {
1244
118
1245
118
    Instruction *MAAccInst = MA->getAccessInstruction();
1246
118
    // Use the escape system to get the correct value to users outside the SCoP.
1247
118
    BlockGenerator::EscapeUserVectorTy EscapeUsers;
1248
118
    for (auto *U : MAAccInst->users())
1249
124
      
if (Instruction *124
UI124
= dyn_cast<Instruction>(U))
1250
124
        
if (124
!S.contains(UI)124
)
1251
7
          EscapeUsers.push_back(UI);
1252
118
1253
118
    if (EscapeUsers.empty())
1254
112
      continue;
1255
118
1256
6
    EscapeMap[MA->getAccessInstruction()] =
1257
6
        std::make_pair(Alloca, std::move(EscapeUsers));
1258
6
  }
1259
109
1260
109
  return true;
1261
113
}
1262
1263
257
void IslNodeBuilder::allocateNewArrays() {
1264
547
  for (auto &SAI : S.arrays()) {
1265
547
    if (SAI->getBasePtr())
1266
542
      continue;
1267
547
1268
5
    assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
1269
5
           "The size of the outermost dimension is used to declare newly "
1270
5
           "created arrays that require memory allocation.");
1271
5
1272
5
    Type *NewArrayType = nullptr;
1273
15
    for (int i = SAI->getNumberOfDimensions() - 1; 
i >= 015
;
i--10
)
{10
1274
10
      auto *DimSize = SAI->getDimensionSize(i);
1275
10
      unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
1276
10
                                     ->getAPInt()
1277
10
                                     .getLimitedValue();
1278
10
1279
10
      if (!NewArrayType)
1280
5
        NewArrayType = SAI->getElementType();
1281
10
1282
10
      NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
1283
10
    }
1284
5
1285
5
    auto InstIt =
1286
5
        Builder.GetInsertBlock()->getParent()->getEntryBlock().getTerminator();
1287
5
    auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
1288
5
                                        SAI->getName(), &*InstIt);
1289
5
    CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
1290
5
    SAI->setBasePtr(CreatedArray);
1291
5
  }
1292
257
}
1293
1294
262
bool IslNodeBuilder::preloadInvariantLoads() {
1295
262
1296
262
  auto &InvariantEquivClasses = S.getInvariantAccesses();
1297
262
  if (InvariantEquivClasses.empty())
1298
199
    return true;
1299
262
1300
63
  BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
1301
63
                                     &*Builder.GetInsertPoint(), &DT, &LI);
1302
63
  PreLoadBB->setName("polly.preload.begin");
1303
63
  Builder.SetInsertPoint(&PreLoadBB->front());
1304
63
1305
63
  for (auto &IAClass : InvariantEquivClasses)
1306
116
    
if (116
!preloadInvariantEquivClass(IAClass)116
)
1307
5
      return false;
1308
63
1309
58
  return true;
1310
63
}
1311
1312
257
void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
1313
257
  // Materialize values for the parameters of the SCoP.
1314
257
  materializeParameters();
1315
257
1316
257
  // Generate values for the current loop iteration for all surrounding loops.
1317
257
  //
1318
257
  // We may also reference loops outside of the scop which do not contain the
1319
257
  // scop itself, but as the number of such scops may be arbitrarily large we do
1320
257
  // not generate code for them here, but only at the point of code generation
1321
257
  // where these values are needed.
1322
257
  Loop *L = LI.getLoopFor(S.getEntry());
1323
257
1324
451
  while (
L != nullptr && 451
S.contains(L)218
)
1325
194
    L = L->getParentLoop();
1326
257
1327
282
  while (
L != nullptr282
)
{25
1328
25
    const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
1329
25
                                            SE.getUnknown(Builder.getInt64(1)),
1330
25
                                            L, SCEV::FlagAnyWrap);
1331
25
    Value *V = generateSCEV(OuterLIV);
1332
25
    OutsideLoopIterations[L] = SE.getUnknown(V);
1333
25
    L = L->getParentLoop();
1334
25
  }
1335
257
1336
257
  isl_set_free(Context);
1337
257
}
1338
1339
169
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
1340
169
  /// We pass the insert location of our Builder, as Polly ensures during IR
1341
169
  /// generation that there is always a valid CFG into which instructions are
1342
169
  /// inserted. As a result, the insertpoint is known to be always followed by a
1343
169
  /// terminator instruction. This means the insert point may be specified by a
1344
169
  /// terminator instruction, but it can never point to an ->end() iterator
1345
169
  /// which does not have a corresponding instruction. Hence, dereferencing
1346
169
  /// the insertpoint to obtain an instruction is known to be save.
1347
169
  ///
1348
169
  /// We also do not need to update the Builder here, as new instructions are
1349
169
  /// always inserted _before_ the given InsertLocation. As a result, the
1350
169
  /// insert location remains valid.
1351
169
  assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
1352
169
         "Insert location points after last valid instruction");
1353
169
  Instruction *InsertLocation = &*Builder.GetInsertPoint();
1354
169
  return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
1355
169
                       InsertLocation, &ValueMap,
1356
169
                       StartBlock->getSinglePredecessor());
1357
169
}
1358
1359
/// The AST expression we generate to perform the run-time check assumes
1360
/// computations on integer types of infinite size. As we only use 64-bit
1361
/// arithmetic we check for overflows, in case of which we set the result
1362
/// of this run-time check to false to be cosnservatively correct,
1363
257
Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) {
1364
257
  auto ExprBuilder = getExprBuilder();
1365
257
  ExprBuilder.setTrackOverflow(true);
1366
257
  Value *RTC = ExprBuilder.create(Condition);
1367
257
  if (!RTC->getType()->isIntegerTy(1))
1368
156
    RTC = Builder.CreateIsNotNull(RTC);
1369
257
  Value *OverflowHappened =
1370
257
      Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown");
1371
257
1372
257
  if (
PollyGenerateRTCPrint257
)
{0
1373
0
    auto *F = Builder.GetInsertBlock()->getParent();
1374
0
    RuntimeDebugBuilder::createCPUPrinter(
1375
0
        Builder,
1376
0
        "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() +
1377
0
            " __RTC: ",
1378
0
        RTC, " Overflow: ", OverflowHappened, "\n");
1379
0
  }
1380
257
1381
257
  RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result");
1382
257
  ExprBuilder.setTrackOverflow(false);
1383
257
1384
257
  if (!isa<ConstantInt>(RTC))
1385
100
    VersionedScops++;
1386
257
1387
257
  return RTC;
1388
257
}