Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/CodeGen/IslNodeBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains the IslNodeBuilder, a class to translate an isl AST into
11
// a LLVM-IR AST.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "polly/CodeGen/IslNodeBuilder.h"
16
#include "polly/CodeGen/BlockGenerators.h"
17
#include "polly/CodeGen/CodeGeneration.h"
18
#include "polly/CodeGen/IslAst.h"
19
#include "polly/CodeGen/IslExprBuilder.h"
20
#include "polly/CodeGen/LoopGenerators.h"
21
#include "polly/CodeGen/RuntimeDebugBuilder.h"
22
#include "polly/Config/config.h"
23
#include "polly/Options.h"
24
#include "polly/ScopInfo.h"
25
#include "polly/Support/GICHelper.h"
26
#include "polly/Support/SCEVValidator.h"
27
#include "polly/Support/ScopHelper.h"
28
#include "llvm/ADT/APInt.h"
29
#include "llvm/ADT/PostOrderIterator.h"
30
#include "llvm/ADT/SetVector.h"
31
#include "llvm/ADT/SmallPtrSet.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/Analysis/LoopInfo.h"
34
#include "llvm/Analysis/RegionInfo.h"
35
#include "llvm/Analysis/ScalarEvolution.h"
36
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
37
#include "llvm/IR/BasicBlock.h"
38
#include "llvm/IR/Constant.h"
39
#include "llvm/IR/Constants.h"
40
#include "llvm/IR/DataLayout.h"
41
#include "llvm/IR/DerivedTypes.h"
42
#include "llvm/IR/Dominators.h"
43
#include "llvm/IR/Function.h"
44
#include "llvm/IR/InstrTypes.h"
45
#include "llvm/IR/Instruction.h"
46
#include "llvm/IR/Instructions.h"
47
#include "llvm/IR/Type.h"
48
#include "llvm/IR/Value.h"
49
#include "llvm/Support/Casting.h"
50
#include "llvm/Support/CommandLine.h"
51
#include "llvm/Support/ErrorHandling.h"
52
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
53
#include "isl/aff.h"
54
#include "isl/aff_type.h"
55
#include "isl/ast.h"
56
#include "isl/ast_build.h"
57
#include "isl/isl-noexceptions.h"
58
#include "isl/map.h"
59
#include "isl/set.h"
60
#include "isl/union_map.h"
61
#include "isl/union_set.h"
62
#include "isl/val.h"
63
#include <algorithm>
64
#include <cassert>
65
#include <cstdint>
66
#include <cstring>
67
#include <string>
68
#include <utility>
69
#include <vector>
70
71
using namespace llvm;
72
using namespace polly;
73
74
#define DEBUG_TYPE "polly-codegen"
75
76
STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
77
78
STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
79
STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
80
STATISTIC(VectorLoops, "Number of generated vector for-loops");
81
STATISTIC(IfConditions, "Number of generated if-conditions");
82
83
static cl::opt<bool> PollyGenerateRTCPrint(
84
    "polly-codegen-emit-rtc-print",
85
    cl::desc("Emit code that prints the runtime check result dynamically."),
86
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
87
88
// If this option is set we always use the isl AST generator to regenerate
89
// memory accesses. Without this option set we regenerate expressions using the
90
// original SCEV expressions and only generate new expressions in case the
91
// access relation has been changed and consequently must be regenerated.
92
static cl::opt<bool> PollyGenerateExpressions(
93
    "polly-codegen-generate-expressions",
94
    cl::desc("Generate AST expressions for unmodified and modified accesses"),
95
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
96
97
static cl::opt<int> PollyTargetFirstLevelCacheLineSize(
98
    "polly-target-first-level-cache-line-size",
99
    cl::desc("The size of the first level cache line size specified in bytes."),
100
    cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory));
101
102
__isl_give isl_ast_expr *
103
IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For,
104
296
                              ICmpInst::Predicate &Predicate) {
105
296
  isl_id *UBID, *IteratorID;
106
296
  isl_ast_expr *Cond, *Iterator, *UB, *Arg0;
107
296
  isl_ast_op_type Type;
108
296
109
296
  Cond = isl_ast_node_for_get_cond(For);
110
296
  Iterator = isl_ast_node_for_get_iterator(For);
111
296
  assert(isl_ast_expr_get_type(Cond) == isl_ast_expr_op &&
112
296
         "conditional expression is not an atomic upper bound");
113
296
114
296
  Type = isl_ast_expr_get_op_type(Cond);
115
296
116
296
  switch (Type) {
117
216
  case isl_ast_op_le:
118
216
    Predicate = ICmpInst::ICMP_SLE;
119
216
    break;
120
80
  case isl_ast_op_lt:
121
80
    Predicate = ICmpInst::ICMP_SLT;
122
80
    break;
123
0
  default:
124
0
    llvm_unreachable("Unexpected comparison type in loop condition");
125
296
  }
126
296
127
296
  Arg0 = isl_ast_expr_get_op_arg(Cond, 0);
128
296
129
296
  assert(isl_ast_expr_get_type(Arg0) == isl_ast_expr_id &&
130
296
         "conditional expression is not an atomic upper bound");
131
296
132
296
  UBID = isl_ast_expr_get_id(Arg0);
133
296
134
296
  assert(isl_ast_expr_get_type(Iterator) == isl_ast_expr_id &&
135
296
         "Could not get the iterator");
136
296
137
296
  IteratorID = isl_ast_expr_get_id(Iterator);
138
296
139
296
  assert(UBID == IteratorID &&
140
296
         "conditional expression is not an atomic upper bound");
141
296
142
296
  UB = isl_ast_expr_get_op_arg(Cond, 1);
143
296
144
296
  isl_ast_expr_free(Cond);
145
296
  isl_ast_expr_free(Iterator);
146
296
  isl_ast_expr_free(Arg0);
147
296
  isl_id_free(IteratorID);
148
296
  isl_id_free(UBID);
149
296
150
296
  return UB;
151
296
}
152
153
/// Return true if a return value of Predicate is true for the value represented
154
/// by passed isl_ast_expr_int.
155
static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr,
156
42
                               isl_bool (*Predicate)(__isl_keep isl_val *)) {
157
42
  if (
isl_ast_expr_get_type(Expr) != isl_ast_expr_int42
) {
158
0
    isl_ast_expr_free(Expr);
159
0
    return false;
160
0
  }
161
42
  auto ExprVal = isl_ast_expr_get_val(Expr);
162
42
  isl_ast_expr_free(Expr);
163
42
  if (
Predicate(ExprVal) != isl_bool_true42
) {
164
0
    isl_val_free(ExprVal);
165
0
    return false;
166
0
  }
167
42
  isl_val_free(ExprVal);
168
42
  return true;
169
42
}
170
171
22
int IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
172
22
  assert(isl_ast_node_get_type(For) == isl_ast_node_for);
173
22
  auto Body = isl_ast_node_for_get_body(For);
174
22
175
22
  // First, check if we can actually handle this code.
176
22
  switch (isl_ast_node_get_type(Body)) {
177
20
  case isl_ast_node_user:
178
20
    break;
179
2
  case isl_ast_node_block: {
180
2
    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
181
4
    for (int i = 0; 
i < isl_ast_node_list_n_ast_node(List)4
;
++i2
) {
182
3
      isl_ast_node *Node = isl_ast_node_list_get_ast_node(List, i);
183
3
      int Type = isl_ast_node_get_type(Node);
184
3
      isl_ast_node_free(Node);
185
3
      if (
Type != isl_ast_node_user3
) {
186
1
        isl_ast_node_list_free(List);
187
1
        isl_ast_node_free(Body);
188
1
        return -1;
189
1
      }
190
3
    }
191
1
    isl_ast_node_list_free(List);
192
1
    break;
193
2
  }
194
0
  default:
195
0
    isl_ast_node_free(Body);
196
0
    return -1;
197
21
  }
198
21
  isl_ast_node_free(Body);
199
21
200
21
  auto Init = isl_ast_node_for_get_init(For);
201
21
  if (!checkIslAstExprInt(Init, isl_val_is_zero))
202
0
    return -1;
203
21
  auto Inc = isl_ast_node_for_get_inc(For);
204
21
  if (!checkIslAstExprInt(Inc, isl_val_is_one))
205
0
    return -1;
206
21
  CmpInst::Predicate Predicate;
207
21
  auto UB = getUpperBound(For, Predicate);
208
21
  if (
isl_ast_expr_get_type(UB) != isl_ast_expr_int21
) {
209
2
    isl_ast_expr_free(UB);
210
2
    return -1;
211
2
  }
212
19
  auto UpVal = isl_ast_expr_get_val(UB);
213
19
  isl_ast_expr_free(UB);
214
19
  int NumberIterations = isl_val_get_num_si(UpVal);
215
19
  isl_val_free(UpVal);
216
19
  if (NumberIterations < 0)
217
0
    return -1;
218
19
  
if (19
Predicate == CmpInst::ICMP_SLT19
)
219
0
    return NumberIterations;
220
19
  else
221
19
    return NumberIterations + 1;
222
0
}
223
224
/// Extract the values and SCEVs needed to generate code for a block.
225
static int findReferencesInBlock(struct SubtreeReferences &References,
226
28
                                 const ScopStmt *Stmt, BasicBlock *BB) {
227
184
  for (Instruction &Inst : *BB) {
228
184
    // Include invariant loads
229
184
    if (isa<LoadInst>(Inst))
230
20
      
if (Value *20
InvariantLoad20
= References.GlobalMap.lookup(&Inst))
231
7
        References.Values.insert(InvariantLoad);
232
184
233
350
    for (Value *SrcVal : Inst.operands()) {
234
350
      auto *Scope = References.LI.getLoopFor(BB);
235
350
      if (
canSynthesize(SrcVal, References.S, &References.SE, Scope)350
) {
236
254
        References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
237
254
        continue;
238
96
      } else 
if (Value *96
NewVal96
= References.GlobalMap.lookup(SrcVal))
239
6
        References.Values.insert(NewVal);
240
350
    }
241
184
  }
242
28
  return 0;
243
28
}
244
245
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
246
28
                               bool CreateScalarRefs) {
247
28
  auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
248
28
249
28
  if (Stmt->isBlockStmt())
250
28
    findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
251
0
  else {
252
0
    assert(Stmt->isRegionStmt() &&
253
0
           "Stmt was neither block nor region statement");
254
0
    for (BasicBlock *BB : Stmt->getRegion()->blocks())
255
0
      findReferencesInBlock(References, Stmt, BB);
256
0
  }
257
28
258
45
  for (auto &Access : *Stmt) {
259
45
    if (
References.ParamSpace45
) {
260
0
      isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
261
0
      (*References.ParamSpace) =
262
0
          References.ParamSpace->align_params(ParamSpace);
263
0
    }
264
45
265
45
    if (
Access->isLatestArrayKind()45
) {
266
43
      auto *BasePtr = Access->getScopArrayInfo()->getBasePtr();
267
43
      if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
268
10
        
if (10
Stmt->getParent()->contains(OpInst)10
)
269
7
          continue;
270
36
271
36
      References.Values.insert(BasePtr);
272
36
      continue;
273
36
    }
274
2
275
2
    
if (2
CreateScalarRefs2
)
276
2
      References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
277
45
  }
278
28
279
28
  return isl_stat_ok;
280
28
}
281
282
/// Extract the out-of-scop values and SCEVs referenced from a set describing
283
/// a ScopStmt.
284
///
285
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
286
/// statement and the base pointers of the memory accesses. For scalar
287
/// statements we force the generation of alloca memory locations and list
288
/// these locations in the set of out-of-scop values as well.
289
///
290
/// @param Set     A set which references the ScopStmt we are interested in.
291
/// @param UserPtr A void pointer that can be casted to a SubtreeReferences
292
///                structure.
293
static isl_stat addReferencesFromStmtSet(__isl_take isl_set *Set,
294
28
                                         void *UserPtr) {
295
28
  isl_id *Id = isl_set_get_tuple_id(Set);
296
28
  auto *Stmt = static_cast<const ScopStmt *>(isl_id_get_user(Id));
297
28
  isl_id_free(Id);
298
28
  isl_set_free(Set);
299
28
  return addReferencesFromStmt(Stmt, UserPtr);
300
28
}
301
302
/// Extract the out-of-scop values and SCEVs referenced from a union set
303
/// referencing multiple ScopStmts.
304
///
305
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
306
/// statement and the base pointers of the memory accesses. For scalar
307
/// statements we force the generation of alloca memory locations and list
308
/// these locations in the set of out-of-scop values as well.
309
///
310
/// @param USet       A union set referencing the ScopStmts we are interested
311
///                   in.
312
/// @param References The SubtreeReferences data structure through which
313
///                   results are returned and further information is
314
///                   provided.
315
static void
316
addReferencesFromStmtUnionSet(isl_union_set *USet,
317
27
                              struct SubtreeReferences &References) {
318
27
  isl_union_set_foreach_set(USet, addReferencesFromStmtSet, &References);
319
27
  isl_union_set_free(USet);
320
27
}
321
322
__isl_give isl_union_map *
323
46
IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) {
324
46
  return IslAstInfo::getSchedule(For);
325
46
}
326
327
void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
328
                                            SetVector<Value *> &Values,
329
27
                                            SetVector<const Loop *> &Loops) {
330
27
  SetVector<const SCEV *> SCEVs;
331
27
  struct SubtreeReferences References = {
332
27
      LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
333
27
334
27
  for (const auto &I : IDToValue)
335
19
    Values.insert(I.second);
336
27
337
27
  // NOTE: this is populated in IslNodeBuilder::addParameters
338
27
  for (const auto &I : OutsideLoopIterations)
339
2
    Values.insert(cast<SCEVUnknown>(I.second)->getValue());
340
27
341
27
  isl_union_set *Schedule = isl_union_map_domain(getScheduleForAstNode(For));
342
27
  addReferencesFromStmtUnionSet(Schedule, References);
343
27
344
189
  for (const SCEV *Expr : SCEVs) {
345
189
    findValues(Expr, SE, Values);
346
189
    findLoops(Expr, Loops);
347
189
  }
348
27
349
69
  Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
350
27
351
27
  /// Note: Code generation of induction variables of loops outside Scops
352
27
  ///
353
27
  /// Remove loops that contain the scop or that are part of the scop, as they
354
27
  /// are considered local. This leaves only loops that are before the scop, but
355
27
  /// do not contain the scop itself.
356
27
  /// We ignore loops perfectly contained in the Scop because these are already
357
27
  /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
358
27
  /// whose induction variables are referred to by the Scop, but the Scop is not
359
27
  /// fully contained in these Loops. Since there can be many of these,
360
27
  /// we choose to codegen these on-demand.
361
27
  /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
362
40
  Loops.remove_if([this](const Loop *L) {
363
3
    return S.contains(L) || L->contains(S.getEntry());
364
40
  });
365
27
366
27
  // Contains Values that may need to be replaced with other values
367
27
  // due to replacements from the ValueMap. We should make sure
368
27
  // that we return correctly remapped values.
369
27
  // NOTE: this code path is tested by:
370
27
  //     1.  test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
371
27
  //     2.  test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
372
27
  SetVector<Value *> ReplacedValues;
373
62
  for (Value *V : Values) {
374
62
    ReplacedValues.insert(getLatestValue(V));
375
62
  }
376
27
  Values = ReplacedValues;
377
27
}
378
379
27
void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
380
27
  SmallPtrSet<Value *, 5> Inserted;
381
27
382
19
  for (const auto &I : IDToValue) {
383
19
    IDToValue[I.first] = NewValues[I.second];
384
19
    Inserted.insert(I.second);
385
19
  }
386
27
387
60
  for (const auto &I : NewValues) {
388
60
    if (Inserted.count(I.first))
389
0
      continue;
390
60
391
60
    ValueMap[I.first] = I.second;
392
60
  }
393
27
}
394
395
62
Value *IslNodeBuilder::getLatestValue(Value *Original) const {
396
62
  auto It = ValueMap.find(Original);
397
62
  if (It == ValueMap.end())
398
59
    return Original;
399
3
  return It->second;
400
3
}
401
402
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
403
                                      std::vector<Value *> &IVS,
404
                                      __isl_take isl_id *IteratorID,
405
20
                                      __isl_take isl_union_map *Schedule) {
406
20
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
407
20
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
408
20
  isl_id *Id = isl_ast_expr_get_id(StmtExpr);
409
20
  isl_ast_expr_free(StmtExpr);
410
20
  ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
411
20
  std::vector<LoopToScevMapT> VLTS(IVS.size());
412
20
413
20
  isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release());
414
20
  Schedule = isl_union_map_intersect_domain(Schedule, Domain);
415
20
  isl_map *S = isl_map_from_union_map(Schedule);
416
20
417
20
  auto *NewAccesses = createNewAccesses(Stmt, User);
418
20
  createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
419
20
  VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
420
20
  isl_id_to_ast_expr_free(NewAccesses);
421
20
  isl_map_free(S);
422
20
  isl_id_free(Id);
423
20
  isl_ast_node_free(User);
424
20
}
425
426
22
void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
427
22
  auto *Id = isl_ast_node_mark_get_id(Node);
428
22
  auto Child = isl_ast_node_mark_get_node(Node);
429
22
  isl_ast_node_free(Node);
430
22
  // If a child node of a 'SIMD mark' is a loop that has a single iteration,
431
22
  // it will be optimized away and we should skip it.
432
22
  if (strcmp(isl_id_get_name(Id), "SIMD") == 0 &&
433
22
      
isl_ast_node_get_type(Child) == isl_ast_node_for2
) {
434
2
    bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
435
2
    int VectorWidth = getNumberOfIterations(Child);
436
2
    if (
Vector && 2
1 < VectorWidth2
&&
VectorWidth <= 162
)
437
2
      createForVector(Child, VectorWidth);
438
2
    else
439
0
      createForSequential(Child, true);
440
2
    isl_id_free(Id);
441
2
    return;
442
2
  }
443
20
  
if (20
strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 020
) {
444
2
    auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
445
2
    Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
446
2
  }
447
22
  create(Child);
448
22
  isl_id_free(Id);
449
22
}
450
451
void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
452
19
                                     int VectorWidth) {
453
19
  isl_ast_node *Body = isl_ast_node_for_get_body(For);
454
19
  isl_ast_expr *Init = isl_ast_node_for_get_init(For);
455
19
  isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
456
19
  isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
457
19
  isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
458
19
459
19
  Value *ValueLB = ExprBuilder.create(Init);
460
19
  Value *ValueInc = ExprBuilder.create(Inc);
461
19
462
19
  Type *MaxType = ExprBuilder.getType(Iterator);
463
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
464
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
465
19
466
19
  if (MaxType != ValueLB->getType())
467
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
468
19
  if (MaxType != ValueInc->getType())
469
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
470
19
471
19
  std::vector<Value *> IVS(VectorWidth);
472
19
  IVS[0] = ValueLB;
473
19
474
119
  for (int i = 1; 
i < VectorWidth119
;
i++100
)
475
100
    IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
476
19
477
19
  isl_union_map *Schedule = getScheduleForAstNode(For);
478
19
  assert(Schedule && "For statement annotation does not contain its schedule");
479
19
480
19
  IDToValue[IteratorID] = ValueLB;
481
19
482
19
  switch (isl_ast_node_get_type(Body)) {
483
18
  case isl_ast_node_user:
484
18
    createUserVector(Body, IVS, isl_id_copy(IteratorID),
485
18
                     isl_union_map_copy(Schedule));
486
18
    break;
487
1
  case isl_ast_node_block: {
488
1
    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
489
1
490
3
    for (int i = 0; 
i < isl_ast_node_list_n_ast_node(List)3
;
++i2
)
491
2
      createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
492
2
                       isl_id_copy(IteratorID), isl_union_map_copy(Schedule));
493
1
494
1
    isl_ast_node_free(Body);
495
1
    isl_ast_node_list_free(List);
496
1
    break;
497
19
  }
498
0
  default:
499
0
    isl_ast_node_dump(Body);
500
0
    llvm_unreachable("Unhandled isl_ast_node in vectorizer");
501
19
  }
502
19
503
19
  IDToValue.erase(IDToValue.find(IteratorID));
504
19
  isl_id_free(IteratorID);
505
19
  isl_union_map_free(Schedule);
506
19
507
19
  isl_ast_node_free(For);
508
19
  isl_ast_expr_free(Iterator);
509
19
510
19
  VectorLoops++;
511
19
}
512
513
/// Restore the initial ordering of dimensions of the band node
514
///
515
/// In case the band node represents all the dimensions of the iteration
516
/// domain, recreate the band node to restore the initial ordering of the
517
/// dimensions.
518
///
519
/// @param Node The band node to be modified.
520
/// @return The modified schedule node.
521
248
static bool IsLoopVectorizerDisabled(isl::ast_node Node) {
522
248
  assert(isl_ast_node_get_type(Node.keep()) == isl_ast_node_for);
523
248
  auto Body = Node.for_get_body();
524
248
  if (isl_ast_node_get_type(Body.keep()) != isl_ast_node_mark)
525
241
    return false;
526
7
  auto Id = Body.mark_get_id();
527
7
  if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
528
2
    return true;
529
5
  return false;
530
5
}
531
532
void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For,
533
248
                                         bool KnownParallel) {
534
248
  isl_ast_node *Body;
535
248
  isl_ast_expr *Init, *Inc, *Iterator, *UB;
536
248
  isl_id *IteratorID;
537
248
  Value *ValueLB, *ValueUB, *ValueInc;
538
248
  Type *MaxType;
539
248
  BasicBlock *ExitBlock;
540
248
  Value *IV;
541
248
  CmpInst::Predicate Predicate;
542
248
  bool Parallel;
543
248
544
248
  Parallel = KnownParallel || (IslAstInfo::isParallel(For) &&
545
16
                               !IslAstInfo::isReductionParallel(For));
546
248
547
248
  bool LoopVectorizerDisabled =
548
248
      IsLoopVectorizerDisabled(isl::manage(isl_ast_node_copy(For)));
549
248
550
248
  Body = isl_ast_node_for_get_body(For);
551
248
552
248
  // isl_ast_node_for_is_degenerate(For)
553
248
  //
554
248
  // TODO: For degenerated loops we could generate a plain assignment.
555
248
  //       However, for now we just reuse the logic for normal loops, which will
556
248
  //       create a loop with a single iteration.
557
248
558
248
  Init = isl_ast_node_for_get_init(For);
559
248
  Inc = isl_ast_node_for_get_inc(For);
560
248
  Iterator = isl_ast_node_for_get_iterator(For);
561
248
  IteratorID = isl_ast_expr_get_id(Iterator);
562
248
  UB = getUpperBound(For, Predicate);
563
248
564
248
  ValueLB = ExprBuilder.create(Init);
565
248
  ValueUB = ExprBuilder.create(UB);
566
248
  ValueInc = ExprBuilder.create(Inc);
567
248
568
248
  MaxType = ExprBuilder.getType(Iterator);
569
248
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
570
248
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
571
248
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
572
248
573
248
  if (MaxType != ValueLB->getType())
574
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
575
248
  if (MaxType != ValueUB->getType())
576
35
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
577
248
  if (MaxType != ValueInc->getType())
578
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
579
248
580
248
  // If we can show that LB <Predicate> UB holds at least once, we can
581
248
  // omit the GuardBB in front of the loop.
582
248
  bool UseGuardBB =
583
248
      !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
584
248
  IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
585
248
                  Predicate, &Annotator, Parallel, UseGuardBB,
586
248
                  LoopVectorizerDisabled);
587
248
  IDToValue[IteratorID] = IV;
588
248
589
248
  create(Body);
590
248
591
248
  Annotator.popLoop(Parallel);
592
248
593
248
  IDToValue.erase(IDToValue.find(IteratorID));
594
248
595
248
  Builder.SetInsertPoint(&ExitBlock->front());
596
248
597
248
  isl_ast_node_free(For);
598
248
  isl_ast_expr_free(Iterator);
599
248
  isl_id_free(IteratorID);
600
248
601
248
  SequentialLoops++;
602
248
}
603
604
/// Remove the BBs contained in a (sub)function from the dominator tree.
605
///
606
/// This function removes the basic blocks that are part of a subfunction from
607
/// the dominator tree. Specifically, when generating code it may happen that at
608
/// some point the code generation continues in a new sub-function (e.g., when
609
/// generating OpenMP code). The basic blocks that are created in this
610
/// sub-function are then still part of the dominator tree of the original
611
/// function, such that the dominator tree reaches over function boundaries.
612
/// This is not only incorrect, but also causes crashes. This function now
613
/// removes from the dominator tree all basic blocks that are dominated (and
614
/// consequently reachable) from the entry block of this (sub)function.
615
///
616
/// FIXME: A LLVM (function or region) pass should not touch anything outside of
617
/// the function/region it runs on. Hence, the pure need for this function shows
618
/// that we do not comply to this rule. At the moment, this does not cause any
619
/// issues, but we should be aware that such issues may appear. Unfortunately
620
/// the current LLVM pass infrastructure does not allow to make Polly a module
621
/// or call-graph pass to solve this issue, as such a pass would not have access
622
/// to the per-function analyses passes needed by Polly. A future pass manager
623
/// infrastructure is supposed to enable such kind of access possibly allowing
624
/// us to create a cleaner solution here.
625
///
626
/// FIXME: Instead of adding the dominance information and then dropping it
627
/// later on, we should try to just not add it in the first place. This requires
628
/// some careful testing to make sure this does not break in interaction with
629
/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
630
/// which may try to update it.
631
///
632
/// @param F The function which contains the BBs to removed.
633
/// @param DT The dominator tree from which to remove the BBs.
634
27
static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
635
27
  DomTreeNode *N = DT.getNode(&F->getEntryBlock());
636
27
  std::vector<BasicBlock *> Nodes;
637
27
638
27
  // We can only remove an element from the dominator tree, if all its children
639
27
  // have been removed. To ensure this we obtain the list of nodes to remove
640
27
  // using a post-order tree traversal.
641
284
  for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); 
I != E284
;
++I257
)
642
257
    Nodes.push_back(I->getBlock());
643
27
644
27
  for (BasicBlock *BB : Nodes)
645
257
    DT.eraseNode(BB);
646
27
}
647
648
27
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
649
27
  isl_ast_node *Body;
650
27
  isl_ast_expr *Init, *Inc, *Iterator, *UB;
651
27
  isl_id *IteratorID;
652
27
  Value *ValueLB, *ValueUB, *ValueInc;
653
27
  Type *MaxType;
654
27
  Value *IV;
655
27
  CmpInst::Predicate Predicate;
656
27
657
27
  // The preamble of parallel code interacts different than normal code with
658
27
  // e.g., scalar initialization. Therefore, we ensure the parallel code is
659
27
  // separated from the last basic block.
660
27
  BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(),
661
27
                                 &*Builder.GetInsertPoint(), &DT, &LI);
662
27
  ParBB->setName("polly.parallel.for");
663
27
  Builder.SetInsertPoint(&ParBB->front());
664
27
665
27
  Body = isl_ast_node_for_get_body(For);
666
27
  Init = isl_ast_node_for_get_init(For);
667
27
  Inc = isl_ast_node_for_get_inc(For);
668
27
  Iterator = isl_ast_node_for_get_iterator(For);
669
27
  IteratorID = isl_ast_expr_get_id(Iterator);
670
27
  UB = getUpperBound(For, Predicate);
671
27
672
27
  ValueLB = ExprBuilder.create(Init);
673
27
  ValueUB = ExprBuilder.create(UB);
674
27
  ValueInc = ExprBuilder.create(Inc);
675
27
676
27
  // OpenMP always uses SLE. In case the isl generated AST uses a SLT
677
27
  // expression, we need to adjust the loop bound by one.
678
27
  if (Predicate == CmpInst::ICMP_SLT)
679
8
    ValueUB = Builder.CreateAdd(
680
8
        ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
681
27
682
27
  MaxType = ExprBuilder.getType(Iterator);
683
27
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
684
27
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
685
27
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
686
27
687
27
  if (MaxType != ValueLB->getType())
688
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
689
27
  if (MaxType != ValueUB->getType())
690
3
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
691
27
  if (MaxType != ValueInc->getType())
692
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
693
27
694
27
  BasicBlock::iterator LoopBody;
695
27
696
27
  SetVector<Value *> SubtreeValues;
697
27
  SetVector<const Loop *> Loops;
698
27
699
27
  getReferencesInSubtree(For, SubtreeValues, Loops);
700
27
701
27
  // Create for all loops we depend on values that contain the current loop
702
27
  // iteration. These values are necessary to generate code for SCEVs that
703
27
  // depend on such loops. As a result we need to pass them to the subfunction.
704
27
  // See [Code generation of induction variables of loops outside Scops]
705
1
  for (const Loop *L : Loops) {
706
1
    Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L);
707
1
    SubtreeValues.insert(LoopInductionVar);
708
1
  }
709
27
710
27
  ValueMapT NewValues;
711
27
  ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL);
712
27
713
27
  IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc,
714
27
                                          SubtreeValues, NewValues, &LoopBody);
715
27
  BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
716
27
  Builder.SetInsertPoint(&*LoopBody);
717
27
718
27
  // Remember the parallel subfunction
719
27
  ParallelSubfunctions.push_back(LoopBody->getFunction());
720
27
721
27
  // Save the current values.
722
27
  auto ValueMapCopy = ValueMap;
723
27
  IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
724
27
725
27
  updateValues(NewValues);
726
27
  IDToValue[IteratorID] = IV;
727
27
728
27
  ValueMapT NewValuesReverse;
729
27
730
27
  for (auto P : NewValues)
731
60
    NewValuesReverse[P.second] = P.first;
732
27
733
27
  Annotator.addAlternativeAliasBases(NewValuesReverse);
734
27
735
27
  create(Body);
736
27
737
27
  Annotator.resetAlternativeAliasBases();
738
27
  // Restore the original values.
739
27
  ValueMap = ValueMapCopy;
740
27
  IDToValue = IDToValueCopy;
741
27
742
27
  Builder.SetInsertPoint(&*AfterLoop);
743
27
  removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
744
27
745
27
  for (const Loop *L : Loops)
746
1
    OutsideLoopIterations.erase(L);
747
27
748
27
  isl_ast_node_free(For);
749
27
  isl_ast_expr_free(Iterator);
750
27
  isl_id_free(IteratorID);
751
27
752
27
  ParallelLoops++;
753
27
}
754
755
/// Return whether any of @p Node's statements contain partial accesses.
756
///
757
/// Partial accesses are not supported by Polly's vector code generator.
758
17
static bool hasPartialAccesses(__isl_take isl_ast_node *Node) {
759
17
  return isl_ast_node_foreach_descendant_top_down(
760
17
             Node,
761
36
             [](isl_ast_node *Node, void *User) -> isl_bool {
762
36
               if (isl_ast_node_get_type(Node) != isl_ast_node_user)
763
18
                 return isl_bool_true;
764
18
765
18
               isl::ast_expr Expr = give(isl_ast_node_user_get_expr(Node));
766
18
               isl::ast_expr StmtExpr =
767
18
                   give(isl_ast_expr_get_op_arg(Expr.keep(), 0));
768
18
               isl::id Id = give(isl_ast_expr_get_id(StmtExpr.keep()));
769
18
770
18
               ScopStmt *Stmt =
771
18
                   static_cast<ScopStmt *>(isl_id_get_user(Id.keep()));
772
18
               isl::set StmtDom = Stmt->getDomain();
773
27
               for (auto *MA : *Stmt) {
774
27
                 if (MA->isLatestPartialAccess())
775
0
                   return isl_bool_error;
776
18
               }
777
18
               return isl_bool_true;
778
18
             },
779
17
             nullptr) == isl_stat_error;
780
17
}
781
782
292
void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
783
292
  bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
784
292
785
292
  if (
Vector && 292
IslAstInfo::isInnermostParallel(For)27
&&
786
292
      
!IslAstInfo::isReductionParallel(For)20
) {
787
20
    int VectorWidth = getNumberOfIterations(For);
788
20
    if (
1 < VectorWidth && 20
VectorWidth <= 1617
&&
!hasPartialAccesses(For)17
) {
789
17
      createForVector(For, VectorWidth);
790
17
      return;
791
17
    }
792
275
  }
793
275
794
275
  
if (275
IslAstInfo::isExecutedInParallel(For)275
) {
795
27
    createForParallel(For);
796
27
    return;
797
27
  }
798
248
  createForSequential(For, false);
799
248
}
800
801
68
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
802
68
  isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
803
68
804
68
  Function *F = Builder.GetInsertBlock()->getParent();
805
68
  LLVMContext &Context = F->getContext();
806
68
807
68
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
808
68
                                  &*Builder.GetInsertPoint(), &DT, &LI);
809
68
  CondBB->setName("polly.cond");
810
68
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
811
68
  MergeBB->setName("polly.merge");
812
68
  BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
813
68
  BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
814
68
815
68
  DT.addNewBlock(ThenBB, CondBB);
816
68
  DT.addNewBlock(ElseBB, CondBB);
817
68
  DT.changeImmediateDominator(MergeBB, CondBB);
818
68
819
68
  Loop *L = LI.getLoopFor(CondBB);
820
68
  if (
L68
) {
821
31
    L->addBasicBlockToLoop(ThenBB, LI);
822
31
    L->addBasicBlockToLoop(ElseBB, LI);
823
31
  }
824
68
825
68
  CondBB->getTerminator()->eraseFromParent();
826
68
827
68
  Builder.SetInsertPoint(CondBB);
828
68
  Value *Predicate = ExprBuilder.create(Cond);
829
68
  Builder.CreateCondBr(Predicate, ThenBB, ElseBB);
830
68
  Builder.SetInsertPoint(ThenBB);
831
68
  Builder.CreateBr(MergeBB);
832
68
  Builder.SetInsertPoint(ElseBB);
833
68
  Builder.CreateBr(MergeBB);
834
68
  Builder.SetInsertPoint(&ThenBB->front());
835
68
836
68
  create(isl_ast_node_if_get_then(If));
837
68
838
68
  Builder.SetInsertPoint(&ElseBB->front());
839
68
840
68
  if (isl_ast_node_if_has_else(If))
841
10
    create(isl_ast_node_if_get_else(If));
842
68
843
68
  Builder.SetInsertPoint(&MergeBB->front());
844
68
845
68
  isl_ast_node_free(If);
846
68
847
68
  IfConditions++;
848
68
}
849
850
__isl_give isl_id_to_ast_expr *
851
IslNodeBuilder::createNewAccesses(ScopStmt *Stmt,
852
452
                                  __isl_keep isl_ast_node *Node) {
853
452
  isl_id_to_ast_expr *NewAccesses =
854
452
      isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx(), 0);
855
452
856
452
  auto *Build = IslAstInfo::getBuild(Node);
857
452
  assert(Build && "Could not obtain isl_ast_build from user node");
858
452
  Stmt->setAstBuild(isl::manage(isl_ast_build_copy(Build)));
859
452
860
994
  for (auto *MA : *Stmt) {
861
994
    if (
!MA->hasNewAccessRelation()994
) {
862
779
      if (
PollyGenerateExpressions779
) {
863
2
        if (!MA->isAffine())
864
0
          continue;
865
2
        
if (2
MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()2
)
866
0
          continue;
867
2
868
2
        auto *BasePtr =
869
2
            dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr());
870
2
        if (
BasePtr && 2
Stmt->getParent()->getRegion().contains(BasePtr)0
)
871
0
          continue;
872
777
      } else {
873
777
        continue;
874
777
      }
875
217
    }
876
994
    assert(MA->isAffine() &&
877
217
           "Only affine memory accesses can be code generated");
878
217
879
217
    auto Schedule = isl_ast_build_get_schedule(Build);
880
217
881
#ifndef NDEBUG
882
    if (MA->isRead()) {
883
      auto Dom = Stmt->getDomain().release();
884
      auto SchedDom = isl_set_from_union_set(
885
          isl_union_map_domain(isl_union_map_copy(Schedule)));
886
      auto AccDom = isl_map_domain(MA->getAccessRelation().release());
887
      Dom = isl_set_intersect_params(Dom,
888
                                     Stmt->getParent()->getContext().release());
889
      SchedDom = isl_set_intersect_params(
890
          SchedDom, Stmt->getParent()->getContext().release());
891
      assert(isl_set_is_subset(SchedDom, AccDom) &&
892
             "Access relation not defined on full schedule domain");
893
      assert(isl_set_is_subset(Dom, AccDom) &&
894
             "Access relation not defined on full domain");
895
      isl_set_free(AccDom);
896
      isl_set_free(SchedDom);
897
      isl_set_free(Dom);
898
    }
899
#endif
900
901
217
    auto PWAccRel =
902
217
        MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release();
903
217
904
217
    // isl cannot generate an index expression for access-nothing accesses.
905
217
    isl::set AccDomain =
906
217
        give(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel)));
907
217
    if (
isl_set_is_empty(AccDomain.keep()) == isl_bool_true217
) {
908
2
      isl_pw_multi_aff_free(PWAccRel);
909
2
      continue;
910
2
    }
911
215
912
215
    auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
913
215
    NewAccesses =
914
215
        isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr);
915
215
  }
916
452
917
452
  return NewAccesses;
918
452
}
919
920
void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr,
921
551
                                         ScopStmt *Stmt, LoopToScevMapT &LTS) {
922
551
  assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
923
551
         "Expression of type 'op' expected");
924
551
  assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
925
551
         "Operation of type 'call' expected");
926
1.16k
  for (int i = 0; 
i < isl_ast_expr_get_op_n_arg(Expr) - 11.16k
;
++i609
) {
927
609
    isl_ast_expr *SubExpr;
928
609
    Value *V;
929
609
930
609
    SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
931
609
    V = ExprBuilder.create(SubExpr);
932
609
    ScalarEvolution *SE = Stmt->getParent()->getSE();
933
609
    LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
934
609
  }
935
551
936
551
  isl_ast_expr_free(Expr);
937
551
}
938
939
void IslNodeBuilder::createSubstitutionsVector(
940
    __isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
941
    std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS,
942
20
    __isl_take isl_id *IteratorID) {
943
20
  int i = 0;
944
20
945
20
  Value *OldValue = IDToValue[IteratorID];
946
123
  for (Value *IV : IVS) {
947
123
    IDToValue[IteratorID] = IV;
948
123
    createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]);
949
123
    i++;
950
123
  }
951
20
952
20
  IDToValue[IteratorID] = OldValue;
953
20
  isl_id_free(IteratorID);
954
20
  isl_ast_expr_free(Expr);
955
20
}
956
957
void IslNodeBuilder::generateCopyStmt(
958
4
    ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
959
4
  assert(Stmt->size() == 2);
960
4
  auto ReadAccess = Stmt->begin();
961
4
  auto WriteAccess = ReadAccess++;
962
4
  assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite());
963
4
  assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&
964
4
         "Accesses use the same data type");
965
4
  assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind());
966
4
  auto *AccessExpr =
967
4
      isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release());
968
4
  auto *LoadValue = ExprBuilder.create(AccessExpr);
969
4
  AccessExpr =
970
4
      isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release());
971
4
  auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr);
972
4
  Builder.CreateStore(LoadValue, StoreAddr);
973
4
}
974
975
23
Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) {
976
23
  assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() &&
977
23
         "trying to materialize loop induction variable twice");
978
23
  const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
979
23
                                          SE.getUnknown(Builder.getInt64(1)), L,
980
23
                                          SCEV::FlagAnyWrap);
981
23
  Value *V = generateSCEV(OuterLIV);
982
23
  OutsideLoopIterations[L] = SE.getUnknown(V);
983
23
  return V;
984
23
}
985
986
432
void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
987
432
  LoopToScevMapT LTS;
988
432
  isl_id *Id;
989
432
  ScopStmt *Stmt;
990
432
991
432
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
992
432
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
993
432
  Id = isl_ast_expr_get_id(StmtExpr);
994
432
  isl_ast_expr_free(StmtExpr);
995
432
996
432
  LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
997
432
998
432
  Stmt = (ScopStmt *)isl_id_get_user(Id);
999
432
  auto *NewAccesses = createNewAccesses(Stmt, User);
1000
432
  if (
Stmt->isCopyStmt()432
) {
1001
4
    generateCopyStmt(Stmt, NewAccesses);
1002
4
    isl_ast_expr_free(Expr);
1003
432
  } else {
1004
428
    createSubstitutions(Expr, Stmt, LTS);
1005
428
1006
428
    if (Stmt->isBlockStmt())
1007
393
      BlockGen.copyStmt(*Stmt, LTS, NewAccesses);
1008
428
    else
1009
35
      RegionGen.copyStmt(*Stmt, LTS, NewAccesses);
1010
428
  }
1011
432
1012
432
  isl_id_to_ast_expr_free(NewAccesses);
1013
432
  isl_ast_node_free(User);
1014
432
  isl_id_free(Id);
1015
432
}
1016
1017
115
void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) {
1018
115
  isl_ast_node_list *List = isl_ast_node_block_get_children(Block);
1019
115
1020
387
  for (int i = 0; 
i < isl_ast_node_list_n_ast_node(List)387
;
++i272
)
1021
272
    create(isl_ast_node_list_get_ast_node(List, i));
1022
115
1023
115
  isl_ast_node_free(Block);
1024
115
  isl_ast_node_list_free(List);
1025
115
}
1026
1027
929
void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
1028
929
  switch (isl_ast_node_get_type(Node)) {
1029
0
  case isl_ast_node_error:
1030
0
    llvm_unreachable("code generation error");
1031
22
  case isl_ast_node_mark:
1032
22
    createMark(Node);
1033
22
    return;
1034
292
  case isl_ast_node_for:
1035
292
    createFor(Node);
1036
292
    return;
1037
68
  case isl_ast_node_if:
1038
68
    createIf(Node);
1039
68
    return;
1040
432
  case isl_ast_node_user:
1041
432
    createUser(Node);
1042
432
    return;
1043
115
  case isl_ast_node_block:
1044
115
    createBlock(Node);
1045
115
    return;
1046
0
  }
1047
0
1048
0
  
llvm_unreachable0
("Unknown isl_ast_node type");
1049
0
}
1050
1051
240
bool IslNodeBuilder::materializeValue(isl_id *Id) {
1052
240
  // If the Id is already mapped, skip it.
1053
240
  if (
!IDToValue.count(Id)240
) {
1054
167
    auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id);
1055
167
    Value *V = nullptr;
1056
167
1057
167
    // Parameters could refer to invariant loads that need to be
1058
167
    // preloaded before we can generate code for the parameter. Thus,
1059
167
    // check if any value referred to in ParamSCEV is an invariant load
1060
167
    // and if so make sure its equivalence class is preloaded.
1061
167
    SetVector<Value *> Values;
1062
167
    findValues(ParamSCEV, SE, Values);
1063
173
    for (auto *Val : Values) {
1064
173
      // Check if the value is an instruction in a dead block within the SCoP
1065
173
      // and if so do not code generate it.
1066
173
      if (auto *
Inst173
= dyn_cast<Instruction>(Val)) {
1067
34
        if (
S.contains(Inst)34
) {
1068
15
          bool IsDead = true;
1069
15
1070
15
          // Check for "undef" loads first, then if there is a statement for
1071
15
          // the parent of Inst and lastly if the parent of Inst has an empty
1072
15
          // domain. In the first and last case the instruction is dead but if
1073
15
          // there is a statement or the domain is not empty Inst is not dead.
1074
15
          auto MemInst = MemAccInst::dyn_cast(Inst);
1075
15
          auto Address = MemInst ? 
MemInst.getPointerOperand()14
:
nullptr1
;
1076
15
          if (
Address && 15
SE.getUnknown(UndefValue::get(Address->getType())) ==
1077
15
                             SE.getPointerBase(SE.getSCEV(Address))) {
1078
15
          } else 
if (15
S.getStmtFor(Inst)15
) {
1079
1
            IsDead = false;
1080
15
          } else {
1081
14
            auto *Domain = S.getDomainConditions(Inst->getParent()).release();
1082
14
            IsDead = isl_set_is_empty(Domain);
1083
14
            isl_set_free(Domain);
1084
14
          }
1085
15
1086
15
          if (
IsDead15
) {
1087
2
            V = UndefValue::get(ParamSCEV->getType());
1088
2
            break;
1089
2
          }
1090
171
        }
1091
34
      }
1092
171
1093
171
      
if (auto *171
IAClass171
= S.lookupInvariantEquivClass(Val)) {
1094
11
        // Check if this invariant access class is empty, hence if we never
1095
11
        // actually added a loads instruction to it. In that case it has no
1096
11
        // (meaningful) users and we should not try to code generate it.
1097
11
        if (IAClass->InvariantAccesses.empty())
1098
0
          V = UndefValue::get(ParamSCEV->getType());
1099
11
1100
11
        if (
!preloadInvariantEquivClass(*IAClass)11
) {
1101
3
          isl_id_free(Id);
1102
3
          return false;
1103
3
        }
1104
164
      }
1105
173
    }
1106
164
1107
164
    
V = V ? 164
V2
:
generateSCEV(ParamSCEV)162
;
1108
167
    IDToValue[Id] = V;
1109
167
  }
1110
240
1111
237
  isl_id_free(Id);
1112
237
  return true;
1113
240
}
1114
1115
132
bool IslNodeBuilder::materializeParameters(isl_set *Set) {
1116
341
  for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); 
i < e341
;
++i209
) {
1117
212
    if (!isl_set_involves_dims(Set, isl_dim_param, i, 1))
1118
155
      continue;
1119
57
    isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i);
1120
57
    if (!materializeValue(Id))
1121
3
      return false;
1122
212
  }
1123
129
  return true;
1124
132
}
1125
1126
284
bool IslNodeBuilder::materializeParameters() {
1127
183
  for (const SCEV *Param : S.parameters()) {
1128
183
    isl_id *Id = S.getIdForParam(Param).release();
1129
183
    if (!materializeValue(Id))
1130
0
      return false;
1131
284
  }
1132
284
  return true;
1133
284
}
1134
1135
/// Generate the computation of the size of the outermost dimension from the
1136
/// Fortran array descriptor (in this case, `@g_arr`). The final `%size`
1137
/// contains the size of the array.
1138
///
1139
/// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] }
1140
/// %desc.dimensionty = type { i64, i64, i64 }
1141
/// @g_arr = global %arrty zeroinitializer, align 32
1142
/// ...
1143
/// %0 = load i64, i64* getelementptr inbounds
1144
///                       (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2)
1145
/// %1 = load i64, i64* getelementptr inbounds
1146
///                      (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1)
1147
/// %2 = sub nsw i64 %0, %1
1148
/// %size = add nsw i64 %2, 1
1149
static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor,
1150
                                             PollyIRBuilder &Builder,
1151
2
                                             std::string ArrayName) {
1152
2
  assert(GlobalDescriptor && "invalid global descriptor given");
1153
2
1154
2
  Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1155
2
                      Builder.getInt64(0), Builder.getInt32(2)};
1156
2
  Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx,
1157
2
                                            ArrayName + "_end_ptr");
1158
2
  Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end");
1159
2
1160
2
  Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1161
2
                        Builder.getInt64(0), Builder.getInt32(1)};
1162
2
  Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx,
1163
2
                                              ArrayName + "_begin_ptr");
1164
2
  Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin");
1165
2
1166
2
  Value *size =
1167
2
      Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta");
1168
2
  Type *endType = dyn_cast<IntegerType>(end->getType());
1169
2
  assert(endType && "expected type of end to be integral");
1170
2
1171
2
  size = Builder.CreateNSWAdd(end,
1172
2
                              ConstantInt::get(endType, 1, /* signed = */ true),
1173
2
                              ArrayName + "_size");
1174
2
1175
2
  return size;
1176
2
}
1177
1178
284
bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
1179
607
  for (ScopArrayInfo *Array : S.arrays()) {
1180
607
    if (Array->getNumberOfDimensions() == 0)
1181
142
      continue;
1182
465
1183
465
    Value *FAD = Array->getFortranArrayDescriptor();
1184
465
    if (!FAD)
1185
463
      continue;
1186
2
1187
2
    isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release();
1188
2
    assert(ParametricPwAff && "parametric pw_aff corresponding "
1189
2
                              "to outermost dimension does not "
1190
2
                              "exist");
1191
2
1192
2
    isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0);
1193
2
    isl_pw_aff_free(ParametricPwAff);
1194
2
1195
2
    assert(Id && "pw_aff is not parametric");
1196
2
1197
2
    if (
IDToValue.count(Id)2
) {
1198
0
      isl_id_free(Id);
1199
0
      continue;
1200
0
    }
1201
2
1202
2
    Value *FinalValue =
1203
2
        buildFADOutermostDimensionLoad(FAD, Builder, Array->getName());
1204
2
    assert(FinalValue && "unable to build Fortran array "
1205
2
                         "descriptor load of outermost dimension");
1206
2
    IDToValue[Id] = FinalValue;
1207
2
    isl_id_free(Id);
1208
2
  }
1209
284
  return true;
1210
284
}
1211
1212
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
1213
                                              isl_ast_build *Build,
1214
105
                                              Instruction *AccInst) {
1215
105
  isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
1216
105
  isl_ast_expr *Access =
1217
105
      isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
1218
105
  auto *Address = isl_ast_expr_address_of(Access);
1219
105
  auto *AddressValue = ExprBuilder.create(Address);
1220
105
  Value *PreloadVal;
1221
105
1222
105
  // Correct the type as the SAI might have a different type than the user
1223
105
  // expects, especially if the base pointer is a struct.
1224
105
  Type *Ty = AccInst->getType();
1225
105
1226
105
  auto *Ptr = AddressValue;
1227
105
  auto Name = Ptr->getName();
1228
105
  auto AS = Ptr->getType()->getPointerAddressSpace();
1229
105
  Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast");
1230
105
  PreloadVal = Builder.CreateLoad(Ptr, Name + ".load");
1231
105
  if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal))
1232
105
    PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment());
1233
105
1234
105
  // TODO: This is only a hot fix for SCoP sequences that use the same load
1235
105
  //       instruction contained and hoisted by one of the SCoPs.
1236
105
  if (SE.isSCEVable(Ty))
1237
90
    SE.forgetValue(AccInst);
1238
105
1239
105
  return PreloadVal;
1240
105
}
1241
1242
Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
1243
108
                                            isl_set *Domain) {
1244
108
  isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release());
1245
108
  AccessRange = isl_set_gist_params(AccessRange, S.getContext().release());
1246
108
1247
108
  if (
!materializeParameters(AccessRange)108
) {
1248
0
    isl_set_free(AccessRange);
1249
0
    isl_set_free(Domain);
1250
0
    return nullptr;
1251
0
  }
1252
108
1253
108
  auto *Build =
1254
108
      isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release()));
1255
108
  isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
1256
108
  bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
1257
108
  isl_set_free(Universe);
1258
108
1259
108
  Instruction *AccInst = MA.getAccessInstruction();
1260
108
  Type *AccInstTy = AccInst->getType();
1261
108
1262
108
  Value *PreloadVal = nullptr;
1263
108
  if (
AlwaysExecuted108
) {
1264
84
    PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst);
1265
84
    isl_ast_build_free(Build);
1266
84
    isl_set_free(Domain);
1267
84
    return PreloadVal;
1268
84
  }
1269
24
1270
24
  
if (24
!materializeParameters(Domain)24
) {
1271
3
    isl_ast_build_free(Build);
1272
3
    isl_set_free(AccessRange);
1273
3
    isl_set_free(Domain);
1274
3
    return nullptr;
1275
3
  }
1276
21
1277
21
  isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
1278
21
  Domain = nullptr;
1279
21
1280
21
  ExprBuilder.setTrackOverflow(true);
1281
21
  Value *Cond = ExprBuilder.create(DomainCond);
1282
21
  Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(),
1283
21
                                              "polly.preload.cond.overflown");
1284
21
  Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result");
1285
21
  ExprBuilder.setTrackOverflow(false);
1286
21
1287
21
  if (!Cond->getType()->isIntegerTy(1))
1288
0
    Cond = Builder.CreateIsNotNull(Cond);
1289
21
1290
21
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1291
21
                                  &*Builder.GetInsertPoint(), &DT, &LI);
1292
21
  CondBB->setName("polly.preload.cond");
1293
21
1294
21
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
1295
21
  MergeBB->setName("polly.preload.merge");
1296
21
1297
21
  Function *F = Builder.GetInsertBlock()->getParent();
1298
21
  LLVMContext &Context = F->getContext();
1299
21
  BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
1300
21
1301
21
  DT.addNewBlock(ExecBB, CondBB);
1302
21
  if (Loop *L = LI.getLoopFor(CondBB))
1303
6
    L->addBasicBlockToLoop(ExecBB, LI);
1304
21
1305
21
  auto *CondBBTerminator = CondBB->getTerminator();
1306
21
  Builder.SetInsertPoint(CondBBTerminator);
1307
21
  Builder.CreateCondBr(Cond, ExecBB, MergeBB);
1308
21
  CondBBTerminator->eraseFromParent();
1309
21
1310
21
  Builder.SetInsertPoint(ExecBB);
1311
21
  Builder.CreateBr(MergeBB);
1312
21
1313
21
  Builder.SetInsertPoint(ExecBB->getTerminator());
1314
21
  Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst);
1315
21
  Builder.SetInsertPoint(MergeBB->getTerminator());
1316
21
  auto *MergePHI = Builder.CreatePHI(
1317
21
      AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
1318
21
  PreloadVal = MergePHI;
1319
21
1320
21
  if (
!PreAccInst21
) {
1321
0
    PreloadVal = nullptr;
1322
0
    PreAccInst = UndefValue::get(AccInstTy);
1323
0
  }
1324
108
1325
108
  MergePHI->addIncoming(PreAccInst, ExecBB);
1326
108
  MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
1327
108
1328
108
  isl_ast_build_free(Build);
1329
108
  return PreloadVal;
1330
108
}
1331
1332
bool IslNodeBuilder::preloadInvariantEquivClass(
1333
146
    InvariantEquivClassTy &IAClass) {
1334
146
  // For an equivalence class of invariant loads we pre-load the representing
1335
146
  // element with the unified execution context. However, we have to map all
1336
146
  // elements of the class to the one preloaded load as they are referenced
1337
146
  // during the code generation and therefor need to be mapped.
1338
146
  const MemoryAccessList &MAs = IAClass.InvariantAccesses;
1339
146
  if (MAs.empty())
1340
2
    return true;
1341
144
1342
144
  MemoryAccess *MA = MAs.front();
1343
144
  assert(MA->isArrayKind() && MA->isRead());
1344
144
1345
144
  // If the access function was already mapped, the preload of this equivalence
1346
144
  // class was triggered earlier already and doesn't need to be done again.
1347
144
  if (ValueMap.count(MA->getAccessInstruction()))
1348
29
    return true;
1349
115
1350
115
  // Check for recursion which can be caused by additional constraints, e.g.,
1351
115
  // non-finite loop constraints. In such a case we have to bail out and insert
1352
115
  // a "false" runtime check that will cause the original code to be executed.
1353
115
  auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType);
1354
115
  if (!PreloadedPtrs.insert(PtrId).second)
1355
4
    return false;
1356
111
1357
111
  // The execution context of the IAClass.
1358
111
  isl_set *&ExecutionCtx = IAClass.ExecutionContext;
1359
111
1360
111
  // If the base pointer of this class is dependent on another one we have to
1361
111
  // make sure it was preloaded already.
1362
111
  auto *SAI = MA->getScopArrayInfo();
1363
111
  if (auto *
BaseIAClass111
= S.lookupInvariantEquivClass(SAI->getBasePtr())) {
1364
22
    if (!preloadInvariantEquivClass(*BaseIAClass))
1365
1
      return false;
1366
21
1367
21
    // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1368
21
    // we need to refine the ExecutionCtx.
1369
21
    isl_set *BaseExecutionCtx = isl_set_copy(BaseIAClass->ExecutionContext);
1370
21
    ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx);
1371
21
  }
1372
111
1373
111
  // If the size of a dimension is dependent on another class, make sure it is
1374
111
  // preloaded.
1375
110
  
for (unsigned i = 1, e = SAI->getNumberOfDimensions(); 110
i < e110
;
++i0
) {
1376
2
    const SCEV *Dim = SAI->getDimensionSize(i);
1377
2
    SetVector<Value *> Values;
1378
2
    findValues(Dim, SE, Values);
1379
2
    for (auto *Val : Values) {
1380
2
      if (auto *
BaseIAClass2
= S.lookupInvariantEquivClass(Val)) {
1381
2
        if (!preloadInvariantEquivClass(*BaseIAClass))
1382
2
          return false;
1383
0
1384
0
        // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1385
0
        // and we need to refine the ExecutionCtx.
1386
0
        isl_set *BaseExecutionCtx = isl_set_copy(BaseIAClass->ExecutionContext);
1387
0
        ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx);
1388
0
      }
1389
2
    }
1390
2
  }
1391
110
1392
108
  Instruction *AccInst = MA->getAccessInstruction();
1393
108
  Type *AccInstTy = AccInst->getType();
1394
108
1395
108
  Value *PreloadVal = preloadInvariantLoad(*MA, isl_set_copy(ExecutionCtx));
1396
108
  if (!PreloadVal)
1397
3
    return false;
1398
105
1399
105
  
for (const MemoryAccess *MA : MAs) 105
{
1400
115
    Instruction *MAAccInst = MA->getAccessInstruction();
1401
115
    assert(PreloadVal->getType() == MAAccInst->getType());
1402
115
    ValueMap[MAAccInst] = PreloadVal;
1403
115
  }
1404
105
1405
105
  if (
SE.isSCEVable(AccInstTy)105
) {
1406
90
    isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release();
1407
90
    if (ParamId)
1408
22
      IDToValue[ParamId] = PreloadVal;
1409
90
    isl_id_free(ParamId);
1410
90
  }
1411
105
1412
105
  BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
1413
105
  auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
1414
105
                                AccInst->getName() + ".preload.s2a");
1415
105
  Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
1416
105
  Builder.CreateStore(PreloadVal, Alloca);
1417
105
  ValueMapT PreloadedPointer;
1418
105
  PreloadedPointer[PreloadVal] = AccInst;
1419
105
  Annotator.addAlternativeAliasBases(PreloadedPointer);
1420
105
1421
46
  for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
1422
46
    Value *BasePtr = DerivedSAI->getBasePtr();
1423
46
1424
52
    for (const MemoryAccess *MA : MAs) {
1425
52
      // As the derived SAI information is quite coarse, any load from the
1426
52
      // current SAI could be the base pointer of the derived SAI, however we
1427
52
      // should only change the base pointer of the derived SAI if we actually
1428
52
      // preloaded it.
1429
52
      if (
BasePtr == MA->getOriginalBaseAddr()52
) {
1430
0
        assert(BasePtr->getType() == PreloadVal->getType());
1431
0
        DerivedSAI->setBasePtr(PreloadVal);
1432
0
      }
1433
52
1434
52
      // For scalar derived SAIs we remap the alloca used for the derived value.
1435
52
      if (BasePtr == MA->getAccessInstruction())
1436
34
        ScalarMap[DerivedSAI] = Alloca;
1437
52
    }
1438
46
  }
1439
105
1440
115
  for (const MemoryAccess *MA : MAs) {
1441
115
    Instruction *MAAccInst = MA->getAccessInstruction();
1442
115
    // Use the escape system to get the correct value to users outside the SCoP.
1443
115
    BlockGenerator::EscapeUserVectorTy EscapeUsers;
1444
115
    for (auto *U : MAAccInst->users())
1445
120
      
if (Instruction *120
UI120
= dyn_cast<Instruction>(U))
1446
120
        
if (120
!S.contains(UI)120
)
1447
7
          EscapeUsers.push_back(UI);
1448
115
1449
115
    if (EscapeUsers.empty())
1450
109
      continue;
1451
6
1452
6
    EscapeMap[MA->getAccessInstruction()] =
1453
6
        std::make_pair(Alloca, std::move(EscapeUsers));
1454
6
  }
1455
146
1456
146
  return true;
1457
146
}
1458
1459
288
void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
1460
620
  for (auto &SAI : S.arrays()) {
1461
620
    if (SAI->getBasePtr())
1462
610
      continue;
1463
10
1464
620
    assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
1465
10
           "The size of the outermost dimension is used to declare newly "
1466
10
           "created arrays that require memory allocation.");
1467
10
1468
10
    Type *NewArrayType = nullptr;
1469
10
1470
10
    // Get the size of the array = size(dim_1)*...*size(dim_n)
1471
10
    uint64_t ArraySizeInt = 1;
1472
30
    for (int i = SAI->getNumberOfDimensions() - 1; 
i >= 030
;
i--20
) {
1473
20
      auto *DimSize = SAI->getDimensionSize(i);
1474
20
      unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
1475
20
                                     ->getAPInt()
1476
20
                                     .getLimitedValue();
1477
20
1478
20
      if (!NewArrayType)
1479
10
        NewArrayType = SAI->getElementType();
1480
20
1481
20
      NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
1482
20
      ArraySizeInt *= UnsignedDimSize;
1483
20
    }
1484
10
1485
10
    if (
SAI->isOnHeap()10
) {
1486
3
      LLVMContext &Ctx = NewArrayType->getContext();
1487
3
1488
3
      // Get the IntPtrTy from the Datalayout
1489
3
      auto IntPtrTy = DL.getIntPtrType(Ctx);
1490
3
1491
3
      // Get the size of the element type in bits
1492
3
      unsigned Size = SAI->getElemSizeInBytes();
1493
3
1494
3
      // Insert the malloc call at polly.start
1495
3
      auto InstIt = std::get<0>(StartExitBlocks)->getTerminator();
1496
3
      auto *CreatedArray = CallInst::CreateMalloc(
1497
3
          &*InstIt, IntPtrTy, SAI->getElementType(),
1498
3
          ConstantInt::get(Type::getInt64Ty(Ctx), Size),
1499
3
          ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr,
1500
3
          SAI->getName());
1501
3
1502
3
      SAI->setBasePtr(CreatedArray);
1503
3
1504
3
      // Insert the free call at polly.exiting
1505
3
      CallInst::CreateFree(CreatedArray,
1506
3
                           std::get<1>(StartExitBlocks)->getTerminator());
1507
10
    } else {
1508
7
      auto InstIt = Builder.GetInsertBlock()
1509
7
                        ->getParent()
1510
7
                        ->getEntryBlock()
1511
7
                        .getTerminator();
1512
7
1513
7
      auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
1514
7
                                          SAI->getName(), &*InstIt);
1515
7
      CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
1516
7
      SAI->setBasePtr(CreatedArray);
1517
7
    }
1518
620
  }
1519
288
}
1520
1521
288
bool IslNodeBuilder::preloadInvariantLoads() {
1522
288
  auto &InvariantEquivClasses = S.getInvariantAccesses();
1523
288
  if (InvariantEquivClasses.empty())
1524
224
    return true;
1525
64
1526
64
  BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
1527
64
                                     &*Builder.GetInsertPoint(), &DT, &LI);
1528
64
  PreLoadBB->setName("polly.preload.begin");
1529
64
  Builder.SetInsertPoint(&PreLoadBB->front());
1530
64
1531
64
  for (auto &IAClass : InvariantEquivClasses)
1532
111
    
if (111
!preloadInvariantEquivClass(IAClass)111
)
1533
4
      return false;
1534
60
1535
60
  return true;
1536
60
}
1537
1538
284
void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
1539
284
  // Materialize values for the parameters of the SCoP.
1540
284
  materializeParameters();
1541
284
1542
284
  // materialize the outermost dimension parameters for a Fortran array.
1543
284
  // NOTE: materializeParameters() does not work since it looks through
1544
284
  // the SCEVs. We don't have a corresponding SCEV for the array size
1545
284
  // parameter
1546
284
  materializeFortranArrayOutermostDimension();
1547
284
1548
284
  // Generate values for the current loop iteration for all surrounding loops.
1549
284
  //
1550
284
  // We may also reference loops outside of the scop which do not contain the
1551
284
  // scop itself, but as the number of such scops may be arbitrarily large we do
1552
284
  // not generate code for them here, but only at the point of code generation
1553
284
  // where these values are needed.
1554
284
  Loop *L = LI.getLoopFor(S.getEntry());
1555
284
1556
499
  while (
L != nullptr && 499
S.contains(L)237
)
1557
215
    L = L->getParentLoop();
1558
284
1559
306
  while (
L != nullptr306
) {
1560
22
    materializeNonScopLoopInductionVariable(L);
1561
22
    L = L->getParentLoop();
1562
22
  }
1563
284
1564
284
  isl_set_free(Context);
1565
284
}
1566
1567
185
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
1568
185
  /// We pass the insert location of our Builder, as Polly ensures during IR
1569
185
  /// generation that there is always a valid CFG into which instructions are
1570
185
  /// inserted. As a result, the insertpoint is known to be always followed by a
1571
185
  /// terminator instruction. This means the insert point may be specified by a
1572
185
  /// terminator instruction, but it can never point to an ->end() iterator
1573
185
  /// which does not have a corresponding instruction. Hence, dereferencing
1574
185
  /// the insertpoint to obtain an instruction is known to be save.
1575
185
  ///
1576
185
  /// We also do not need to update the Builder here, as new instructions are
1577
185
  /// always inserted _before_ the given InsertLocation. As a result, the
1578
185
  /// insert location remains valid.
1579
185
  assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
1580
185
         "Insert location points after last valid instruction");
1581
185
  Instruction *InsertLocation = &*Builder.GetInsertPoint();
1582
185
  return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
1583
185
                       InsertLocation, &ValueMap,
1584
185
                       StartBlock->getSinglePredecessor());
1585
185
}
1586
1587
/// The AST expression we generate to perform the run-time check assumes
1588
/// computations on integer types of infinite size. As we only use 64-bit
1589
/// arithmetic we check for overflows, in case of which we set the result
1590
/// of this run-time check to false to be conservatively correct,
1591
284
Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) {
1592
284
  auto ExprBuilder = getExprBuilder();
1593
284
1594
284
  // In case the AST expression has integers larger than 64 bit, bail out. The
1595
284
  // resulting LLVM-IR will contain operations on types that use more than 64
1596
284
  // bits. These are -- in case wrapping intrinsics are used -- translated to
1597
284
  // runtime library calls that are not available on all systems (e.g., Android)
1598
284
  // and consequently will result in linker errors.
1599
284
  if (
ExprBuilder.hasLargeInts(isl::manage(isl_ast_expr_copy(Condition)))284
) {
1600
8
    isl_ast_expr_free(Condition);
1601
8
    return Builder.getFalse();
1602
8
  }
1603
276
1604
276
  ExprBuilder.setTrackOverflow(true);
1605
276
  Value *RTC = ExprBuilder.create(Condition);
1606
276
  if (!RTC->getType()->isIntegerTy(1))
1607
169
    RTC = Builder.CreateIsNotNull(RTC);
1608
276
  Value *OverflowHappened =
1609
276
      Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown");
1610
276
1611
276
  if (
PollyGenerateRTCPrint276
) {
1612
0
    auto *F = Builder.GetInsertBlock()->getParent();
1613
0
    RuntimeDebugBuilder::createCPUPrinter(
1614
0
        Builder,
1615
0
        "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() +
1616
0
            "RTC: ",
1617
0
        RTC, " Overflow: ", OverflowHappened,
1618
0
        "\n"
1619
0
        "  (0 failed, -1 succeeded)\n"
1620
0
        "  (if one or both are 0 falling back to original code, if both are -1 "
1621
0
        "executing Polly code)\n");
1622
0
  }
1623
276
1624
276
  RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result");
1625
276
  ExprBuilder.setTrackOverflow(false);
1626
276
1627
276
  if (!isa<ConstantInt>(RTC))
1628
106
    VersionedScops++;
1629
284
1630
284
  return RTC;
1631
284
}