Coverage Report

Created: 2019-02-23 12:57

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/CodeGen/IslNodeBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the IslNodeBuilder, a class to translate an isl AST into
10
// a LLVM-IR AST.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/CodeGen/IslNodeBuilder.h"
15
#include "polly/CodeGen/BlockGenerators.h"
16
#include "polly/CodeGen/CodeGeneration.h"
17
#include "polly/CodeGen/IslAst.h"
18
#include "polly/CodeGen/IslExprBuilder.h"
19
#include "polly/CodeGen/LoopGenerators.h"
20
#include "polly/CodeGen/RuntimeDebugBuilder.h"
21
#include "polly/Config/config.h"
22
#include "polly/Options.h"
23
#include "polly/ScopInfo.h"
24
#include "polly/Support/GICHelper.h"
25
#include "polly/Support/ISLTools.h"
26
#include "polly/Support/SCEVValidator.h"
27
#include "polly/Support/ScopHelper.h"
28
#include "llvm/ADT/APInt.h"
29
#include "llvm/ADT/PostOrderIterator.h"
30
#include "llvm/ADT/SetVector.h"
31
#include "llvm/ADT/SmallPtrSet.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/Analysis/LoopInfo.h"
34
#include "llvm/Analysis/RegionInfo.h"
35
#include "llvm/Analysis/ScalarEvolution.h"
36
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
37
#include "llvm/IR/BasicBlock.h"
38
#include "llvm/IR/Constant.h"
39
#include "llvm/IR/Constants.h"
40
#include "llvm/IR/DataLayout.h"
41
#include "llvm/IR/DerivedTypes.h"
42
#include "llvm/IR/Dominators.h"
43
#include "llvm/IR/Function.h"
44
#include "llvm/IR/InstrTypes.h"
45
#include "llvm/IR/Instruction.h"
46
#include "llvm/IR/Instructions.h"
47
#include "llvm/IR/Type.h"
48
#include "llvm/IR/Value.h"
49
#include "llvm/Support/Casting.h"
50
#include "llvm/Support/CommandLine.h"
51
#include "llvm/Support/ErrorHandling.h"
52
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
53
#include "isl/aff.h"
54
#include "isl/aff_type.h"
55
#include "isl/ast.h"
56
#include "isl/ast_build.h"
57
#include "isl/isl-noexceptions.h"
58
#include "isl/map.h"
59
#include "isl/set.h"
60
#include "isl/union_map.h"
61
#include "isl/union_set.h"
62
#include "isl/val.h"
63
#include <algorithm>
64
#include <cassert>
65
#include <cstdint>
66
#include <cstring>
67
#include <string>
68
#include <utility>
69
#include <vector>
70
71
using namespace llvm;
72
using namespace polly;
73
74
#define DEBUG_TYPE "polly-codegen"
75
76
STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
77
78
STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
79
STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
80
STATISTIC(VectorLoops, "Number of generated vector for-loops");
81
STATISTIC(IfConditions, "Number of generated if-conditions");
82
83
static cl::opt<bool> PollyGenerateRTCPrint(
84
    "polly-codegen-emit-rtc-print",
85
    cl::desc("Emit code that prints the runtime check result dynamically."),
86
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
87
88
// If this option is set we always use the isl AST generator to regenerate
89
// memory accesses. Without this option set we regenerate expressions using the
90
// original SCEV expressions and only generate new expressions in case the
91
// access relation has been changed and consequently must be regenerated.
92
static cl::opt<bool> PollyGenerateExpressions(
93
    "polly-codegen-generate-expressions",
94
    cl::desc("Generate AST expressions for unmodified and modified accesses"),
95
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
96
97
static cl::opt<int> PollyTargetFirstLevelCacheLineSize(
98
    "polly-target-first-level-cache-line-size",
99
    cl::desc("The size of the first level cache line size specified in bytes."),
100
    cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory));
101
102
isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node For,
103
329
                                            ICmpInst::Predicate &Predicate) {
104
329
  isl::ast_expr Cond = For.for_get_cond();
105
329
  isl::ast_expr Iterator = For.for_get_iterator();
106
329
  assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op &&
107
329
         "conditional expression is not an atomic upper bound");
108
329
109
329
  isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get());
110
329
111
329
  switch (OpType) {
112
329
  case isl_ast_op_le:
113
241
    Predicate = ICmpInst::ICMP_SLE;
114
241
    break;
115
329
  case isl_ast_op_lt:
116
88
    Predicate = ICmpInst::ICMP_SLT;
117
88
    break;
118
329
  default:
119
0
    llvm_unreachable("Unexpected comparison type in loop condition");
120
329
  }
121
329
122
329
  isl::ast_expr Arg0 = Cond.get_op_arg(0);
123
329
124
329
  assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id &&
125
329
         "conditional expression is not an atomic upper bound");
126
329
127
329
  isl::id UBID = Arg0.get_id();
128
329
129
329
  assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id &&
130
329
         "Could not get the iterator");
131
329
132
329
  isl::id IteratorID = Iterator.get_id();
133
329
134
329
  assert(UBID.get() == IteratorID.get() &&
135
329
         "conditional expression is not an atomic upper bound");
136
329
137
329
  return Cond.get_op_arg(1);
138
329
}
139
140
/// Return true if a return value of Predicate is true for the value represented
141
/// by passed isl_ast_expr_int.
142
static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr,
143
42
                               isl_bool (*Predicate)(__isl_keep isl_val *)) {
144
42
  if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) {
145
0
    isl_ast_expr_free(Expr);
146
0
    return false;
147
0
  }
148
42
  auto ExprVal = isl_ast_expr_get_val(Expr);
149
42
  isl_ast_expr_free(Expr);
150
42
  if (Predicate(ExprVal) != isl_bool_true) {
151
0
    isl_val_free(ExprVal);
152
0
    return false;
153
0
  }
154
42
  isl_val_free(ExprVal);
155
42
  return true;
156
42
}
157
158
22
int IslNodeBuilder::getNumberOfIterations(isl::ast_node For) {
159
22
  assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for);
160
22
  isl::ast_node Body = For.for_get_body();
161
22
162
22
  // First, check if we can actually handle this code.
163
22
  switch (isl_ast_node_get_type(Body.get())) {
164
22
  case isl_ast_node_user:
165
20
    break;
166
22
  case isl_ast_node_block: {
167
2
    isl::ast_node_list List = Body.block_get_children();
168
3
    for (isl::ast_node Node : List) {
169
3
      isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get());
170
3
      if (NodeType != isl_ast_node_user)
171
1
        return -1;
172
3
    }
173
2
    
break1
;
174
2
  }
175
2
  default:
176
0
    return -1;
177
21
  }
178
21
179
21
  isl::ast_expr Init = For.for_get_init();
180
21
  if (!checkIslAstExprInt(Init.release(), isl_val_is_zero))
181
0
    return -1;
182
21
  isl::ast_expr Inc = For.for_get_inc();
183
21
  if (!checkIslAstExprInt(Inc.release(), isl_val_is_one))
184
0
    return -1;
185
21
  CmpInst::Predicate Predicate;
186
21
  isl::ast_expr UB = getUpperBound(For, Predicate);
187
21
  if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int)
188
2
    return -1;
189
19
  isl::val UpVal = UB.get_val();
190
19
  int NumberIterations = UpVal.get_num_si();
191
19
  if (NumberIterations < 0)
192
0
    return -1;
193
19
  if (Predicate == CmpInst::ICMP_SLT)
194
0
    return NumberIterations;
195
19
  else
196
19
    return NumberIterations + 1;
197
19
}
198
199
/// Extract the values and SCEVs needed to generate code for a block.
200
static int findReferencesInBlock(struct SubtreeReferences &References,
201
31
                                 const ScopStmt *Stmt, BasicBlock *BB) {
202
208
  for (Instruction &Inst : *BB) {
203
208
    // Include invariant loads
204
208
    if (isa<LoadInst>(Inst))
205
23
      if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
206
7
        References.Values.insert(InvariantLoad);
207
208
208
405
    for (Value *SrcVal : Inst.operands()) {
209
405
      auto *Scope = References.LI.getLoopFor(BB);
210
405
      if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
211
296
        References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
212
296
        continue;
213
296
      } else 
if (Value *109
NewVal109
= References.GlobalMap.lookup(SrcVal))
214
6
        References.Values.insert(NewVal);
215
405
    }
216
208
  }
217
31
  return 0;
218
31
}
219
220
void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
221
31
                           bool CreateScalarRefs) {
222
31
  auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
223
31
224
31
  if (Stmt->isBlockStmt())
225
31
    findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
226
0
  else {
227
0
    assert(Stmt->isRegionStmt() &&
228
0
           "Stmt was neither block nor region statement");
229
0
    for (BasicBlock *BB : Stmt->getRegion()->blocks())
230
0
      findReferencesInBlock(References, Stmt, BB);
231
0
  }
232
31
233
47
  for (auto &Access : *Stmt) {
234
47
    if (References.ParamSpace) {
235
0
      isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
236
0
      (*References.ParamSpace) =
237
0
          References.ParamSpace->align_params(ParamSpace);
238
0
    }
239
47
240
47
    if (Access->isLatestArrayKind()) {
241
45
      auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr();
242
45
      if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
243
11
        if (Stmt->getParent()->contains(OpInst))
244
4
          continue;
245
41
246
41
      References.Values.insert(BasePtr);
247
41
      continue;
248
41
    }
249
2
250
2
    if (CreateScalarRefs)
251
2
      References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
252
2
  }
253
31
}
254
255
/// Extract the out-of-scop values and SCEVs referenced from a set describing
256
/// a ScopStmt.
257
///
258
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
259
/// statement and the base pointers of the memory accesses. For scalar
260
/// statements we force the generation of alloca memory locations and list
261
/// these locations in the set of out-of-scop values as well.
262
///
263
/// @param Set     A set which references the ScopStmt we are interested in.
264
/// @param UserPtr A void pointer that can be casted to a SubtreeReferences
265
///                structure.
266
static void addReferencesFromStmtSet(isl::set Set,
267
31
                                     struct SubtreeReferences *UserPtr) {
268
31
  isl::id Id = Set.get_tuple_id();
269
31
  auto *Stmt = static_cast<const ScopStmt *>(Id.get_user());
270
31
  return addReferencesFromStmt(Stmt, UserPtr);
271
31
}
272
273
/// Extract the out-of-scop values and SCEVs referenced from a union set
274
/// referencing multiple ScopStmts.
275
///
276
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
277
/// statement and the base pointers of the memory accesses. For scalar
278
/// statements we force the generation of alloca memory locations and list
279
/// these locations in the set of out-of-scop values as well.
280
///
281
/// @param USet       A union set referencing the ScopStmts we are interested
282
///                   in.
283
/// @param References The SubtreeReferences data structure through which
284
///                   results are returned and further information is
285
///                   provided.
286
static void
287
addReferencesFromStmtUnionSet(isl::union_set USet,
288
30
                              struct SubtreeReferences &References) {
289
30
290
30
  for (isl::set Set : USet.get_set_list())
291
31
    addReferencesFromStmtSet(Set, &References);
292
30
}
293
294
__isl_give isl_union_map *
295
49
IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) {
296
49
  return IslAstInfo::getSchedule(For);
297
49
}
298
299
void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
300
                                            SetVector<Value *> &Values,
301
30
                                            SetVector<const Loop *> &Loops) {
302
30
  SetVector<const SCEV *> SCEVs;
303
30
  struct SubtreeReferences References = {
304
30
      LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
305
30
306
30
  for (const auto &I : IDToValue)
307
29
    Values.insert(I.second);
308
30
309
30
  // NOTE: this is populated in IslNodeBuilder::addParameters
310
30
  for (const auto &I : OutsideLoopIterations)
311
4
    Values.insert(cast<SCEVUnknown>(I.second)->getValue());
312
30
313
30
  isl::union_set Schedule =
314
30
      isl::manage(isl_union_map_domain(getScheduleForAstNode(For)));
315
30
  addReferencesFromStmtUnionSet(Schedule, References);
316
30
317
221
  for (const SCEV *Expr : SCEVs) {
318
221
    findValues(Expr, SE, Values);
319
221
    findLoops(Expr, Loops);
320
221
  }
321
30
322
87
  Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
323
30
324
30
  /// Note: Code generation of induction variables of loops outside Scops
325
30
  ///
326
30
  /// Remove loops that contain the scop or that are part of the scop, as they
327
30
  /// are considered local. This leaves only loops that are before the scop, but
328
30
  /// do not contain the scop itself.
329
30
  /// We ignore loops perfectly contained in the Scop because these are already
330
30
  /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
331
30
  /// whose induction variables are referred to by the Scop, but the Scop is not
332
30
  /// fully contained in these Loops. Since there can be many of these,
333
30
  /// we choose to codegen these on-demand.
334
30
  /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
335
50
  Loops.remove_if([this](const Loop *L) {
336
50
    return S.contains(L) || 
L->contains(S.getEntry())5
;
337
50
  });
338
30
339
30
  // Contains Values that may need to be replaced with other values
340
30
  // due to replacements from the ValueMap. We should make sure
341
30
  // that we return correctly remapped values.
342
30
  // NOTE: this code path is tested by:
343
30
  //     1.  test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
344
30
  //     2.  test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
345
30
  SetVector<Value *> ReplacedValues;
346
76
  for (Value *V : Values) {
347
76
    ReplacedValues.insert(getLatestValue(V));
348
76
  }
349
30
  Values = ReplacedValues;
350
30
}
351
352
30
void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
353
30
  SmallPtrSet<Value *, 5> Inserted;
354
30
355
30
  for (const auto &I : IDToValue) {
356
29
    IDToValue[I.first] = NewValues[I.second];
357
29
    Inserted.insert(I.second);
358
29
  }
359
30
360
74
  for (const auto &I : NewValues) {
361
74
    if (Inserted.count(I.first))
362
0
      continue;
363
74
364
74
    ValueMap[I.first] = I.second;
365
74
  }
366
30
}
367
368
76
Value *IslNodeBuilder::getLatestValue(Value *Original) const {
369
76
  auto It = ValueMap.find(Original);
370
76
  if (It == ValueMap.end())
371
73
    return Original;
372
3
  return It->second;
373
3
}
374
375
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
376
                                      std::vector<Value *> &IVS,
377
                                      __isl_take isl_id *IteratorID,
378
20
                                      __isl_take isl_union_map *Schedule) {
379
20
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
380
20
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
381
20
  isl_id *Id = isl_ast_expr_get_id(StmtExpr);
382
20
  isl_ast_expr_free(StmtExpr);
383
20
  ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
384
20
  std::vector<LoopToScevMapT> VLTS(IVS.size());
385
20
386
20
  isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release());
387
20
  Schedule = isl_union_map_intersect_domain(Schedule, Domain);
388
20
  isl_map *S = isl_map_from_union_map(Schedule);
389
20
390
20
  auto *NewAccesses = createNewAccesses(Stmt, User);
391
20
  createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
392
20
  VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
393
20
  isl_id_to_ast_expr_free(NewAccesses);
394
20
  isl_map_free(S);
395
20
  isl_id_free(Id);
396
20
  isl_ast_node_free(User);
397
20
}
398
399
43
void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
400
43
  auto *Id = isl_ast_node_mark_get_id(Node);
401
43
  auto Child = isl_ast_node_mark_get_node(Node);
402
43
  isl_ast_node_free(Node);
403
43
  // If a child node of a 'SIMD mark' is a loop that has a single iteration,
404
43
  // it will be optimized away and we should skip it.
405
43
  if (strcmp(isl_id_get_name(Id), "SIMD") == 0 &&
406
43
      
isl_ast_node_get_type(Child) == isl_ast_node_for14
) {
407
2
    bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
408
2
    int VectorWidth = getNumberOfIterations(isl::manage_copy(Child));
409
2
    if (Vector && 1 < VectorWidth && VectorWidth <= 16)
410
2
      createForVector(Child, VectorWidth);
411
0
    else
412
0
      createForSequential(isl::manage(Child), true);
413
2
    isl_id_free(Id);
414
2
    return;
415
2
  }
416
41
  if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) {
417
2
    auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
418
2
    Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
419
2
  }
420
41
  create(Child);
421
41
  isl_id_free(Id);
422
41
}
423
424
void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
425
19
                                     int VectorWidth) {
426
19
  isl_ast_node *Body = isl_ast_node_for_get_body(For);
427
19
  isl_ast_expr *Init = isl_ast_node_for_get_init(For);
428
19
  isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
429
19
  isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
430
19
  isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
431
19
432
19
  Value *ValueLB = ExprBuilder.create(Init);
433
19
  Value *ValueInc = ExprBuilder.create(Inc);
434
19
435
19
  Type *MaxType = ExprBuilder.getType(Iterator);
436
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
437
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
438
19
439
19
  if (MaxType != ValueLB->getType())
440
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
441
19
  if (MaxType != ValueInc->getType())
442
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
443
19
444
19
  std::vector<Value *> IVS(VectorWidth);
445
19
  IVS[0] = ValueLB;
446
19
447
119
  for (int i = 1; i < VectorWidth; 
i++100
)
448
100
    IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
449
19
450
19
  isl_union_map *Schedule = getScheduleForAstNode(For);
451
19
  assert(Schedule && "For statement annotation does not contain its schedule");
452
19
453
19
  IDToValue[IteratorID] = ValueLB;
454
19
455
19
  switch (isl_ast_node_get_type(Body)) {
456
19
  case isl_ast_node_user:
457
18
    createUserVector(Body, IVS, isl_id_copy(IteratorID),
458
18
                     isl_union_map_copy(Schedule));
459
18
    break;
460
19
  case isl_ast_node_block: {
461
1
    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
462
1
463
3
    for (int i = 0; i < isl_ast_node_list_n_ast_node(List); 
++i2
)
464
2
      createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
465
2
                       isl_id_copy(IteratorID), isl_union_map_copy(Schedule));
466
1
467
1
    isl_ast_node_free(Body);
468
1
    isl_ast_node_list_free(List);
469
1
    break;
470
19
  }
471
19
  default:
472
0
    isl_ast_node_dump(Body);
473
0
    llvm_unreachable("Unhandled isl_ast_node in vectorizer");
474
19
  }
475
19
476
19
  IDToValue.erase(IDToValue.find(IteratorID));
477
19
  isl_id_free(IteratorID);
478
19
  isl_union_map_free(Schedule);
479
19
480
19
  isl_ast_node_free(For);
481
19
  isl_ast_expr_free(Iterator);
482
19
483
19
  VectorLoops++;
484
19
}
485
486
/// Restore the initial ordering of dimensions of the band node
487
///
488
/// In case the band node represents all the dimensions of the iteration
489
/// domain, recreate the band node to restore the initial ordering of the
490
/// dimensions.
491
///
492
/// @param Node The band node to be modified.
493
/// @return The modified schedule node.
494
278
static bool IsLoopVectorizerDisabled(isl::ast_node Node) {
495
278
  assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for);
496
278
  auto Body = Node.for_get_body();
497
278
  if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark)
498
259
    return false;
499
19
  auto Id = Body.mark_get_id();
500
19
  if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
501
2
    return true;
502
17
  return false;
503
17
}
504
505
278
void IslNodeBuilder::createForSequential(isl::ast_node For, bool MarkParallel) {
506
278
  Value *ValueLB, *ValueUB, *ValueInc;
507
278
  Type *MaxType;
508
278
  BasicBlock *ExitBlock;
509
278
  Value *IV;
510
278
  CmpInst::Predicate Predicate;
511
278
512
278
  bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For);
513
278
514
278
  isl::ast_node Body = For.for_get_body();
515
278
516
278
  // isl_ast_node_for_is_degenerate(For)
517
278
  //
518
278
  // TODO: For degenerated loops we could generate a plain assignment.
519
278
  //       However, for now we just reuse the logic for normal loops, which will
520
278
  //       create a loop with a single iteration.
521
278
522
278
  isl::ast_expr Init = For.for_get_init();
523
278
  isl::ast_expr Inc = For.for_get_inc();
524
278
  isl::ast_expr Iterator = For.for_get_iterator();
525
278
  isl::id IteratorID = Iterator.get_id();
526
278
  isl::ast_expr UB = getUpperBound(For, Predicate);
527
278
528
278
  ValueLB = ExprBuilder.create(Init.release());
529
278
  ValueUB = ExprBuilder.create(UB.release());
530
278
  ValueInc = ExprBuilder.create(Inc.release());
531
278
532
278
  MaxType = ExprBuilder.getType(Iterator.get());
533
278
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
534
278
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
535
278
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
536
278
537
278
  if (MaxType != ValueLB->getType())
538
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
539
278
  if (MaxType != ValueUB->getType())
540
37
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
541
278
  if (MaxType != ValueInc->getType())
542
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
543
278
544
278
  // If we can show that LB <Predicate> UB holds at least once, we can
545
278
  // omit the GuardBB in front of the loop.
546
278
  bool UseGuardBB =
547
278
      !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
548
278
  IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
549
278
                  Predicate, &Annotator, MarkParallel, UseGuardBB,
550
278
                  LoopVectorizerDisabled);
551
278
  IDToValue[IteratorID.get()] = IV;
552
278
553
278
  create(Body.release());
554
278
555
278
  Annotator.popLoop(MarkParallel);
556
278
557
278
  IDToValue.erase(IDToValue.find(IteratorID.get()));
558
278
559
278
  Builder.SetInsertPoint(&ExitBlock->front());
560
278
561
278
  SequentialLoops++;
562
278
}
563
564
/// Remove the BBs contained in a (sub)function from the dominator tree.
565
///
566
/// This function removes the basic blocks that are part of a subfunction from
567
/// the dominator tree. Specifically, when generating code it may happen that at
568
/// some point the code generation continues in a new sub-function (e.g., when
569
/// generating OpenMP code). The basic blocks that are created in this
570
/// sub-function are then still part of the dominator tree of the original
571
/// function, such that the dominator tree reaches over function boundaries.
572
/// This is not only incorrect, but also causes crashes. This function now
573
/// removes from the dominator tree all basic blocks that are dominated (and
574
/// consequently reachable) from the entry block of this (sub)function.
575
///
576
/// FIXME: A LLVM (function or region) pass should not touch anything outside of
577
/// the function/region it runs on. Hence, the pure need for this function shows
578
/// that we do not comply to this rule. At the moment, this does not cause any
579
/// issues, but we should be aware that such issues may appear. Unfortunately
580
/// the current LLVM pass infrastructure does not allow to make Polly a module
581
/// or call-graph pass to solve this issue, as such a pass would not have access
582
/// to the per-function analyses passes needed by Polly. A future pass manager
583
/// infrastructure is supposed to enable such kind of access possibly allowing
584
/// us to create a cleaner solution here.
585
///
586
/// FIXME: Instead of adding the dominance information and then dropping it
587
/// later on, we should try to just not add it in the first place. This requires
588
/// some careful testing to make sure this does not break in interaction with
589
/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
590
/// which may try to update it.
591
///
592
/// @param F The function which contains the BBs to removed.
593
/// @param DT The dominator tree from which to remove the BBs.
594
30
static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
595
30
  DomTreeNode *N = DT.getNode(&F->getEntryBlock());
596
30
  std::vector<BasicBlock *> Nodes;
597
30
598
30
  // We can only remove an element from the dominator tree, if all its children
599
30
  // have been removed. To ensure this we obtain the list of nodes to remove
600
30
  // using a post-order tree traversal.
601
486
  for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; 
++I456
)
602
456
    Nodes.push_back(I->getBlock());
603
30
604
30
  for (BasicBlock *BB : Nodes)
605
456
    DT.eraseNode(BB);
606
30
}
607
608
30
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
609
30
  isl_ast_node *Body;
610
30
  isl_ast_expr *Init, *Inc, *Iterator, *UB;
611
30
  isl_id *IteratorID;
612
30
  Value *ValueLB, *ValueUB, *ValueInc;
613
30
  Type *MaxType;
614
30
  Value *IV;
615
30
  CmpInst::Predicate Predicate;
616
30
617
30
  // The preamble of parallel code interacts different than normal code with
618
30
  // e.g., scalar initialization. Therefore, we ensure the parallel code is
619
30
  // separated from the last basic block.
620
30
  BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(),
621
30
                                 &*Builder.GetInsertPoint(), &DT, &LI);
622
30
  ParBB->setName("polly.parallel.for");
623
30
  Builder.SetInsertPoint(&ParBB->front());
624
30
625
30
  Body = isl_ast_node_for_get_body(For);
626
30
  Init = isl_ast_node_for_get_init(For);
627
30
  Inc = isl_ast_node_for_get_inc(For);
628
30
  Iterator = isl_ast_node_for_get_iterator(For);
629
30
  IteratorID = isl_ast_expr_get_id(Iterator);
630
30
  UB = getUpperBound(isl::manage_copy(For), Predicate).release();
631
30
632
30
  ValueLB = ExprBuilder.create(Init);
633
30
  ValueUB = ExprBuilder.create(UB);
634
30
  ValueInc = ExprBuilder.create(Inc);
635
30
636
30
  // OpenMP always uses SLE. In case the isl generated AST uses a SLT
637
30
  // expression, we need to adjust the loop bound by one.
638
30
  if (Predicate == CmpInst::ICMP_SLT)
639
8
    ValueUB = Builder.CreateAdd(
640
8
        ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
641
30
642
30
  MaxType = ExprBuilder.getType(Iterator);
643
30
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
644
30
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
645
30
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
646
30
647
30
  if (MaxType != ValueLB->getType())
648
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
649
30
  if (MaxType != ValueUB->getType())
650
4
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
651
30
  if (MaxType != ValueInc->getType())
652
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
653
30
654
30
  BasicBlock::iterator LoopBody;
655
30
656
30
  SetVector<Value *> SubtreeValues;
657
30
  SetVector<const Loop *> Loops;
658
30
659
30
  getReferencesInSubtree(For, SubtreeValues, Loops);
660
30
661
30
  // Create for all loops we depend on values that contain the current loop
662
30
  // iteration. These values are necessary to generate code for SCEVs that
663
30
  // depend on such loops. As a result we need to pass them to the subfunction.
664
30
  // See [Code generation of induction variables of loops outside Scops]
665
30
  for (const Loop *L : Loops) {
666
1
    Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L);
667
1
    SubtreeValues.insert(LoopInductionVar);
668
1
  }
669
30
670
30
  ValueMapT NewValues;
671
30
  ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL);
672
30
673
30
  IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc,
674
30
                                          SubtreeValues, NewValues, &LoopBody);
675
30
  BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
676
30
  Builder.SetInsertPoint(&*LoopBody);
677
30
678
30
  // Remember the parallel subfunction
679
30
  ParallelSubfunctions.push_back(LoopBody->getFunction());
680
30
681
30
  // Save the current values.
682
30
  auto ValueMapCopy = ValueMap;
683
30
  IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
684
30
685
30
  updateValues(NewValues);
686
30
  IDToValue[IteratorID] = IV;
687
30
688
30
  ValueMapT NewValuesReverse;
689
30
690
30
  for (auto P : NewValues)
691
74
    NewValuesReverse[P.second] = P.first;
692
30
693
30
  Annotator.addAlternativeAliasBases(NewValuesReverse);
694
30
695
30
  create(Body);
696
30
697
30
  Annotator.resetAlternativeAliasBases();
698
30
  // Restore the original values.
699
30
  ValueMap = ValueMapCopy;
700
30
  IDToValue = IDToValueCopy;
701
30
702
30
  Builder.SetInsertPoint(&*AfterLoop);
703
30
  removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
704
30
705
30
  for (const Loop *L : Loops)
706
1
    OutsideLoopIterations.erase(L);
707
30
708
30
  isl_ast_node_free(For);
709
30
  isl_ast_expr_free(Iterator);
710
30
  isl_id_free(IteratorID);
711
30
712
30
  ParallelLoops++;
713
30
}
714
715
/// Return whether any of @p Node's statements contain partial accesses.
716
///
717
/// Partial accesses are not supported by Polly's vector code generator.
718
17
static bool hasPartialAccesses(__isl_take isl_ast_node *Node) {
719
17
  return isl_ast_node_foreach_descendant_top_down(
720
17
             Node,
721
36
             [](isl_ast_node *Node, void *User) -> isl_bool {
722
36
               if (isl_ast_node_get_type(Node) != isl_ast_node_user)
723
18
                 return isl_bool_true;
724
18
725
18
               isl::ast_expr Expr =
726
18
                   isl::manage(isl_ast_node_user_get_expr(Node));
727
18
               isl::ast_expr StmtExpr = Expr.get_op_arg(0);
728
18
               isl::id Id = StmtExpr.get_id();
729
18
730
18
               ScopStmt *Stmt =
731
18
                   static_cast<ScopStmt *>(isl_id_get_user(Id.get()));
732
18
               isl::set StmtDom = Stmt->getDomain();
733
27
               for (auto *MA : *Stmt) {
734
27
                 if (MA->isLatestPartialAccess())
735
0
                   return isl_bool_error;
736
27
               }
737
18
               return isl_bool_true;
738
18
             },
739
17
             nullptr) == isl_stat_error;
740
17
}
741
742
325
void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
743
325
  bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
744
325
745
325
  if (Vector && 
IslAstInfo::isInnermostParallel(For)27
&&
746
325
      
!IslAstInfo::isReductionParallel(For)20
) {
747
20
    int VectorWidth = getNumberOfIterations(isl::manage_copy(For));
748
20
    if (1 < VectorWidth && 
VectorWidth <= 1617
&&
!hasPartialAccesses(For)17
) {
749
17
      createForVector(For, VectorWidth);
750
17
      return;
751
17
    }
752
308
  }
753
308
754
308
  if (IslAstInfo::isExecutedInParallel(For)) {
755
30
    createForParallel(For);
756
30
    return;
757
30
  }
758
278
  bool Parallel =
759
278
      (IslAstInfo::isParallel(For) && 
!IslAstInfo::isReductionParallel(For)26
);
760
278
  createForSequential(isl::manage(For), Parallel);
761
278
}
762
763
86
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
764
86
  isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
765
86
766
86
  Function *F = Builder.GetInsertBlock()->getParent();
767
86
  LLVMContext &Context = F->getContext();
768
86
769
86
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
770
86
                                  &*Builder.GetInsertPoint(), &DT, &LI);
771
86
  CondBB->setName("polly.cond");
772
86
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
773
86
  MergeBB->setName("polly.merge");
774
86
  BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
775
86
  BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
776
86
777
86
  DT.addNewBlock(ThenBB, CondBB);
778
86
  DT.addNewBlock(ElseBB, CondBB);
779
86
  DT.changeImmediateDominator(MergeBB, CondBB);
780
86
781
86
  Loop *L = LI.getLoopFor(CondBB);
782
86
  if (L) {
783
52
    L->addBasicBlockToLoop(ThenBB, LI);
784
52
    L->addBasicBlockToLoop(ElseBB, LI);
785
52
  }
786
86
787
86
  CondBB->getTerminator()->eraseFromParent();
788
86
789
86
  Builder.SetInsertPoint(CondBB);
790
86
  Value *Predicate = ExprBuilder.create(Cond);
791
86
  Builder.CreateCondBr(Predicate, ThenBB, ElseBB);
792
86
  Builder.SetInsertPoint(ThenBB);
793
86
  Builder.CreateBr(MergeBB);
794
86
  Builder.SetInsertPoint(ElseBB);
795
86
  Builder.CreateBr(MergeBB);
796
86
  Builder.SetInsertPoint(&ThenBB->front());
797
86
798
86
  create(isl_ast_node_if_get_then(If));
799
86
800
86
  Builder.SetInsertPoint(&ElseBB->front());
801
86
802
86
  if (isl_ast_node_if_has_else(If))
803
11
    create(isl_ast_node_if_get_else(If));
804
86
805
86
  Builder.SetInsertPoint(&MergeBB->front());
806
86
807
86
  isl_ast_node_free(If);
808
86
809
86
  IfConditions++;
810
86
}
811
812
__isl_give isl_id_to_ast_expr *
813
IslNodeBuilder::createNewAccesses(ScopStmt *Stmt,
814
521
                                  __isl_keep isl_ast_node *Node) {
815
521
  isl_id_to_ast_expr *NewAccesses =
816
521
      isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx().get(), 0);
817
521
818
521
  auto *Build = IslAstInfo::getBuild(Node);
819
521
  assert(Build && "Could not obtain isl_ast_build from user node");
820
521
  Stmt->setAstBuild(isl::manage_copy(Build));
821
521
822
1.01k
  for (auto *MA : *Stmt) {
823
1.01k
    if (!MA->hasNewAccessRelation()) {
824
795
      if (PollyGenerateExpressions) {
825
2
        if (!MA->isAffine())
826
0
          continue;
827
2
        if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI())
828
0
          continue;
829
2
830
2
        auto *BasePtr =
831
2
            dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr());
832
2
        if (BasePtr && 
Stmt->getParent()->getRegion().contains(BasePtr)0
)
833
0
          continue;
834
793
      } else {
835
793
        continue;
836
793
      }
837
226
    }
838
226
    assert(MA->isAffine() &&
839
226
           "Only affine memory accesses can be code generated");
840
226
841
226
    auto Schedule = isl_ast_build_get_schedule(Build);
842
226
843
#ifndef NDEBUG
844
    if (MA->isRead()) {
845
      auto Dom = Stmt->getDomain().release();
846
      auto SchedDom = isl_set_from_union_set(
847
          isl_union_map_domain(isl_union_map_copy(Schedule)));
848
      auto AccDom = isl_map_domain(MA->getAccessRelation().release());
849
      Dom = isl_set_intersect_params(Dom,
850
                                     Stmt->getParent()->getContext().release());
851
      SchedDom = isl_set_intersect_params(
852
          SchedDom, Stmt->getParent()->getContext().release());
853
      assert(isl_set_is_subset(SchedDom, AccDom) &&
854
             "Access relation not defined on full schedule domain");
855
      assert(isl_set_is_subset(Dom, AccDom) &&
856
             "Access relation not defined on full domain");
857
      isl_set_free(AccDom);
858
      isl_set_free(SchedDom);
859
      isl_set_free(Dom);
860
    }
861
#endif
862
863
226
    auto PWAccRel =
864
226
        MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release();
865
226
866
226
    // isl cannot generate an index expression for access-nothing accesses.
867
226
    isl::set AccDomain =
868
226
        isl::manage(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel)));
869
226
    isl::set Context = S.getContext();
870
226
    AccDomain = AccDomain.intersect_params(Context);
871
226
    if (AccDomain.is_empty()) {
872
3
      isl_pw_multi_aff_free(PWAccRel);
873
3
      continue;
874
3
    }
875
223
876
223
    auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
877
223
    NewAccesses =
878
223
        isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr);
879
223
  }
880
521
881
521
  return NewAccesses;
882
521
}
883
884
void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr,
885
620
                                         ScopStmt *Stmt, LoopToScevMapT &LTS) {
886
620
  assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
887
620
         "Expression of type 'op' expected");
888
620
  assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
889
620
         "Operation of type 'call' expected");
890
1.33k
  for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; 
++i713
) {
891
713
    isl_ast_expr *SubExpr;
892
713
    Value *V;
893
713
894
713
    SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
895
713
    V = ExprBuilder.create(SubExpr);
896
713
    ScalarEvolution *SE = Stmt->getParent()->getSE();
897
713
    LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
898
713
  }
899
620
900
620
  isl_ast_expr_free(Expr);
901
620
}
902
903
void IslNodeBuilder::createSubstitutionsVector(
904
    __isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
905
    std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS,
906
20
    __isl_take isl_id *IteratorID) {
907
20
  int i = 0;
908
20
909
20
  Value *OldValue = IDToValue[IteratorID];
910
123
  for (Value *IV : IVS) {
911
123
    IDToValue[IteratorID] = IV;
912
123
    createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]);
913
123
    i++;
914
123
  }
915
20
916
20
  IDToValue[IteratorID] = OldValue;
917
20
  isl_id_free(IteratorID);
918
20
  isl_ast_expr_free(Expr);
919
20
}
920
921
void IslNodeBuilder::generateCopyStmt(
922
4
    ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
923
4
  assert(Stmt->size() == 2);
924
4
  auto ReadAccess = Stmt->begin();
925
4
  auto WriteAccess = ReadAccess++;
926
4
  assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite());
927
4
  assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&
928
4
         "Accesses use the same data type");
929
4
  assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind());
930
4
  auto *AccessExpr =
931
4
      isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release());
932
4
  auto *LoadValue = ExprBuilder.create(AccessExpr);
933
4
  AccessExpr =
934
4
      isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release());
935
4
  auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr);
936
4
  Builder.CreateStore(LoadValue, StoreAddr);
937
4
}
938
939
26
Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) {
940
26
  assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() &&
941
26
         "trying to materialize loop induction variable twice");
942
26
  const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
943
26
                                          SE.getUnknown(Builder.getInt64(1)), L,
944
26
                                          SCEV::FlagAnyWrap);
945
26
  Value *V = generateSCEV(OuterLIV);
946
26
  OutsideLoopIterations[L] = SE.getUnknown(V);
947
26
  return V;
948
26
}
949
950
501
void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
951
501
  LoopToScevMapT LTS;
952
501
  isl_id *Id;
953
501
  ScopStmt *Stmt;
954
501
955
501
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
956
501
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
957
501
  Id = isl_ast_expr_get_id(StmtExpr);
958
501
  isl_ast_expr_free(StmtExpr);
959
501
960
501
  LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
961
501
962
501
  Stmt = (ScopStmt *)isl_id_get_user(Id);
963
501
  auto *NewAccesses = createNewAccesses(Stmt, User);
964
501
  if (Stmt->isCopyStmt()) {
965
4
    generateCopyStmt(Stmt, NewAccesses);
966
4
    isl_ast_expr_free(Expr);
967
497
  } else {
968
497
    createSubstitutions(Expr, Stmt, LTS);
969
497
970
497
    if (Stmt->isBlockStmt())
971
462
      BlockGen.copyStmt(*Stmt, LTS, NewAccesses);
972
35
    else
973
35
      RegionGen.copyStmt(*Stmt, LTS, NewAccesses);
974
497
  }
975
501
976
501
  isl_id_to_ast_expr_free(NewAccesses);
977
501
  isl_ast_node_free(User);
978
501
  isl_id_free(Id);
979
501
}
980
981
148
void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) {
982
148
  isl_ast_node_list *List = isl_ast_node_block_get_children(Block);
983
148
984
516
  for (int i = 0; i < isl_ast_node_list_n_ast_node(List); 
++i368
)
985
368
    create(isl_ast_node_list_get_ast_node(List, i));
986
148
987
148
  isl_ast_node_free(Block);
988
148
  isl_ast_node_list_free(List);
989
148
}
990
991
1.10k
void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
992
1.10k
  switch (isl_ast_node_get_type(Node)) {
993
1.10k
  case isl_ast_node_error:
994
0
    llvm_unreachable("code generation error");
995
1.10k
  case isl_ast_node_mark:
996
43
    createMark(Node);
997
43
    return;
998
1.10k
  case isl_ast_node_for:
999
325
    createFor(Node);
1000
325
    return;
1001
1.10k
  case isl_ast_node_if:
1002
86
    createIf(Node);
1003
86
    return;
1004
1.10k
  case isl_ast_node_user:
1005
501
    createUser(Node);
1006
501
    return;
1007
1.10k
  case isl_ast_node_block:
1008
148
    createBlock(Node);
1009
148
    return;
1010
0
  }
1011
0
1012
0
  llvm_unreachable("Unknown isl_ast_node type");
1013
0
}
1014
1015
248
bool IslNodeBuilder::materializeValue(isl_id *Id) {
1016
248
  // If the Id is already mapped, skip it.
1017
248
  if (!IDToValue.count(Id)) {
1018
176
    auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id);
1019
176
    Value *V = nullptr;
1020
176
1021
176
    // Parameters could refer to invariant loads that need to be
1022
176
    // preloaded before we can generate code for the parameter. Thus,
1023
176
    // check if any value referred to in ParamSCEV is an invariant load
1024
176
    // and if so make sure its equivalence class is preloaded.
1025
176
    SetVector<Value *> Values;
1026
176
    findValues(ParamSCEV, SE, Values);
1027
180
    for (auto *Val : Values) {
1028
180
      // Check if the value is an instruction in a dead block within the SCoP
1029
180
      // and if so do not code generate it.
1030
180
      if (auto *Inst = dyn_cast<Instruction>(Val)) {
1031
40
        if (S.contains(Inst)) {
1032
16
          bool IsDead = true;
1033
16
1034
16
          // Check for "undef" loads first, then if there is a statement for
1035
16
          // the parent of Inst and lastly if the parent of Inst has an empty
1036
16
          // domain. In the first and last case the instruction is dead but if
1037
16
          // there is a statement or the domain is not empty Inst is not dead.
1038
16
          auto MemInst = MemAccInst::dyn_cast(Inst);
1039
16
          auto Address = MemInst ? 
MemInst.getPointerOperand()15
:
nullptr1
;
1040
16
          if (Address && SE.getUnknown(UndefValue::get(Address->getType())) ==
1041
14
                             SE.getPointerBase(SE.getSCEV(Address))) {
1042
16
          } else if (S.getStmtFor(Inst)) {
1043
0
            IsDead = false;
1044
16
          } else {
1045
16
            auto *Domain = S.getDomainConditions(Inst->getParent()).release();
1046
16
            IsDead = isl_set_is_empty(Domain);
1047
16
            isl_set_free(Domain);
1048
16
          }
1049
16
1050
16
          if (IsDead) {
1051
2
            V = UndefValue::get(ParamSCEV->getType());
1052
2
            break;
1053
2
          }
1054
178
        }
1055
40
      }
1056
178
1057
178
      if (auto *IAClass = S.lookupInvariantEquivClass(Val)) {
1058
12
        // Check if this invariant access class is empty, hence if we never
1059
12
        // actually added a loads instruction to it. In that case it has no
1060
12
        // (meaningful) users and we should not try to code generate it.
1061
12
        if (IAClass->InvariantAccesses.empty())
1062
0
          V = UndefValue::get(ParamSCEV->getType());
1063
12
1064
12
        if (!preloadInvariantEquivClass(*IAClass)) {
1065
3
          isl_id_free(Id);
1066
3
          return false;
1067
3
        }
1068
12
      }
1069
178
    }
1070
176
1071
176
    
V = V 173
?
V2
:
generateSCEV(ParamSCEV)171
;
1072
173
    IDToValue[Id] = V;
1073
173
  }
1074
248
1075
248
  isl_id_free(Id);
1076
245
  return true;
1077
248
}
1078
1079
132
bool IslNodeBuilder::materializeParameters(isl_set *Set) {
1080
341
  for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; 
++i209
) {
1081
212
    if (!isl_set_involves_dims(Set, isl_dim_param, i, 1))
1082
155
      continue;
1083
57
    isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i);
1084
57
    if (!materializeValue(Id))
1085
3
      return false;
1086
57
  }
1087
132
  
return true129
;
1088
132
}
1089
1090
289
bool IslNodeBuilder::materializeParameters() {
1091
289
  for (const SCEV *Param : S.parameters()) {
1092
191
    isl_id *Id = S.getIdForParam(Param).release();
1093
191
    if (!materializeValue(Id))
1094
0
      return false;
1095
191
  }
1096
289
  return true;
1097
289
}
1098
1099
/// Generate the computation of the size of the outermost dimension from the
1100
/// Fortran array descriptor (in this case, `@g_arr`). The final `%size`
1101
/// contains the size of the array.
1102
///
1103
/// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] }
1104
/// %desc.dimensionty = type { i64, i64, i64 }
1105
/// @g_arr = global %arrty zeroinitializer, align 32
1106
/// ...
1107
/// %0 = load i64, i64* getelementptr inbounds
1108
///                       (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2)
1109
/// %1 = load i64, i64* getelementptr inbounds
1110
///                      (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1)
1111
/// %2 = sub nsw i64 %0, %1
1112
/// %size = add nsw i64 %2, 1
1113
static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor,
1114
                                             PollyIRBuilder &Builder,
1115
2
                                             std::string ArrayName) {
1116
2
  assert(GlobalDescriptor && "invalid global descriptor given");
1117
2
1118
2
  Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1119
2
                      Builder.getInt64(0), Builder.getInt32(2)};
1120
2
  Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx,
1121
2
                                            ArrayName + "_end_ptr");
1122
2
  Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end");
1123
2
1124
2
  Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1125
2
                        Builder.getInt64(0), Builder.getInt32(1)};
1126
2
  Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx,
1127
2
                                              ArrayName + "_begin_ptr");
1128
2
  Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin");
1129
2
1130
2
  Value *size =
1131
2
      Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta");
1132
2
  Type *endType = dyn_cast<IntegerType>(end->getType());
1133
2
  assert(endType && "expected type of end to be integral");
1134
2
1135
2
  size = Builder.CreateNSWAdd(end,
1136
2
                              ConstantInt::get(endType, 1, /* signed = */ true),
1137
2
                              ArrayName + "_size");
1138
2
1139
2
  return size;
1140
2
}
1141
1142
289
bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
1143
622
  for (ScopArrayInfo *Array : S.arrays()) {
1144
622
    if (Array->getNumberOfDimensions() == 0)
1145
147
      continue;
1146
475
1147
475
    Value *FAD = Array->getFortranArrayDescriptor();
1148
475
    if (!FAD)
1149
473
      continue;
1150
2
1151
2
    isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release();
1152
2
    assert(ParametricPwAff && "parametric pw_aff corresponding "
1153
2
                              "to outermost dimension does not "
1154
2
                              "exist");
1155
2
1156
2
    isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0);
1157
2
    isl_pw_aff_free(ParametricPwAff);
1158
2
1159
2
    assert(Id && "pw_aff is not parametric");
1160
2
1161
2
    if (IDToValue.count(Id)) {
1162
0
      isl_id_free(Id);
1163
0
      continue;
1164
0
    }
1165
2
1166
2
    Value *FinalValue =
1167
2
        buildFADOutermostDimensionLoad(FAD, Builder, Array->getName());
1168
2
    assert(FinalValue && "unable to build Fortran array "
1169
2
                         "descriptor load of outermost dimension");
1170
2
    IDToValue[Id] = FinalValue;
1171
2
    isl_id_free(Id);
1172
2
  }
1173
289
  return true;
1174
289
}
1175
1176
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
1177
                                              isl_ast_build *Build,
1178
105
                                              Instruction *AccInst) {
1179
105
  isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
1180
105
  isl_ast_expr *Access =
1181
105
      isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
1182
105
  auto *Address = isl_ast_expr_address_of(Access);
1183
105
  auto *AddressValue = ExprBuilder.create(Address);
1184
105
  Value *PreloadVal;
1185
105
1186
105
  // Correct the type as the SAI might have a different type than the user
1187
105
  // expects, especially if the base pointer is a struct.
1188
105
  Type *Ty = AccInst->getType();
1189
105
1190
105
  auto *Ptr = AddressValue;
1191
105
  auto Name = Ptr->getName();
1192
105
  auto AS = Ptr->getType()->getPointerAddressSpace();
1193
105
  Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast");
1194
105
  PreloadVal = Builder.CreateLoad(Ptr, Name + ".load");
1195
105
  if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal))
1196
105
    PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment());
1197
105
1198
105
  // TODO: This is only a hot fix for SCoP sequences that use the same load
1199
105
  //       instruction contained and hoisted by one of the SCoPs.
1200
105
  if (SE.isSCEVable(Ty))
1201
90
    SE.forgetValue(AccInst);
1202
105
1203
105
  return PreloadVal;
1204
105
}
1205
1206
Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
1207
108
                                            isl_set *Domain) {
1208
108
  isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release());
1209
108
  AccessRange = isl_set_gist_params(AccessRange, S.getContext().release());
1210
108
1211
108
  if (!materializeParameters(AccessRange)) {
1212
0
    isl_set_free(AccessRange);
1213
0
    isl_set_free(Domain);
1214
0
    return nullptr;
1215
0
  }
1216
108
1217
108
  auto *Build =
1218
108
      isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release()));
1219
108
  isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
1220
108
  bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
1221
108
  isl_set_free(Universe);
1222
108
1223
108
  Instruction *AccInst = MA.getAccessInstruction();
1224
108
  Type *AccInstTy = AccInst->getType();
1225
108
1226
108
  Value *PreloadVal = nullptr;
1227
108
  if (AlwaysExecuted) {
1228
84
    PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst);
1229
84
    isl_ast_build_free(Build);
1230
84
    isl_set_free(Domain);
1231
84
    return PreloadVal;
1232
84
  }
1233
24
1234
24
  if (!materializeParameters(Domain)) {
1235
3
    isl_ast_build_free(Build);
1236
3
    isl_set_free(AccessRange);
1237
3
    isl_set_free(Domain);
1238
3
    return nullptr;
1239
3
  }
1240
21
1241
21
  isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
1242
21
  Domain = nullptr;
1243
21
1244
21
  ExprBuilder.setTrackOverflow(true);
1245
21
  Value *Cond = ExprBuilder.create(DomainCond);
1246
21
  Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(),
1247
21
                                              "polly.preload.cond.overflown");
1248
21
  Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result");
1249
21
  ExprBuilder.setTrackOverflow(false);
1250
21
1251
21
  if (!Cond->getType()->isIntegerTy(1))
1252
0
    Cond = Builder.CreateIsNotNull(Cond);
1253
21
1254
21
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1255
21
                                  &*Builder.GetInsertPoint(), &DT, &LI);
1256
21
  CondBB->setName("polly.preload.cond");
1257
21
1258
21
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
1259
21
  MergeBB->setName("polly.preload.merge");
1260
21
1261
21
  Function *F = Builder.GetInsertBlock()->getParent();
1262
21
  LLVMContext &Context = F->getContext();
1263
21
  BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
1264
21
1265
21
  DT.addNewBlock(ExecBB, CondBB);
1266
21
  if (Loop *L = LI.getLoopFor(CondBB))
1267
6
    L->addBasicBlockToLoop(ExecBB, LI);
1268
21
1269
21
  auto *CondBBTerminator = CondBB->getTerminator();
1270
21
  Builder.SetInsertPoint(CondBBTerminator);
1271
21
  Builder.CreateCondBr(Cond, ExecBB, MergeBB);
1272
21
  CondBBTerminator->eraseFromParent();
1273
21
1274
21
  Builder.SetInsertPoint(ExecBB);
1275
21
  Builder.CreateBr(MergeBB);
1276
21
1277
21
  Builder.SetInsertPoint(ExecBB->getTerminator());
1278
21
  Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst);
1279
21
  Builder.SetInsertPoint(MergeBB->getTerminator());
1280
21
  auto *MergePHI = Builder.CreatePHI(
1281
21
      AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
1282
21
  PreloadVal = MergePHI;
1283
21
1284
21
  if (!PreAccInst) {
1285
0
    PreloadVal = nullptr;
1286
0
    PreAccInst = UndefValue::get(AccInstTy);
1287
0
  }
1288
21
1289
21
  MergePHI->addIncoming(PreAccInst, ExecBB);
1290
21
  MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
1291
21
1292
21
  isl_ast_build_free(Build);
1293
21
  return PreloadVal;
1294
21
}
1295
1296
bool IslNodeBuilder::preloadInvariantEquivClass(
1297
147
    InvariantEquivClassTy &IAClass) {
1298
147
  // For an equivalence class of invariant loads we pre-load the representing
1299
147
  // element with the unified execution context. However, we have to map all
1300
147
  // elements of the class to the one preloaded load as they are referenced
1301
147
  // during the code generation and therefor need to be mapped.
1302
147
  const MemoryAccessList &MAs = IAClass.InvariantAccesses;
1303
147
  if (MAs.empty())
1304
2
    return true;
1305
145
1306
145
  MemoryAccess *MA = MAs.front();
1307
145
  assert(MA->isArrayKind() && MA->isRead());
1308
145
1309
145
  // If the access function was already mapped, the preload of this equivalence
1310
145
  // class was triggered earlier already and doesn't need to be done again.
1311
145
  if (ValueMap.count(MA->getAccessInstruction()))
1312
30
    return true;
1313
115
1314
115
  // Check for recursion which can be caused by additional constraints, e.g.,
1315
115
  // non-finite loop constraints. In such a case we have to bail out and insert
1316
115
  // a "false" runtime check that will cause the original code to be executed.
1317
115
  auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType);
1318
115
  if (!PreloadedPtrs.insert(PtrId).second)
1319
4
    return false;
1320
111
1321
111
  // The execution context of the IAClass.
1322
111
  isl::set &ExecutionCtx = IAClass.ExecutionContext;
1323
111
1324
111
  // If the base pointer of this class is dependent on another one we have to
1325
111
  // make sure it was preloaded already.
1326
111
  auto *SAI = MA->getScopArrayInfo();
1327
111
  if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) {
1328
22
    if (!preloadInvariantEquivClass(*BaseIAClass))
1329
1
      return false;
1330
21
1331
21
    // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1332
21
    // we need to refine the ExecutionCtx.
1333
21
    isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1334
21
    ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1335
21
  }
1336
111
1337
111
  // If the size of a dimension is dependent on another class, make sure it is
1338
111
  // preloaded.
1339
111
  
for (unsigned i = 1, e = SAI->getNumberOfDimensions(); 110
i < e110
;
++i0
) {
1340
2
    const SCEV *Dim = SAI->getDimensionSize(i);
1341
2
    SetVector<Value *> Values;
1342
2
    findValues(Dim, SE, Values);
1343
2
    for (auto *Val : Values) {
1344
2
      if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) {
1345
2
        if (!preloadInvariantEquivClass(*BaseIAClass))
1346
2
          return false;
1347
0
1348
0
        // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1349
0
        // and we need to refine the ExecutionCtx.
1350
0
        isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1351
0
        ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1352
0
      }
1353
2
    }
1354
2
  }
1355
110
1356
110
  Instruction *AccInst = MA->getAccessInstruction();
1357
108
  Type *AccInstTy = AccInst->getType();
1358
108
1359
108
  Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy());
1360
108
  if (!PreloadVal)
1361
3
    return false;
1362
105
1363
115
  
for (const MemoryAccess *MA : MAs)105
{
1364
115
    Instruction *MAAccInst = MA->getAccessInstruction();
1365
115
    assert(PreloadVal->getType() == MAAccInst->getType());
1366
115
    ValueMap[MAAccInst] = PreloadVal;
1367
115
  }
1368
105
1369
105
  if (SE.isSCEVable(AccInstTy)) {
1370
90
    isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release();
1371
90
    if (ParamId)
1372
21
      IDToValue[ParamId] = PreloadVal;
1373
90
    isl_id_free(ParamId);
1374
90
  }
1375
105
1376
105
  BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
1377
105
  auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
1378
105
                                AccInst->getName() + ".preload.s2a");
1379
105
  Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
1380
105
  Builder.CreateStore(PreloadVal, Alloca);
1381
105
  ValueMapT PreloadedPointer;
1382
105
  PreloadedPointer[PreloadVal] = AccInst;
1383
105
  Annotator.addAlternativeAliasBases(PreloadedPointer);
1384
105
1385
105
  for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
1386
46
    Value *BasePtr = DerivedSAI->getBasePtr();
1387
46
1388
52
    for (const MemoryAccess *MA : MAs) {
1389
52
      // As the derived SAI information is quite coarse, any load from the
1390
52
      // current SAI could be the base pointer of the derived SAI, however we
1391
52
      // should only change the base pointer of the derived SAI if we actually
1392
52
      // preloaded it.
1393
52
      if (BasePtr == MA->getOriginalBaseAddr()) {
1394
0
        assert(BasePtr->getType() == PreloadVal->getType());
1395
0
        DerivedSAI->setBasePtr(PreloadVal);
1396
0
      }
1397
52
1398
52
      // For scalar derived SAIs we remap the alloca used for the derived value.
1399
52
      if (BasePtr == MA->getAccessInstruction())
1400
34
        ScalarMap[DerivedSAI] = Alloca;
1401
52
    }
1402
46
  }
1403
105
1404
115
  for (const MemoryAccess *MA : MAs) {
1405
115
    Instruction *MAAccInst = MA->getAccessInstruction();
1406
115
    // Use the escape system to get the correct value to users outside the SCoP.
1407
115
    BlockGenerator::EscapeUserVectorTy EscapeUsers;
1408
115
    for (auto *U : MAAccInst->users())
1409
120
      if (Instruction *UI = dyn_cast<Instruction>(U))
1410
120
        if (!S.contains(UI))
1411
7
          EscapeUsers.push_back(UI);
1412
115
1413
115
    if (EscapeUsers.empty())
1414
109
      continue;
1415
6
1416
6
    EscapeMap[MA->getAccessInstruction()] =
1417
6
        std::make_pair(Alloca, std::move(EscapeUsers));
1418
6
  }
1419
105
1420
105
  return true;
1421
105
}
1422
1423
293
void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
1424
635
  for (auto &SAI : S.arrays()) {
1425
635
    if (SAI->getBasePtr())
1426
625
      continue;
1427
10
1428
10
    assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
1429
10
           "The size of the outermost dimension is used to declare newly "
1430
10
           "created arrays that require memory allocation.");
1431
10
1432
10
    Type *NewArrayType = nullptr;
1433
10
1434
10
    // Get the size of the array = size(dim_1)*...*size(dim_n)
1435
10
    uint64_t ArraySizeInt = 1;
1436
30
    for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; 
i--20
) {
1437
20
      auto *DimSize = SAI->getDimensionSize(i);
1438
20
      unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
1439
20
                                     ->getAPInt()
1440
20
                                     .getLimitedValue();
1441
20
1442
20
      if (!NewArrayType)
1443
10
        NewArrayType = SAI->getElementType();
1444
20
1445
20
      NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
1446
20
      ArraySizeInt *= UnsignedDimSize;
1447
20
    }
1448
10
1449
10
    if (SAI->isOnHeap()) {
1450
3
      LLVMContext &Ctx = NewArrayType->getContext();
1451
3
1452
3
      // Get the IntPtrTy from the Datalayout
1453
3
      auto IntPtrTy = DL.getIntPtrType(Ctx);
1454
3
1455
3
      // Get the size of the element type in bits
1456
3
      unsigned Size = SAI->getElemSizeInBytes();
1457
3
1458
3
      // Insert the malloc call at polly.start
1459
3
      auto InstIt = std::get<0>(StartExitBlocks)->getTerminator();
1460
3
      auto *CreatedArray = CallInst::CreateMalloc(
1461
3
          &*InstIt, IntPtrTy, SAI->getElementType(),
1462
3
          ConstantInt::get(Type::getInt64Ty(Ctx), Size),
1463
3
          ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr,
1464
3
          SAI->getName());
1465
3
1466
3
      SAI->setBasePtr(CreatedArray);
1467
3
1468
3
      // Insert the free call at polly.exiting
1469
3
      CallInst::CreateFree(CreatedArray,
1470
3
                           std::get<1>(StartExitBlocks)->getTerminator());
1471
7
    } else {
1472
7
      auto InstIt = Builder.GetInsertBlock()
1473
7
                        ->getParent()
1474
7
                        ->getEntryBlock()
1475
7
                        .getTerminator();
1476
7
1477
7
      auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
1478
7
                                          SAI->getName(), &*InstIt);
1479
7
      CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
1480
7
      SAI->setBasePtr(CreatedArray);
1481
7
    }
1482
10
  }
1483
293
}
1484
1485
293
bool IslNodeBuilder::preloadInvariantLoads() {
1486
293
  auto &InvariantEquivClasses = S.getInvariantAccesses();
1487
293
  if (InvariantEquivClasses.empty())
1488
229
    return true;
1489
64
1490
64
  BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
1491
64
                                     &*Builder.GetInsertPoint(), &DT, &LI);
1492
64
  PreLoadBB->setName("polly.preload.begin");
1493
64
  Builder.SetInsertPoint(&PreLoadBB->front());
1494
64
1495
64
  for (auto &IAClass : InvariantEquivClasses)
1496
111
    if (!preloadInvariantEquivClass(IAClass))
1497
4
      return false;
1498
64
1499
64
  
return true60
;
1500
64
}
1501
1502
289
void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
1503
289
  // Materialize values for the parameters of the SCoP.
1504
289
  materializeParameters();
1505
289
1506
289
  // materialize the outermost dimension parameters for a Fortran array.
1507
289
  // NOTE: materializeParameters() does not work since it looks through
1508
289
  // the SCEVs. We don't have a corresponding SCEV for the array size
1509
289
  // parameter
1510
289
  materializeFortranArrayOutermostDimension();
1511
289
1512
289
  // Generate values for the current loop iteration for all surrounding loops.
1513
289
  //
1514
289
  // We may also reference loops outside of the scop which do not contain the
1515
289
  // scop itself, but as the number of such scops may be arbitrarily large we do
1516
289
  // not generate code for them here, but only at the point of code generation
1517
289
  // where these values are needed.
1518
289
  Loop *L = LI.getLoopFor(S.getEntry());
1519
289
1520
509
  while (L != nullptr && 
S.contains(L)245
)
1521
220
    L = L->getParentLoop();
1522
289
1523
314
  while (L != nullptr) {
1524
25
    materializeNonScopLoopInductionVariable(L);
1525
25
    L = L->getParentLoop();
1526
25
  }
1527
289
1528
289
  isl_set_free(Context);
1529
289
}
1530
1531
197
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
1532
197
  /// We pass the insert location of our Builder, as Polly ensures during IR
1533
197
  /// generation that there is always a valid CFG into which instructions are
1534
197
  /// inserted. As a result, the insertpoint is known to be always followed by a
1535
197
  /// terminator instruction. This means the insert point may be specified by a
1536
197
  /// terminator instruction, but it can never point to an ->end() iterator
1537
197
  /// which does not have a corresponding instruction. Hence, dereferencing
1538
197
  /// the insertpoint to obtain an instruction is known to be save.
1539
197
  ///
1540
197
  /// We also do not need to update the Builder here, as new instructions are
1541
197
  /// always inserted _before_ the given InsertLocation. As a result, the
1542
197
  /// insert location remains valid.
1543
197
  assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
1544
197
         "Insert location points after last valid instruction");
1545
197
  Instruction *InsertLocation = &*Builder.GetInsertPoint();
1546
197
  return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
1547
197
                       InsertLocation, &ValueMap,
1548
197
                       StartBlock->getSinglePredecessor());
1549
197
}
1550
1551
/// The AST expression we generate to perform the run-time check assumes
1552
/// computations on integer types of infinite size. As we only use 64-bit
1553
/// arithmetic we check for overflows, in case of which we set the result
1554
/// of this run-time check to false to be conservatively correct,
1555
289
Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) {
1556
289
  auto ExprBuilder = getExprBuilder();
1557
289
1558
289
  // In case the AST expression has integers larger than 64 bit, bail out. The
1559
289
  // resulting LLVM-IR will contain operations on types that use more than 64
1560
289
  // bits. These are -- in case wrapping intrinsics are used -- translated to
1561
289
  // runtime library calls that are not available on all systems (e.g., Android)
1562
289
  // and consequently will result in linker errors.
1563
289
  if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) {
1564
7
    isl_ast_expr_free(Condition);
1565
7
    return Builder.getFalse();
1566
7
  }
1567
282
1568
282
  ExprBuilder.setTrackOverflow(true);
1569
282
  Value *RTC = ExprBuilder.create(Condition);
1570
282
  if (!RTC->getType()->isIntegerTy(1))
1571
173
    RTC = Builder.CreateIsNotNull(RTC);
1572
282
  Value *OverflowHappened =
1573
282
      Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown");
1574
282
1575
282
  if (PollyGenerateRTCPrint) {
1576
0
    auto *F = Builder.GetInsertBlock()->getParent();
1577
0
    RuntimeDebugBuilder::createCPUPrinter(
1578
0
        Builder,
1579
0
        "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() +
1580
0
            "RTC: ",
1581
0
        RTC, " Overflow: ", OverflowHappened,
1582
0
        "\n"
1583
0
        "  (0 failed, -1 succeeded)\n"
1584
0
        "  (if one or both are 0 falling back to original code, if both are -1 "
1585
0
        "executing Polly code)\n");
1586
0
  }
1587
282
1588
282
  RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result");
1589
282
  ExprBuilder.setTrackOverflow(false);
1590
282
1591
282
  if (!isa<ConstantInt>(RTC))
1592
108
    VersionedScops++;
1593
282
1594
282
  return RTC;
1595
282
}