Coverage Report

Created: 2019-04-25 15:07

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/CodeGen/IslNodeBuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the IslNodeBuilder, a class to translate an isl AST into
10
// a LLVM-IR AST.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/CodeGen/IslNodeBuilder.h"
15
#include "polly/CodeGen/BlockGenerators.h"
16
#include "polly/CodeGen/CodeGeneration.h"
17
#include "polly/CodeGen/IslAst.h"
18
#include "polly/CodeGen/IslExprBuilder.h"
19
#include "polly/CodeGen/LoopGeneratorsGOMP.h"
20
#include "polly/CodeGen/LoopGeneratorsKMP.h"
21
#include "polly/CodeGen/RuntimeDebugBuilder.h"
22
#include "polly/Options.h"
23
#include "polly/ScopInfo.h"
24
#include "polly/Support/ISLTools.h"
25
#include "polly/Support/SCEVValidator.h"
26
#include "polly/Support/ScopHelper.h"
27
#include "llvm/ADT/APInt.h"
28
#include "llvm/ADT/PostOrderIterator.h"
29
#include "llvm/ADT/SetVector.h"
30
#include "llvm/ADT/SmallPtrSet.h"
31
#include "llvm/ADT/Statistic.h"
32
#include "llvm/Analysis/LoopInfo.h"
33
#include "llvm/Analysis/RegionInfo.h"
34
#include "llvm/Analysis/ScalarEvolution.h"
35
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
36
#include "llvm/IR/BasicBlock.h"
37
#include "llvm/IR/Constant.h"
38
#include "llvm/IR/Constants.h"
39
#include "llvm/IR/DataLayout.h"
40
#include "llvm/IR/DerivedTypes.h"
41
#include "llvm/IR/Dominators.h"
42
#include "llvm/IR/Function.h"
43
#include "llvm/IR/InstrTypes.h"
44
#include "llvm/IR/Instruction.h"
45
#include "llvm/IR/Instructions.h"
46
#include "llvm/IR/Type.h"
47
#include "llvm/IR/Value.h"
48
#include "llvm/Support/Casting.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/ErrorHandling.h"
51
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
52
#include "isl/aff.h"
53
#include "isl/aff_type.h"
54
#include "isl/ast.h"
55
#include "isl/ast_build.h"
56
#include "isl/isl-noexceptions.h"
57
#include "isl/map.h"
58
#include "isl/set.h"
59
#include "isl/union_map.h"
60
#include "isl/union_set.h"
61
#include "isl/val.h"
62
#include <algorithm>
63
#include <cassert>
64
#include <cstdint>
65
#include <cstring>
66
#include <string>
67
#include <utility>
68
#include <vector>
69
70
using namespace llvm;
71
using namespace polly;
72
73
#define DEBUG_TYPE "polly-codegen"
74
75
STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
76
77
STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
78
STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
79
STATISTIC(VectorLoops, "Number of generated vector for-loops");
80
STATISTIC(IfConditions, "Number of generated if-conditions");
81
82
/// OpenMP backend options
83
enum class OpenMPBackend { GNU, LLVM };
84
85
static cl::opt<bool> PollyGenerateRTCPrint(
86
    "polly-codegen-emit-rtc-print",
87
    cl::desc("Emit code that prints the runtime check result dynamically."),
88
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
89
90
// If this option is set we always use the isl AST generator to regenerate
91
// memory accesses. Without this option set we regenerate expressions using the
92
// original SCEV expressions and only generate new expressions in case the
93
// access relation has been changed and consequently must be regenerated.
94
static cl::opt<bool> PollyGenerateExpressions(
95
    "polly-codegen-generate-expressions",
96
    cl::desc("Generate AST expressions for unmodified and modified accesses"),
97
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
98
99
static cl::opt<int> PollyTargetFirstLevelCacheLineSize(
100
    "polly-target-first-level-cache-line-size",
101
    cl::desc("The size of the first level cache line size specified in bytes."),
102
    cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory));
103
104
static cl::opt<OpenMPBackend> PollyOmpBackend(
105
    "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"),
106
    cl::values(clEnumValN(OpenMPBackend::GNU, "GNU", "GNU OpenMP"),
107
               clEnumValN(OpenMPBackend::LLVM, "LLVM", "LLVM OpenMP")),
108
    cl::Hidden, cl::init(OpenMPBackend::GNU), cl::cat(PollyCategory));
109
110
isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node For,
111
344
                                            ICmpInst::Predicate &Predicate) {
112
344
  isl::ast_expr Cond = For.for_get_cond();
113
344
  isl::ast_expr Iterator = For.for_get_iterator();
114
344
  assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op &&
115
344
         "conditional expression is not an atomic upper bound");
116
344
117
344
  isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get());
118
344
119
344
  switch (OpType) {
120
344
  case isl_ast_op_le:
121
256
    Predicate = ICmpInst::ICMP_SLE;
122
256
    break;
123
344
  case isl_ast_op_lt:
124
88
    Predicate = ICmpInst::ICMP_SLT;
125
88
    break;
126
344
  default:
127
0
    llvm_unreachable("Unexpected comparison type in loop condition");
128
344
  }
129
344
130
344
  isl::ast_expr Arg0 = Cond.get_op_arg(0);
131
344
132
344
  assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id &&
133
344
         "conditional expression is not an atomic upper bound");
134
344
135
344
  isl::id UBID = Arg0.get_id();
136
344
137
344
  assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id &&
138
344
         "Could not get the iterator");
139
344
140
344
  isl::id IteratorID = Iterator.get_id();
141
344
142
344
  assert(UBID.get() == IteratorID.get() &&
143
344
         "conditional expression is not an atomic upper bound");
144
344
145
344
  return Cond.get_op_arg(1);
146
344
}
147
148
/// Return true if a return value of Predicate is true for the value represented
149
/// by passed isl_ast_expr_int.
150
static bool checkIslAstExprInt(__isl_take isl_ast_expr *Expr,
151
42
                               isl_bool (*Predicate)(__isl_keep isl_val *)) {
152
42
  if (isl_ast_expr_get_type(Expr) != isl_ast_expr_int) {
153
0
    isl_ast_expr_free(Expr);
154
0
    return false;
155
0
  }
156
42
  auto ExprVal = isl_ast_expr_get_val(Expr);
157
42
  isl_ast_expr_free(Expr);
158
42
  if (Predicate(ExprVal) != isl_bool_true) {
159
0
    isl_val_free(ExprVal);
160
0
    return false;
161
0
  }
162
42
  isl_val_free(ExprVal);
163
42
  return true;
164
42
}
165
166
22
int IslNodeBuilder::getNumberOfIterations(isl::ast_node For) {
167
22
  assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for);
168
22
  isl::ast_node Body = For.for_get_body();
169
22
170
22
  // First, check if we can actually handle this code.
171
22
  switch (isl_ast_node_get_type(Body.get())) {
172
22
  case isl_ast_node_user:
173
20
    break;
174
22
  case isl_ast_node_block: {
175
2
    isl::ast_node_list List = Body.block_get_children();
176
3
    for (isl::ast_node Node : List) {
177
3
      isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get());
178
3
      if (NodeType != isl_ast_node_user)
179
1
        return -1;
180
3
    }
181
2
    
break1
;
182
2
  }
183
2
  default:
184
0
    return -1;
185
21
  }
186
21
187
21
  isl::ast_expr Init = For.for_get_init();
188
21
  if (!checkIslAstExprInt(Init.release(), isl_val_is_zero))
189
0
    return -1;
190
21
  isl::ast_expr Inc = For.for_get_inc();
191
21
  if (!checkIslAstExprInt(Inc.release(), isl_val_is_one))
192
0
    return -1;
193
21
  CmpInst::Predicate Predicate;
194
21
  isl::ast_expr UB = getUpperBound(For, Predicate);
195
21
  if (isl_ast_expr_get_type(UB.get()) != isl_ast_expr_int)
196
2
    return -1;
197
19
  isl::val UpVal = UB.get_val();
198
19
  int NumberIterations = UpVal.get_num_si();
199
19
  if (NumberIterations < 0)
200
0
    return -1;
201
19
  if (Predicate == CmpInst::ICMP_SLT)
202
0
    return NumberIterations;
203
19
  else
204
19
    return NumberIterations + 1;
205
19
}
206
207
/// Extract the values and SCEVs needed to generate code for a block.
208
static int findReferencesInBlock(struct SubtreeReferences &References,
209
42
                                 const ScopStmt *Stmt, BasicBlock *BB) {
210
251
  for (Instruction &Inst : *BB) {
211
251
    // Include invariant loads
212
251
    if (isa<LoadInst>(Inst))
213
25
      if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
214
7
        References.Values.insert(InvariantLoad);
215
251
216
486
    for (Value *SrcVal : Inst.operands()) {
217
486
      auto *Scope = References.LI.getLoopFor(BB);
218
486
      if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
219
356
        References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
220
356
        continue;
221
356
      } else 
if (Value *130
NewVal130
= References.GlobalMap.lookup(SrcVal))
222
6
        References.Values.insert(NewVal);
223
486
    }
224
251
  }
225
42
  return 0;
226
42
}
227
228
void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
229
42
                           bool CreateScalarRefs) {
230
42
  auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
231
42
232
42
  if (Stmt->isBlockStmt())
233
42
    findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
234
0
  else {
235
0
    assert(Stmt->isRegionStmt() &&
236
0
           "Stmt was neither block nor region statement");
237
0
    for (BasicBlock *BB : Stmt->getRegion()->blocks())
238
0
      findReferencesInBlock(References, Stmt, BB);
239
0
  }
240
42
241
60
  for (auto &Access : *Stmt) {
242
60
    if (References.ParamSpace) {
243
0
      isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
244
0
      (*References.ParamSpace) =
245
0
          References.ParamSpace->align_params(ParamSpace);
246
0
    }
247
60
248
60
    if (Access->isLatestArrayKind()) {
249
56
      auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr();
250
56
      if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
251
11
        if (Stmt->getParent()->contains(OpInst))
252
4
          continue;
253
52
254
52
      References.Values.insert(BasePtr);
255
52
      continue;
256
52
    }
257
4
258
4
    if (CreateScalarRefs)
259
4
      References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
260
4
  }
261
42
}
262
263
/// Extract the out-of-scop values and SCEVs referenced from a set describing
264
/// a ScopStmt.
265
///
266
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
267
/// statement and the base pointers of the memory accesses. For scalar
268
/// statements we force the generation of alloca memory locations and list
269
/// these locations in the set of out-of-scop values as well.
270
///
271
/// @param Set     A set which references the ScopStmt we are interested in.
272
/// @param UserPtr A void pointer that can be casted to a SubtreeReferences
273
///                structure.
274
static void addReferencesFromStmtSet(isl::set Set,
275
42
                                     struct SubtreeReferences *UserPtr) {
276
42
  isl::id Id = Set.get_tuple_id();
277
42
  auto *Stmt = static_cast<const ScopStmt *>(Id.get_user());
278
42
  return addReferencesFromStmt(Stmt, UserPtr);
279
42
}
280
281
/// Extract the out-of-scop values and SCEVs referenced from a union set
282
/// referencing multiple ScopStmts.
283
///
284
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
285
/// statement and the base pointers of the memory accesses. For scalar
286
/// statements we force the generation of alloca memory locations and list
287
/// these locations in the set of out-of-scop values as well.
288
///
289
/// @param USet       A union set referencing the ScopStmts we are interested
290
///                   in.
291
/// @param References The SubtreeReferences data structure through which
292
///                   results are returned and further information is
293
///                   provided.
294
static void
295
addReferencesFromStmtUnionSet(isl::union_set USet,
296
41
                              struct SubtreeReferences &References) {
297
41
298
41
  for (isl::set Set : USet.get_set_list())
299
42
    addReferencesFromStmtSet(Set, &References);
300
41
}
301
302
__isl_give isl_union_map *
303
60
IslNodeBuilder::getScheduleForAstNode(__isl_keep isl_ast_node *For) {
304
60
  return IslAstInfo::getSchedule(For);
305
60
}
306
307
void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For,
308
                                            SetVector<Value *> &Values,
309
41
                                            SetVector<const Loop *> &Loops) {
310
41
  SetVector<const SCEV *> SCEVs;
311
41
  struct SubtreeReferences References = {
312
41
      LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
313
41
314
41
  for (const auto &I : IDToValue)
315
29
    Values.insert(I.second);
316
41
317
41
  // NOTE: this is populated in IslNodeBuilder::addParameters
318
41
  for (const auto &I : OutsideLoopIterations)
319
4
    Values.insert(cast<SCEVUnknown>(I.second)->getValue());
320
41
321
41
  isl::union_set Schedule =
322
41
      isl::manage(isl_union_map_domain(getScheduleForAstNode(For)));
323
41
  addReferencesFromStmtUnionSet(Schedule, References);
324
41
325
265
  for (const SCEV *Expr : SCEVs) {
326
265
    findValues(Expr, SE, Values);
327
265
    findLoops(Expr, Loops);
328
265
  }
329
41
330
102
  Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); });
331
41
332
41
  /// Note: Code generation of induction variables of loops outside Scops
333
41
  ///
334
41
  /// Remove loops that contain the scop or that are part of the scop, as they
335
41
  /// are considered local. This leaves only loops that are before the scop, but
336
41
  /// do not contain the scop itself.
337
41
  /// We ignore loops perfectly contained in the Scop because these are already
338
41
  /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops
339
41
  /// whose induction variables are referred to by the Scop, but the Scop is not
340
41
  /// fully contained in these Loops. Since there can be many of these,
341
41
  /// we choose to codegen these on-demand.
342
41
  /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable.
343
64
  Loops.remove_if([this](const Loop *L) {
344
64
    return S.contains(L) || 
L->contains(S.getEntry())5
;
345
64
  });
346
41
347
41
  // Contains Values that may need to be replaced with other values
348
41
  // due to replacements from the ValueMap. We should make sure
349
41
  // that we return correctly remapped values.
350
41
  // NOTE: this code path is tested by:
351
41
  //     1.  test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
352
41
  //     2.  test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
353
41
  SetVector<Value *> ReplacedValues;
354
83
  for (Value *V : Values) {
355
83
    ReplacedValues.insert(getLatestValue(V));
356
83
  }
357
41
  Values = ReplacedValues;
358
41
}
359
360
41
void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
361
41
  SmallPtrSet<Value *, 5> Inserted;
362
41
363
41
  for (const auto &I : IDToValue) {
364
29
    IDToValue[I.first] = NewValues[I.second];
365
29
    Inserted.insert(I.second);
366
29
  }
367
41
368
81
  for (const auto &I : NewValues) {
369
81
    if (Inserted.count(I.first))
370
0
      continue;
371
81
372
81
    ValueMap[I.first] = I.second;
373
81
  }
374
41
}
375
376
83
Value *IslNodeBuilder::getLatestValue(Value *Original) const {
377
83
  auto It = ValueMap.find(Original);
378
83
  if (It == ValueMap.end())
379
80
    return Original;
380
3
  return It->second;
381
3
}
382
383
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
384
                                      std::vector<Value *> &IVS,
385
                                      __isl_take isl_id *IteratorID,
386
20
                                      __isl_take isl_union_map *Schedule) {
387
20
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
388
20
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
389
20
  isl_id *Id = isl_ast_expr_get_id(StmtExpr);
390
20
  isl_ast_expr_free(StmtExpr);
391
20
  ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
392
20
  std::vector<LoopToScevMapT> VLTS(IVS.size());
393
20
394
20
  isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release());
395
20
  Schedule = isl_union_map_intersect_domain(Schedule, Domain);
396
20
  isl_map *S = isl_map_from_union_map(Schedule);
397
20
398
20
  auto *NewAccesses = createNewAccesses(Stmt, User);
399
20
  createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
400
20
  VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
401
20
  isl_id_to_ast_expr_free(NewAccesses);
402
20
  isl_map_free(S);
403
20
  isl_id_free(Id);
404
20
  isl_ast_node_free(User);
405
20
}
406
407
43
void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
408
43
  auto *Id = isl_ast_node_mark_get_id(Node);
409
43
  auto Child = isl_ast_node_mark_get_node(Node);
410
43
  isl_ast_node_free(Node);
411
43
  // If a child node of a 'SIMD mark' is a loop that has a single iteration,
412
43
  // it will be optimized away and we should skip it.
413
43
  if (strcmp(isl_id_get_name(Id), "SIMD") == 0 &&
414
43
      
isl_ast_node_get_type(Child) == isl_ast_node_for14
) {
415
2
    bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
416
2
    int VectorWidth = getNumberOfIterations(isl::manage_copy(Child));
417
2
    if (Vector && 1 < VectorWidth && VectorWidth <= 16)
418
2
      createForVector(Child, VectorWidth);
419
0
    else
420
0
      createForSequential(isl::manage(Child), true);
421
2
    isl_id_free(Id);
422
2
    return;
423
2
  }
424
41
  if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) {
425
2
    auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
426
2
    Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
427
2
  }
428
41
  create(Child);
429
41
  isl_id_free(Id);
430
41
}
431
432
void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
433
19
                                     int VectorWidth) {
434
19
  isl_ast_node *Body = isl_ast_node_for_get_body(For);
435
19
  isl_ast_expr *Init = isl_ast_node_for_get_init(For);
436
19
  isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
437
19
  isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
438
19
  isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
439
19
440
19
  Value *ValueLB = ExprBuilder.create(Init);
441
19
  Value *ValueInc = ExprBuilder.create(Inc);
442
19
443
19
  Type *MaxType = ExprBuilder.getType(Iterator);
444
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
445
19
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
446
19
447
19
  if (MaxType != ValueLB->getType())
448
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
449
19
  if (MaxType != ValueInc->getType())
450
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
451
19
452
19
  std::vector<Value *> IVS(VectorWidth);
453
19
  IVS[0] = ValueLB;
454
19
455
119
  for (int i = 1; i < VectorWidth; 
i++100
)
456
100
    IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
457
19
458
19
  isl_union_map *Schedule = getScheduleForAstNode(For);
459
19
  assert(Schedule && "For statement annotation does not contain its schedule");
460
19
461
19
  IDToValue[IteratorID] = ValueLB;
462
19
463
19
  switch (isl_ast_node_get_type(Body)) {
464
19
  case isl_ast_node_user:
465
18
    createUserVector(Body, IVS, isl_id_copy(IteratorID),
466
18
                     isl_union_map_copy(Schedule));
467
18
    break;
468
19
  case isl_ast_node_block: {
469
1
    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
470
1
471
3
    for (int i = 0; i < isl_ast_node_list_n_ast_node(List); 
++i2
)
472
2
      createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
473
2
                       isl_id_copy(IteratorID), isl_union_map_copy(Schedule));
474
1
475
1
    isl_ast_node_free(Body);
476
1
    isl_ast_node_list_free(List);
477
1
    break;
478
19
  }
479
19
  default:
480
0
    isl_ast_node_dump(Body);
481
0
    llvm_unreachable("Unhandled isl_ast_node in vectorizer");
482
19
  }
483
19
484
19
  IDToValue.erase(IDToValue.find(IteratorID));
485
19
  isl_id_free(IteratorID);
486
19
  isl_union_map_free(Schedule);
487
19
488
19
  isl_ast_node_free(For);
489
19
  isl_ast_expr_free(Iterator);
490
19
491
19
  VectorLoops++;
492
19
}
493
494
/// Restore the initial ordering of dimensions of the band node
495
///
496
/// In case the band node represents all the dimensions of the iteration
497
/// domain, recreate the band node to restore the initial ordering of the
498
/// dimensions.
499
///
500
/// @param Node The band node to be modified.
501
/// @return The modified schedule node.
502
282
static bool IsLoopVectorizerDisabled(isl::ast_node Node) {
503
282
  assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for);
504
282
  auto Body = Node.for_get_body();
505
282
  if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark)
506
263
    return false;
507
19
  auto Id = Body.mark_get_id();
508
19
  if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0)
509
2
    return true;
510
17
  return false;
511
17
}
512
513
282
void IslNodeBuilder::createForSequential(isl::ast_node For, bool MarkParallel) {
514
282
  Value *ValueLB, *ValueUB, *ValueInc;
515
282
  Type *MaxType;
516
282
  BasicBlock *ExitBlock;
517
282
  Value *IV;
518
282
  CmpInst::Predicate Predicate;
519
282
520
282
  bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For);
521
282
522
282
  isl::ast_node Body = For.for_get_body();
523
282
524
282
  // isl_ast_node_for_is_degenerate(For)
525
282
  //
526
282
  // TODO: For degenerated loops we could generate a plain assignment.
527
282
  //       However, for now we just reuse the logic for normal loops, which will
528
282
  //       create a loop with a single iteration.
529
282
530
282
  isl::ast_expr Init = For.for_get_init();
531
282
  isl::ast_expr Inc = For.for_get_inc();
532
282
  isl::ast_expr Iterator = For.for_get_iterator();
533
282
  isl::id IteratorID = Iterator.get_id();
534
282
  isl::ast_expr UB = getUpperBound(For, Predicate);
535
282
536
282
  ValueLB = ExprBuilder.create(Init.release());
537
282
  ValueUB = ExprBuilder.create(UB.release());
538
282
  ValueInc = ExprBuilder.create(Inc.release());
539
282
540
282
  MaxType = ExprBuilder.getType(Iterator.get());
541
282
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
542
282
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
543
282
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
544
282
545
282
  if (MaxType != ValueLB->getType())
546
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
547
282
  if (MaxType != ValueUB->getType())
548
37
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
549
282
  if (MaxType != ValueInc->getType())
550
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
551
282
552
282
  // If we can show that LB <Predicate> UB holds at least once, we can
553
282
  // omit the GuardBB in front of the loop.
554
282
  bool UseGuardBB =
555
282
      !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
556
282
  IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
557
282
                  Predicate, &Annotator, MarkParallel, UseGuardBB,
558
282
                  LoopVectorizerDisabled);
559
282
  IDToValue[IteratorID.get()] = IV;
560
282
561
282
  create(Body.release());
562
282
563
282
  Annotator.popLoop(MarkParallel);
564
282
565
282
  IDToValue.erase(IDToValue.find(IteratorID.get()));
566
282
567
282
  Builder.SetInsertPoint(&ExitBlock->front());
568
282
569
282
  SequentialLoops++;
570
282
}
571
572
/// Remove the BBs contained in a (sub)function from the dominator tree.
573
///
574
/// This function removes the basic blocks that are part of a subfunction from
575
/// the dominator tree. Specifically, when generating code it may happen that at
576
/// some point the code generation continues in a new sub-function (e.g., when
577
/// generating OpenMP code). The basic blocks that are created in this
578
/// sub-function are then still part of the dominator tree of the original
579
/// function, such that the dominator tree reaches over function boundaries.
580
/// This is not only incorrect, but also causes crashes. This function now
581
/// removes from the dominator tree all basic blocks that are dominated (and
582
/// consequently reachable) from the entry block of this (sub)function.
583
///
584
/// FIXME: A LLVM (function or region) pass should not touch anything outside of
585
/// the function/region it runs on. Hence, the pure need for this function shows
586
/// that we do not comply to this rule. At the moment, this does not cause any
587
/// issues, but we should be aware that such issues may appear. Unfortunately
588
/// the current LLVM pass infrastructure does not allow to make Polly a module
589
/// or call-graph pass to solve this issue, as such a pass would not have access
590
/// to the per-function analyses passes needed by Polly. A future pass manager
591
/// infrastructure is supposed to enable such kind of access possibly allowing
592
/// us to create a cleaner solution here.
593
///
594
/// FIXME: Instead of adding the dominance information and then dropping it
595
/// later on, we should try to just not add it in the first place. This requires
596
/// some careful testing to make sure this does not break in interaction with
597
/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
598
/// which may try to update it.
599
///
600
/// @param F The function which contains the BBs to removed.
601
/// @param DT The dominator tree from which to remove the BBs.
602
41
static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
603
41
  DomTreeNode *N = DT.getNode(&F->getEntryBlock());
604
41
  std::vector<BasicBlock *> Nodes;
605
41
606
41
  // We can only remove an element from the dominator tree, if all its children
607
41
  // have been removed. To ensure this we obtain the list of nodes to remove
608
41
  // using a post-order tree traversal.
609
598
  for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; 
++I557
)
610
557
    Nodes.push_back(I->getBlock());
611
41
612
41
  for (BasicBlock *BB : Nodes)
613
557
    DT.eraseNode(BB);
614
41
}
615
616
41
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
617
41
  isl_ast_node *Body;
618
41
  isl_ast_expr *Init, *Inc, *Iterator, *UB;
619
41
  isl_id *IteratorID;
620
41
  Value *ValueLB, *ValueUB, *ValueInc;
621
41
  Type *MaxType;
622
41
  Value *IV;
623
41
  CmpInst::Predicate Predicate;
624
41
625
41
  // The preamble of parallel code interacts different than normal code with
626
41
  // e.g., scalar initialization. Therefore, we ensure the parallel code is
627
41
  // separated from the last basic block.
628
41
  BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(),
629
41
                                 &*Builder.GetInsertPoint(), &DT, &LI);
630
41
  ParBB->setName("polly.parallel.for");
631
41
  Builder.SetInsertPoint(&ParBB->front());
632
41
633
41
  Body = isl_ast_node_for_get_body(For);
634
41
  Init = isl_ast_node_for_get_init(For);
635
41
  Inc = isl_ast_node_for_get_inc(For);
636
41
  Iterator = isl_ast_node_for_get_iterator(For);
637
41
  IteratorID = isl_ast_expr_get_id(Iterator);
638
41
  UB = getUpperBound(isl::manage_copy(For), Predicate).release();
639
41
640
41
  ValueLB = ExprBuilder.create(Init);
641
41
  ValueUB = ExprBuilder.create(UB);
642
41
  ValueInc = ExprBuilder.create(Inc);
643
41
644
41
  // OpenMP always uses SLE. In case the isl generated AST uses a SLT
645
41
  // expression, we need to adjust the loop bound by one.
646
41
  if (Predicate == CmpInst::ICMP_SLT)
647
8
    ValueUB = Builder.CreateAdd(
648
8
        ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType()));
649
41
650
41
  MaxType = ExprBuilder.getType(Iterator);
651
41
  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
652
41
  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
653
41
  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
654
41
655
41
  if (MaxType != ValueLB->getType())
656
0
    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
657
41
  if (MaxType != ValueUB->getType())
658
4
    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
659
41
  if (MaxType != ValueInc->getType())
660
0
    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
661
41
662
41
  BasicBlock::iterator LoopBody;
663
41
664
41
  SetVector<Value *> SubtreeValues;
665
41
  SetVector<const Loop *> Loops;
666
41
667
41
  getReferencesInSubtree(For, SubtreeValues, Loops);
668
41
669
41
  // Create for all loops we depend on values that contain the current loop
670
41
  // iteration. These values are necessary to generate code for SCEVs that
671
41
  // depend on such loops. As a result we need to pass them to the subfunction.
672
41
  // See [Code generation of induction variables of loops outside Scops]
673
41
  for (const Loop *L : Loops) {
674
1
    Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L);
675
1
    SubtreeValues.insert(LoopInductionVar);
676
1
  }
677
41
678
41
  ValueMapT NewValues;
679
41
680
41
  std::unique_ptr<ParallelLoopGenerator> ParallelLoopGenPtr;
681
41
682
41
  switch (PollyOmpBackend) {
683
41
  case OpenMPBackend::GNU:
684
31
    ParallelLoopGenPtr.reset(
685
31
        new ParallelLoopGeneratorGOMP(Builder, LI, DT, DL));
686
31
    break;
687
41
  case OpenMPBackend::LLVM:
688
10
    ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, LI, DT, DL));
689
10
    break;
690
41
  }
691
41
692
41
  IV = ParallelLoopGenPtr->createParallelLoop(
693
41
      ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody);
694
41
  BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
695
41
  Builder.SetInsertPoint(&*LoopBody);
696
41
697
41
  // Remember the parallel subfunction
698
41
  ParallelSubfunctions.push_back(LoopBody->getFunction());
699
41
700
41
  // Save the current values.
701
41
  auto ValueMapCopy = ValueMap;
702
41
  IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
703
41
704
41
  updateValues(NewValues);
705
41
  IDToValue[IteratorID] = IV;
706
41
707
41
  ValueMapT NewValuesReverse;
708
41
709
41
  for (auto P : NewValues)
710
81
    NewValuesReverse[P.second] = P.first;
711
41
712
41
  Annotator.addAlternativeAliasBases(NewValuesReverse);
713
41
714
41
  create(Body);
715
41
716
41
  Annotator.resetAlternativeAliasBases();
717
41
  // Restore the original values.
718
41
  ValueMap = ValueMapCopy;
719
41
  IDToValue = IDToValueCopy;
720
41
721
41
  Builder.SetInsertPoint(&*AfterLoop);
722
41
  removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
723
41
724
41
  for (const Loop *L : Loops)
725
1
    OutsideLoopIterations.erase(L);
726
41
727
41
  isl_ast_node_free(For);
728
41
  isl_ast_expr_free(Iterator);
729
41
  isl_id_free(IteratorID);
730
41
731
41
  ParallelLoops++;
732
41
}
733
734
/// Return whether any of @p Node's statements contain partial accesses.
735
///
736
/// Partial accesses are not supported by Polly's vector code generator.
737
17
static bool hasPartialAccesses(__isl_take isl_ast_node *Node) {
738
17
  return isl_ast_node_foreach_descendant_top_down(
739
17
             Node,
740
36
             [](isl_ast_node *Node, void *User) -> isl_bool {
741
36
               if (isl_ast_node_get_type(Node) != isl_ast_node_user)
742
18
                 return isl_bool_true;
743
18
744
18
               isl::ast_expr Expr =
745
18
                   isl::manage(isl_ast_node_user_get_expr(Node));
746
18
               isl::ast_expr StmtExpr = Expr.get_op_arg(0);
747
18
               isl::id Id = StmtExpr.get_id();
748
18
749
18
               ScopStmt *Stmt =
750
18
                   static_cast<ScopStmt *>(isl_id_get_user(Id.get()));
751
18
               isl::set StmtDom = Stmt->getDomain();
752
27
               for (auto *MA : *Stmt) {
753
27
                 if (MA->isLatestPartialAccess())
754
0
                   return isl_bool_error;
755
27
               }
756
18
               return isl_bool_true;
757
18
             },
758
17
             nullptr) == isl_stat_error;
759
17
}
760
761
340
void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
762
340
  bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
763
340
764
340
  if (Vector && 
IslAstInfo::isInnermostParallel(For)27
&&
765
340
      
!IslAstInfo::isReductionParallel(For)20
) {
766
20
    int VectorWidth = getNumberOfIterations(isl::manage_copy(For));
767
20
    if (1 < VectorWidth && 
VectorWidth <= 1617
&&
!hasPartialAccesses(For)17
) {
768
17
      createForVector(For, VectorWidth);
769
17
      return;
770
17
    }
771
323
  }
772
323
773
323
  if (IslAstInfo::isExecutedInParallel(For)) {
774
41
    createForParallel(For);
775
41
    return;
776
41
  }
777
282
  bool Parallel =
778
282
      (IslAstInfo::isParallel(For) && 
!IslAstInfo::isReductionParallel(For)30
);
779
282
  createForSequential(isl::manage(For), Parallel);
780
282
}
781
782
85
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
783
85
  isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
784
85
785
85
  Function *F = Builder.GetInsertBlock()->getParent();
786
85
  LLVMContext &Context = F->getContext();
787
85
788
85
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
789
85
                                  &*Builder.GetInsertPoint(), &DT, &LI);
790
85
  CondBB->setName("polly.cond");
791
85
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
792
85
  MergeBB->setName("polly.merge");
793
85
  BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
794
85
  BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
795
85
796
85
  DT.addNewBlock(ThenBB, CondBB);
797
85
  DT.addNewBlock(ElseBB, CondBB);
798
85
  DT.changeImmediateDominator(MergeBB, CondBB);
799
85
800
85
  Loop *L = LI.getLoopFor(CondBB);
801
85
  if (L) {
802
51
    L->addBasicBlockToLoop(ThenBB, LI);
803
51
    L->addBasicBlockToLoop(ElseBB, LI);
804
51
  }
805
85
806
85
  CondBB->getTerminator()->eraseFromParent();
807
85
808
85
  Builder.SetInsertPoint(CondBB);
809
85
  Value *Predicate = ExprBuilder.create(Cond);
810
85
  Builder.CreateCondBr(Predicate, ThenBB, ElseBB);
811
85
  Builder.SetInsertPoint(ThenBB);
812
85
  Builder.CreateBr(MergeBB);
813
85
  Builder.SetInsertPoint(ElseBB);
814
85
  Builder.CreateBr(MergeBB);
815
85
  Builder.SetInsertPoint(&ThenBB->front());
816
85
817
85
  create(isl_ast_node_if_get_then(If));
818
85
819
85
  Builder.SetInsertPoint(&ElseBB->front());
820
85
821
85
  if (isl_ast_node_if_has_else(If))
822
11
    create(isl_ast_node_if_get_else(If));
823
85
824
85
  Builder.SetInsertPoint(&MergeBB->front());
825
85
826
85
  isl_ast_node_free(If);
827
85
828
85
  IfConditions++;
829
85
}
830
831
__isl_give isl_id_to_ast_expr *
832
IslNodeBuilder::createNewAccesses(ScopStmt *Stmt,
833
531
                                  __isl_keep isl_ast_node *Node) {
834
531
  isl_id_to_ast_expr *NewAccesses =
835
531
      isl_id_to_ast_expr_alloc(Stmt->getParent()->getIslCtx().get(), 0);
836
531
837
531
  auto *Build = IslAstInfo::getBuild(Node);
838
531
  assert(Build && "Could not obtain isl_ast_build from user node");
839
531
  Stmt->setAstBuild(isl::manage_copy(Build));
840
531
841
1.03k
  for (auto *MA : *Stmt) {
842
1.03k
    if (!MA->hasNewAccessRelation()) {
843
807
      if (PollyGenerateExpressions) {
844
2
        if (!MA->isAffine())
845
0
          continue;
846
2
        if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI())
847
0
          continue;
848
2
849
2
        auto *BasePtr =
850
2
            dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr());
851
2
        if (BasePtr && 
Stmt->getParent()->getRegion().contains(BasePtr)0
)
852
0
          continue;
853
805
      } else {
854
805
        continue;
855
805
      }
856
226
    }
857
226
    assert(MA->isAffine() &&
858
226
           "Only affine memory accesses can be code generated");
859
226
860
226
    auto Schedule = isl_ast_build_get_schedule(Build);
861
226
862
#ifndef NDEBUG
863
    if (MA->isRead()) {
864
      auto Dom = Stmt->getDomain().release();
865
      auto SchedDom = isl_set_from_union_set(
866
          isl_union_map_domain(isl_union_map_copy(Schedule)));
867
      auto AccDom = isl_map_domain(MA->getAccessRelation().release());
868
      Dom = isl_set_intersect_params(Dom,
869
                                     Stmt->getParent()->getContext().release());
870
      SchedDom = isl_set_intersect_params(
871
          SchedDom, Stmt->getParent()->getContext().release());
872
      assert(isl_set_is_subset(SchedDom, AccDom) &&
873
             "Access relation not defined on full schedule domain");
874
      assert(isl_set_is_subset(Dom, AccDom) &&
875
             "Access relation not defined on full domain");
876
      isl_set_free(AccDom);
877
      isl_set_free(SchedDom);
878
      isl_set_free(Dom);
879
    }
880
#endif
881
882
226
    auto PWAccRel =
883
226
        MA->applyScheduleToAccessRelation(isl::manage(Schedule)).release();
884
226
885
226
    // isl cannot generate an index expression for access-nothing accesses.
886
226
    isl::set AccDomain =
887
226
        isl::manage(isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(PWAccRel)));
888
226
    isl::set Context = S.getContext();
889
226
    AccDomain = AccDomain.intersect_params(Context);
890
226
    if (AccDomain.is_empty()) {
891
3
      isl_pw_multi_aff_free(PWAccRel);
892
3
      continue;
893
3
    }
894
223
895
223
    auto AccessExpr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
896
223
    NewAccesses =
897
223
        isl_id_to_ast_expr_set(NewAccesses, MA->getId().release(), AccessExpr);
898
223
  }
899
531
900
531
  return NewAccesses;
901
531
}
902
903
void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr,
904
630
                                         ScopStmt *Stmt, LoopToScevMapT &LTS) {
905
630
  assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op &&
906
630
         "Expression of type 'op' expected");
907
630
  assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call &&
908
630
         "Operation of type 'call' expected");
909
1.35k
  for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; 
++i727
) {
910
727
    isl_ast_expr *SubExpr;
911
727
    Value *V;
912
727
913
727
    SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1);
914
727
    V = ExprBuilder.create(SubExpr);
915
727
    ScalarEvolution *SE = Stmt->getParent()->getSE();
916
727
    LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V);
917
727
  }
918
630
919
630
  isl_ast_expr_free(Expr);
920
630
}
921
922
void IslNodeBuilder::createSubstitutionsVector(
923
    __isl_take isl_ast_expr *Expr, ScopStmt *Stmt,
924
    std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS,
925
20
    __isl_take isl_id *IteratorID) {
926
20
  int i = 0;
927
20
928
20
  Value *OldValue = IDToValue[IteratorID];
929
123
  for (Value *IV : IVS) {
930
123
    IDToValue[IteratorID] = IV;
931
123
    createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]);
932
123
    i++;
933
123
  }
934
20
935
20
  IDToValue[IteratorID] = OldValue;
936
20
  isl_id_free(IteratorID);
937
20
  isl_ast_expr_free(Expr);
938
20
}
939
940
void IslNodeBuilder::generateCopyStmt(
941
4
    ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
942
4
  assert(Stmt->size() == 2);
943
4
  auto ReadAccess = Stmt->begin();
944
4
  auto WriteAccess = ReadAccess++;
945
4
  assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite());
946
4
  assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() &&
947
4
         "Accesses use the same data type");
948
4
  assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind());
949
4
  auto *AccessExpr =
950
4
      isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release());
951
4
  auto *LoadValue = ExprBuilder.create(AccessExpr);
952
4
  AccessExpr =
953
4
      isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release());
954
4
  auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr);
955
4
  Builder.CreateStore(LoadValue, StoreAddr);
956
4
}
957
958
26
Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) {
959
26
  assert(OutsideLoopIterations.find(L) == OutsideLoopIterations.end() &&
960
26
         "trying to materialize loop induction variable twice");
961
26
  const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)),
962
26
                                          SE.getUnknown(Builder.getInt64(1)), L,
963
26
                                          SCEV::FlagAnyWrap);
964
26
  Value *V = generateSCEV(OuterLIV);
965
26
  OutsideLoopIterations[L] = SE.getUnknown(V);
966
26
  return V;
967
26
}
968
969
511
void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
970
511
  LoopToScevMapT LTS;
971
511
  isl_id *Id;
972
511
  ScopStmt *Stmt;
973
511
974
511
  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
975
511
  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
976
511
  Id = isl_ast_expr_get_id(StmtExpr);
977
511
  isl_ast_expr_free(StmtExpr);
978
511
979
511
  LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end());
980
511
981
511
  Stmt = (ScopStmt *)isl_id_get_user(Id);
982
511
  auto *NewAccesses = createNewAccesses(Stmt, User);
983
511
  if (Stmt->isCopyStmt()) {
984
4
    generateCopyStmt(Stmt, NewAccesses);
985
4
    isl_ast_expr_free(Expr);
986
507
  } else {
987
507
    createSubstitutions(Expr, Stmt, LTS);
988
507
989
507
    if (Stmt->isBlockStmt())
990
472
      BlockGen.copyStmt(*Stmt, LTS, NewAccesses);
991
35
    else
992
35
      RegionGen.copyStmt(*Stmt, LTS, NewAccesses);
993
507
  }
994
511
995
511
  isl_id_to_ast_expr_free(NewAccesses);
996
511
  isl_ast_node_free(User);
997
511
  isl_id_free(Id);
998
511
}
999
1000
147
void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) {
1001
147
  isl_ast_node_list *List = isl_ast_node_block_get_children(Block);
1002
147
1003
513
  for (int i = 0; i < isl_ast_node_list_n_ast_node(List); 
++i366
)
1004
366
    create(isl_ast_node_list_get_ast_node(List, i));
1005
147
1006
147
  isl_ast_node_free(Block);
1007
147
  isl_ast_node_list_free(List);
1008
147
}
1009
1010
1.12k
void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
1011
1.12k
  switch (isl_ast_node_get_type(Node)) {
1012
1.12k
  case isl_ast_node_error:
1013
0
    llvm_unreachable("code generation error");
1014
1.12k
  case isl_ast_node_mark:
1015
43
    createMark(Node);
1016
43
    return;
1017
1.12k
  case isl_ast_node_for:
1018
340
    createFor(Node);
1019
340
    return;
1020
1.12k
  case isl_ast_node_if:
1021
85
    createIf(Node);
1022
85
    return;
1023
1.12k
  case isl_ast_node_user:
1024
511
    createUser(Node);
1025
511
    return;
1026
1.12k
  case isl_ast_node_block:
1027
147
    createBlock(Node);
1028
147
    return;
1029
0
  }
1030
0
1031
0
  llvm_unreachable("Unknown isl_ast_node type");
1032
0
}
1033
1034
248
bool IslNodeBuilder::materializeValue(isl_id *Id) {
1035
248
  // If the Id is already mapped, skip it.
1036
248
  if (!IDToValue.count(Id)) {
1037
176
    auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id);
1038
176
    Value *V = nullptr;
1039
176
1040
176
    // Parameters could refer to invariant loads that need to be
1041
176
    // preloaded before we can generate code for the parameter. Thus,
1042
176
    // check if any value referred to in ParamSCEV is an invariant load
1043
176
    // and if so make sure its equivalence class is preloaded.
1044
176
    SetVector<Value *> Values;
1045
176
    findValues(ParamSCEV, SE, Values);
1046
180
    for (auto *Val : Values) {
1047
180
      // Check if the value is an instruction in a dead block within the SCoP
1048
180
      // and if so do not code generate it.
1049
180
      if (auto *Inst = dyn_cast<Instruction>(Val)) {
1050
40
        if (S.contains(Inst)) {
1051
16
          bool IsDead = true;
1052
16
1053
16
          // Check for "undef" loads first, then if there is a statement for
1054
16
          // the parent of Inst and lastly if the parent of Inst has an empty
1055
16
          // domain. In the first and last case the instruction is dead but if
1056
16
          // there is a statement or the domain is not empty Inst is not dead.
1057
16
          auto MemInst = MemAccInst::dyn_cast(Inst);
1058
16
          auto Address = MemInst ? 
MemInst.getPointerOperand()15
:
nullptr1
;
1059
16
          if (Address && SE.getUnknown(UndefValue::get(Address->getType())) ==
1060
14
                             SE.getPointerBase(SE.getSCEV(Address))) {
1061
16
          } else if (S.getStmtFor(Inst)) {
1062
0
            IsDead = false;
1063
16
          } else {
1064
16
            auto *Domain = S.getDomainConditions(Inst->getParent()).release();
1065
16
            IsDead = isl_set_is_empty(Domain);
1066
16
            isl_set_free(Domain);
1067
16
          }
1068
16
1069
16
          if (IsDead) {
1070
2
            V = UndefValue::get(ParamSCEV->getType());
1071
2
            break;
1072
2
          }
1073
178
        }
1074
40
      }
1075
178
1076
178
      if (auto *IAClass = S.lookupInvariantEquivClass(Val)) {
1077
12
        // Check if this invariant access class is empty, hence if we never
1078
12
        // actually added a loads instruction to it. In that case it has no
1079
12
        // (meaningful) users and we should not try to code generate it.
1080
12
        if (IAClass->InvariantAccesses.empty())
1081
0
          V = UndefValue::get(ParamSCEV->getType());
1082
12
1083
12
        if (!preloadInvariantEquivClass(*IAClass)) {
1084
3
          isl_id_free(Id);
1085
3
          return false;
1086
3
        }
1087
12
      }
1088
178
    }
1089
176
1090
176
    
V = V 173
?
V2
:
generateSCEV(ParamSCEV)171
;
1091
173
    IDToValue[Id] = V;
1092
173
  }
1093
248
1094
248
  isl_id_free(Id);
1095
245
  return true;
1096
248
}
1097
1098
132
bool IslNodeBuilder::materializeParameters(isl_set *Set) {
1099
341
  for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; 
++i209
) {
1100
212
    if (!isl_set_involves_dims(Set, isl_dim_param, i, 1))
1101
155
      continue;
1102
57
    isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i);
1103
57
    if (!materializeValue(Id))
1104
3
      return false;
1105
57
  }
1106
132
  
return true129
;
1107
132
}
1108
1109
300
bool IslNodeBuilder::materializeParameters() {
1110
300
  for (const SCEV *Param : S.parameters()) {
1111
191
    isl_id *Id = S.getIdForParam(Param).release();
1112
191
    if (!materializeValue(Id))
1113
0
      return false;
1114
191
  }
1115
300
  return true;
1116
300
}
1117
1118
/// Generate the computation of the size of the outermost dimension from the
1119
/// Fortran array descriptor (in this case, `@g_arr`). The final `%size`
1120
/// contains the size of the array.
1121
///
1122
/// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] }
1123
/// %desc.dimensionty = type { i64, i64, i64 }
1124
/// @g_arr = global %arrty zeroinitializer, align 32
1125
/// ...
1126
/// %0 = load i64, i64* getelementptr inbounds
1127
///                       (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2)
1128
/// %1 = load i64, i64* getelementptr inbounds
1129
///                      (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1)
1130
/// %2 = sub nsw i64 %0, %1
1131
/// %size = add nsw i64 %2, 1
1132
static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor,
1133
                                             PollyIRBuilder &Builder,
1134
2
                                             std::string ArrayName) {
1135
2
  assert(GlobalDescriptor && "invalid global descriptor given");
1136
2
1137
2
  Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1138
2
                      Builder.getInt64(0), Builder.getInt32(2)};
1139
2
  Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx,
1140
2
                                            ArrayName + "_end_ptr");
1141
2
  Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end");
1142
2
1143
2
  Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
1144
2
                        Builder.getInt64(0), Builder.getInt32(1)};
1145
2
  Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx,
1146
2
                                              ArrayName + "_begin_ptr");
1147
2
  Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin");
1148
2
1149
2
  Value *size =
1150
2
      Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta");
1151
2
  Type *endType = dyn_cast<IntegerType>(end->getType());
1152
2
  assert(endType && "expected type of end to be integral");
1153
2
1154
2
  size = Builder.CreateNSWAdd(end,
1155
2
                              ConstantInt::get(endType, 1, /* signed = */ true),
1156
2
                              ArrayName + "_size");
1157
2
1158
2
  return size;
1159
2
}
1160
1161
300
bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
1162
637
  for (ScopArrayInfo *Array : S.arrays()) {
1163
637
    if (Array->getNumberOfDimensions() == 0)
1164
149
      continue;
1165
488
1166
488
    Value *FAD = Array->getFortranArrayDescriptor();
1167
488
    if (!FAD)
1168
486
      continue;
1169
2
1170
2
    isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0).release();
1171
2
    assert(ParametricPwAff && "parametric pw_aff corresponding "
1172
2
                              "to outermost dimension does not "
1173
2
                              "exist");
1174
2
1175
2
    isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0);
1176
2
    isl_pw_aff_free(ParametricPwAff);
1177
2
1178
2
    assert(Id && "pw_aff is not parametric");
1179
2
1180
2
    if (IDToValue.count(Id)) {
1181
0
      isl_id_free(Id);
1182
0
      continue;
1183
0
    }
1184
2
1185
2
    Value *FinalValue =
1186
2
        buildFADOutermostDimensionLoad(FAD, Builder, Array->getName());
1187
2
    assert(FinalValue && "unable to build Fortran array "
1188
2
                         "descriptor load of outermost dimension");
1189
2
    IDToValue[Id] = FinalValue;
1190
2
    isl_id_free(Id);
1191
2
  }
1192
300
  return true;
1193
300
}
1194
1195
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
1196
                                              isl_ast_build *Build,
1197
105
                                              Instruction *AccInst) {
1198
105
  isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
1199
105
  isl_ast_expr *Access =
1200
105
      isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
1201
105
  auto *Address = isl_ast_expr_address_of(Access);
1202
105
  auto *AddressValue = ExprBuilder.create(Address);
1203
105
  Value *PreloadVal;
1204
105
1205
105
  // Correct the type as the SAI might have a different type than the user
1206
105
  // expects, especially if the base pointer is a struct.
1207
105
  Type *Ty = AccInst->getType();
1208
105
1209
105
  auto *Ptr = AddressValue;
1210
105
  auto Name = Ptr->getName();
1211
105
  auto AS = Ptr->getType()->getPointerAddressSpace();
1212
105
  Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(AS), Name + ".cast");
1213
105
  PreloadVal = Builder.CreateLoad(Ptr, Name + ".load");
1214
105
  if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal))
1215
105
    PreloadInst->setAlignment(dyn_cast<LoadInst>(AccInst)->getAlignment());
1216
105
1217
105
  // TODO: This is only a hot fix for SCoP sequences that use the same load
1218
105
  //       instruction contained and hoisted by one of the SCoPs.
1219
105
  if (SE.isSCEVable(Ty))
1220
90
    SE.forgetValue(AccInst);
1221
105
1222
105
  return PreloadVal;
1223
105
}
1224
1225
Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
1226
108
                                            isl_set *Domain) {
1227
108
  isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release());
1228
108
  AccessRange = isl_set_gist_params(AccessRange, S.getContext().release());
1229
108
1230
108
  if (!materializeParameters(AccessRange)) {
1231
0
    isl_set_free(AccessRange);
1232
0
    isl_set_free(Domain);
1233
0
    return nullptr;
1234
0
  }
1235
108
1236
108
  auto *Build =
1237
108
      isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release()));
1238
108
  isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
1239
108
  bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
1240
108
  isl_set_free(Universe);
1241
108
1242
108
  Instruction *AccInst = MA.getAccessInstruction();
1243
108
  Type *AccInstTy = AccInst->getType();
1244
108
1245
108
  Value *PreloadVal = nullptr;
1246
108
  if (AlwaysExecuted) {
1247
84
    PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst);
1248
84
    isl_ast_build_free(Build);
1249
84
    isl_set_free(Domain);
1250
84
    return PreloadVal;
1251
84
  }
1252
24
1253
24
  if (!materializeParameters(Domain)) {
1254
3
    isl_ast_build_free(Build);
1255
3
    isl_set_free(AccessRange);
1256
3
    isl_set_free(Domain);
1257
3
    return nullptr;
1258
3
  }
1259
21
1260
21
  isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
1261
21
  Domain = nullptr;
1262
21
1263
21
  ExprBuilder.setTrackOverflow(true);
1264
21
  Value *Cond = ExprBuilder.create(DomainCond);
1265
21
  Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(),
1266
21
                                              "polly.preload.cond.overflown");
1267
21
  Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result");
1268
21
  ExprBuilder.setTrackOverflow(false);
1269
21
1270
21
  if (!Cond->getType()->isIntegerTy(1))
1271
0
    Cond = Builder.CreateIsNotNull(Cond);
1272
21
1273
21
  BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1274
21
                                  &*Builder.GetInsertPoint(), &DT, &LI);
1275
21
  CondBB->setName("polly.preload.cond");
1276
21
1277
21
  BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
1278
21
  MergeBB->setName("polly.preload.merge");
1279
21
1280
21
  Function *F = Builder.GetInsertBlock()->getParent();
1281
21
  LLVMContext &Context = F->getContext();
1282
21
  BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
1283
21
1284
21
  DT.addNewBlock(ExecBB, CondBB);
1285
21
  if (Loop *L = LI.getLoopFor(CondBB))
1286
6
    L->addBasicBlockToLoop(ExecBB, LI);
1287
21
1288
21
  auto *CondBBTerminator = CondBB->getTerminator();
1289
21
  Builder.SetInsertPoint(CondBBTerminator);
1290
21
  Builder.CreateCondBr(Cond, ExecBB, MergeBB);
1291
21
  CondBBTerminator->eraseFromParent();
1292
21
1293
21
  Builder.SetInsertPoint(ExecBB);
1294
21
  Builder.CreateBr(MergeBB);
1295
21
1296
21
  Builder.SetInsertPoint(ExecBB->getTerminator());
1297
21
  Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst);
1298
21
  Builder.SetInsertPoint(MergeBB->getTerminator());
1299
21
  auto *MergePHI = Builder.CreatePHI(
1300
21
      AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
1301
21
  PreloadVal = MergePHI;
1302
21
1303
21
  if (!PreAccInst) {
1304
0
    PreloadVal = nullptr;
1305
0
    PreAccInst = UndefValue::get(AccInstTy);
1306
0
  }
1307
21
1308
21
  MergePHI->addIncoming(PreAccInst, ExecBB);
1309
21
  MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
1310
21
1311
21
  isl_ast_build_free(Build);
1312
21
  return PreloadVal;
1313
21
}
1314
1315
bool IslNodeBuilder::preloadInvariantEquivClass(
1316
147
    InvariantEquivClassTy &IAClass) {
1317
147
  // For an equivalence class of invariant loads we pre-load the representing
1318
147
  // element with the unified execution context. However, we have to map all
1319
147
  // elements of the class to the one preloaded load as they are referenced
1320
147
  // during the code generation and therefor need to be mapped.
1321
147
  const MemoryAccessList &MAs = IAClass.InvariantAccesses;
1322
147
  if (MAs.empty())
1323
2
    return true;
1324
145
1325
145
  MemoryAccess *MA = MAs.front();
1326
145
  assert(MA->isArrayKind() && MA->isRead());
1327
145
1328
145
  // If the access function was already mapped, the preload of this equivalence
1329
145
  // class was triggered earlier already and doesn't need to be done again.
1330
145
  if (ValueMap.count(MA->getAccessInstruction()))
1331
30
    return true;
1332
115
1333
115
  // Check for recursion which can be caused by additional constraints, e.g.,
1334
115
  // non-finite loop constraints. In such a case we have to bail out and insert
1335
115
  // a "false" runtime check that will cause the original code to be executed.
1336
115
  auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType);
1337
115
  if (!PreloadedPtrs.insert(PtrId).second)
1338
4
    return false;
1339
111
1340
111
  // The execution context of the IAClass.
1341
111
  isl::set &ExecutionCtx = IAClass.ExecutionContext;
1342
111
1343
111
  // If the base pointer of this class is dependent on another one we have to
1344
111
  // make sure it was preloaded already.
1345
111
  auto *SAI = MA->getScopArrayInfo();
1346
111
  if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) {
1347
22
    if (!preloadInvariantEquivClass(*BaseIAClass))
1348
1
      return false;
1349
21
1350
21
    // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and
1351
21
    // we need to refine the ExecutionCtx.
1352
21
    isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1353
21
    ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1354
21
  }
1355
111
1356
111
  // If the size of a dimension is dependent on another class, make sure it is
1357
111
  // preloaded.
1358
111
  
for (unsigned i = 1, e = SAI->getNumberOfDimensions(); 110
i < e110
;
++i0
) {
1359
2
    const SCEV *Dim = SAI->getDimensionSize(i);
1360
2
    SetVector<Value *> Values;
1361
2
    findValues(Dim, SE, Values);
1362
2
    for (auto *Val : Values) {
1363
2
      if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) {
1364
2
        if (!preloadInvariantEquivClass(*BaseIAClass))
1365
2
          return false;
1366
0
1367
0
        // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx
1368
0
        // and we need to refine the ExecutionCtx.
1369
0
        isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext;
1370
0
        ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx);
1371
0
      }
1372
2
    }
1373
2
  }
1374
110
1375
110
  Instruction *AccInst = MA->getAccessInstruction();
1376
108
  Type *AccInstTy = AccInst->getType();
1377
108
1378
108
  Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy());
1379
108
  if (!PreloadVal)
1380
3
    return false;
1381
105
1382
115
  
for (const MemoryAccess *MA : MAs)105
{
1383
115
    Instruction *MAAccInst = MA->getAccessInstruction();
1384
115
    assert(PreloadVal->getType() == MAAccInst->getType());
1385
115
    ValueMap[MAAccInst] = PreloadVal;
1386
115
  }
1387
105
1388
105
  if (SE.isSCEVable(AccInstTy)) {
1389
90
    isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release();
1390
90
    if (ParamId)
1391
21
      IDToValue[ParamId] = PreloadVal;
1392
90
    isl_id_free(ParamId);
1393
90
  }
1394
105
1395
105
  BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
1396
105
  auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
1397
105
                                AccInst->getName() + ".preload.s2a");
1398
105
  Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
1399
105
  Builder.CreateStore(PreloadVal, Alloca);
1400
105
  ValueMapT PreloadedPointer;
1401
105
  PreloadedPointer[PreloadVal] = AccInst;
1402
105
  Annotator.addAlternativeAliasBases(PreloadedPointer);
1403
105
1404
105
  for (auto *DerivedSAI : SAI->getDerivedSAIs()) {
1405
46
    Value *BasePtr = DerivedSAI->getBasePtr();
1406
46
1407
52
    for (const MemoryAccess *MA : MAs) {
1408
52
      // As the derived SAI information is quite coarse, any load from the
1409
52
      // current SAI could be the base pointer of the derived SAI, however we
1410
52
      // should only change the base pointer of the derived SAI if we actually
1411
52
      // preloaded it.
1412
52
      if (BasePtr == MA->getOriginalBaseAddr()) {
1413
0
        assert(BasePtr->getType() == PreloadVal->getType());
1414
0
        DerivedSAI->setBasePtr(PreloadVal);
1415
0
      }
1416
52
1417
52
      // For scalar derived SAIs we remap the alloca used for the derived value.
1418
52
      if (BasePtr == MA->getAccessInstruction())
1419
34
        ScalarMap[DerivedSAI] = Alloca;
1420
52
    }
1421
46
  }
1422
105
1423
115
  for (const MemoryAccess *MA : MAs) {
1424
115
    Instruction *MAAccInst = MA->getAccessInstruction();
1425
115
    // Use the escape system to get the correct value to users outside the SCoP.
1426
115
    BlockGenerator::EscapeUserVectorTy EscapeUsers;
1427
115
    for (auto *U : MAAccInst->users())
1428
120
      if (Instruction *UI = dyn_cast<Instruction>(U))
1429
120
        if (!S.contains(UI))
1430
7
          EscapeUsers.push_back(UI);
1431
115
1432
115
    if (EscapeUsers.empty())
1433
109
      continue;
1434
6
1435
6
    EscapeMap[MA->getAccessInstruction()] =
1436
6
        std::make_pair(Alloca, std::move(EscapeUsers));
1437
6
  }
1438
105
1439
105
  return true;
1440
105
}
1441
1442
304
void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
1443
650
  for (auto &SAI : S.arrays()) {
1444
650
    if (SAI->getBasePtr())
1445
640
      continue;
1446
10
1447
10
    assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
1448
10
           "The size of the outermost dimension is used to declare newly "
1449
10
           "created arrays that require memory allocation.");
1450
10
1451
10
    Type *NewArrayType = nullptr;
1452
10
1453
10
    // Get the size of the array = size(dim_1)*...*size(dim_n)
1454
10
    uint64_t ArraySizeInt = 1;
1455
30
    for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; 
i--20
) {
1456
20
      auto *DimSize = SAI->getDimensionSize(i);
1457
20
      unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
1458
20
                                     ->getAPInt()
1459
20
                                     .getLimitedValue();
1460
20
1461
20
      if (!NewArrayType)
1462
10
        NewArrayType = SAI->getElementType();
1463
20
1464
20
      NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
1465
20
      ArraySizeInt *= UnsignedDimSize;
1466
20
    }
1467
10
1468
10
    if (SAI->isOnHeap()) {
1469
3
      LLVMContext &Ctx = NewArrayType->getContext();
1470
3
1471
3
      // Get the IntPtrTy from the Datalayout
1472
3
      auto IntPtrTy = DL.getIntPtrType(Ctx);
1473
3
1474
3
      // Get the size of the element type in bits
1475
3
      unsigned Size = SAI->getElemSizeInBytes();
1476
3
1477
3
      // Insert the malloc call at polly.start
1478
3
      auto InstIt = std::get<0>(StartExitBlocks)->getTerminator();
1479
3
      auto *CreatedArray = CallInst::CreateMalloc(
1480
3
          &*InstIt, IntPtrTy, SAI->getElementType(),
1481
3
          ConstantInt::get(Type::getInt64Ty(Ctx), Size),
1482
3
          ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr,
1483
3
          SAI->getName());
1484
3
1485
3
      SAI->setBasePtr(CreatedArray);
1486
3
1487
3
      // Insert the free call at polly.exiting
1488
3
      CallInst::CreateFree(CreatedArray,
1489
3
                           std::get<1>(StartExitBlocks)->getTerminator());
1490
7
    } else {
1491
7
      auto InstIt = Builder.GetInsertBlock()
1492
7
                        ->getParent()
1493
7
                        ->getEntryBlock()
1494
7
                        .getTerminator();
1495
7
1496
7
      auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
1497
7
                                          SAI->getName(), &*InstIt);
1498
7
      CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
1499
7
      SAI->setBasePtr(CreatedArray);
1500
7
    }
1501
10
  }
1502
304
}
1503
1504
304
bool IslNodeBuilder::preloadInvariantLoads() {
1505
304
  auto &InvariantEquivClasses = S.getInvariantAccesses();
1506
304
  if (InvariantEquivClasses.empty())
1507
240
    return true;
1508
64
1509
64
  BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
1510
64
                                     &*Builder.GetInsertPoint(), &DT, &LI);
1511
64
  PreLoadBB->setName("polly.preload.begin");
1512
64
  Builder.SetInsertPoint(&PreLoadBB->front());
1513
64
1514
64
  for (auto &IAClass : InvariantEquivClasses)
1515
111
    if (!preloadInvariantEquivClass(IAClass))
1516
4
      return false;
1517
64
1518
64
  
return true60
;
1519
64
}
1520
1521
300
void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
1522
300
  // Materialize values for the parameters of the SCoP.
1523
300
  materializeParameters();
1524
300
1525
300
  // materialize the outermost dimension parameters for a Fortran array.
1526
300
  // NOTE: materializeParameters() does not work since it looks through
1527
300
  // the SCEVs. We don't have a corresponding SCEV for the array size
1528
300
  // parameter
1529
300
  materializeFortranArrayOutermostDimension();
1530
300
1531
300
  // Generate values for the current loop iteration for all surrounding loops.
1532
300
  //
1533
300
  // We may also reference loops outside of the scop which do not contain the
1534
300
  // scop itself, but as the number of such scops may be arbitrarily large we do
1535
300
  // not generate code for them here, but only at the point of code generation
1536
300
  // where these values are needed.
1537
300
  Loop *L = LI.getLoopFor(S.getEntry());
1538
300
1539
531
  while (L != nullptr && 
S.contains(L)256
)
1540
231
    L = L->getParentLoop();
1541
300
1542
325
  while (L != nullptr) {
1543
25
    materializeNonScopLoopInductionVariable(L);
1544
25
    L = L->getParentLoop();
1545
25
  }
1546
300
1547
300
  isl_set_free(Context);
1548
300
}
1549
1550
197
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
1551
197
  /// We pass the insert location of our Builder, as Polly ensures during IR
1552
197
  /// generation that there is always a valid CFG into which instructions are
1553
197
  /// inserted. As a result, the insertpoint is known to be always followed by a
1554
197
  /// terminator instruction. This means the insert point may be specified by a
1555
197
  /// terminator instruction, but it can never point to an ->end() iterator
1556
197
  /// which does not have a corresponding instruction. Hence, dereferencing
1557
197
  /// the insertpoint to obtain an instruction is known to be save.
1558
197
  ///
1559
197
  /// We also do not need to update the Builder here, as new instructions are
1560
197
  /// always inserted _before_ the given InsertLocation. As a result, the
1561
197
  /// insert location remains valid.
1562
197
  assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
1563
197
         "Insert location points after last valid instruction");
1564
197
  Instruction *InsertLocation = &*Builder.GetInsertPoint();
1565
197
  return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
1566
197
                       InsertLocation, &ValueMap,
1567
197
                       StartBlock->getSinglePredecessor());
1568
197
}
1569
1570
/// The AST expression we generate to perform the run-time check assumes
1571
/// computations on integer types of infinite size. As we only use 64-bit
1572
/// arithmetic we check for overflows, in case of which we set the result
1573
/// of this run-time check to false to be conservatively correct,
1574
300
Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) {
1575
300
  auto ExprBuilder = getExprBuilder();
1576
300
1577
300
  // In case the AST expression has integers larger than 64 bit, bail out. The
1578
300
  // resulting LLVM-IR will contain operations on types that use more than 64
1579
300
  // bits. These are -- in case wrapping intrinsics are used -- translated to
1580
300
  // runtime library calls that are not available on all systems (e.g., Android)
1581
300
  // and consequently will result in linker errors.
1582
300
  if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) {
1583
7
    isl_ast_expr_free(Condition);
1584
7
    return Builder.getFalse();
1585
7
  }
1586
293
1587
293
  ExprBuilder.setTrackOverflow(true);
1588
293
  Value *RTC = ExprBuilder.create(Condition);
1589
293
  if (!RTC->getType()->isIntegerTy(1))
1590
185
    RTC = Builder.CreateIsNotNull(RTC);
1591
293
  Value *OverflowHappened =
1592
293
      Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown");
1593
293
1594
293
  if (PollyGenerateRTCPrint) {
1595
0
    auto *F = Builder.GetInsertBlock()->getParent();
1596
0
    RuntimeDebugBuilder::createCPUPrinter(
1597
0
        Builder,
1598
0
        "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() +
1599
0
            "RTC: ",
1600
0
        RTC, " Overflow: ", OverflowHappened,
1601
0
        "\n"
1602
0
        "  (0 failed, -1 succeeded)\n"
1603
0
        "  (if one or both are 0 falling back to original code, if both are -1 "
1604
0
        "executing Polly code)\n");
1605
0
  }
1606
293
1607
293
  RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result");
1608
293
  ExprBuilder.setTrackOverflow(false);
1609
293
1610
293
  if (!isa<ConstantInt>(RTC))
1611
108
    VersionedScops++;
1612
293
1613
293
  return RTC;
1614
293
}