Coverage Report

Created: 2017-06-23 12:40

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/CodeGen/BlockGenerators.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the BlockGenerator and VectorBlockGenerator classes,
11
// which generate sequential code and vectorized code for a polyhedral
12
// statement, respectively.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "polly/CodeGen/BlockGenerators.h"
17
#include "polly/CodeGen/CodeGeneration.h"
18
#include "polly/CodeGen/IslExprBuilder.h"
19
#include "polly/CodeGen/RuntimeDebugBuilder.h"
20
#include "polly/Options.h"
21
#include "polly/ScopInfo.h"
22
#include "polly/Support/GICHelper.h"
23
#include "polly/Support/SCEVValidator.h"
24
#include "polly/Support/ScopHelper.h"
25
#include "polly/Support/VirtualInstruction.h"
26
#include "llvm/Analysis/LoopInfo.h"
27
#include "llvm/Analysis/RegionInfo.h"
28
#include "llvm/Analysis/ScalarEvolution.h"
29
#include "llvm/IR/IntrinsicInst.h"
30
#include "llvm/IR/Module.h"
31
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
32
#include "llvm/Transforms/Utils/Local.h"
33
#include "isl/aff.h"
34
#include "isl/ast.h"
35
#include "isl/ast_build.h"
36
#include "isl/set.h"
37
#include <deque>
38
39
using namespace llvm;
40
using namespace polly;
41
42
static cl::opt<bool> Aligned("enable-polly-aligned",
43
                             cl::desc("Assumed aligned memory accesses."),
44
                             cl::Hidden, cl::init(false), cl::ZeroOrMore,
45
                             cl::cat(PollyCategory));
46
47
bool PollyDebugPrinting;
48
static cl::opt<bool, true> DebugPrintingX(
49
    "polly-codegen-add-debug-printing",
50
    cl::desc("Add printf calls that show the values loaded/stored."),
51
    cl::location(PollyDebugPrinting), cl::Hidden, cl::init(false),
52
    cl::ZeroOrMore, cl::cat(PollyCategory));
53
54
BlockGenerator::BlockGenerator(
55
    PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
56
    AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
57
    ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
58
    : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
59
      EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
60
280
      GlobalMap(GlobalMap), StartBlock(StartBlock) {}
61
62
Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
63
                                             ValueMapT &BBMap,
64
                                             LoopToScevMapT &LTS,
65
716
                                             Loop *L) const {
66
716
  if (!SE.isSCEVable(Old->getType()))
67
0
    return nullptr;
68
716
69
716
  const SCEV *Scev = SE.getSCEVAtScope(Old, L);
70
716
  if (!Scev)
71
0
    return nullptr;
72
716
73
716
  
if (716
isa<SCEVCouldNotCompute>(Scev)716
)
74
0
    return nullptr;
75
716
76
716
  const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
77
716
  ValueMapT VTV;
78
716
  VTV.insert(BBMap.begin(), BBMap.end());
79
716
  VTV.insert(GlobalMap.begin(), GlobalMap.end());
80
716
81
716
  Scop &S = *Stmt.getParent();
82
716
  const DataLayout &DL = S.getFunction().getParent()->getDataLayout();
83
716
  auto IP = Builder.GetInsertPoint();
84
716
85
716
  assert(IP != Builder.GetInsertBlock()->end() &&
86
716
         "Only instructions can be insert points for SCEVExpander");
87
716
  Value *Expanded =
88
716
      expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
89
716
                    StartBlock->getSinglePredecessor());
90
716
91
716
  BBMap[Old] = Expanded;
92
716
  return Expanded;
93
716
}
94
95
Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
96
2.19k
                                   LoopToScevMapT &LTS, Loop *L) const {
97
2.19k
98
1.38k
  auto lookupGlobally = [this](Value *Old) -> Value * {
99
1.38k
    Value *New = GlobalMap.lookup(Old);
100
1.38k
    if (!New)
101
1.28k
      return nullptr;
102
1.38k
103
1.38k
    // Required by:
104
1.38k
    // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
105
1.38k
    // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
106
1.38k
    // * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
107
1.38k
    // * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
108
1.38k
    // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
109
1.38k
    // * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
110
1.38k
    // GlobalMap should be a mapping from (value in original SCoP) to (copied
111
1.38k
    // value in generated SCoP), without intermediate mappings, which might
112
1.38k
    // easily require transitiveness as well.
113
105
    
if (Value *105
NewRemapped105
= GlobalMap.lookup(New))
114
9
      New = NewRemapped;
115
105
116
105
    // No test case for this code.
117
105
    if (Old->getType()->getScalarSizeInBits() <
118
105
        New->getType()->getScalarSizeInBits())
119
0
      New = Builder.CreateTruncOrBitCast(New, Old->getType());
120
105
121
105
    return New;
122
1.38k
  };
123
2.19k
124
2.19k
  Value *New = nullptr;
125
2.19k
  auto VUse = VirtualUse::create(&Stmt, L, Old, true);
126
2.19k
  switch (VUse.getKind()) {
127
136
  case VirtualUse::Block:
128
136
    // BasicBlock are constants, but the BlockGenerator copies them.
129
136
    New = BBMap.lookup(Old);
130
136
    break;
131
136
132
447
  case VirtualUse::Constant:
133
447
    // Used by:
134
447
    // * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
135
447
    // Constants should not be redefined. In this case, the GlobalMap just
136
447
    // contains a mapping to the same constant, which is unnecessary, but
137
447
    // harmless.
138
447
    if ((New = lookupGlobally(Old)))
139
1
      break;
140
447
141
446
    assert(!BBMap.count(Old));
142
446
    New = Old;
143
446
    break;
144
447
145
19
  case VirtualUse::ReadOnly:
146
19
    assert(!GlobalMap.count(Old));
147
19
148
19
    // Required for:
149
19
    // * Isl/CodeGen/MemAccess/create_arrays.ll
150
19
    // * Isl/CodeGen/read-only-scalars.ll
151
19
    // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
152
19
    // For some reason these reload a read-only value. The reloaded value ends
153
19
    // up in BBMap, buts its value should be identical.
154
19
    //
155
19
    // Required for:
156
19
    // * Isl/CodeGen/OpenMP/single_loop_with_param.ll
157
19
    // The parallel subfunctions need to reference the read-only value from the
158
19
    // parent function, this is done by reloading them locally.
159
19
    if ((New = BBMap.lookup(Old)))
160
18
      break;
161
19
162
1
    New = Old;
163
1
    break;
164
19
165
841
  case VirtualUse::Synthesizable:
166
841
    // Used by:
167
841
    // * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
168
841
    // * Isl/CodeGen/OpenMP/recomputed-srem.ll
169
841
    // * Isl/CodeGen/OpenMP/reference-other-bb.ll
170
841
    // * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
171
841
    // For some reason synthesizable values end up in GlobalMap. Their values
172
841
    // are the same as trySynthesizeNewValue would return. The legacy
173
841
    // implementation prioritized GlobalMap, so this is what we do here as well.
174
841
    // Ideally, synthesizable values should not end up in GlobalMap.
175
841
    if ((New = lookupGlobally(Old)))
176
6
      break;
177
841
178
841
    // Required for:
179
841
    // * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
180
841
    // * Isl/CodeGen/getNumberOfIterations.ll
181
841
    // * Isl/CodeGen/non_affine_float_compare.ll
182
841
    // * ScheduleOptimizer/pattern-matching-based-opts_10.ll
183
841
    // Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
184
841
    // not precomputed (SCEVExpander has its own caching mechanism).
185
841
    // These tests fail without this, but I think trySynthesizeNewValue would
186
841
    // just re-synthesize the same instructions.
187
835
    
if (835
(New = BBMap.lookup(Old))835
)
188
119
      break;
189
835
190
716
    New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L);
191
716
    break;
192
835
193
98
  case VirtualUse::Hoisted:
194
98
    // TODO: Hoisted invariant loads should be found in GlobalMap only, but not
195
98
    // redefined locally (which will be ignored anyway). That is, the following
196
98
    // assertion should apply: assert(!BBMap.count(Old))
197
98
198
98
    New = lookupGlobally(Old);
199
98
    break;
200
835
201
655
  case VirtualUse::Intra:
202
655
  case VirtualUse::Inter:
203
655
    assert(!GlobalMap.count(Old) &&
204
655
           "Intra and inter-stmt values are never global");
205
655
    New = BBMap.lookup(Old);
206
655
    break;
207
2.19k
  }
208
2.19k
  assert(New && "Unexpected scalar dependence in region!");
209
2.19k
  return New;
210
2.19k
}
211
212
void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
213
621
                                    ValueMapT &BBMap, LoopToScevMapT &LTS) {
214
621
  // We do not generate debug intrinsics as we did not investigate how to
215
621
  // copy them correctly. At the current state, they just crash the code
216
621
  // generation as the meta-data operands are not correctly copied.
217
621
  if (isa<DbgInfoIntrinsic>(Inst))
218
0
    return;
219
621
220
621
  Instruction *NewInst = Inst->clone();
221
621
222
621
  // Replace old operands with the new ones.
223
1.17k
  for (Value *OldOperand : Inst->operands()) {
224
1.17k
    Value *NewOperand =
225
1.17k
        getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt));
226
1.17k
227
1.17k
    if (
!NewOperand1.17k
)
{0
228
0
      assert(!isa<StoreInst>(NewInst) &&
229
0
             "Store instructions are always needed!");
230
0
      NewInst->deleteValue();
231
0
      return;
232
0
    }
233
1.17k
234
1.17k
    NewInst->replaceUsesOfWith(OldOperand, NewOperand);
235
1.17k
  }
236
621
237
621
  Builder.Insert(NewInst);
238
621
  BBMap[Inst] = NewInst;
239
621
240
621
  if (!NewInst->getType()->isVoidTy())
241
520
    NewInst->setName("p_" + Inst->getName());
242
621
}
243
244
Value *
245
BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
246
                                         ValueMapT &BBMap, LoopToScevMapT &LTS,
247
592
                                         isl_id_to_ast_expr *NewAccesses) {
248
592
  const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst);
249
592
  return generateLocationAccessed(
250
592
      Stmt, getLoopForStmt(Stmt),
251
592
      Inst.isNull() ? 
nullptr0
:
Inst.getPointerOperand()592
, BBMap, LTS,
252
592
      NewAccesses, MA.getId(), MA.getAccessValue()->getType());
253
592
}
254
255
Value *BlockGenerator::generateLocationAccessed(
256
    ScopStmt &Stmt, Loop *L, Value *Pointer, ValueMapT &BBMap,
257
    LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses, __isl_take isl_id *Id,
258
606
    Type *ExpectedType) {
259
606
  isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, Id);
260
606
261
606
  if (
AccessExpr606
)
{86
262
86
    AccessExpr = isl_ast_expr_address_of(AccessExpr);
263
86
    auto Address = ExprBuilder->create(AccessExpr);
264
86
265
86
    // Cast the address of this memory access to a pointer type that has the
266
86
    // same element type as the original access, but uses the address space of
267
86
    // the newly generated pointer.
268
86
    auto OldPtrTy = ExpectedType->getPointerTo();
269
86
    auto NewPtrTy = Address->getType();
270
86
    OldPtrTy = PointerType::get(OldPtrTy->getElementType(),
271
86
                                NewPtrTy->getPointerAddressSpace());
272
86
273
86
    if (OldPtrTy != NewPtrTy)
274
4
      Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy);
275
86
    return Address;
276
86
  }
277
520
  assert(
278
520
      Pointer &&
279
520
      "If expression was not generated, must use the original pointer value");
280
520
  return getNewValue(Stmt, Pointer, BBMap, LTS, L);
281
606
}
282
283
Value *
284
BlockGenerator::getImplicitAddress(MemoryAccess &Access, Loop *L,
285
                                   LoopToScevMapT &LTS, ValueMapT &BBMap,
286
235
                                   __isl_keep isl_id_to_ast_expr *NewAccesses) {
287
235
  if (Access.isLatestArrayKind())
288
14
    return generateLocationAccessed(*Access.getStatement(), L, nullptr, BBMap,
289
14
                                    LTS, NewAccesses, Access.getId(),
290
14
                                    Access.getAccessValue()->getType());
291
235
292
221
  return getOrCreateAlloca(Access);
293
235
}
294
295
3.70k
Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const {
296
3.70k
  auto *StmtBB = Stmt.getEntryBlock();
297
3.70k
  return LI.getLoopFor(StmtBB);
298
3.70k
}
299
300
Value *BlockGenerator::generateArrayLoad(ScopStmt &Stmt, LoadInst *Load,
301
                                         ValueMapT &BBMap, LoopToScevMapT &LTS,
302
299
                                         isl_id_to_ast_expr *NewAccesses) {
303
299
  if (Value *PreloadLoad = GlobalMap.lookup(Load))
304
85
    return PreloadLoad;
305
299
306
214
  Value *NewPointer =
307
214
      generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses);
308
214
  Value *ScalarLoad = Builder.CreateAlignedLoad(
309
214
      NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
310
214
311
214
  if (PollyDebugPrinting)
312
5
    RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer,
313
5
                                          ": ", ScalarLoad, "\n");
314
214
315
214
  return ScalarLoad;
316
299
}
317
318
void BlockGenerator::generateArrayStore(ScopStmt &Stmt, StoreInst *Store,
319
                                        ValueMapT &BBMap, LoopToScevMapT &LTS,
320
325
                                        isl_id_to_ast_expr *NewAccesses) {
321
325
  MemoryAccess &MA = Stmt.getArrayAccessFor(Store);
322
325
  isl::set AccDom = give(isl_map_domain(MA.getAccessRelation()));
323
325
  const char *Subject = isl_id_get_name(give(MA.getId()).keep());
324
325
325
324
  generateConditionalExecution(Stmt, AccDom, Subject, [&, this]() {
326
324
    Value *NewPointer =
327
324
        generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses);
328
324
    Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap,
329
324
                                      LTS, getLoopForStmt(Stmt));
330
324
331
324
    if (PollyDebugPrinting)
332
1
      RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to  ", NewPointer,
333
1
                                            ": ", ValueOperand, "\n");
334
324
335
324
    Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment());
336
324
  });
337
325
}
338
339
1.34k
bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
340
1.34k
  Loop *L = getLoopForStmt(Stmt);
341
199
  return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
342
1.33k
         canSynthesize(Inst, *Stmt.getParent(), &SE, L);
343
1.34k
}
344
345
void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
346
                                     ValueMapT &BBMap, LoopToScevMapT &LTS,
347
1.34k
                                     isl_id_to_ast_expr *NewAccesses) {
348
1.34k
  // Terminator instructions control the control flow. They are explicitly
349
1.34k
  // expressed in the clast and do not need to be copied.
350
1.34k
  if (Inst->isTerminator())
351
96
    return;
352
1.34k
353
1.34k
  // Synthesizable statements will be generated on-demand.
354
1.24k
  
if (1.24k
canSyntheziseInStmt(Stmt, Inst)1.24k
)
355
56
    return;
356
1.24k
357
1.19k
  
if (auto *1.19k
Load1.19k
= dyn_cast<LoadInst>(Inst))
{299
358
299
    Value *NewLoad = generateArrayLoad(Stmt, Load, BBMap, LTS, NewAccesses);
359
299
    // Compute NewLoad before its insertion in BBMap to make the insertion
360
299
    // deterministic.
361
299
    BBMap[Load] = NewLoad;
362
299
    return;
363
299
  }
364
1.19k
365
892
  
if (auto *892
Store892
= dyn_cast<StoreInst>(Inst))
{325
366
325
    // Identified as redundant by -polly-simplify.
367
325
    if (!Stmt.getArrayAccessOrNULLFor(Store))
368
0
      return;
369
325
370
325
    generateArrayStore(Stmt, Store, BBMap, LTS, NewAccesses);
371
325
    return;
372
325
  }
373
892
374
567
  
if (auto *567
PHI567
= dyn_cast<PHINode>(Inst))
{42
375
42
    copyPHIInstruction(Stmt, PHI, BBMap, LTS);
376
42
    return;
377
42
  }
378
567
379
567
  // Skip some special intrinsics for which we do not adjust the semantics to
380
567
  // the new schedule. All others are handled like every other instruction.
381
525
  
if (525
isIgnoredIntrinsic(Inst)525
)
382
0
    return;
383
525
384
525
  copyInstScalar(Stmt, Inst, BBMap, LTS);
385
525
}
386
387
341
void BlockGenerator::removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap) {
388
341
  auto NewBB = Builder.GetInsertBlock();
389
3.28k
  for (auto I = NewBB->rbegin(); 
I != NewBB->rend()3.28k
;
I++2.94k
)
{2.94k
390
2.94k
    Instruction *NewInst = &*I;
391
2.94k
392
2.94k
    if (!isInstructionTriviallyDead(NewInst))
393
2.70k
      continue;
394
2.94k
395
247
    for (auto Pair : BBMap)
396
1.33k
      
if (1.33k
Pair.second == NewInst1.33k
)
{230
397
230
        BBMap.erase(Pair.first);
398
230
      }
399
247
400
247
    NewInst->eraseFromParent();
401
247
    I = NewBB->rbegin();
402
247
  }
403
341
}
404
405
void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
406
341
                              isl_id_to_ast_expr *NewAccesses) {
407
341
  assert(Stmt.isBlockStmt() &&
408
341
         "Only block statements can be copied by the block generator");
409
341
410
341
  ValueMapT BBMap;
411
341
412
341
  BasicBlock *BB = Stmt.getBasicBlock();
413
341
  copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
414
341
  removeDeadInstructions(BB, BBMap);
415
341
}
416
417
437
BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
418
437
  BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
419
437
                                  &*Builder.GetInsertPoint(), &DT, &LI);
420
437
  CopyBB->setName("polly.stmt." + BB->getName());
421
437
  return CopyBB;
422
437
}
423
424
BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
425
                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
426
341
                                   isl_id_to_ast_expr *NewAccesses) {
427
341
  BasicBlock *CopyBB = splitBB(BB);
428
341
  Builder.SetInsertPoint(&CopyBB->front());
429
341
  generateScalarLoads(Stmt, LTS, BBMap, NewAccesses);
430
341
431
341
  copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses);
432
341
433
341
  // After a basic block was copied store all scalars that escape this block in
434
341
  // their alloca.
435
341
  generateScalarStores(Stmt, LTS, BBMap, NewAccesses);
436
341
  return CopyBB;
437
341
}
438
439
void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
440
                            ValueMapT &BBMap, LoopToScevMapT &LTS,
441
437
                            isl_id_to_ast_expr *NewAccesses) {
442
437
  EntryBB = &CopyBB->getParent()->getEntryBlock();
443
437
444
437
  if (Stmt.isBlockStmt())
445
341
    for (Instruction *Inst : Stmt.getInstructions())
446
972
      copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses);
447
437
  else
448
96
    for (Instruction &Inst : *BB)
449
295
      copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
450
437
}
451
452
224
Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) {
453
224
  assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind");
454
224
455
224
  return getOrCreateAlloca(Access.getLatestScopArrayInfo());
456
224
}
457
458
303
Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
459
303
  assert(!Array->isArrayKind() && "Trying to get alloca for array kind");
460
303
461
303
  auto &Addr = ScalarMap[Array];
462
303
463
303
  if (
Addr303
)
{172
464
172
    // Allow allocas to be (temporarily) redirected once by adding a new
465
172
    // old-alloca-addr to new-addr mapping to GlobalMap. This functionality
466
172
    // is used for example by the OpenMP code generation where a first use
467
172
    // of a scalar while still in the host code allocates a normal alloca with
468
172
    // getOrCreateAlloca. When the values of this scalar are accessed during
469
172
    // the generation of the parallel subfunction, these values are copied over
470
172
    // to the parallel subfunction and each request for a scalar alloca slot
471
172
    // must be forwarded to the temporary in-subfunction slot. This mapping is
472
172
    // removed when the subfunction has been generated and again normal host
473
172
    // code is generated. Due to the following reasons it is not possible to
474
172
    // perform the GlobalMap lookup right after creating the alloca below, but
475
172
    // instead we need to check GlobalMap at each call to getOrCreateAlloca:
476
172
    //
477
172
    //   1) GlobalMap may be changed multiple times (for each parallel loop),
478
172
    //   2) The temporary mapping is commonly only known after the initial
479
172
    //      alloca has already been generated, and
480
172
    //   3) The original alloca value must be restored after leaving the
481
172
    //      sub-function.
482
172
    if (Value *NewAddr = GlobalMap.lookup(&*Addr))
483
2
      return NewAddr;
484
170
    return Addr;
485
172
  }
486
303
487
131
  Type *Ty = Array->getElementType();
488
131
  Value *ScalarBase = Array->getBasePtr();
489
131
  std::string NameExt;
490
131
  if (Array->isPHIKind())
491
35
    NameExt = ".phiops";
492
131
  else
493
96
    NameExt = ".s2a";
494
131
495
131
  const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
496
131
497
131
  Addr = new AllocaInst(Ty, DL.getAllocaAddrSpace(),
498
131
                        ScalarBase->getName() + NameExt);
499
131
  EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
500
131
  Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
501
131
502
131
  return Addr;
503
303
}
504
505
74
void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) {
506
74
  Instruction *Inst = cast<Instruction>(Array->getBasePtr());
507
74
508
74
  // If there are escape users we get the alloca for this instruction and put it
509
74
  // in the EscapeMap for later finalization. Lastly, if the instruction was
510
74
  // copied multiple times we already did this and can exit.
511
74
  if (EscapeMap.count(Inst))
512
6
    return;
513
74
514
68
  EscapeUserVectorTy EscapeUsers;
515
104
  for (User *U : Inst->users()) {
516
104
517
104
    // Non-instruction user will never escape.
518
104
    Instruction *UI = dyn_cast<Instruction>(U);
519
104
    if (!UI)
520
0
      continue;
521
104
522
104
    
if (104
S.contains(UI)104
)
523
66
      continue;
524
104
525
38
    EscapeUsers.push_back(UI);
526
38
  }
527
68
528
68
  // Exit if no escape uses were found.
529
68
  if (EscapeUsers.empty())
530
36
    return;
531
68
532
68
  // Get or create an escape alloca for this instruction.
533
32
  auto *ScalarAddr = getOrCreateAlloca(Array);
534
32
535
32
  // Remember that this instruction has escape uses and the escape alloca.
536
32
  EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
537
32
}
538
539
void BlockGenerator::generateScalarLoads(
540
    ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
541
374
    __isl_keep isl_id_to_ast_expr *NewAccesses) {
542
756
  for (MemoryAccess *MA : Stmt) {
543
756
    if (
MA->isOriginalArrayKind() || 756
MA->isWrite()235
)
544
663
      continue;
545
756
546
756
#ifndef NDEBUG
547
    auto *StmtDom = Stmt.getDomain();
548
    auto *AccDom = isl_map_domain(MA->getAccessRelation());
549
    assert(isl_set_is_subset(StmtDom, AccDom) &&
550
           "Scalar must be loaded in all statement instances");
551
    isl_set_free(StmtDom);
552
    isl_set_free(AccDom);
553
#endif
554
756
555
93
    auto *Address =
556
93
        getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
557
93
    assert((!isa<Instruction>(Address) ||
558
93
            DT.dominates(cast<Instruction>(Address)->getParent(),
559
93
                         Builder.GetInsertBlock())) &&
560
93
           "Domination violation");
561
93
    BBMap[MA->getAccessValue()] =
562
93
        Builder.CreateLoad(Address, Address->getName() + ".reload");
563
93
  }
564
374
}
565
566
Value *BlockGenerator::buildContainsCondition(ScopStmt &Stmt,
567
6
                                              const isl::set &Subdomain) {
568
6
  isl::ast_build AstBuild = give(isl_ast_build_copy(Stmt.getAstBuild()));
569
6
  isl::set Domain = give(Stmt.getDomain());
570
6
571
6
  isl::union_map USchedule = AstBuild.get_schedule();
572
6
  USchedule = USchedule.intersect_domain(Domain);
573
6
574
6
  assert(!USchedule.is_empty());
575
6
  isl::map Schedule = isl::map::from_union_map(USchedule);
576
6
577
6
  isl::set ScheduledDomain = Schedule.range();
578
6
  isl::set ScheduledSet = Subdomain.apply(Schedule);
579
6
580
6
  isl::ast_build RestrictedBuild = AstBuild.restrict(ScheduledDomain);
581
6
582
6
  isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduledSet);
583
6
  Value *IsInSetExpr = ExprBuilder->create(IsInSet.copy());
584
6
  IsInSetExpr = Builder.CreateICmpNE(
585
6
      IsInSetExpr, ConstantInt::get(IsInSetExpr->getType(), 0));
586
6
587
6
  return IsInSetExpr;
588
6
}
589
590
void BlockGenerator::generateConditionalExecution(
591
    ScopStmt &Stmt, const isl::set &Subdomain, StringRef Subject,
592
467
    const std::function<void()> &GenThenFunc) {
593
467
  isl::set StmtDom = give(Stmt.getDomain());
594
467
595
467
  // Don't call GenThenFunc if it is never executed. An ast index expression
596
467
  // might not be defined in this case.
597
467
  if (Subdomain.is_empty())
598
1
    return;
599
467
600
467
  // If the condition is a tautology, don't generate a condition around the
601
467
  // code.
602
466
  bool IsPartialWrite =
603
466
      !StmtDom.intersect_params(give(Stmt.getParent()->getContext()))
604
466
           .is_subset(Subdomain);
605
466
  if (
!IsPartialWrite466
)
{460
606
460
    GenThenFunc();
607
460
    return;
608
460
  }
609
466
610
466
  // Generate the condition.
611
6
  Value *Cond = buildContainsCondition(Stmt, Subdomain);
612
6
  BasicBlock *HeadBlock = Builder.GetInsertBlock();
613
6
  StringRef BlockName = HeadBlock->getName();
614
6
615
6
  // Generate the conditional block.
616
6
  SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
617
6
                            &DT, &LI);
618
6
  BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
619
6
  BasicBlock *ThenBlock = Branch->getSuccessor(0);
620
6
  BasicBlock *TailBlock = Branch->getSuccessor(1);
621
6
622
6
  // Assign descriptive names.
623
6
  if (auto *CondInst = dyn_cast<Instruction>(Cond))
624
6
    CondInst->setName("polly." + Subject + ".cond");
625
6
  ThenBlock->setName(BlockName + "." + Subject + ".partial");
626
6
  TailBlock->setName(BlockName + ".cont");
627
6
628
6
  // Put the client code into the conditional block and continue in the merge
629
6
  // block afterwards.
630
6
  Builder.SetInsertPoint(ThenBlock, ThenBlock->getFirstInsertionPt());
631
6
  GenThenFunc();
632
6
  Builder.SetInsertPoint(TailBlock, TailBlock->getFirstInsertionPt());
633
6
}
634
635
void BlockGenerator::generateScalarStores(
636
    ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
637
341
    __isl_keep isl_id_to_ast_expr *NewAccesses) {
638
341
  Loop *L = LI.getLoopFor(Stmt.getBasicBlock());
639
341
640
341
  assert(Stmt.isBlockStmt() &&
641
341
         "Region statements need to use the generateScalarStores() function in "
642
341
         "the RegionGenerator");
643
341
644
663
  for (MemoryAccess *MA : Stmt) {
645
663
    if (
MA->isOriginalArrayKind() || 663
MA->isRead()203
)
646
542
      continue;
647
663
648
121
    isl::set AccDom = give(isl_map_domain(MA->getAccessRelation()));
649
121
    const char *Subject = isl_id_get_name(give(MA->getId()).keep());
650
121
651
121
    generateConditionalExecution(Stmt, AccDom, Subject, [&, this, MA]() {
652
121
      Value *Val = MA->getAccessValue();
653
121
      if (
MA->isAnyPHIKind()121
)
{59
654
59
        assert(
655
59
            MA->getIncoming().size() >= 1 &&
656
59
            "Block statements have exactly one exiting block, or multiple but "
657
59
            "with same incoming block and value");
658
59
        assert(std::all_of(MA->getIncoming().begin(), MA->getIncoming().end(),
659
59
                           [&](std::pair<BasicBlock *, Value *> p) -> bool {
660
59
                             return p.first == Stmt.getBasicBlock();
661
59
                           }) &&
662
59
               "Incoming block must be statement's block");
663
59
        Val = MA->getIncoming()[0].second;
664
59
      }
665
121
      auto Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap,
666
121
                                        NewAccesses);
667
121
668
121
      Val = getNewValue(Stmt, Val, BBMap, LTS, L);
669
121
      assert((!isa<Instruction>(Val) ||
670
121
              DT.dominates(cast<Instruction>(Val)->getParent(),
671
121
                           Builder.GetInsertBlock())) &&
672
121
             "Domination violation");
673
121
      assert((!isa<Instruction>(Address) ||
674
121
              DT.dominates(cast<Instruction>(Address)->getParent(),
675
121
                           Builder.GetInsertBlock())) &&
676
121
             "Domination violation");
677
121
      Builder.CreateStore(Val, Address);
678
121
679
121
    });
680
121
  }
681
341
}
682
683
275
void BlockGenerator::createScalarInitialization(Scop &S) {
684
275
  BasicBlock *ExitBB = S.getExit();
685
275
  BasicBlock *PreEntryBB = S.getEnteringBlock();
686
275
687
275
  Builder.SetInsertPoint(&*StartBlock->begin());
688
275
689
581
  for (auto &Array : S.arrays()) {
690
581
    if (Array->getNumberOfDimensions() != 0)
691
437
      continue;
692
144
    
if (144
Array->isPHIKind()144
)
{36
693
36
      // For PHI nodes, the only values we need to store are the ones that
694
36
      // reach the PHI node from outside the region. In general there should
695
36
      // only be one such incoming edge and this edge should enter through
696
36
      // 'PreEntryBB'.
697
36
      auto PHI = cast<PHINode>(Array->getBasePtr());
698
36
699
105
      for (auto BI = PHI->block_begin(), BE = PHI->block_end(); 
BI != BE105
;
BI++69
)
700
69
        
if (69
!S.contains(*BI) && 69
*BI != PreEntryBB14
)
701
0
          llvm_unreachable("Incoming edges from outside the scop should always "
702
36
                           "come from PreEntryBB");
703
36
704
36
      int Idx = PHI->getBasicBlockIndex(PreEntryBB);
705
36
      if (Idx < 0)
706
22
        continue;
707
36
708
14
      Value *ScalarValue = PHI->getIncomingValue(Idx);
709
14
710
14
      Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array));
711
14
      continue;
712
36
    }
713
144
714
108
    auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
715
108
716
108
    if (
Inst && 108
S.contains(Inst)98
)
717
74
      continue;
718
108
719
108
    // PHI nodes that are not marked as such in their SAI object are either exit
720
108
    // PHI nodes we model as common scalars but without initialization, or
721
108
    // incoming phi nodes that need to be initialized. Check if the first is the
722
108
    // case for Inst and do not create and initialize memory if so.
723
34
    
if (auto *34
PHI34
= dyn_cast_or_null<PHINode>(Inst))
724
23
      
if (23
!S.hasSingleExitEdge() && 23
PHI->getBasicBlockIndex(ExitBB) >= 022
)
725
22
        continue;
726
34
727
12
    Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array));
728
12
  }
729
275
}
730
731
275
void BlockGenerator::createScalarFinalization(Scop &S) {
732
275
  // The exit block of the __unoptimized__ region.
733
275
  BasicBlock *ExitBB = S.getExitingBlock();
734
275
  // The merge block __just after__ the region and the optimized region.
735
275
  BasicBlock *MergeBB = S.getExit();
736
275
737
275
  // The exit block of the __optimized__ region.
738
275
  BasicBlock *OptExitBB = *(pred_begin(MergeBB));
739
275
  if (OptExitBB == ExitBB)
740
0
    OptExitBB = *(++pred_begin(MergeBB));
741
275
742
275
  Builder.SetInsertPoint(OptExitBB->getTerminator());
743
38
  for (const auto &EscapeMapping : EscapeMap) {
744
38
    // Extract the escaping instruction and the escaping users as well as the
745
38
    // alloca the instruction was demoted to.
746
38
    Instruction *EscapeInst = EscapeMapping.first;
747
38
    const auto &EscapeMappingValue = EscapeMapping.second;
748
38
    const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
749
38
    Value *ScalarAddr = EscapeMappingValue.first;
750
38
751
38
    // Reload the demoted instruction in the optimized version of the SCoP.
752
38
    Value *EscapeInstReload =
753
38
        Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload");
754
38
    EscapeInstReload =
755
38
        Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType());
756
38
757
38
    // Create the merge PHI that merges the optimized and unoptimized version.
758
38
    PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
759
38
                                        EscapeInst->getName() + ".merge");
760
38
    MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
761
38
762
38
    // Add the respective values to the merge PHI.
763
38
    MergePHI->addIncoming(EscapeInstReload, OptExitBB);
764
38
    MergePHI->addIncoming(EscapeInst, ExitBB);
765
38
766
38
    // The information of scalar evolution about the escaping instruction needs
767
38
    // to be revoked so the new merged instruction will be used.
768
38
    if (SE.isSCEVable(EscapeInst->getType()))
769
31
      SE.forgetValue(EscapeInst);
770
38
771
38
    // Replace all uses of the demoted instruction with the merge PHI.
772
38
    for (Instruction *EUser : EscapeUsers)
773
45
      EUser->replaceUsesOfWith(EscapeInst, MergePHI);
774
38
  }
775
275
}
776
777
275
void BlockGenerator::findOutsideUsers(Scop &S) {
778
581
  for (auto &Array : S.arrays()) {
779
581
780
581
    if (Array->getNumberOfDimensions() != 0)
781
437
      continue;
782
581
783
144
    
if (144
Array->isPHIKind()144
)
784
36
      continue;
785
144
786
108
    auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
787
108
788
108
    if (!Inst)
789
10
      continue;
790
108
791
108
    // Scop invariant hoisting moves some of the base pointers out of the scop.
792
108
    // We can ignore these, as the invariant load hoisting already registers the
793
108
    // relevant outside users.
794
98
    
if (98
!S.contains(Inst)98
)
795
24
      continue;
796
98
797
74
    handleOutsideUsers(S, Array);
798
74
  }
799
275
}
800
801
275
void BlockGenerator::createExitPHINodeMerges(Scop &S) {
802
275
  if (S.hasSingleExitEdge())
803
216
    return;
804
275
805
59
  auto *ExitBB = S.getExitingBlock();
806
59
  auto *MergeBB = S.getExit();
807
59
  auto *AfterMergeBB = MergeBB->getSingleSuccessor();
808
59
  BasicBlock *OptExitBB = *(pred_begin(MergeBB));
809
59
  if (OptExitBB == ExitBB)
810
0
    OptExitBB = *(++pred_begin(MergeBB));
811
59
812
59
  Builder.SetInsertPoint(OptExitBB->getTerminator());
813
59
814
124
  for (auto &SAI : S.arrays()) {
815
124
    auto *Val = SAI->getBasePtr();
816
124
817
124
    // Only Value-like scalars need a merge PHI. Exit block PHIs receive either
818
124
    // the original PHI's value or the reloaded incoming values from the
819
124
    // generated code. An llvm::Value is merged between the original code's
820
124
    // value or the generated one.
821
124
    if (!SAI->isExitPHIKind())
822
103
      continue;
823
124
824
21
    PHINode *PHI = dyn_cast<PHINode>(Val);
825
21
    if (!PHI)
826
0
      continue;
827
21
828
21
    
if (21
PHI->getParent() != AfterMergeBB21
)
829
0
      continue;
830
21
831
21
    std::string Name = PHI->getName();
832
21
    Value *ScalarAddr = getOrCreateAlloca(SAI);
833
21
    Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload");
834
21
    Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType());
835
21
    Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB);
836
21
    assert((!isa<Instruction>(OriginalValue) ||
837
21
            cast<Instruction>(OriginalValue)->getParent() != MergeBB) &&
838
21
           "Original value must no be one we just generated.");
839
21
    auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge");
840
21
    MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
841
21
    MergePHI->addIncoming(Reload, OptExitBB);
842
21
    MergePHI->addIncoming(OriginalValue, ExitBB);
843
21
    int Idx = PHI->getBasicBlockIndex(MergeBB);
844
21
    PHI->setIncomingValue(Idx, MergePHI);
845
21
  }
846
59
}
847
848
275
void BlockGenerator::invalidateScalarEvolution(Scop &S) {
849
275
  for (auto &Stmt : S)
850
375
    
if (375
Stmt.isCopyStmt()375
)
851
2
      continue;
852
373
    else 
if (373
Stmt.isBlockStmt()373
)
853
341
      for (auto &Inst : *Stmt.getBasicBlock())
854
1.95k
        SE.forgetValue(&Inst);
855
32
    else 
if (32
Stmt.isRegionStmt()32
)
856
32
      for (auto *BB : Stmt.getRegion()->blocks())
857
93
        for (auto &Inst : *BB)
858
281
          SE.forgetValue(&Inst);
859
32
    else
860
0
      llvm_unreachable("Unexpected statement type found");
861
275
862
275
  // Invalidate SCEV of loops surrounding the EscapeUsers.
863
38
  for (const auto &EscapeMapping : EscapeMap) {
864
38
    const EscapeUserVectorTy &EscapeUsers = EscapeMapping.second.second;
865
45
    for (Instruction *EUser : EscapeUsers) {
866
45
      if (Loop *L = LI.getLoopFor(EUser->getParent()))
867
34
        
while (17
L34
)
{17
868
17
          SE.forgetLoop(L);
869
17
          L = L->getParentLoop();
870
17
        }
871
45
    }
872
38
  }
873
275
}
874
875
275
void BlockGenerator::finalizeSCoP(Scop &S) {
876
275
  findOutsideUsers(S);
877
275
  createScalarInitialization(S);
878
275
  createExitPHINodeMerges(S);
879
275
  createScalarFinalization(S);
880
275
  invalidateScalarEvolution(S);
881
275
}
882
883
VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
884
                                           std::vector<LoopToScevMapT> &VLTS,
885
                                           isl_map *Schedule)
886
20
    : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
887
20
  assert(Schedule && "No statement domain provided");
888
20
}
889
890
Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
891
                                            ValueMapT &VectorMap,
892
                                            VectorValueMapT &ScalarMaps,
893
32
                                            Loop *L) {
894
32
  if (Value *NewValue = VectorMap.lookup(Old))
895
29
    return NewValue;
896
32
897
3
  int Width = getVectorWidth();
898
3
899
3
  Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
900
3
901
15
  for (int Lane = 0; 
Lane < Width15
;
Lane++12
)
902
12
    Vector = Builder.CreateInsertElement(
903
12
        Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
904
12
        Builder.getInt32(Lane));
905
3
906
3
  VectorMap[Old] = Vector;
907
3
908
3
  return Vector;
909
32
}
910
911
27
Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
912
27
  PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
913
27
  assert(PointerTy && "PointerType expected");
914
27
915
27
  Type *ScalarType = PointerTy->getElementType();
916
27
  VectorType *VectorType = VectorType::get(ScalarType, Width);
917
27
918
27
  return PointerType::getUnqual(VectorType);
919
27
}
920
921
Value *VectorBlockGenerator::generateStrideOneLoad(
922
    ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
923
8
    __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
924
8
  unsigned VectorWidth = getVectorWidth();
925
8
  auto *Pointer = Load->getPointerOperand();
926
8
  Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
927
7
  unsigned Offset = NegativeStride ? 
VectorWidth - 11
:
07
;
928
8
929
8
  Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
930
8
                                               VLTS[Offset], NewAccesses);
931
8
  Value *VectorPtr =
932
8
      Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
933
8
  LoadInst *VecLoad =
934
8
      Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
935
8
  if (!Aligned)
936
8
    VecLoad->setAlignment(8);
937
8
938
8
  if (
NegativeStride8
)
{1
939
1
    SmallVector<Constant *, 16> Indices;
940
5
    for (int i = VectorWidth - 1; 
i >= 05
;
i--4
)
941
4
      Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
942
1
    Constant *SV = llvm::ConstantVector::get(Indices);
943
1
    Value *RevVecLoad = Builder.CreateShuffleVector(
944
1
        VecLoad, VecLoad, SV, Load->getName() + "_reverse");
945
1
    return RevVecLoad;
946
1
  }
947
8
948
7
  return VecLoad;
949
8
}
950
951
Value *VectorBlockGenerator::generateStrideZeroLoad(
952
    ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
953
3
    __isl_keep isl_id_to_ast_expr *NewAccesses) {
954
3
  auto *Pointer = Load->getPointerOperand();
955
3
  Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
956
3
  Value *NewPointer =
957
3
      generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
958
3
  Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
959
3
                                           Load->getName() + "_p_vec_p");
960
3
  LoadInst *ScalarLoad =
961
3
      Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
962
3
963
3
  if (!Aligned)
964
3
    ScalarLoad->setAlignment(8);
965
3
966
3
  Constant *SplatVector = Constant::getNullValue(
967
3
      VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
968
3
969
3
  Value *VectorLoad = Builder.CreateShuffleVector(
970
3
      ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
971
3
  return VectorLoad;
972
3
}
973
974
Value *VectorBlockGenerator::generateUnknownStrideLoad(
975
    ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
976
3
    __isl_keep isl_id_to_ast_expr *NewAccesses) {
977
3
  int VectorWidth = getVectorWidth();
978
3
  auto *Pointer = Load->getPointerOperand();
979
3
  VectorType *VectorType = VectorType::get(
980
3
      dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
981
3
982
3
  Value *Vector = UndefValue::get(VectorType);
983
3
984
23
  for (int i = 0; 
i < VectorWidth23
;
i++20
)
{20
985
20
    Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
986
20
                                                 VLTS[i], NewAccesses);
987
20
    Value *ScalarLoad =
988
20
        Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
989
20
    Vector = Builder.CreateInsertElement(
990
20
        Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
991
20
  }
992
3
993
3
  return Vector;
994
3
}
995
996
void VectorBlockGenerator::generateLoad(
997
    ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
998
20
    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
999
20
  if (Value *
PreloadLoad20
= GlobalMap.lookup(Load))
{6
1000
6
    VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
1001
6
                                                Load->getName() + "_p");
1002
6
    return;
1003
6
  }
1004
20
1005
14
  
if (14
!VectorType::isValidElementType(Load->getType())14
)
{0
1006
0
    for (int i = 0; 
i < getVectorWidth()0
;
i++0
)
1007
0
      ScalarMaps[i][Load] =
1008
0
          generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
1009
0
    return;
1010
0
  }
1011
14
1012
14
  const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
1013
14
1014
14
  // Make sure we have scalar values available to access the pointer to
1015
14
  // the data location.
1016
14
  extractScalarValues(Load, VectorMap, ScalarMaps);
1017
14
1018
14
  Value *NewLoad;
1019
14
  if (Access.isStrideZero(isl_map_copy(Schedule)))
1020
3
    NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
1021
11
  else 
if (11
Access.isStrideOne(isl_map_copy(Schedule))11
)
1022
7
    NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
1023
4
  else 
if (4
Access.isStrideX(isl_map_copy(Schedule), -1)4
)
1024
1
    NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
1025
4
  else
1026
3
    NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
1027
14
1028
14
  VectorMap[Load] = NewLoad;
1029
14
}
1030
1031
void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
1032
                                         ValueMapT &VectorMap,
1033
1
                                         VectorValueMapT &ScalarMaps) {
1034
1
  int VectorWidth = getVectorWidth();
1035
1
  Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
1036
1
                                     ScalarMaps, getLoopForStmt(Stmt));
1037
1
1038
1
  assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
1039
1
1040
1
  const CastInst *Cast = dyn_cast<CastInst>(Inst);
1041
1
  VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
1042
1
  VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
1043
1
}
1044
1045
void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
1046
                                          ValueMapT &VectorMap,
1047
7
                                          VectorValueMapT &ScalarMaps) {
1048
7
  Loop *L = getLoopForStmt(Stmt);
1049
7
  Value *OpZero = Inst->getOperand(0);
1050
7
  Value *OpOne = Inst->getOperand(1);
1051
7
1052
7
  Value *NewOpZero, *NewOpOne;
1053
7
  NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
1054
7
  NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
1055
7
1056
7
  Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
1057
7
                                       Inst->getName() + "p_vec");
1058
7
  VectorMap[Inst] = NewInst;
1059
7
}
1060
1061
void VectorBlockGenerator::copyStore(
1062
    ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
1063
17
    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
1064
17
  const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
1065
17
1066
17
  auto *Pointer = Store->getPointerOperand();
1067
17
  Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
1068
17
                                 ScalarMaps, getLoopForStmt(Stmt));
1069
17
1070
17
  // Make sure we have scalar values available to access the pointer to
1071
17
  // the data location.
1072
17
  extractScalarValues(Store, VectorMap, ScalarMaps);
1073
17
1074
17
  if (
Access.isStrideOne(isl_map_copy(Schedule))17
)
{15
1075
15
    Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
1076
15
    Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
1077
15
                                                 VLTS[0], NewAccesses);
1078
15
1079
15
    Value *VectorPtr =
1080
15
        Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
1081
15
    StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
1082
15
1083
15
    if (!Aligned)
1084
15
      Store->setAlignment(8);
1085
2
  } else {
1086
10
    for (unsigned i = 0; 
i < ScalarMaps.size()10
;
i++8
)
{8
1087
8
      Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
1088
8
      Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
1089
8
                                                   VLTS[i], NewAccesses);
1090
8
      Builder.CreateStore(Scalar, NewPointer);
1091
8
    }
1092
2
  }
1093
17
}
1094
1095
bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
1096
40
                                             ValueMapT &VectorMap) {
1097
40
  for (Value *Operand : Inst->operands())
1098
51
    
if (51
VectorMap.count(Operand)51
)
1099
28
      return true;
1100
12
  return false;
1101
40
}
1102
1103
bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
1104
                                               ValueMapT &VectorMap,
1105
46
                                               VectorValueMapT &ScalarMaps) {
1106
46
  bool HasVectorOperand = false;
1107
46
  int VectorWidth = getVectorWidth();
1108
46
1109
77
  for (Value *Operand : Inst->operands()) {
1110
77
    ValueMapT::iterator VecOp = VectorMap.find(Operand);
1111
77
1112
77
    if (VecOp == VectorMap.end())
1113
57
      continue;
1114
77
1115
20
    HasVectorOperand = true;
1116
20
    Value *NewVector = VecOp->second;
1117
20
1118
115
    for (int i = 0; 
i < VectorWidth115
;
++i95
)
{98
1119
98
      ValueMapT &SM = ScalarMaps[i];
1120
98
1121
98
      // If there is one scalar extracted, all scalar elements should have
1122
98
      // already been extracted by the code here. So no need to check for the
1123
98
      // existence of all of them.
1124
98
      if (SM.count(Operand))
1125
3
        break;
1126
98
1127
95
      SM[Operand] =
1128
95
          Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
1129
95
    }
1130
20
  }
1131
46
1132
46
  return HasVectorOperand;
1133
46
}
1134
1135
void VectorBlockGenerator::copyInstScalarized(
1136
    ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
1137
15
    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
1138
15
  bool HasVectorOperand;
1139
15
  int VectorWidth = getVectorWidth();
1140
15
1141
15
  HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
1142
15
1143
91
  for (int VectorLane = 0; 
VectorLane < getVectorWidth()91
;
VectorLane++76
)
1144
76
    BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
1145
76
                                    VLTS[VectorLane], NewAccesses);
1146
15
1147
15
  if (
!VectorType::isValidElementType(Inst->getType()) || 15
!HasVectorOperand12
)
1148
12
    return;
1149
15
1150
15
  // Make the result available as vector value.
1151
3
  VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
1152
3
  Value *Vector = UndefValue::get(VectorType);
1153
3
1154
15
  for (int i = 0; 
i < VectorWidth15
;
i++12
)
1155
12
    Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
1156
12
                                         Builder.getInt32(i));
1157
3
1158
3
  VectorMap[Inst] = Vector;
1159
3
}
1160
1161
212
int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
1162
1163
void VectorBlockGenerator::copyInstruction(
1164
    ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
1165
122
    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
1166
122
  // Terminator instructions control the control flow. They are explicitly
1167
122
  // expressed in the clast and do not need to be copied.
1168
122
  if (Inst->isTerminator())
1169
20
    return;
1170
122
1171
102
  
if (102
canSyntheziseInStmt(Stmt, Inst)102
)
1172
42
    return;
1173
102
1174
60
  
if (auto *60
Load60
= dyn_cast<LoadInst>(Inst))
{20
1175
20
    generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
1176
20
    return;
1177
20
  }
1178
60
1179
40
  
if (40
hasVectorOperands(Inst, VectorMap)40
)
{28
1180
28
    if (auto *
Store28
= dyn_cast<StoreInst>(Inst))
{17
1181
17
      // Identified as redundant by -polly-simplify.
1182
17
      if (!Stmt.getArrayAccessOrNULLFor(Store))
1183
0
        return;
1184
17
1185
17
      copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
1186
17
      return;
1187
17
    }
1188
28
1189
11
    
if (auto *11
Unary11
= dyn_cast<UnaryInstruction>(Inst))
{1
1190
1
      copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
1191
1
      return;
1192
1
    }
1193
11
1194
10
    
if (auto *10
Binary10
= dyn_cast<BinaryOperator>(Inst))
{7
1195
7
      copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
1196
7
      return;
1197
7
    }
1198
10
1199
10
    // Fallthrough: We generate scalar instructions, if we don't know how to
1200
10
    // generate vector code.
1201
10
  }
1202
40
1203
15
  copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
1204
15
}
1205
1206
void VectorBlockGenerator::generateScalarVectorLoads(
1207
20
    ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
1208
35
  for (MemoryAccess *MA : Stmt) {
1209
35
    if (
MA->isArrayKind() || 35
MA->isWrite()1
)
1210
34
      continue;
1211
35
1212
1
    auto *Address = getOrCreateAlloca(*MA);
1213
1
    Type *VectorPtrType = getVectorPtrTy(Address, 1);
1214
1
    Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
1215
1
                                             Address->getName() + "_p_vec_p");
1216
1
    auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
1217
1
    Constant *SplatVector = Constant::getNullValue(
1218
1
        VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
1219
1
1220
1
    Value *VectorVal = Builder.CreateShuffleVector(
1221
1
        Val, Val, SplatVector, Address->getName() + "_p_splat");
1222
1
    VectorBlockMap[MA->getAccessValue()] = VectorVal;
1223
1
  }
1224
20
}
1225
1226
20
void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
1227
35
  for (MemoryAccess *MA : Stmt) {
1228
35
    if (
MA->isArrayKind() || 35
MA->isRead()1
)
1229
35
      continue;
1230
35
1231
0
    
llvm_unreachable0
("Scalar stores not expected in vector loop");0
1232
0
  }
1233
20
}
1234
1235
void VectorBlockGenerator::copyStmt(
1236
20
    ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
1237
20
  assert(Stmt.isBlockStmt() &&
1238
20
         "TODO: Only block statements can be copied by the vector block "
1239
20
         "generator");
1240
20
1241
20
  BasicBlock *BB = Stmt.getBasicBlock();
1242
20
  BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
1243
20
                                  &*Builder.GetInsertPoint(), &DT, &LI);
1244
20
  CopyBB->setName("polly.stmt." + BB->getName());
1245
20
  Builder.SetInsertPoint(&CopyBB->front());
1246
20
1247
20
  // Create two maps that store the mapping from the original instructions of
1248
20
  // the old basic block to their copies in the new basic block. Those maps
1249
20
  // are basic block local.
1250
20
  //
1251
20
  // As vector code generation is supported there is one map for scalar values
1252
20
  // and one for vector values.
1253
20
  //
1254
20
  // In case we just do scalar code generation, the vectorMap is not used and
1255
20
  // the scalarMap has just one dimension, which contains the mapping.
1256
20
  //
1257
20
  // In case vector code generation is done, an instruction may either appear
1258
20
  // in the vector map once (as it is calculating >vectorwidth< values at a
1259
20
  // time. Or (if the values are calculated using scalar operations), it
1260
20
  // appears once in every dimension of the scalarMap.
1261
20
  VectorValueMapT ScalarBlockMap(getVectorWidth());
1262
20
  ValueMapT VectorBlockMap;
1263
20
1264
20
  generateScalarVectorLoads(Stmt, VectorBlockMap);
1265
20
1266
20
  for (Instruction &Inst : *BB)
1267
122
    copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
1268
20
1269
20
  verifyNoScalarStores(Stmt);
1270
20
}
1271
1272
BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
1273
96
                                             BasicBlock *BBCopy) {
1274
96
1275
96
  BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
1276
96
  BasicBlock *BBCopyIDom = EndBlockMap.lookup(BBIDom);
1277
96
1278
96
  if (BBCopyIDom)
1279
95
    DT.changeImmediateDominator(BBCopy, BBCopyIDom);
1280
96
1281
96
  return StartBlockMap.lookup(BBIDom);
1282
96
}
1283
1284
// This is to determine whether an llvm::Value (defined in @p BB) is usable when
1285
// leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
1286
// does not work in cases where the exit block has edges from outside the
1287
// region. In that case the llvm::Value would never be usable in in the exit
1288
// block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
1289
// for the subregion's exiting edges only. We need to determine whether an
1290
// llvm::Value is usable in there. We do this by checking whether it dominates
1291
// all exiting blocks individually.
1292
static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R,
1293
96
                                      BasicBlock *BB) {
1294
178
  for (auto ExitingBB : predecessors(R->getExit())) {
1295
178
    // Check for non-subregion incoming edges.
1296
178
    if (!R->contains(ExitingBB))
1297
28
      continue;
1298
178
1299
150
    
if (150
!DT.dominates(BB, ExitingBB)150
)
1300
55
      return false;
1301
150
  }
1302
96
1303
41
  return true;
1304
96
}
1305
1306
// Find the direct dominator of the subregion's exit block if the subregion was
1307
// simplified.
1308
33
static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) {
1309
33
  BasicBlock *Common = nullptr;
1310
68
  for (auto ExitingBB : predecessors(R->getExit())) {
1311
68
    // Check for non-subregion incoming edges.
1312
68
    if (!R->contains(ExitingBB))
1313
10
      continue;
1314
68
1315
68
    // First exiting edge.
1316
58
    
if (58
!Common58
)
{33
1317
33
      Common = ExitingBB;
1318
33
      continue;
1319
33
    }
1320
58
1321
25
    Common = DT.findNearestCommonDominator(Common, ExitingBB);
1322
25
  }
1323
33
1324
33
  assert(Common && R->contains(Common));
1325
33
  return Common;
1326
33
}
1327
1328
void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
1329
33
                               isl_id_to_ast_expr *IdToAstExp) {
1330
33
  assert(Stmt.isRegionStmt() &&
1331
33
         "Only region statements can be copied by the region generator");
1332
33
1333
33
  // Forget all old mappings.
1334
33
  StartBlockMap.clear();
1335
33
  EndBlockMap.clear();
1336
33
  RegionMaps.clear();
1337
33
  IncompletePHINodeMap.clear();
1338
33
1339
33
  // Collection of all values related to this subregion.
1340
33
  ValueMapT ValueMap;
1341
33
1342
33
  // The region represented by the statement.
1343
33
  Region *R = Stmt.getRegion();
1344
33
1345
33
  // Create a dedicated entry for the region where we can reload all demoted
1346
33
  // inputs.
1347
33
  BasicBlock *EntryBB = R->getEntry();
1348
33
  BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(),
1349
33
                                       &*Builder.GetInsertPoint(), &DT, &LI);
1350
33
  EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
1351
33
  Builder.SetInsertPoint(&EntryBBCopy->front());
1352
33
1353
33
  ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy];
1354
33
  generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp);
1355
33
1356
81
  for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); 
PI != PE81
;
++PI48
)
1357
48
    
if (48
!R->contains(*PI)48
)
{45
1358
45
      StartBlockMap[*PI] = EntryBBCopy;
1359
45
      EndBlockMap[*PI] = EntryBBCopy;
1360
45
    }
1361
33
1362
33
  // Iterate over all blocks in the region in a breadth-first search.
1363
33
  std::deque<BasicBlock *> Blocks;
1364
33
  SmallSetVector<BasicBlock *, 8> SeenBlocks;
1365
33
  Blocks.push_back(EntryBB);
1366
33
  SeenBlocks.insert(EntryBB);
1367
33
1368
129
  while (
!Blocks.empty()129
)
{96
1369
96
    BasicBlock *BB = Blocks.front();
1370
96
    Blocks.pop_front();
1371
96
1372
96
    // First split the block and update dominance information.
1373
96
    BasicBlock *BBCopy = splitBB(BB);
1374
96
    BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
1375
96
1376
96
    // Get the mapping for this block and initialize it with either the scalar
1377
96
    // loads from the generated entering block (which dominates all blocks of
1378
96
    // this subregion) or the maps of the immediate dominator, if part of the
1379
96
    // subregion. The latter necessarily includes the former.
1380
96
    ValueMapT *InitBBMap;
1381
96
    if (
BBCopyIDom96
)
{95
1382
95
      assert(RegionMaps.count(BBCopyIDom));
1383
95
      InitBBMap = &RegionMaps[BBCopyIDom];
1384
95
    } else
1385
1
      InitBBMap = &EntryBBMap;
1386
96
    auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap));
1387
96
    ValueMapT &RegionMap = Inserted.first->second;
1388
96
1389
96
    // Copy the block with the BlockGenerator.
1390
96
    Builder.SetInsertPoint(&BBCopy->front());
1391
96
    copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp);
1392
96
1393
96
    // In order to remap PHI nodes we store also basic block mappings.
1394
96
    StartBlockMap[BB] = BBCopy;
1395
96
    EndBlockMap[BB] = Builder.GetInsertBlock();
1396
96
1397
96
    // Add values to incomplete PHI nodes waiting for this block to be copied.
1398
96
    for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
1399
4
      addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS);
1400
96
    IncompletePHINodeMap[BB].clear();
1401
96
1402
96
    // And continue with new successors inside the region.
1403
232
    for (auto SI = succ_begin(BB), SE = succ_end(BB); 
SI != SE232
;
SI++136
)
1404
136
      
if (136
R->contains(*SI) && 136
SeenBlocks.insert(*SI)78
)
1405
63
        Blocks.push_back(*SI);
1406
96
1407
96
    // Remember value in case it is visible after this subregion.
1408
96
    if (isDominatingSubregionExit(DT, R, BB))
1409
41
      ValueMap.insert(RegionMap.begin(), RegionMap.end());
1410
96
  }
1411
33
1412
33
  // Now create a new dedicated region exit block and add it to the region map.
1413
33
  BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(),
1414
33
                                      &*Builder.GetInsertPoint(), &DT, &LI);
1415
33
  ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
1416
33
  StartBlockMap[R->getExit()] = ExitBBCopy;
1417
33
  EndBlockMap[R->getExit()] = ExitBBCopy;
1418
33
1419
33
  BasicBlock *ExitDomBBCopy = EndBlockMap.lookup(findExitDominator(DT, R));
1420
33
  assert(ExitDomBBCopy &&
1421
33
         "Common exit dominator must be within region; at least the entry node "
1422
33
         "must match");
1423
33
  DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy);
1424
33
1425
33
  // As the block generator doesn't handle control flow we need to add the
1426
33
  // region control flow by hand after all blocks have been copied.
1427
96
  for (BasicBlock *BB : SeenBlocks) {
1428
96
1429
96
    BasicBlock *BBCopyStart = StartBlockMap[BB];
1430
96
    BasicBlock *BBCopyEnd = EndBlockMap[BB];
1431
96
    TerminatorInst *TI = BB->getTerminator();
1432
96
    if (
isa<UnreachableInst>(TI)96
)
{0
1433
0
      while (!BBCopyEnd->empty())
1434
0
        BBCopyEnd->begin()->eraseFromParent();
1435
0
      new UnreachableInst(BBCopyEnd->getContext(), BBCopyEnd);
1436
0
      continue;
1437
0
    }
1438
96
1439
96
    Instruction *BICopy = BBCopyEnd->getTerminator();
1440
96
1441
96
    ValueMapT &RegionMap = RegionMaps[BBCopyStart];
1442
96
    RegionMap.insert(StartBlockMap.begin(), StartBlockMap.end());
1443
96
1444
96
    Builder.SetInsertPoint(BICopy);
1445
96
    copyInstScalar(Stmt, TI, RegionMap, LTS);
1446
96
    BICopy->eraseFromParent();
1447
96
  }
1448
33
1449
33
  // Add counting PHI nodes to all loops in the region that can be used as
1450
33
  // replacement for SCEVs referring to the old loop.
1451
96
  for (BasicBlock *BB : SeenBlocks) {
1452
96
    Loop *L = LI.getLoopFor(BB);
1453
96
    if (
L == nullptr || 96
L->getHeader() != BB70
||
!R->contains(L)11
)
1454
94
      continue;
1455
96
1456
2
    BasicBlock *BBCopy = StartBlockMap[BB];
1457
2
    Value *NullVal = Builder.getInt32(0);
1458
2
    PHINode *LoopPHI =
1459
2
        PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
1460
2
    Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
1461
2
        LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
1462
2
    LoopPHI->insertBefore(&BBCopy->front());
1463
2
    LoopPHIInc->insertBefore(BBCopy->getTerminator());
1464
2
1465
4
    for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
1466
4
      if (!R->contains(PredBB))
1467
2
        continue;
1468
2
      
if (2
L->contains(PredBB)2
)
1469
2
        LoopPHI->addIncoming(LoopPHIInc, EndBlockMap[PredBB]);
1470
2
      else
1471
0
        LoopPHI->addIncoming(NullVal, EndBlockMap[PredBB]);
1472
2
    }
1473
2
1474
2
    for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
1475
4
      
if (4
LoopPHI->getBasicBlockIndex(PredBBCopy) < 04
)
1476
2
        LoopPHI->addIncoming(NullVal, PredBBCopy);
1477
2
1478
2
    LTS[L] = SE.getUnknown(LoopPHI);
1479
2
  }
1480
33
1481
33
  // Continue generating code in the exit block.
1482
33
  Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt());
1483
33
1484
33
  // Write values visible to other statements.
1485
33
  generateScalarStores(Stmt, LTS, ValueMap, IdToAstExp);
1486
33
  StartBlockMap.clear();
1487
33
  EndBlockMap.clear();
1488
33
  RegionMaps.clear();
1489
33
  IncompletePHINodeMap.clear();
1490
33
}
1491
1492
PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS,
1493
12
                                       ValueMapT &BBMap, Loop *L) {
1494
12
  ScopStmt *Stmt = MA->getStatement();
1495
12
  Region *SubR = Stmt->getRegion();
1496
12
  auto Incoming = MA->getIncoming();
1497
12
1498
12
  PollyIRBuilder::InsertPointGuard IPGuard(Builder);
1499
12
  PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction());
1500
12
  BasicBlock *NewSubregionExit = Builder.GetInsertBlock();
1501
12
1502
12
  // This can happen if the subregion is simplified after the ScopStmts
1503
12
  // have been created; simplification happens as part of CodeGeneration.
1504
12
  if (
OrigPHI->getParent() != SubR->getExit()12
)
{6
1505
6
    BasicBlock *FormerExit = SubR->getExitingBlock();
1506
6
    if (FormerExit)
1507
4
      NewSubregionExit = StartBlockMap.lookup(FormerExit);
1508
6
  }
1509
12
1510
12
  PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(),
1511
12
                                    "polly." + OrigPHI->getName(),
1512
12
                                    NewSubregionExit->getFirstNonPHI());
1513
12
1514
12
  // Add the incoming values to the PHI.
1515
26
  for (auto &Pair : Incoming) {
1516
26
    BasicBlock *OrigIncomingBlock = Pair.first;
1517
26
    BasicBlock *NewIncomingBlockStart = StartBlockMap.lookup(OrigIncomingBlock);
1518
26
    BasicBlock *NewIncomingBlockEnd = EndBlockMap.lookup(OrigIncomingBlock);
1519
26
    Builder.SetInsertPoint(NewIncomingBlockEnd->getTerminator());
1520
26
    assert(RegionMaps.count(NewIncomingBlockStart));
1521
26
    assert(RegionMaps.count(NewIncomingBlockEnd));
1522
26
    ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlockStart];
1523
26
1524
26
    Value *OrigIncomingValue = Pair.second;
1525
26
    Value *NewIncomingValue =
1526
26
        getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L);
1527
26
    NewPHI->addIncoming(NewIncomingValue, NewIncomingBlockEnd);
1528
26
  }
1529
12
1530
12
  return NewPHI;
1531
12
}
1532
1533
Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS,
1534
21
                                      ValueMapT &BBMap) {
1535
21
  ScopStmt *Stmt = MA->getStatement();
1536
21
1537
21
  // TODO: Add some test cases that ensure this is really the right choice.
1538
21
  Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit());
1539
21
1540
21
  if (
MA->isAnyPHIKind()21
)
{12
1541
12
    auto Incoming = MA->getIncoming();
1542
12
    assert(!Incoming.empty() &&
1543
12
           "PHI WRITEs must have originate from at least one incoming block");
1544
12
1545
12
    // If there is only one incoming value, we do not need to create a PHI.
1546
12
    if (
Incoming.size() == 112
)
{0
1547
0
      Value *OldVal = Incoming[0].second;
1548
0
      return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
1549
0
    }
1550
12
1551
12
    return buildExitPHI(MA, LTS, BBMap, L);
1552
12
  }
1553
21
1554
21
  // MemoryKind::Value accesses leaving the subregion must dominate the exit
1555
21
  // block; just pass the copied value.
1556
9
  Value *OldVal = MA->getAccessValue();
1557
9
  return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
1558
21
}
1559
1560
void RegionGenerator::generateScalarStores(
1561
    ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMap,
1562
33
    __isl_keep isl_id_to_ast_expr *NewAccesses) {
1563
33
  assert(Stmt.getRegion() &&
1564
33
         "Block statements need to use the generateScalarStores() "
1565
33
         "function in the BlockGenerator");
1566
33
1567
93
  for (MemoryAccess *MA : Stmt) {
1568
93
    if (
MA->isOriginalArrayKind() || 93
MA->isRead()32
)
1569
72
      continue;
1570
93
1571
21
    isl::set AccDom = give(isl_map_domain(MA->getAccessRelation()));
1572
21
    const char *Subject = isl_id_get_name(give(MA->getId()).keep());
1573
21
    generateConditionalExecution(Stmt, AccDom, Subject, [&, this, MA]() {
1574
21
1575
21
      Value *NewVal = getExitScalar(MA, LTS, BBMap);
1576
21
      Value *Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap,
1577
21
                                          NewAccesses);
1578
21
      assert((!isa<Instruction>(NewVal) ||
1579
21
              DT.dominates(cast<Instruction>(NewVal)->getParent(),
1580
21
                           Builder.GetInsertBlock())) &&
1581
21
             "Domination violation");
1582
21
      assert((!isa<Instruction>(Address) ||
1583
21
              DT.dominates(cast<Instruction>(Address)->getParent(),
1584
21
                           Builder.GetInsertBlock())) &&
1585
21
             "Domination violation");
1586
21
      Builder.CreateStore(NewVal, Address);
1587
21
    });
1588
21
  }
1589
33
}
1590
1591
void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
1592
                                      PHINode *PHICopy, BasicBlock *IncomingBB,
1593
20
                                      LoopToScevMapT &LTS) {
1594
20
  // If the incoming block was not yet copied mark this PHI as incomplete.
1595
20
  // Once the block will be copied the incoming value will be added.
1596
20
  BasicBlock *BBCopyStart = StartBlockMap[IncomingBB];
1597
20
  BasicBlock *BBCopyEnd = EndBlockMap[IncomingBB];
1598
20
  if (
!BBCopyStart20
)
{4
1599
4
    assert(!BBCopyEnd);
1600
4
    assert(Stmt.contains(IncomingBB) &&
1601
4
           "Bad incoming block for PHI in non-affine region");
1602
4
    IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
1603
4
    return;
1604
4
  }
1605
20
1606
16
  assert(RegionMaps.count(BBCopyStart) &&
1607
16
         "Incoming PHI block did not have a BBMap");
1608
16
  ValueMapT &BBCopyMap = RegionMaps[BBCopyStart];
1609
16
1610
16
  Value *OpCopy = nullptr;
1611
16
1612
16
  if (
Stmt.contains(IncomingBB)16
)
{7
1613
7
    Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
1614
7
1615
7
    // If the current insert block is different from the PHIs incoming block
1616
7
    // change it, otherwise do not.
1617
7
    auto IP = Builder.GetInsertPoint();
1618
7
    if (IP->getParent() != BBCopyEnd)
1619
3
      Builder.SetInsertPoint(BBCopyEnd->getTerminator());
1620
7
    OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt));
1621
7
    if (IP->getParent() != BBCopyEnd)
1622
3
      Builder.SetInsertPoint(&*IP);
1623
9
  } else {
1624
9
    // All edges from outside the non-affine region become a single edge
1625
9
    // in the new copy of the non-affine region. Make sure to only add the
1626
9
    // corresponding edge the first time we encounter a basic block from
1627
9
    // outside the non-affine region.
1628
9
    if (PHICopy->getBasicBlockIndex(BBCopyEnd) >= 0)
1629
2
      return;
1630
9
1631
9
    // Get the reloaded value.
1632
7
    OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
1633
7
  }
1634
16
1635
14
  assert(OpCopy && "Incoming PHI value was not copied properly");
1636
14
  PHICopy->addIncoming(OpCopy, BBCopyEnd);
1637
14
}
1638
1639
void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI,
1640
                                         ValueMapT &BBMap,
1641
9
                                         LoopToScevMapT &LTS) {
1642
9
  unsigned NumIncoming = PHI->getNumIncomingValues();
1643
9
  PHINode *PHICopy =
1644
9
      Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
1645
9
  PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
1646
9
  BBMap[PHI] = PHICopy;
1647
9
1648
9
  for (BasicBlock *IncomingBB : PHI->blocks())
1649
16
    addOperandToPHI(Stmt, PHI, PHICopy, IncomingBB, LTS);
1650
9
}