Coverage Report

Created: 2017-04-29 12:21

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/include/polly/CodeGen/BlockGenerators.h
Line
Count
Source (jump to first uncovered line)
1
//===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file defines the BlockGenerator and VectorBlockGenerator classes, which
11
// generate sequential code and vectorized code for a polyhedral statement,
12
// respectively.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#ifndef POLLY_BLOCK_GENERATORS_H
17
#define POLLY_BLOCK_GENERATORS_H
18
19
#include "polly/CodeGen/IRBuilder.h"
20
#include "polly/Support/ScopHelper.h"
21
#include "llvm/ADT/MapVector.h"
22
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
23
#include "isl/map.h"
24
25
struct isl_ast_build;
26
struct isl_id_to_ast_expr;
27
28
namespace llvm {
29
class Pass;
30
class Region;
31
class ScalarEvolution;
32
} // namespace llvm
33
34
namespace polly {
35
using namespace llvm;
36
class ScopStmt;
37
class MemoryAccess;
38
class ScopArrayInfo;
39
class IslExprBuilder;
40
41
/// Generate a new basic block for a polyhedral statement.
42
class BlockGenerator {
43
public:
44
  typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
45
46
  /// Map types to resolve scalar dependences.
47
  ///
48
  ///@{
49
  using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
50
51
  /// Simple vector of instructions to store escape users.
52
  using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
53
54
  /// Map type to resolve escaping users for scalar instructions.
55
  ///
56
  /// @see The EscapeMap member.
57
  using EscapeUsersAllocaMapTy =
58
      MapVector<Instruction *,
59
                std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
60
61
  ///@}
62
63
  /// Create a generator for basic blocks.
64
  ///
65
  /// @param Builder     The LLVM-IR Builder used to generate the statement. The
66
  ///                    code is generated at the location, the Builder points
67
  ///                    to.
68
  /// @param LI          The loop info for the current function
69
  /// @param SE          The scalar evolution info for the current function
70
  /// @param DT          The dominator tree of this function.
71
  /// @param ScalarMap   Map from scalars to their demoted location.
72
  /// @param EscapeMap   Map from scalars to their escape users and locations.
73
  /// @param GlobalMap   A mapping from llvm::Values used in the original scop
74
  ///                    region to a new set of llvm::Values. Each reference to
75
  ///                    an original value appearing in this mapping is replaced
76
  ///                    with the new value it is mapped to.
77
  /// @param ExprBuilder An expression builder to generate new access functions.
78
  /// @param StartBlock  The first basic block after the RTC.
79
  BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
80
                 DominatorTree &DT, AllocaMapTy &ScalarMap,
81
                 EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
82
                 IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
83
84
  /// Copy the basic block.
85
  ///
86
  /// This copies the entire basic block and updates references to old values
87
  /// with references to new values, as defined by GlobalMap.
88
  ///
89
  /// @param Stmt        The block statement to code generate.
90
  /// @param LTS         A map from old loops to new induction variables as
91
  ///                    SCEVs.
92
  /// @param NewAccesses A map from memory access ids to new ast expressions,
93
  ///                    which may contain new access expressions for certain
94
  ///                    memory accesses.
95
  void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
96
                isl_id_to_ast_expr *NewAccesses);
97
98
  /// Remove a ScopArrayInfo's allocation from the ScalarMap.
99
  ///
100
  /// This function allows to remove values from the ScalarMap. This is useful
101
  /// if the corresponding alloca instruction will be deleted (or moved into
102
  /// another module), as without removing these values the underlying
103
  /// AssertingVH will trigger due to us still keeping reference to this
104
  /// scalar.
105
  ///
106
  /// @param Array The array for which the alloca was generated.
107
0
  void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
108
109
  /// Return the alloca for @p Access.
110
  ///
111
  /// If no alloca was mapped for @p Access a new one is created.
112
  ///
113
  /// @param Access    The memory access for which to generate the alloca.
114
  ///
115
  /// @returns The alloca for @p Access or a replacement value taken from
116
  ///          GlobalMap.
117
  Value *getOrCreateAlloca(const MemoryAccess &Access);
118
119
  /// Return the alloca for @p Array.
120
  ///
121
  /// If no alloca was mapped for @p Array a new one is created.
122
  ///
123
  /// @param Array The array for which to generate the alloca.
124
  ///
125
  /// @returns The alloca for @p Array or a replacement value taken from
126
  ///          GlobalMap.
127
  Value *getOrCreateAlloca(const ScopArrayInfo *Array);
128
129
  /// Finalize the code generation for the SCoP @p S.
130
  ///
131
  /// This will initialize and finalize the scalar variables we demoted during
132
  /// the code generation.
133
  ///
134
  /// @see createScalarInitialization(Scop &)
135
  /// @see createScalarFinalization(Region &)
136
  void finalizeSCoP(Scop &S);
137
138
  /// An empty destructor
139
544
  virtual ~BlockGenerator() {}
140
141
282
  BlockGenerator(const BlockGenerator &) = default;
142
143
protected:
144
  PollyIRBuilder &Builder;
145
  LoopInfo &LI;
146
  ScalarEvolution &SE;
147
  IslExprBuilder *ExprBuilder;
148
149
  /// The dominator tree of this function.
150
  DominatorTree &DT;
151
152
  /// The entry block of the current function.
153
  BasicBlock *EntryBB;
154
155
  /// Map to resolve scalar dependences for PHI operands and scalars.
156
  ///
157
  /// When translating code that contains scalar dependences as they result from
158
  /// inter-block scalar dependences (including the use of data carrying PHI
159
  /// nodes), we do not directly regenerate in-register SSA code, but instead
160
  /// allocate some stack memory through which these scalar values are passed.
161
  /// Only a later pass of -mem2reg will then (re)introduce in-register
162
  /// computations.
163
  ///
164
  /// To keep track of the memory location(s) used to store the data computed by
165
  /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
166
  /// given ScopArrayInfo to the junk of stack allocated memory, that is
167
  /// used for code generation.
168
  ///
169
  /// Up to two different ScopArrayInfo objects are associated with each
170
  /// llvm::Value:
171
  ///
172
  /// MemoryType::Value objects are used for normal scalar dependences that go
173
  /// from a scalar definition to its use. Such dependences are lowered by
174
  /// directly writing the value an instruction computes into the corresponding
175
  /// chunk of memory and reading it back from this chunk of memory right before
176
  /// every use of this original scalar value. The memory allocations for
177
  /// MemoryType::Value objects end with '.s2a'.
178
  ///
179
  /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
180
  /// nodes. For each PHI nodes we introduce, besides the Array of type
181
  /// MemoryType::Value, a second chunk of memory into which we write at the end
182
  /// of each basic block preceeding the PHI instruction the value passed
183
  /// through this basic block. At the place where the PHI node is executed, we
184
  /// replace the PHI node with a load from the corresponding MemoryType::PHI
185
  /// memory location. The memory allocations for MemoryType::PHI end with
186
  /// '.phiops'.
187
  ///
188
  /// Example:
189
  ///
190
  ///                              Input C Code
191
  ///                              ============
192
  ///
193
  ///                 S1:      x1 = ...
194
  ///                          for (i=0...N) {
195
  ///                 S2:           x2 = phi(x1, add)
196
  ///                 S3:           add = x2 + 42;
197
  ///                          }
198
  ///                 S4:      print(x1)
199
  ///                          print(x2)
200
  ///                          print(add)
201
  ///
202
  ///
203
  ///        Unmodified IR                         IR After expansion
204
  ///        =============                         ==================
205
  ///
206
  /// S1:   x1 = ...                     S1:    x1 = ...
207
  ///                                           x1.s2a = s1
208
  ///                                           x2.phiops = s1
209
  ///        |                                    |
210
  ///        |   <--<--<--<--<                    |   <--<--<--<--<
211
  ///        | /              \                   | /              \     .
212
  ///        V V               \                  V V               \    .
213
  /// S2:  x2 = phi (x1, add)   |        S2:    x2 = x2.phiops       |
214
  ///                           |               x2.s2a = x2          |
215
  ///                           |                                    |
216
  /// S3:  add = x2 + 42        |        S3:    add = x2 + 42        |
217
  ///                           |               add.s2a = add        |
218
  ///                           |               x2.phiops = add      |
219
  ///        | \               /                  | \               /
220
  ///        |  \             /                   |  \             /
221
  ///        |   >-->-->-->-->                    |   >-->-->-->-->
222
  ///        V                                    V
223
  ///
224
  ///                                    S4:    x1 = x1.s2a
225
  /// S4:  ... = x1                             ... = x1
226
  ///                                           x2 = x2.s2a
227
  ///      ... = x2                             ... = x2
228
  ///                                           add = add.s2a
229
  ///      ... = add                            ... = add
230
  ///
231
  ///      ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
232
  ///                    add:Value -> add.s2a, x2:PHI -> x2.phiops }
233
  ///
234
  ///  ??? Why does a PHI-node require two memory chunks ???
235
  ///
236
  ///  One may wonder why a PHI node requires two memory chunks and not just
237
  ///  all data is stored in a single location. The following example tries
238
  ///  to store all data in .s2a and drops the .phiops location:
239
  ///
240
  ///      S1:    x1 = ...
241
  ///             x1.s2a = s1
242
  ///             x2.s2a = s1             // use .s2a instead of .phiops
243
  ///               |
244
  ///               |   <--<--<--<--<
245
  ///               | /              \    .
246
  ///               V V               \   .
247
  ///      S2:    x2 = x2.s2a          |  // value is same as above, but read
248
  ///                                  |  // from .s2a
249
  ///                                  |
250
  ///             x2.s2a = x2          |  // store into .s2a as normal
251
  ///                                  |
252
  ///      S3:    add = x2 + 42        |
253
  ///             add.s2a = add        |
254
  ///             x2.s2a = add         |  // use s2a instead of .phiops
255
  ///               | \               /   // !!! This is wrong, as x2.s2a now
256
  ///               |   >-->-->-->-->     // contains add instead of x2.
257
  ///               V
258
  ///
259
  ///      S4:    x1 = x1.s2a
260
  ///             ... = x1
261
  ///             x2 = x2.s2a             // !!! We now read 'add' instead of
262
  ///             ... = x2                // 'x2'
263
  ///             add = add.s2a
264
  ///             ... = add
265
  ///
266
  ///  As visible in the example, the SSA value of the PHI node may still be
267
  ///  needed _after_ the basic block, which could conceptually branch to the
268
  ///  PHI node, has been run and has overwritten the PHI's old value. Hence, a
269
  ///  single memory location is not enough to code-generate a PHI node.
270
  ///
271
  /// Memory locations used for the special PHI node modeling.
272
  AllocaMapTy &ScalarMap;
273
274
  /// Map from instructions to their escape users as well as the alloca.
275
  EscapeUsersAllocaMapTy &EscapeMap;
276
277
  /// A map from llvm::Values referenced in the old code to a new set of
278
  ///        llvm::Values, which is used to replace these old values during
279
  ///        code generation.
280
  ValueMapT &GlobalMap;
281
282
  /// The first basic block after the RTC.
283
  BasicBlock *StartBlock;
284
285
  /// Split @p BB to create a new one we can use to clone @p BB in.
286
  BasicBlock *splitBB(BasicBlock *BB);
287
288
  /// Copy the given basic block.
289
  ///
290
  /// @param Stmt      The statement to code generate.
291
  /// @param BB        The basic block to code generate.
292
  /// @param BBMap     A mapping from old values to their new values in this
293
  /// block.
294
  /// @param LTS         A map from old loops to new induction variables as
295
  ///                    SCEVs.
296
  /// @param NewAccesses A map from memory access ids to new ast expressions,
297
  ///                    which may contain new access expressions for certain
298
  ///                    memory accesses.
299
  ///
300
  /// @returns The copy of the basic block.
301
  BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
302
                     LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
303
304
  /// Copy the given basic block.
305
  ///
306
  /// @param Stmt      The statement to code generate.
307
  /// @param BB        The basic block to code generate.
308
  /// @param BBCopy    The new basic block to generate code in.
309
  /// @param BBMap     A mapping from old values to their new values in this
310
  /// block.
311
  /// @param LTS         A map from old loops to new induction variables as
312
  ///                    SCEVs.
313
  /// @param NewAccesses A map from memory access ids to new ast expressions,
314
  ///                    which may contain new access expressions for certain
315
  ///                    memory accesses.
316
  void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
317
              ValueMapT &BBMap, LoopToScevMapT &LTS,
318
              isl_id_to_ast_expr *NewAccesses);
319
320
  /// Generate reload of scalars demoted to memory and needed by @p Stmt.
321
  ///
322
  /// @param Stmt  The statement we generate code for.
323
  /// @param LTS   A mapping from loops virtual canonical induction
324
  ///              variable to their new values.
325
  /// @param BBMap A mapping from old values to their new values in this block.
326
  /// @param NewAccesses A map from memory access ids to new ast expressions.
327
  void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT &LTS,
328
                           ValueMapT &BBMap,
329
                           __isl_keep isl_id_to_ast_expr *NewAccesses);
330
331
  /// Generate the scalar stores for the given statement.
332
  ///
333
  /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
334
  /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
335
  /// be demoted to memory.
336
  ///
337
  /// @param Stmt  The statement we generate code for.
338
  /// @param LTS   A mapping from loops virtual canonical induction
339
  ///              variable to their new values
340
  ///              (for values recalculated in the new ScoP, but not
341
  ///               within this basic block)
342
  /// @param BBMap A mapping from old values to their new values in this block.
343
  /// @param NewAccesses A map from memory access ids to new ast expressions.
344
  virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS,
345
                                    ValueMapT &BBMap,
346
                                    __isl_keep isl_id_to_ast_expr *NewAccesses);
347
348
  /// Handle users of @p Array outside the SCoP.
349
  ///
350
  /// @param S         The current SCoP.
351
  /// @param Inst      The ScopArrayInfo to handle.
352
  void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
353
354
  /// Find scalar statements that have outside users.
355
  ///
356
  /// We register these scalar values to later update subsequent scalar uses of
357
  /// these values to either use the newly computed value from within the scop
358
  /// (if the scop was executed) or the unchanged original code (if the run-time
359
  /// check failed).
360
  ///
361
  /// @param S The scop for which to find the outside users.
362
  void findOutsideUsers(Scop &S);
363
364
  /// Initialize the memory of demoted scalars.
365
  ///
366
  /// @param S The scop for which to generate the scalar initializers.
367
  void createScalarInitialization(Scop &S);
368
369
  /// Create exit PHI node merges for PHI nodes with more than two edges
370
  ///        from inside the scop.
371
  ///
372
  /// For scops which have a PHI node in the exit block that has more than two
373
  /// incoming edges from inside the scop region, we require some special
374
  /// handling to understand which of the possible values will be passed to the
375
  /// PHI node from inside the optimized version of the scop. To do so ScopInfo
376
  /// models the possible incoming values as write accesses of the ScopStmts.
377
  ///
378
  /// This function creates corresponding code to reload the computed outgoing
379
  /// value from the stack slot it has been stored into and to pass it on to the
380
  /// PHI node in the original exit block.
381
  ///
382
  /// @param S The scop for which to generate the exiting PHI nodes.
383
  void createExitPHINodeMerges(Scop &S);
384
385
  /// Promote the values of demoted scalars after the SCoP.
386
  ///
387
  /// If a scalar value was used outside the SCoP we need to promote the value
388
  /// stored in the memory cell allocated for that scalar and combine it with
389
  /// the original value in the non-optimized SCoP.
390
  void createScalarFinalization(Scop &S);
391
392
  /// Try to synthesize a new value
393
  ///
394
  /// Given an old value, we try to synthesize it in a new context from its
395
  /// original SCEV expression. We start from the original SCEV expression,
396
  /// then replace outdated parameter and loop references, and finally
397
  /// expand it to code that computes this updated expression.
398
  ///
399
  /// @param Stmt      The statement to code generate
400
  /// @param Old       The old Value
401
  /// @param BBMap     A mapping from old values to their new values
402
  ///                  (for values recalculated within this basic block)
403
  /// @param LTS       A mapping from loops virtual canonical induction
404
  ///                  variable to their new values
405
  ///                  (for values recalculated in the new ScoP, but not
406
  ///                   within this basic block)
407
  /// @param L         The loop that surrounded the instruction that referenced
408
  ///                  this value in the original code. This loop is used to
409
  ///                  evaluate the scalar evolution at the right scope.
410
  ///
411
  /// @returns  o A newly synthesized value.
412
  ///           o NULL, if synthesizing the value failed.
413
  Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
414
                               LoopToScevMapT &LTS, Loop *L) const;
415
416
  /// Get the new version of a value.
417
  ///
418
  /// Given an old value, we first check if a new version of this value is
419
  /// available in the BBMap or GlobalMap. In case it is not and the value can
420
  /// be recomputed using SCEV, we do so. If we can not recompute a value
421
  /// using SCEV, but we understand that the value is constant within the scop,
422
  /// we return the old value.  If the value can still not be derived, this
423
  /// function will assert.
424
  ///
425
  /// @param Stmt      The statement to code generate.
426
  /// @param Old       The old Value.
427
  /// @param BBMap     A mapping from old values to their new values
428
  ///                  (for values recalculated within this basic block).
429
  /// @param LTS       A mapping from loops virtual canonical induction
430
  ///                  variable to their new values
431
  ///                  (for values recalculated in the new ScoP, but not
432
  ///                   within this basic block).
433
  /// @param L         The loop that surrounded the instruction that referenced
434
  ///                  this value in the original code. This loop is used to
435
  ///                  evaluate the scalar evolution at the right scope.
436
  ///
437
  /// @returns  o The old value, if it is still valid.
438
  ///           o The new value, if available.
439
  ///           o NULL, if no value is found.
440
  Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
441
                     LoopToScevMapT &LTS, Loop *L) const;
442
443
  void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
444
                      LoopToScevMapT &LTS);
445
446
  /// Get the innermost loop that surrounds the statement @p Stmt.
447
  Loop *getLoopForStmt(const ScopStmt &Stmt) const;
448
449
  /// Generate the operand address
450
  /// @param NewAccesses A map from memory access ids to new ast expressions,
451
  ///                    which may contain new access expressions for certain
452
  ///                    memory accesses.
453
  Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
454
                                  ValueMapT &BBMap, LoopToScevMapT &LTS,
455
                                  isl_id_to_ast_expr *NewAccesses);
456
457
  /// Generate the operand address.
458
  ///
459
  /// @param Stmt         The statement to generate code for.
460
  /// @param L            The innermost loop that surrounds the statement.
461
  /// @param Pointer      If the access expression is not changed (ie. not found
462
  ///                     in @p LTS), use this Pointer from the original code
463
  ///                     instead.
464
  /// @param BBMap        A mapping from old values to their new values.
465
  /// @param LTS          A mapping from loops virtual canonical induction
466
  ///                     variable to their new values.
467
  /// @param NewAccesses  Ahead-of-time generated access expressions.
468
  /// @param Id           Identifier of the MemoryAccess to generate.
469
  /// @param ExpectedType The type the returned value should have.
470
  ///
471
  /// @return The generated address.
472
  Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
473
                                  ValueMapT &BBMap, LoopToScevMapT &LTS,
474
                                  isl_id_to_ast_expr *NewAccesses,
475
                                  __isl_take isl_id *Id, Type *ExpectedType);
476
477
  /// Generate the pointer value that is accesses by @p Access.
478
  ///
479
  /// For write accesses, generate the target address. For read accesses,
480
  /// generate the source address.
481
  /// The access can be either an array access or a scalar access. In the first
482
  /// case, the returned address will point to an element into that array. In
483
  /// the scalar case, an alloca is used.
484
  /// If a new AccessRelation is set for the MemoryAccess, the new relation will
485
  /// be used.
486
  ///
487
  /// @param Access      The access to generate a pointer for.
488
  /// @param L           The innermost loop that surrounds the statement.
489
  /// @param LTS         A mapping from loops virtual canonical induction
490
  ///                    variable to their new values.
491
  /// @param BBMap       A mapping from old values to their new values.
492
  /// @param NewAccesses A map from memory access ids to new ast expressions.
493
  ///
494
  /// @return The generated address.
495
  Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT &LTS,
496
                            ValueMapT &BBMap,
497
                            __isl_keep isl_id_to_ast_expr *NewAccesses);
498
499
  /// @param NewAccesses A map from memory access ids to new ast expressions,
500
  ///                    which may contain new access expressions for certain
501
  ///                    memory accesses.
502
  Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
503
                           LoopToScevMapT &LTS,
504
                           isl_id_to_ast_expr *NewAccesses);
505
506
  /// @param NewAccesses A map from memory access ids to new ast expressions,
507
  ///                    which may contain new access expressions for certain
508
  ///                    memory accesses.
509
  void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
510
                          LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
511
512
  /// Copy a single PHI instruction.
513
  ///
514
  /// The implementation in the BlockGenerator is trivial, however it allows
515
  /// subclasses to handle PHIs different.
516
  virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
517
32
                                  LoopToScevMapT &) {}
518
519
  /// Copy a single Instruction.
520
  ///
521
  /// This copies a single Instruction and updates references to old values
522
  /// with references to new values, as defined by GlobalMap and BBMap.
523
  ///
524
  /// @param Stmt        The statement to code generate.
525
  /// @param Inst        The instruction to copy.
526
  /// @param BBMap       A mapping from old values to their new values
527
  ///                    (for values recalculated within this basic block).
528
  /// @param GlobalMap   A mapping from old values to their new values
529
  ///                    (for values recalculated in the new ScoP, but not
530
  ///                    within this basic block).
531
  /// @param LTS         A mapping from loops virtual canonical induction
532
  ///                    variable to their new values
533
  ///                    (for values recalculated in the new ScoP, but not
534
  ///                     within this basic block).
535
  /// @param NewAccesses A map from memory access ids to new ast expressions,
536
  ///                    which may contain new access expressions for certain
537
  ///                    memory accesses.
538
  void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
539
                       LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
540
541
  /// Helper to determine if @p Inst can be synthesized in @p Stmt.
542
  ///
543
  /// @returns false, iff @p Inst can be synthesized in @p Stmt.
544
  bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
545
546
  /// Remove dead instructions generated for BB
547
  ///
548
  /// @param BB The basic block code for which code has been generated.
549
  /// @param BBMap A local map from old to new instructions.
550
  void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
551
552
  /// Invalidate the scalar evolution expressions for a scop.
553
  ///
554
  /// This function invalidates the scalar evolution results for all
555
  /// instructions that are part of a given scop. This is necessary to ensure
556
  /// that later scops do not obtain scalar evolution expressions that reference
557
  /// values that earlier dominated the later scop, but have been moved in the
558
  /// conditional part of an earlier scop and consequently do not any more
559
  /// dominate the later scop.
560
  ///
561
  /// @param S The scop to invalidate.
562
  void invalidateScalarEvolution(Scop &S);
563
};
564
565
/// Generate a new vector basic block for a polyhedral statement.
566
///
567
/// The only public function exposed is generate().
568
class VectorBlockGenerator : BlockGenerator {
569
public:
570
  /// Generate a new vector basic block for a ScoPStmt.
571
  ///
572
  /// This code generation is similar to the normal, scalar code generation,
573
  /// except that each instruction is code generated for several vector lanes
574
  /// at a time. If possible instructions are issued as actual vector
575
  /// instructions, but e.g. for address calculation instructions we currently
576
  /// generate scalar instructions for each vector lane.
577
  ///
578
  /// @param BlockGen    A block generator object used as parent.
579
  /// @param Stmt        The statement to code generate.
580
  /// @param VLTS        A mapping from loops virtual canonical induction
581
  ///                    variable to their new values
582
  ///                    (for values recalculated in the new ScoP, but not
583
  ///                     within this basic block), one for each lane.
584
  /// @param Schedule    A map from the statement to a schedule where the
585
  ///                    innermost dimension is the dimension of the innermost
586
  ///                    loop containing the statement.
587
  /// @param NewAccesses A map from memory access ids to new ast expressions,
588
  ///                    which may contain new access expressions for certain
589
  ///                    memory accesses.
590
  static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
591
                       std::vector<LoopToScevMapT> &VLTS,
592
                       __isl_keep isl_map *Schedule,
593
20
                       __isl_keep isl_id_to_ast_expr *NewAccesses) {
594
20
    VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
595
20
    Generator.copyStmt(Stmt, NewAccesses);
596
20
  }
597
598
private:
599
  // This is a vector of loop->scev maps.  The first map is used for the first
600
  // vector lane, ...
601
  // Each map, contains information about Instructions in the old ScoP, which
602
  // are recalculated in the new SCoP. When copying the basic block, we replace
603
  // all references to the old instructions with their recalculated values.
604
  //
605
  // For example, when the code generator produces this AST:
606
  //
607
  //   for (int c1 = 0; c1 <= 1023; c1 += 1)
608
  //     for (int c2 = 0; c2 <= 1023; c2 += VF)
609
  //       for (int lane = 0; lane <= VF; lane += 1)
610
  //         Stmt(c2 + lane + 3, c1);
611
  //
612
  // VLTS[lane] contains a map:
613
  //   "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
614
  //   "inner loop in the old loop nest" -> SCEV("c1").
615
  std::vector<LoopToScevMapT> &VLTS;
616
617
  // A map from the statement to a schedule where the innermost dimension is the
618
  // dimension of the innermost loop containing the statement.
619
  isl_map *Schedule;
620
621
  VectorBlockGenerator(BlockGenerator &BlockGen,
622
                       std::vector<LoopToScevMapT> &VLTS,
623
                       __isl_keep isl_map *Schedule);
624
625
  int getVectorWidth();
626
627
  Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
628
                        VectorValueMapT &ScalarMaps, Loop *L);
629
630
  Type *getVectorPtrTy(const Value *V, int Width);
631
632
  /// Load a vector from a set of adjacent scalars
633
  ///
634
  /// In case a set of scalars is known to be next to each other in memory,
635
  /// create a vector load that loads those scalars
636
  ///
637
  /// %vector_ptr= bitcast double* %p to <4 x double>*
638
  /// %vec_full = load <4 x double>* %vector_ptr
639
  ///
640
  /// @param Stmt           The statement to code generate.
641
  /// @param NegativeStride This is used to indicate a -1 stride. In such
642
  ///                       a case we load the end of a base address and
643
  ///                       shuffle the accesses in reverse order into the
644
  ///                       vector. By default we would do only positive
645
  ///                       strides.
646
  ///
647
  /// @param NewAccesses    A map from memory access ids to new ast
648
  ///                       expressions, which may contain new access
649
  ///                       expressions for certain memory accesses.
650
  Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
651
                               VectorValueMapT &ScalarMaps,
652
                               __isl_keep isl_id_to_ast_expr *NewAccesses,
653
                               bool NegativeStride);
654
655
  /// Load a vector initialized from a single scalar in memory
656
  ///
657
  /// In case all elements of a vector are initialized to the same
658
  /// scalar value, this value is loaded and shuffeled into all elements
659
  /// of the vector.
660
  ///
661
  /// %splat_one = load <1 x double>* %p
662
  /// %splat = shufflevector <1 x double> %splat_one, <1 x
663
  ///       double> %splat_one, <4 x i32> zeroinitializer
664
  ///
665
  /// @param NewAccesses A map from memory access ids to new ast expressions,
666
  ///                    which may contain new access expressions for certain
667
  ///                    memory accesses.
668
  Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
669
                                ValueMapT &BBMap,
670
                                __isl_keep isl_id_to_ast_expr *NewAccesses);
671
672
  /// Load a vector from scalars distributed in memory
673
  ///
674
  /// In case some scalars a distributed randomly in memory. Create a vector
675
  /// by loading each scalar and by inserting one after the other into the
676
  /// vector.
677
  ///
678
  /// %scalar_1= load double* %p_1
679
  /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
680
  /// %scalar 2 = load double* %p_2
681
  /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
682
  ///
683
  /// @param NewAccesses A map from memory access ids to new ast expressions,
684
  ///                    which may contain new access expressions for certain
685
  ///                    memory accesses.
686
  Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
687
                                   VectorValueMapT &ScalarMaps,
688
                                   __isl_keep isl_id_to_ast_expr *NewAccesses);
689
690
  /// @param NewAccesses A map from memory access ids to new ast expressions,
691
  ///                    which may contain new access expressions for certain
692
  ///                    memory accesses.
693
  void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
694
                    VectorValueMapT &ScalarMaps,
695
                    __isl_keep isl_id_to_ast_expr *NewAccesses);
696
697
  void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
698
                     ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
699
700
  void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
701
                      ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
702
703
  /// @param NewAccesses A map from memory access ids to new ast expressions,
704
  ///                    which may contain new access expressions for certain
705
  ///                    memory accesses.
706
  void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
707
                 VectorValueMapT &ScalarMaps,
708
                 __isl_keep isl_id_to_ast_expr *NewAccesses);
709
710
  /// @param NewAccesses A map from memory access ids to new ast expressions,
711
  ///                    which may contain new access expressions for certain
712
  ///                    memory accesses.
713
  void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
714
                          ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
715
                          __isl_keep isl_id_to_ast_expr *NewAccesses);
716
717
  bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
718
                           VectorValueMapT &ScalarMaps);
719
720
  bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
721
722
  /// Generate vector loads for scalars.
723
  ///
724
  /// @param Stmt           The scop statement for which to generate the loads.
725
  /// @param VectorBlockMap A map that will be updated to relate the original
726
  ///                       values with the newly generated vector loads.
727
  void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
728
729
  /// Verify absence of scalar stores.
730
  ///
731
  /// @param Stmt The scop statement to check for scalar stores.
732
  void verifyNoScalarStores(ScopStmt &Stmt);
733
734
  /// @param NewAccesses A map from memory access ids to new ast expressions,
735
  ///                    which may contain new access expressions for certain
736
  ///                    memory accesses.
737
  void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
738
                       VectorValueMapT &ScalarMaps,
739
                       __isl_keep isl_id_to_ast_expr *NewAccesses);
740
741
  /// @param NewAccesses A map from memory access ids to new ast expressions,
742
  ///                    which may contain new access expressions for certain
743
  ///                    memory accesses.
744
  void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
745
};
746
747
/// Generator for new versions of polyhedral region statements.
748
class RegionGenerator : public BlockGenerator {
749
public:
750
  /// Create a generator for regions.
751
  ///
752
  /// @param BlockGen A generator for basic blocks.
753
262
  RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
754
755
262
  virtual ~RegionGenerator() {}
756
757
  /// Copy the region statement @p Stmt.
758
  ///
759
  /// This copies the entire region represented by @p Stmt and updates
760
  /// references to old values with references to new values, as defined by
761
  /// GlobalMap.
762
  ///
763
  /// @param Stmt      The statement to code generate.
764
  /// @param LTS       A map from old loops to new induction variables as SCEVs.
765
  void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
766
                __isl_keep isl_id_to_ast_expr *IdToAstExp);
767
768
private:
769
  /// A map from old to new blocks in the region.
770
  DenseMap<BasicBlock *, BasicBlock *> BlockMap;
771
772
  /// The "BBMaps" for the whole region (one for each block).
773
  DenseMap<BasicBlock *, ValueMapT> RegionMaps;
774
775
  /// Mapping to remember PHI nodes that still need incoming values.
776
  using PHINodePairTy = std::pair<PHINode *, PHINode *>;
777
  DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
778
779
  /// Repair the dominance tree after we created a copy block for @p BB.
780
  ///
781
  /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
782
  BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
783
784
  /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
785
  ///
786
  /// PHI nodes, which may have (multiple) edges that enter from outside the
787
  /// non-affine subregion and even from outside the scop, are code generated as
788
  /// follows:
789
  ///
790
  /// # Original
791
  ///
792
  ///   Region: %A-> %exit
793
  ///   NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
794
  ///
795
  ///     pre:
796
  ///       %val = add i64 1, 1
797
  ///
798
  ///     A:
799
  ///      br label %nonaff
800
  ///
801
  ///     nonaffB:
802
  ///       %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
803
  ///       %cmp = <nonaff>
804
  ///       br i1 %cmp, label %C, label %nonaffC
805
  ///
806
  ///     nonaffC:
807
  ///       %valC = add i64 1, 1
808
  ///       br i1 undef, label %D, label %nonaffB
809
  ///
810
  ///     D:
811
  ///       %valD = ...
812
  ///       %exit_cond = <loopexit>
813
  ///       br i1 %exit_cond, label %nonaffB, label %exit
814
  ///
815
  ///     exit:
816
  ///       ...
817
  ///
818
  ///  - %start and %C enter from outside the non-affine region.
819
  ///  - %nonaffC enters from within the non-affine region.
820
  ///
821
  ///  # New
822
  ///
823
  ///    polly.A:
824
  ///       store i64 %val, i64* %phi.phiops
825
  ///       br label %polly.nonaffA.entry
826
  ///
827
  ///    polly.nonaffB.entry:
828
  ///       %phi.phiops.reload = load i64, i64* %phi.phiops
829
  ///       br label %nonaffB
830
  ///
831
  ///    polly.nonaffB:
832
  ///       %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
833
  ///                    [%p.valC, %polly.nonaffC]
834
  ///
835
  ///    polly.nonaffC:
836
  ///       %p.valC = add i64 1, 1
837
  ///       br i1 undef, label %polly.D, label %polly.nonaffB
838
  ///
839
  ///    polly.D:
840
  ///        %p.valD = ...
841
  ///        store i64 %p.valD, i64* %phi.phiops
842
  ///        %p.exit_cond = <loopexit>
843
  ///        br i1 %p.exit_cond, label %polly.nonaffB, label %exit
844
  ///
845
  /// Values that enter the PHI from outside the non-affine region are stored
846
  /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
847
  /// reloaded in %polly.nonaffB.entry, a basic block generated before the
848
  /// actual non-affine region.
849
  ///
850
  /// When generating the PHI node of the non-affine region in %polly.nonaffB,
851
  /// incoming edges from outside the region are combined into a single branch
852
  /// from %polly.nonaffB.entry which has as incoming value the value reloaded
853
  /// from the %phi.phiops stack slot. Incoming edges from within the region
854
  /// refer to the copied instructions (%p.valC) and basic blocks
855
  /// (%polly.nonaffC) of the non-affine region.
856
  ///
857
  /// @param Stmt       The statement to code generate.
858
  /// @param PHI        The original PHI we copy.
859
  /// @param PHICopy    The copy of @p PHI.
860
  /// @param IncomingBB An incoming block of @p PHI.
861
  /// @param LTS        A map from old loops to new induction variables as
862
  /// SCEVs.
863
  void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
864
                       BasicBlock *IncomingBB, LoopToScevMapT &LTS);
865
866
  /// Create a PHI that combines the incoming values from all incoming blocks
867
  /// that are in the subregion.
868
  ///
869
  /// PHIs in the subregion's exit block can have incoming edges from within and
870
  /// outside the subregion. This function combines the incoming values from
871
  /// within the subregion to appear as if there is only one incoming edge from
872
  /// the subregion (an additional exit block is created by RegionGenerator).
873
  /// This is to avoid that a value is written to the .phiops location without
874
  /// leaving the subregion because the exiting block as an edge back into the
875
  /// subregion.
876
  ///
877
  /// @param MA    The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
878
  ///              the subregion's exit block.
879
  /// @param LTS   Virtual induction variable mapping.
880
  /// @param BBMap A mapping from old values to their new values in this block.
881
  /// @param L     Loop surrounding this region statement.
882
  ///
883
  /// @returns The constructed PHI node.
884
  PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap,
885
                        Loop *L);
886
887
  /// @param Return the new value of a scalar write, creating a PHINode if
888
  ///        necessary.
889
  ///
890
  /// @param MA    A scalar WRITE MemoryAccess.
891
  /// @param LTS   Virtual induction variable mapping.
892
  /// @param BBMap A mapping from old values to their new values in this block.
893
  ///
894
  /// @returns The effective value of @p MA's written value when leaving the
895
  ///          subregion.
896
  /// @see buildExitPHI
897
  Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap);
898
899
  /// Generate the scalar stores for the given statement.
900
  ///
901
  /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
902
  /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
903
  /// be demoted to memory.
904
  ///
905
  /// @param Stmt  The statement we generate code for.
906
  /// @param LTS   A mapping from loops virtual canonical induction variable to
907
  ///              their new values (for values recalculated in the new ScoP,
908
  ///              but not within this basic block)
909
  /// @param BBMap A mapping from old values to their new values in this block.
910
  /// @param LTS   A mapping from loops virtual canonical induction variable to
911
  /// their new values.
912
  virtual void
913
  generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMAp,
914
                       __isl_keep isl_id_to_ast_expr *NewAccesses) override;
915
916
  /// Copy a single PHI instruction.
917
  ///
918
  /// This copies a single PHI instruction and updates references to old values
919
  /// with references to new values, as defined by GlobalMap and BBMap.
920
  ///
921
  /// @param Stmt      The statement to code generate.
922
  /// @param PHI       The PHI instruction to copy.
923
  /// @param BBMap     A mapping from old values to their new values
924
  ///                  (for values recalculated within this basic block).
925
  /// @param LTS       A map from old loops to new induction variables as SCEVs.
926
  virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst,
927
                                  ValueMapT &BBMap,
928
                                  LoopToScevMapT &LTS) override;
929
};
930
} // namespace polly
931
#endif