Coverage Report

Created: 2018-10-23 09:19

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Move instructions between statements.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/ForwardOpTree.h"
15
#include "polly/Options.h"
16
#include "polly/ScopBuilder.h"
17
#include "polly/ScopInfo.h"
18
#include "polly/ScopPass.h"
19
#include "polly/Support/GICHelper.h"
20
#include "polly/Support/ISLOStream.h"
21
#include "polly/Support/ISLTools.h"
22
#include "polly/Support/VirtualInstruction.h"
23
#include "polly/ZoneAlgo.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/Analysis/LoopInfo.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/Instruction.h"
30
#include "llvm/IR/Instructions.h"
31
#include "llvm/IR/Value.h"
32
#include "llvm/Pass.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/Compiler.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/ErrorHandling.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include "isl/ctx.h"
40
#include "isl/isl-noexceptions.h"
41
#include <cassert>
42
#include <memory>
43
44
#define DEBUG_TYPE "polly-optree"
45
46
using namespace llvm;
47
using namespace polly;
48
49
static cl::opt<bool>
50
    AnalyzeKnown("polly-optree-analyze-known",
51
                 cl::desc("Analyze array contents for load forwarding"),
52
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53
54
static cl::opt<bool>
55
    NormalizePHIs("polly-optree-normalize-phi",
56
                  cl::desc("Replace PHIs by their incoming values"),
57
                  cl::cat(PollyCategory), cl::init(false), cl::Hidden);
58
59
static cl::opt<unsigned>
60
    MaxOps("polly-optree-max-ops",
61
           cl::desc("Maximum number of ISL operations to invest for known "
62
                    "analysis; 0=no limit"),
63
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
64
65
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
66
STATISTIC(KnownOutOfQuota,
67
          "Analyses aborted because max_operations was reached");
68
69
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
70
STATISTIC(TotalKnownLoadsForwarded,
71
          "Number of forwarded loads because their value was known");
72
STATISTIC(TotalReloads, "Number of reloaded values");
73
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
74
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
75
STATISTIC(TotalModifiedStmts,
76
          "Number of statements with at least one forwarded tree");
77
78
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
79
80
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
81
STATISTIC(NumValueWritesInLoops,
82
          "Number of scalar value writes nested in affine loops after OpTree");
83
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
84
STATISTIC(NumPHIWritesInLoops,
85
          "Number of scalar phi writes nested in affine loops after OpTree");
86
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
87
STATISTIC(NumSingletonWritesInLoops,
88
          "Number of singleton writes nested in affine loops after OpTree");
89
90
namespace {
91
92
/// The state of whether an operand tree was/can be forwarded.
93
///
94
/// The items apply to an instructions and its operand tree with the instruction
95
/// as the root element. If the value in question is not an instruction in the
96
/// SCoP, it can be a leaf of an instruction's operand tree.
97
enum ForwardingDecision {
98
  /// The root instruction or value cannot be forwarded at all.
99
  FD_CannotForward,
100
101
  /// The root instruction or value can be forwarded as a leaf of a larger
102
  /// operand tree.
103
  /// It does not make sense to move the value itself, it would just replace it
104
  /// by a use of itself. For instance, a constant "5" used in a statement can
105
  /// be forwarded, but it would just replace it by the same constant "5".
106
  /// However, it makes sense to move as an operand of
107
  ///
108
  ///   %add = add 5, 5
109
  ///
110
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
111
  /// (disregarding for a moment that literal constants don't have a location
112
  /// and can be used anywhere) into the same statement as %add would.
113
  FD_CanForwardLeaf,
114
115
  /// The root instruction can be forwarded and doing so avoids a scalar
116
  /// dependency.
117
  ///
118
  /// This can be either because the operand tree can be moved to the target
119
  /// statement, or a memory access is redirected to read from a different
120
  /// location.
121
  FD_CanForwardProfitably,
122
123
  /// Used to indicate that a forwarding has be carried out successfully, and
124
  /// the forwarded memory access can be deleted.
125
  FD_DidForwardTree,
126
127
  /// Used to indicate that a forwarding has be carried out successfully, and
128
  /// the forwarded memory access is being reused.
129
  FD_DidForwardLeaf,
130
131
  /// A forwarding method cannot be applied to the operand tree.
132
  /// The difference to FD_CannotForward is that there might be other methods
133
  /// that can handle it.
134
  /// The conditions that make an operand tree applicable must be checked even
135
  /// with DoIt==true because a method following the one that returned
136
  /// FD_NotApplicable might have returned FD_CanForwardTree.
137
  FD_NotApplicable
138
};
139
140
/// Implementation of operand tree forwarding for a specific SCoP.
141
///
142
/// For a statement that requires a scalar value (through a value read
143
/// MemoryAccess), see if its operand can be moved into the statement. If so,
144
/// the MemoryAccess is removed and the all the operand tree instructions are
145
/// moved into the statement. All original instructions are left in the source
146
/// statements. The simplification pass can clean these up.
147
class ForwardOpTreeImpl : ZoneAlgorithm {
148
private:
149
  /// Scope guard to limit the number of isl operations for this pass.
150
  IslMaxOperationsGuard &MaxOpGuard;
151
152
  /// How many instructions have been copied to other statements.
153
  int NumInstructionsCopied = 0;
154
155
  /// Number of loads forwarded because their value was known.
156
  int NumKnownLoadsForwarded = 0;
157
158
  /// Number of values reloaded from known array elements.
159
  int NumReloads = 0;
160
161
  /// How many read-only accesses have been copied.
162
  int NumReadOnlyCopied = 0;
163
164
  /// How many operand trees have been forwarded.
165
  int NumForwardedTrees = 0;
166
167
  /// Number of statements with at least one forwarded operand tree.
168
  int NumModifiedStmts = 0;
169
170
  /// Whether we carried out at least one change to the SCoP.
171
  bool Modified = false;
172
173
  /// Contains the zones where array elements are known to contain a specific
174
  /// value.
175
  /// { [Element[] -> Zone[]] -> ValInst[] }
176
  /// @see computeKnown()
177
  isl::union_map Known;
178
179
  /// Translator for newly introduced ValInsts to already existing ValInsts such
180
  /// that new introduced load instructions can reuse the Known analysis of its
181
  /// original load. { ValInst[] -> ValInst[] }
182
  isl::union_map Translator;
183
184
  /// Get list of array elements that do contain the same ValInst[] at Domain[].
185
  ///
186
  /// @param ValInst { Domain[] -> ValInst[] }
187
  ///                The values for which we search for alternative locations,
188
  ///                per statement instance.
189
  ///
190
  /// @return { Domain[] -> Element[] }
191
  ///         For each statement instance, the array elements that contain the
192
  ///         same ValInst.
193
65
  isl::union_map findSameContentElements(isl::union_map ValInst) {
194
65
    assert(!ValInst.is_single_valued().is_false());
195
65
196
65
    // { Domain[] }
197
65
    isl::union_set Domain = ValInst.domain();
198
65
199
65
    // { Domain[] -> Scatter[] }
200
65
    isl::union_map Schedule = getScatterFor(Domain);
201
65
202
65
    // { Element[] -> [Scatter[] -> ValInst[]] }
203
65
    isl::union_map MustKnownCurried =
204
65
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
205
65
206
65
    // { [Domain[] -> ValInst[]] -> Scatter[] }
207
65
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
208
65
209
65
    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
210
65
    isl::union_map SchedValDomVal =
211
65
        DomValSched.range_product(ValInst.range_map()).reverse();
212
65
213
65
    // { Element[] -> [Domain[] -> ValInst[]] }
214
65
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
215
65
216
65
    // { Domain[] -> Element[] }
217
65
    isl::union_map MustKnownMap =
218
65
        MustKnownInst.uncurry().domain().unwrap().reverse();
219
65
    simplify(MustKnownMap);
220
65
221
65
    return MustKnownMap;
222
65
  }
223
224
  /// Find a single array element for each statement instance, within a single
225
  /// array.
226
  ///
227
  /// @param MustKnown { Domain[] -> Element[] }
228
  ///                  Set of candidate array elements.
229
  /// @param Domain    { Domain[] }
230
  ///                  The statement instance for which we need elements for.
231
  ///
232
  /// @return { Domain[] -> Element[] }
233
  ///         For each statement instance, an array element out of @p MustKnown.
234
  ///         All array elements must be in the same array (Polly does not yet
235
  ///         support reading from different accesses using the same
236
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
237
  ///         null.
238
65
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
239
65
    // { Domain[] -> Element[] }
240
65
    isl::map Result;
241
65
242
65
    // MemoryAccesses can read only elements from a single array
243
65
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
244
65
    // Look through all spaces until we find one that contains at least the
245
65
    // wanted statement instance.s
246
65
    for (isl::map Map : MustKnown.get_map_list()) {
247
54
      // Get the array this is accessing.
248
54
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
249
54
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
250
54
251
54
      // No support for generation of indirect array accesses.
252
54
      if (SAI->getBasePtrOriginSAI())
253
0
        continue;
254
54
255
54
      // Determine whether this map contains all wanted values.
256
54
      isl::set MapDom = Map.domain();
257
54
      if (!Domain.is_subset(MapDom).is_true())
258
2
        continue;
259
52
260
52
      // There might be multiple array elements that contain the same value, but
261
52
      // choose only one of them. lexmin is used because it returns a one-value
262
52
      // mapping, we do not care about which one.
263
52
      // TODO: Get the simplest access function.
264
52
      Result = Map.lexmin();
265
52
      break;
266
52
    }
267
65
268
65
    return Result;
269
65
  }
270
271
public:
272
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
273
34
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
274
275
  /// Compute the zones of known array element contents.
276
  ///
277
  /// @return True if the computed #Known is usable.
278
34
  bool computeKnownValues() {
279
34
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
280
34
281
34
    // Check that nothing strange occurs.
282
34
    collectCompatibleElts();
283
34
284
34
    {
285
34
      IslQuotaScope QuotaScope = MaxOpGuard.enter();
286
34
287
34
      computeCommon();
288
34
      if (NormalizePHIs)
289
4
        computeNormalizedPHIs();
290
34
      Known = computeKnown(true, true);
291
34
292
34
      // Preexisting ValInsts use the known content analysis of themselves.
293
34
      Translator = makeIdentityMap(Known.range(), false);
294
34
    }
295
34
296
34
    if (!Known || !Translator || !NormalizeMap) {
297
0
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
298
0
      Known = nullptr;
299
0
      Translator = nullptr;
300
0
      NormalizeMap = nullptr;
301
0
      LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
302
0
      return false;
303
0
    }
304
34
305
34
    KnownAnalyzed++;
306
34
    LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
307
34
308
34
    return true;
309
34
  }
310
311
33
  void printStatistics(raw_ostream &OS, int Indent = 0) {
312
33
    OS.indent(Indent) << "Statistics {\n";
313
33
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
314
33
                          << '\n';
315
33
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
316
33
                          << '\n';
317
33
    OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
318
33
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
319
33
                          << '\n';
320
33
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
321
33
                          << '\n';
322
33
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
323
33
                          << NumModifiedStmts << '\n';
324
33
    OS.indent(Indent) << "}\n";
325
33
  }
326
327
25
  void printStatements(raw_ostream &OS, int Indent = 0) const {
328
25
    OS.indent(Indent) << "After statements {\n";
329
62
    for (auto &Stmt : *S) {
330
62
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
331
62
      for (auto *MA : Stmt)
332
120
        MA->print(OS);
333
62
334
62
      OS.indent(Indent + 12);
335
62
      Stmt.printInstructions(OS);
336
62
    }
337
25
    OS.indent(Indent) << "}\n";
338
25
  }
339
340
  /// Create a new MemoryAccess of type read and MemoryKind::Array.
341
  ///
342
  /// @param Stmt           The statement in which the access occurs.
343
  /// @param LI             The instruction that does the access.
344
  /// @param AccessRelation The array element that each statement instance
345
  ///                       accesses.
346
  ///
347
  /// @param The newly created access.
348
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
349
16
                                    isl::map AccessRelation) {
350
16
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
351
16
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
352
16
353
16
    // Create a dummy SCEV access, to be replaced anyway.
354
16
    SmallVector<const SCEV *, 4> Sizes;
355
16
    Sizes.reserve(SAI->getNumberOfDimensions());
356
16
    SmallVector<const SCEV *, 4> Subscripts;
357
16
    Subscripts.reserve(SAI->getNumberOfDimensions());
358
32
    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); 
i += 116
) {
359
16
      Sizes.push_back(SAI->getDimensionSize(i));
360
16
      Subscripts.push_back(nullptr);
361
16
    }
362
16
363
16
    MemoryAccess *Access =
364
16
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
365
16
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
366
16
    S->addAccessFunction(Access);
367
16
    Stmt->addAccess(Access, true);
368
16
369
16
    Access->setNewAccessRelation(AccessRelation);
370
16
371
16
    return Access;
372
16
  }
373
374
  /// Forward a load by reading from an array element that contains the same
375
  /// value. Typically the location it was loaded from.
376
  ///
377
  /// @param TargetStmt  The statement the operand tree will be copied to.
378
  /// @param Inst        The (possibly speculatable) instruction to forward.
379
  /// @param UseStmt     The statement that uses @p Inst.
380
  /// @param UseLoop     The loop @p Inst is used in.
381
  /// @param DefStmt     The statement @p Inst is defined in.
382
  /// @param DefLoop     The loop which contains @p Inst.
383
  /// @param DoIt        If false, only determine whether an operand tree can be
384
  ///                    forwarded. If true, carry out the forwarding. Do not
385
  ///                    use DoIt==true if an operand tree is not known to be
386
  ///                    forwardable.
387
  ///
388
  /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
389
  ///                           load.
390
  ///         FD_CannotForward  if the pointer operand cannot be forwarded.
391
  ///         FD_CanForwardProfitably if @p Inst is forwardable.
392
  ///         FD_DidForwardTree if @p DoIt was true.
393
  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
394
                                      ScopStmt *UseStmt, Loop *UseLoop,
395
                                      ScopStmt *DefStmt, Loop *DefLoop,
396
66
                                      bool DoIt) {
397
66
    // Cannot do anything without successful known analysis.
398
66
    if (Known.is_null() || Translator.is_null() ||
399
66
        MaxOpGuard.hasQuotaExceeded())
400
0
      return FD_NotApplicable;
401
66
402
66
    LoadInst *LI = dyn_cast<LoadInst>(Inst);
403
66
    if (!LI)
404
21
      return FD_NotApplicable;
405
45
406
45
    // If the load is already in the statement, no forwarding is necessary.
407
45
    // However, it might happen that the LoadInst is already present in the
408
45
    // statement's instruction list. In that case we do as follows:
409
45
    // - For the evaluation (DoIt==false), we can trivially forward it as it is
410
45
    //   benefit of forwarding an already present instruction.
411
45
    // - For the execution (DoIt==true), prepend the instruction (to make it
412
45
    //   available to all instructions following in the instruction list), but
413
45
    //   do not add another MemoryAccess.
414
45
    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
415
45
    if (Access && 
!DoIt9
)
416
4
      return FD_CanForwardProfitably;
417
41
418
41
    ForwardingDecision OpDecision = forwardTree(
419
41
        TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
420
41
    switch (OpDecision) {
421
41
    case FD_CannotForward:
422
0
      assert(!DoIt);
423
0
      return OpDecision;
424
41
425
41
    case FD_CanForwardLeaf:
426
20
    case FD_CanForwardProfitably:
427
20
      assert(!DoIt);
428
20
      break;
429
20
430
21
    case FD_DidForwardLeaf:
431
21
    case FD_DidForwardTree:
432
21
      assert(DoIt);
433
21
      break;
434
21
435
21
    default:
436
0
      llvm_unreachable("Shouldn't return this");
437
41
    }
438
41
439
41
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
440
41
441
41
    // { DomainDef[] -> ValInst[] }
442
41
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
443
41
    assert(!isNormalized(ExpectedVal).is_false() &&
444
41
           "LoadInsts are always normalized");
445
41
446
41
    // { DomainUse[] -> DomainTarget[] }
447
41
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
448
41
449
41
    // { DomainTarget[] -> ValInst[] }
450
41
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
451
41
    isl::union_map TranslatedExpectedVal =
452
41
        isl::union_map(TargetExpectedVal).apply_range(Translator);
453
41
454
41
    // { DomainTarget[] -> Element[] }
455
41
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
456
41
457
41
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
458
41
    if (!SameVal)
459
3
      return FD_NotApplicable;
460
38
461
38
    if (DoIt)
462
21
      TargetStmt->prependInstruction(LI);
463
38
464
38
    if (!DoIt)
465
17
      return FD_CanForwardProfitably;
466
21
467
21
    if (Access) {
468
5
      LLVM_DEBUG(
469
5
          dbgs() << "    forwarded known load with preexisting MemoryAccess"
470
5
                 << Access << "\n");
471
16
    } else {
472
16
      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
473
16
      LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
474
16
                        << Access << "\n");
475
16
476
16
      // { ValInst[] }
477
16
      isl::space ValInstSpace = ExpectedVal.get_space().range();
478
16
479
16
      // After adding a new load to the SCoP, also update the Known content
480
16
      // about it. The new load will have a known ValInst of
481
16
      // { [DomainTarget[] -> Value[]] }
482
16
      // but which -- because it is a copy of it -- has same value as the
483
16
      // { [DomainDef[] -> Value[]] }
484
16
      // that it replicates. Instead of  cloning the known content of
485
16
      // [DomainDef[] -> Value[]]
486
16
      // for DomainTarget[], we add a 'translator' that maps
487
16
      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
488
16
      // before comparing to the known content.
489
16
      // TODO: 'Translator' could also be used to map PHINodes to their incoming
490
16
      // ValInsts.
491
16
      if (ValInstSpace.is_wrapping()) {
492
16
        // { DefDomain[] -> Value[] }
493
16
        isl::map ValInsts = ExpectedVal.range().unwrap();
494
16
495
16
        // { DefDomain[] }
496
16
        isl::set DefDomain = ValInsts.domain();
497
16
498
16
        // { Value[] }
499
16
        isl::space ValSpace = ValInstSpace.unwrap().range();
500
16
501
16
        // { Value[] -> Value[] }
502
16
        isl::map ValToVal =
503
16
            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
504
16
505
16
        // { DomainDef[] -> DomainTarget[] }
506
16
        isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
507
16
508
16
        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
509
16
        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
510
16
511
16
        Translator = Translator.add_map(LocalTranslator);
512
16
        LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
513
16
                          << "\n");
514
16
      }
515
16
    }
516
21
    LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
517
21
                      << "\n");
518
21
    LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
519
21
                      << "\n");
520
21
    assert(Access);
521
21
522
21
    NumKnownLoadsForwarded++;
523
21
    TotalKnownLoadsForwarded++;
524
21
    return FD_DidForwardTree;
525
21
  }
526
527
  /// Forward a scalar by redirecting the access to an array element that stores
528
  /// the same value.
529
  ///
530
  /// @param TargetStmt  The statement the operand tree will be copied to.
531
  /// @param Inst        The scalar to forward.
532
  /// @param UseStmt     The statement that uses @p Inst.
533
  /// @param UseLoop     The loop @p Inst is used in.
534
  /// @param DefStmt     The statement @p Inst is defined in.
535
  /// @param DefLoop     The loop which contains @p Inst.
536
  /// @param DoIt        If false, only determine whether an operand tree can be
537
  ///                    forwarded. If true, carry out the forwarding. Do not
538
  ///                    use DoIt==true if an operand tree is not known to be
539
  ///                    forwardable.
540
  ///
541
  /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
542
  ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
543
  ///         FD_CanForwardProfitably if @p Inst has been reloaded.
544
  ///         FD_DidForwardLeaf       if @p DoIt was true.
545
  ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
546
                                        ScopStmt *UseStmt, Loop *UseLoop,
547
                                        ScopStmt *DefStmt, Loop *DefLoop,
548
24
                                        bool DoIt) {
549
24
    // Cannot do anything without successful known analysis.
550
24
    if (Known.is_null() || Translator.is_null() ||
551
24
        MaxOpGuard.hasQuotaExceeded())
552
0
      return FD_NotApplicable;
553
24
554
24
    MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
555
24
    if (Access && 
Access->isLatestArrayKind()22
) {
556
0
      if (DoIt)
557
0
        return FD_DidForwardLeaf;
558
0
      return FD_CanForwardLeaf;
559
0
    }
560
24
561
24
    // Don't spend too much time analyzing whether it can be reloaded. When
562
24
    // carrying-out the forwarding, we cannot bail-out in the middle of the
563
24
    // transformation. It also shouldn't take as long because some results are
564
24
    // cached.
565
24
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
566
24
567
24
    // { DomainDef[] -> ValInst[] }
568
24
    isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
569
24
570
24
    // { DomainUse[] -> DomainTarget[] }
571
24
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
572
24
573
24
    // { DomainTarget[] -> ValInst[] }
574
24
    isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
575
24
    isl::union_map TranslatedExpectedVal =
576
24
        TargetExpectedVal.apply_range(Translator);
577
24
578
24
    // { DomainTarget[] -> Element[] }
579
24
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
580
24
581
24
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
582
24
    if (!SameVal)
583
10
      return FD_NotApplicable;
584
14
585
14
    if (!DoIt)
586
7
      return FD_CanForwardProfitably;
587
7
588
7
    if (!Access)
589
0
      Access = TargetStmt->ensureValueRead(Inst);
590
7
591
7
    simplify(SameVal);
592
7
    Access->setNewAccessRelation(SameVal);
593
7
594
7
    TotalReloads++;
595
7
    NumReloads++;
596
7
    return FD_DidForwardLeaf;
597
7
  }
598
599
  /// Forwards a speculatively executable instruction.
600
  ///
601
  /// @param TargetStmt  The statement the operand tree will be copied to.
602
  /// @param UseInst     The (possibly speculatable) instruction to forward.
603
  /// @param DefStmt     The statement @p UseInst is defined in.
604
  /// @param DefLoop     The loop which contains @p UseInst.
605
  /// @param DoIt        If false, only determine whether an operand tree can be
606
  ///                    forwarded. If true, carry out the forwarding. Do not
607
  ///                    use DoIt==true if an operand tree is not known to be
608
  ///                    forwardable.
609
  ///
610
  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
611
  ///         FD_CannotForward  if one of @p UseInst's operands is not
612
  ///                           forwardable.
613
  ///         FD_CanForwardTree if @p UseInst is forwardable.
614
  ///         FD_DidForward     if @p DoIt was true.
615
  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
616
                                         Instruction *UseInst,
617
                                         ScopStmt *DefStmt, Loop *DefLoop,
618
100
                                         bool DoIt) {
619
100
    // PHIs, unless synthesizable, are not yet supported.
620
100
    if (isa<PHINode>(UseInst))
621
17
      return FD_NotApplicable;
622
83
623
83
    // Compatible instructions must satisfy the following conditions:
624
83
    // 1. Idempotent (instruction will be copied, not moved; although its
625
83
    //    original instance might be removed by simplification)
626
83
    // 2. Not access memory (There might be memory writes between)
627
83
    // 3. Not cause undefined behaviour (we might copy to a location when the
628
83
    //    original instruction was no executed; this is currently not possible
629
83
    //    because we do not forward PHINodes)
630
83
    // 4. Not leak memory if executed multiple times (i.e. malloc)
631
83
    //
632
83
    // Instruction::mayHaveSideEffects is not sufficient because it considers
633
83
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
634
83
    // not sufficient because it allows memory accesses.
635
83
    if (mayBeMemoryDependent(*UseInst))
636
49
      return FD_NotApplicable;
637
34
638
34
    if (DoIt) {
639
16
      // To ensure the right order, prepend this instruction before its
640
16
      // operands. This ensures that its operands are inserted before the
641
16
      // instruction using them.
642
16
      // TODO: The operand tree is not really a tree, but a DAG. We should be
643
16
      // able to handle DAGs without duplication.
644
16
      TargetStmt->prependInstruction(UseInst);
645
16
      NumInstructionsCopied++;
646
16
      TotalInstructionsCopied++;
647
16
    }
648
34
649
60
    for (Value *OpVal : UseInst->operand_values()) {
650
60
      ForwardingDecision OpDecision =
651
60
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
652
60
      switch (OpDecision) {
653
60
      case FD_CannotForward:
654
2
        assert(!DoIt);
655
2
        return FD_CannotForward;
656
60
657
60
      case FD_CanForwardLeaf:
658
29
      case FD_CanForwardProfitably:
659
29
        assert(!DoIt);
660
29
        break;
661
29
662
29
      case FD_DidForwardLeaf:
663
29
      case FD_DidForwardTree:
664
29
        assert(DoIt);
665
29
        break;
666
29
667
29
      case FD_NotApplicable:
668
0
        llvm_unreachable("forwardTree should never return FD_NotApplicable");
669
60
      }
670
60
    }
671
34
672
34
    
if (32
DoIt32
)
673
16
      return FD_DidForwardTree;
674
16
    return FD_CanForwardProfitably;
675
16
  }
676
677
  /// Determines whether an operand tree can be forwarded or carries out a
678
  /// forwarding, depending on the @p DoIt flag.
679
  ///
680
  /// @param TargetStmt  The statement the operand tree will be copied to.
681
  /// @param UseVal      The value (usually an instruction) which is root of an
682
  ///                    operand tree.
683
  /// @param UseStmt     The statement that uses @p UseVal.
684
  /// @param UseLoop     The loop @p UseVal is used in.
685
  /// @param DoIt        If false, only determine whether an operand tree can be
686
  ///                    forwarded. If true, carry out the forwarding. Do not
687
  ///                    use DoIt==true if an operand tree is not known to be
688
  ///                    forwardable.
689
  ///
690
  /// @return If DoIt==false, return whether the operand tree can be forwarded.
691
  ///         If DoIt==true, return FD_DidForward.
692
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
693
186
                                 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
694
186
    ScopStmt *DefStmt = nullptr;
695
186
    Loop *DefLoop = nullptr;
696
186
697
186
    // { DefDomain[] -> TargetDomain[] }
698
186
    isl::map DefToTarget;
699
186
700
186
    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
701
186
    switch (VUse.getKind()) {
702
186
    case VirtualUse::Constant:
703
42
    case VirtualUse::Block:
704
42
    case VirtualUse::Hoisted:
705
42
      // These can be used anywhere without special considerations.
706
42
      if (DoIt)
707
21
        return FD_DidForwardTree;
708
21
      return FD_CanForwardLeaf;
709
21
710
39
    case VirtualUse::Synthesizable: {
711
39
      // ScopExpander will take care for of generating the code at the new
712
39
      // location.
713
39
      if (DoIt)
714
20
        return FD_DidForwardTree;
715
19
716
19
      // Check if the value is synthesizable at the new location as well. This
717
19
      // might be possible when leaving a loop for which ScalarEvolution is
718
19
      // unable to derive the exit value for.
719
19
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
720
19
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
721
19
      // do not forward past its loop header. This would require us to use a
722
19
      // previous loop induction variable instead the current one. We currently
723
19
      // do not allow forwarding PHI nodes, thus this should never occur (the
724
19
      // only exception where no phi is necessary being an unreachable loop
725
19
      // without edge from the outside).
726
19
      VirtualUse TargetUse = VirtualUse::create(
727
19
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
728
19
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
729
19
        return FD_CanForwardLeaf;
730
0
731
0
      LLVM_DEBUG(
732
0
          dbgs() << "    Synthesizable would not be synthesizable anymore: "
733
0
                 << *UseVal << "\n");
734
0
      return FD_CannotForward;
735
0
    }
736
0
737
5
    case VirtualUse::ReadOnly:
738
5
      // Note that we cannot return FD_CanForwardTree here. With a operand tree
739
5
      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
740
5
      // With -polly-analyze-read-only-scalars=true we would ensure the
741
5
      // existence of a MemoryAccess (which already exists for a leaf) and be
742
5
      // removed again by tryForwardTree because it's goal is to remove this
743
5
      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
744
5
      // to do so.
745
5
      if (!DoIt)
746
3
        return FD_CanForwardLeaf;
747
2
748
2
      // If we model read-only scalars, we need to create a MemoryAccess for it.
749
2
      if (ModelReadOnlyScalars)
750
1
        TargetStmt->ensureValueRead(UseVal);
751
2
752
2
      NumReadOnlyCopied++;
753
2
      TotalReadOnlyCopied++;
754
2
      return FD_DidForwardLeaf;
755
2
756
28
    case VirtualUse::Intra:
757
28
      // Knowing that UseStmt and DefStmt are the same statement instance, just
758
28
      // reuse the information about UseStmt for DefStmt
759
28
      DefStmt = UseStmt;
760
28
761
28
      LLVM_FALLTHROUGH;
762
100
    case VirtualUse::Inter:
763
100
      Instruction *Inst = cast<Instruction>(UseVal);
764
100
765
100
      if (!DefStmt) {
766
72
        DefStmt = S->getStmtFor(Inst);
767
72
        if (!DefStmt)
768
0
          return FD_CannotForward;
769
100
      }
770
100
771
100
      DefLoop = LI->getLoopFor(Inst->getParent());
772
100
773
100
      ForwardingDecision SpeculativeResult =
774
100
          forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
775
100
      if (SpeculativeResult != FD_NotApplicable)
776
34
        return SpeculativeResult;
777
66
778
66
      ForwardingDecision KnownResult = forwardKnownLoad(
779
66
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
780
66
      if (KnownResult != FD_NotApplicable)
781
42
        return KnownResult;
782
24
783
24
      ForwardingDecision ReloadResult = reloadKnownContent(
784
24
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
785
24
      if (ReloadResult != FD_NotApplicable)
786
14
        return ReloadResult;
787
10
788
10
      // When no method is found to forward the operand tree, we effectively
789
10
      // cannot handle it.
790
10
      LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
791
10
      return FD_CannotForward;
792
0
    }
793
0
794
0
    llvm_unreachable("Case unhandled");
795
0
  }
796
797
  /// Try to forward an operand tree rooted in @p RA.
798
48
  bool tryForwardTree(MemoryAccess *RA) {
799
48
    assert(RA->isLatestScalarKind());
800
48
    LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
801
48
802
48
    ScopStmt *Stmt = RA->getStatement();
803
48
    Loop *InLoop = Stmt->getSurroundingLoop();
804
48
805
48
    isl::map TargetToUse;
806
48
    if (!Known.is_null()) {
807
48
      isl::space DomSpace = Stmt->getDomainSpace();
808
48
      TargetToUse =
809
48
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
810
48
    }
811
48
812
48
    ForwardingDecision Assessment =
813
48
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
814
48
    assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
815
48
    if (Assessment != FD_CanForwardProfitably)
816
11
      return false;
817
37
818
37
    ForwardingDecision Execution =
819
37
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
820
37
    assert(((Execution == FD_DidForwardTree) ||
821
37
            (Execution == FD_DidForwardLeaf)) &&
822
37
           "A previous positive assessment must also be executable");
823
37
824
37
    if (Execution == FD_DidForwardTree)
825
30
      Stmt->removeSingleMemoryAccess(RA);
826
37
    return true;
827
37
  }
828
829
  /// Return which SCoP this instance is processing.
830
0
  Scop *getScop() const { return S; }
831
832
  /// Run the algorithm: Use value read accesses as operand tree roots and try
833
  /// to forward them into the statement.
834
34
  bool forwardOperandTrees() {
835
87
    for (ScopStmt &Stmt : *S) {
836
87
      bool StmtModified = false;
837
87
838
87
      // Because we are modifying the MemoryAccess list, collect them first to
839
87
      // avoid iterator invalidation.
840
87
      SmallVector<MemoryAccess *, 16> Accs;
841
177
      for (MemoryAccess *RA : Stmt) {
842
177
        if (!RA->isRead())
843
105
          continue;
844
72
        if (!RA->isLatestScalarKind())
845
24
          continue;
846
48
847
48
        Accs.push_back(RA);
848
48
      }
849
87
850
87
      for (MemoryAccess *RA : Accs) {
851
48
        if (tryForwardTree(RA)) {
852
37
          Modified = true;
853
37
          StmtModified = true;
854
37
          NumForwardedTrees++;
855
37
          TotalForwardedTrees++;
856
37
        }
857
48
      }
858
87
859
87
      if (StmtModified) {
860
33
        NumModifiedStmts++;
861
33
        TotalModifiedStmts++;
862
33
      }
863
87
    }
864
34
865
34
    if (Modified)
866
26
      ScopsModified++;
867
34
    return Modified;
868
34
  }
869
870
  /// Print the pass result, performed transformations and the SCoP after the
871
  /// transformation.
872
33
  void print(raw_ostream &OS, int Indent = 0) {
873
33
    printStatistics(OS, Indent);
874
33
875
33
    if (!Modified) {
876
8
      // This line can easily be checked in regression tests.
877
8
      OS << "ForwardOpTree executed, but did not modify anything\n";
878
8
      return;
879
8
    }
880
25
881
25
    printStatements(OS, Indent);
882
25
  }
883
};
884
885
/// Pass that redirects scalar reads to array elements that are known to contain
886
/// the same value.
887
///
888
/// This reduces the number of scalar accesses and therefore potentially
889
/// increases the freedom of the scheduler. In the ideal case, all reads of a
890
/// scalar definition are redirected (We currently do not care about removing
891
/// the write in this case).  This is also useful for the main DeLICM pass as
892
/// there are less scalars to be mapped.
893
class ForwardOpTree : public ScopPass {
894
private:
895
  /// The pass implementation, also holding per-scop data.
896
  std::unique_ptr<ForwardOpTreeImpl> Impl;
897
898
public:
899
  static char ID;
900
901
33
  explicit ForwardOpTree() : ScopPass(ID) {}
902
  ForwardOpTree(const ForwardOpTree &) = delete;
903
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;
904
905
33
  void getAnalysisUsage(AnalysisUsage &AU) const override {
906
33
    AU.addRequiredTransitive<ScopInfoRegionPass>();
907
33
    AU.addRequired<LoopInfoWrapperPass>();
908
33
    AU.setPreservesAll();
909
33
  }
910
911
34
  bool runOnScop(Scop &S) override {
912
34
    // Free resources for previous SCoP's computation, if not yet done.
913
34
    releaseMemory();
914
34
915
34
    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
916
34
917
34
    {
918
34
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
919
34
      Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
920
34
921
34
      if (AnalyzeKnown) {
922
34
        LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
923
34
        Impl->computeKnownValues();
924
34
      }
925
34
926
34
      LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
927
34
      Impl->forwardOperandTrees();
928
34
929
34
      if (MaxOpGuard.hasQuotaExceeded()) {
930
0
        LLVM_DEBUG(dbgs() << "Not all operations completed because of "
931
0
                             "max_operations exceeded\n");
932
0
        KnownOutOfQuota++;
933
0
      }
934
34
    }
935
34
936
34
    LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
937
34
    LLVM_DEBUG(dbgs() << S);
938
34
939
34
    // Update statistics
940
34
    auto ScopStats = S.getStatistics();
941
34
    NumValueWrites += ScopStats.NumValueWrites;
942
34
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
943
34
    NumPHIWrites += ScopStats.NumPHIWrites;
944
34
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
945
34
    NumSingletonWrites += ScopStats.NumSingletonWrites;
946
34
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
947
34
948
34
    return false;
949
34
  }
950
951
33
  void printScop(raw_ostream &OS, Scop &S) const override {
952
33
    if (!Impl)
953
0
      return;
954
33
955
33
    assert(Impl->getScop() == &S);
956
33
    Impl->print(OS);
957
33
  }
958
959
151
  void releaseMemory() override { Impl.reset(); }
960
}; // class ForwardOpTree
961
962
char ForwardOpTree::ID;
963
} // namespace
964
965
0
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
966
967
45.1k
INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
968
45.1k
                      "Polly - Forward operand tree", false, false)
969
45.1k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
970
45.1k
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
971
                    "Polly - Forward operand tree", false, false)