Coverage Report

Created: 2019-04-21 19:17

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Move instructions between statements.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "polly/ForwardOpTree.h"
14
#include "polly/Options.h"
15
#include "polly/ScopBuilder.h"
16
#include "polly/ScopInfo.h"
17
#include "polly/ScopPass.h"
18
#include "polly/Support/GICHelper.h"
19
#include "polly/Support/ISLOStream.h"
20
#include "polly/Support/ISLTools.h"
21
#include "polly/Support/VirtualInstruction.h"
22
#include "polly/ZoneAlgo.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/ADT/SmallVector.h"
25
#include "llvm/ADT/Statistic.h"
26
#include "llvm/Analysis/LoopInfo.h"
27
#include "llvm/Analysis/ValueTracking.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/Value.h"
31
#include "llvm/Support/Casting.h"
32
#include "llvm/Support/CommandLine.h"
33
#include "llvm/Support/Compiler.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/raw_ostream.h"
37
#include "isl/ctx.h"
38
#include "isl/isl-noexceptions.h"
39
#include <cassert>
40
#include <memory>
41
42
#define DEBUG_TYPE "polly-optree"
43
44
using namespace llvm;
45
using namespace polly;
46
47
static cl::opt<bool>
48
    AnalyzeKnown("polly-optree-analyze-known",
49
                 cl::desc("Analyze array contents for load forwarding"),
50
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
51
52
static cl::opt<bool>
53
    NormalizePHIs("polly-optree-normalize-phi",
54
                  cl::desc("Replace PHIs by their incoming values"),
55
                  cl::cat(PollyCategory), cl::init(false), cl::Hidden);
56
57
static cl::opt<unsigned>
58
    MaxOps("polly-optree-max-ops",
59
           cl::desc("Maximum number of ISL operations to invest for known "
60
                    "analysis; 0=no limit"),
61
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
62
63
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
64
STATISTIC(KnownOutOfQuota,
65
          "Analyses aborted because max_operations was reached");
66
67
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
68
STATISTIC(TotalKnownLoadsForwarded,
69
          "Number of forwarded loads because their value was known");
70
STATISTIC(TotalReloads, "Number of reloaded values");
71
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
72
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
73
STATISTIC(TotalModifiedStmts,
74
          "Number of statements with at least one forwarded tree");
75
76
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
77
78
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
79
STATISTIC(NumValueWritesInLoops,
80
          "Number of scalar value writes nested in affine loops after OpTree");
81
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
82
STATISTIC(NumPHIWritesInLoops,
83
          "Number of scalar phi writes nested in affine loops after OpTree");
84
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
85
STATISTIC(NumSingletonWritesInLoops,
86
          "Number of singleton writes nested in affine loops after OpTree");
87
88
namespace {
89
90
/// The state of whether an operand tree was/can be forwarded.
91
///
92
/// The items apply to an instructions and its operand tree with the instruction
93
/// as the root element. If the value in question is not an instruction in the
94
/// SCoP, it can be a leaf of an instruction's operand tree.
95
enum ForwardingDecision {
96
  /// The root instruction or value cannot be forwarded at all.
97
  FD_CannotForward,
98
99
  /// The root instruction or value can be forwarded as a leaf of a larger
100
  /// operand tree.
101
  /// It does not make sense to move the value itself, it would just replace it
102
  /// by a use of itself. For instance, a constant "5" used in a statement can
103
  /// be forwarded, but it would just replace it by the same constant "5".
104
  /// However, it makes sense to move as an operand of
105
  ///
106
  ///   %add = add 5, 5
107
  ///
108
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
109
  /// (disregarding for a moment that literal constants don't have a location
110
  /// and can be used anywhere) into the same statement as %add would.
111
  FD_CanForwardLeaf,
112
113
  /// The root instruction can be forwarded and doing so avoids a scalar
114
  /// dependency.
115
  ///
116
  /// This can be either because the operand tree can be moved to the target
117
  /// statement, or a memory access is redirected to read from a different
118
  /// location.
119
  FD_CanForwardProfitably,
120
121
  /// Used to indicate that a forwarding has be carried out successfully, and
122
  /// the forwarded memory access can be deleted.
123
  FD_DidForwardTree,
124
125
  /// Used to indicate that a forwarding has be carried out successfully, and
126
  /// the forwarded memory access is being reused.
127
  FD_DidForwardLeaf,
128
129
  /// A forwarding method cannot be applied to the operand tree.
130
  /// The difference to FD_CannotForward is that there might be other methods
131
  /// that can handle it.
132
  /// The conditions that make an operand tree applicable must be checked even
133
  /// with DoIt==true because a method following the one that returned
134
  /// FD_NotApplicable might have returned FD_CanForwardTree.
135
  FD_NotApplicable
136
};
137
138
/// Implementation of operand tree forwarding for a specific SCoP.
139
///
140
/// For a statement that requires a scalar value (through a value read
141
/// MemoryAccess), see if its operand can be moved into the statement. If so,
142
/// the MemoryAccess is removed and the all the operand tree instructions are
143
/// moved into the statement. All original instructions are left in the source
144
/// statements. The simplification pass can clean these up.
145
class ForwardOpTreeImpl : ZoneAlgorithm {
146
private:
147
  /// Scope guard to limit the number of isl operations for this pass.
148
  IslMaxOperationsGuard &MaxOpGuard;
149
150
  /// How many instructions have been copied to other statements.
151
  int NumInstructionsCopied = 0;
152
153
  /// Number of loads forwarded because their value was known.
154
  int NumKnownLoadsForwarded = 0;
155
156
  /// Number of values reloaded from known array elements.
157
  int NumReloads = 0;
158
159
  /// How many read-only accesses have been copied.
160
  int NumReadOnlyCopied = 0;
161
162
  /// How many operand trees have been forwarded.
163
  int NumForwardedTrees = 0;
164
165
  /// Number of statements with at least one forwarded operand tree.
166
  int NumModifiedStmts = 0;
167
168
  /// Whether we carried out at least one change to the SCoP.
169
  bool Modified = false;
170
171
  /// Contains the zones where array elements are known to contain a specific
172
  /// value.
173
  /// { [Element[] -> Zone[]] -> ValInst[] }
174
  /// @see computeKnown()
175
  isl::union_map Known;
176
177
  /// Translator for newly introduced ValInsts to already existing ValInsts such
178
  /// that new introduced load instructions can reuse the Known analysis of its
179
  /// original load. { ValInst[] -> ValInst[] }
180
  isl::union_map Translator;
181
182
  /// Get list of array elements that do contain the same ValInst[] at Domain[].
183
  ///
184
  /// @param ValInst { Domain[] -> ValInst[] }
185
  ///                The values for which we search for alternative locations,
186
  ///                per statement instance.
187
  ///
188
  /// @return { Domain[] -> Element[] }
189
  ///         For each statement instance, the array elements that contain the
190
  ///         same ValInst.
191
65
  isl::union_map findSameContentElements(isl::union_map ValInst) {
192
65
    assert(!ValInst.is_single_valued().is_false());
193
65
194
65
    // { Domain[] }
195
65
    isl::union_set Domain = ValInst.domain();
196
65
197
65
    // { Domain[] -> Scatter[] }
198
65
    isl::union_map Schedule = getScatterFor(Domain);
199
65
200
65
    // { Element[] -> [Scatter[] -> ValInst[]] }
201
65
    isl::union_map MustKnownCurried =
202
65
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
203
65
204
65
    // { [Domain[] -> ValInst[]] -> Scatter[] }
205
65
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
206
65
207
65
    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
208
65
    isl::union_map SchedValDomVal =
209
65
        DomValSched.range_product(ValInst.range_map()).reverse();
210
65
211
65
    // { Element[] -> [Domain[] -> ValInst[]] }
212
65
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
213
65
214
65
    // { Domain[] -> Element[] }
215
65
    isl::union_map MustKnownMap =
216
65
        MustKnownInst.uncurry().domain().unwrap().reverse();
217
65
    simplify(MustKnownMap);
218
65
219
65
    return MustKnownMap;
220
65
  }
221
222
  /// Find a single array element for each statement instance, within a single
223
  /// array.
224
  ///
225
  /// @param MustKnown { Domain[] -> Element[] }
226
  ///                  Set of candidate array elements.
227
  /// @param Domain    { Domain[] }
228
  ///                  The statement instance for which we need elements for.
229
  ///
230
  /// @return { Domain[] -> Element[] }
231
  ///         For each statement instance, an array element out of @p MustKnown.
232
  ///         All array elements must be in the same array (Polly does not yet
233
  ///         support reading from different accesses using the same
234
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
235
  ///         null.
236
65
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
237
65
    // { Domain[] -> Element[] }
238
65
    isl::map Result;
239
65
240
65
    // MemoryAccesses can read only elements from a single array
241
65
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
242
65
    // Look through all spaces until we find one that contains at least the
243
65
    // wanted statement instance.s
244
65
    for (isl::map Map : MustKnown.get_map_list()) {
245
54
      // Get the array this is accessing.
246
54
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
247
54
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
248
54
249
54
      // No support for generation of indirect array accesses.
250
54
      if (SAI->getBasePtrOriginSAI())
251
0
        continue;
252
54
253
54
      // Determine whether this map contains all wanted values.
254
54
      isl::set MapDom = Map.domain();
255
54
      if (!Domain.is_subset(MapDom).is_true())
256
2
        continue;
257
52
258
52
      // There might be multiple array elements that contain the same value, but
259
52
      // choose only one of them. lexmin is used because it returns a one-value
260
52
      // mapping, we do not care about which one.
261
52
      // TODO: Get the simplest access function.
262
52
      Result = Map.lexmin();
263
52
      break;
264
52
    }
265
65
266
65
    return Result;
267
65
  }
268
269
public:
270
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
271
34
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
272
273
  /// Compute the zones of known array element contents.
274
  ///
275
  /// @return True if the computed #Known is usable.
276
34
  bool computeKnownValues() {
277
34
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
278
34
279
34
    // Check that nothing strange occurs.
280
34
    collectCompatibleElts();
281
34
282
34
    {
283
34
      IslQuotaScope QuotaScope = MaxOpGuard.enter();
284
34
285
34
      computeCommon();
286
34
      if (NormalizePHIs)
287
4
        computeNormalizedPHIs();
288
34
      Known = computeKnown(true, true);
289
34
290
34
      // Preexisting ValInsts use the known content analysis of themselves.
291
34
      Translator = makeIdentityMap(Known.range(), false);
292
34
    }
293
34
294
34
    if (!Known || !Translator || !NormalizeMap) {
295
0
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
296
0
      Known = nullptr;
297
0
      Translator = nullptr;
298
0
      NormalizeMap = nullptr;
299
0
      LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
300
0
      return false;
301
0
    }
302
34
303
34
    KnownAnalyzed++;
304
34
    LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
305
34
306
34
    return true;
307
34
  }
308
309
33
  void printStatistics(raw_ostream &OS, int Indent = 0) {
310
33
    OS.indent(Indent) << "Statistics {\n";
311
33
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
312
33
                          << '\n';
313
33
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
314
33
                          << '\n';
315
33
    OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
316
33
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
317
33
                          << '\n';
318
33
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
319
33
                          << '\n';
320
33
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
321
33
                          << NumModifiedStmts << '\n';
322
33
    OS.indent(Indent) << "}\n";
323
33
  }
324
325
25
  void printStatements(raw_ostream &OS, int Indent = 0) const {
326
25
    OS.indent(Indent) << "After statements {\n";
327
62
    for (auto &Stmt : *S) {
328
62
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
329
62
      for (auto *MA : Stmt)
330
120
        MA->print(OS);
331
62
332
62
      OS.indent(Indent + 12);
333
62
      Stmt.printInstructions(OS);
334
62
    }
335
25
    OS.indent(Indent) << "}\n";
336
25
  }
337
338
  /// Create a new MemoryAccess of type read and MemoryKind::Array.
339
  ///
340
  /// @param Stmt           The statement in which the access occurs.
341
  /// @param LI             The instruction that does the access.
342
  /// @param AccessRelation The array element that each statement instance
343
  ///                       accesses.
344
  ///
345
  /// @param The newly created access.
346
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
347
16
                                    isl::map AccessRelation) {
348
16
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
349
16
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
350
16
351
16
    // Create a dummy SCEV access, to be replaced anyway.
352
16
    SmallVector<const SCEV *, 4> Sizes;
353
16
    Sizes.reserve(SAI->getNumberOfDimensions());
354
16
    SmallVector<const SCEV *, 4> Subscripts;
355
16
    Subscripts.reserve(SAI->getNumberOfDimensions());
356
32
    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); 
i += 116
) {
357
16
      Sizes.push_back(SAI->getDimensionSize(i));
358
16
      Subscripts.push_back(nullptr);
359
16
    }
360
16
361
16
    MemoryAccess *Access =
362
16
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
363
16
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
364
16
    S->addAccessFunction(Access);
365
16
    Stmt->addAccess(Access, true);
366
16
367
16
    Access->setNewAccessRelation(AccessRelation);
368
16
369
16
    return Access;
370
16
  }
371
372
  /// Forward a load by reading from an array element that contains the same
373
  /// value. Typically the location it was loaded from.
374
  ///
375
  /// @param TargetStmt  The statement the operand tree will be copied to.
376
  /// @param Inst        The (possibly speculatable) instruction to forward.
377
  /// @param UseStmt     The statement that uses @p Inst.
378
  /// @param UseLoop     The loop @p Inst is used in.
379
  /// @param DefStmt     The statement @p Inst is defined in.
380
  /// @param DefLoop     The loop which contains @p Inst.
381
  /// @param DoIt        If false, only determine whether an operand tree can be
382
  ///                    forwarded. If true, carry out the forwarding. Do not
383
  ///                    use DoIt==true if an operand tree is not known to be
384
  ///                    forwardable.
385
  ///
386
  /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
387
  ///                           load.
388
  ///         FD_CannotForward  if the pointer operand cannot be forwarded.
389
  ///         FD_CanForwardProfitably if @p Inst is forwardable.
390
  ///         FD_DidForwardTree if @p DoIt was true.
391
  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
392
                                      ScopStmt *UseStmt, Loop *UseLoop,
393
                                      ScopStmt *DefStmt, Loop *DefLoop,
394
66
                                      bool DoIt) {
395
66
    // Cannot do anything without successful known analysis.
396
66
    if (Known.is_null() || Translator.is_null() ||
397
66
        MaxOpGuard.hasQuotaExceeded())
398
0
      return FD_NotApplicable;
399
66
400
66
    LoadInst *LI = dyn_cast<LoadInst>(Inst);
401
66
    if (!LI)
402
21
      return FD_NotApplicable;
403
45
404
45
    // If the load is already in the statement, no forwarding is necessary.
405
45
    // However, it might happen that the LoadInst is already present in the
406
45
    // statement's instruction list. In that case we do as follows:
407
45
    // - For the evaluation (DoIt==false), we can trivially forward it as it is
408
45
    //   benefit of forwarding an already present instruction.
409
45
    // - For the execution (DoIt==true), prepend the instruction (to make it
410
45
    //   available to all instructions following in the instruction list), but
411
45
    //   do not add another MemoryAccess.
412
45
    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
413
45
    if (Access && 
!DoIt9
)
414
4
      return FD_CanForwardProfitably;
415
41
416
41
    ForwardingDecision OpDecision = forwardTree(
417
41
        TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
418
41
    switch (OpDecision) {
419
41
    case FD_CannotForward:
420
0
      assert(!DoIt);
421
0
      return OpDecision;
422
41
423
41
    case FD_CanForwardLeaf:
424
20
    case FD_CanForwardProfitably:
425
20
      assert(!DoIt);
426
20
      break;
427
20
428
21
    case FD_DidForwardLeaf:
429
21
    case FD_DidForwardTree:
430
21
      assert(DoIt);
431
21
      break;
432
21
433
21
    default:
434
0
      llvm_unreachable("Shouldn't return this");
435
41
    }
436
41
437
41
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
438
41
439
41
    // { DomainDef[] -> ValInst[] }
440
41
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
441
41
    assert(!isNormalized(ExpectedVal).is_false() &&
442
41
           "LoadInsts are always normalized");
443
41
444
41
    // { DomainUse[] -> DomainTarget[] }
445
41
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
446
41
447
41
    // { DomainTarget[] -> ValInst[] }
448
41
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
449
41
    isl::union_map TranslatedExpectedVal =
450
41
        isl::union_map(TargetExpectedVal).apply_range(Translator);
451
41
452
41
    // { DomainTarget[] -> Element[] }
453
41
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
454
41
455
41
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
456
41
    if (!SameVal)
457
3
      return FD_NotApplicable;
458
38
459
38
    if (DoIt)
460
21
      TargetStmt->prependInstruction(LI);
461
38
462
38
    if (!DoIt)
463
17
      return FD_CanForwardProfitably;
464
21
465
21
    if (Access) {
466
5
      LLVM_DEBUG(
467
5
          dbgs() << "    forwarded known load with preexisting MemoryAccess"
468
5
                 << Access << "\n");
469
16
    } else {
470
16
      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
471
16
      LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
472
16
                        << Access << "\n");
473
16
474
16
      // { ValInst[] }
475
16
      isl::space ValInstSpace = ExpectedVal.get_space().range();
476
16
477
16
      // After adding a new load to the SCoP, also update the Known content
478
16
      // about it. The new load will have a known ValInst of
479
16
      // { [DomainTarget[] -> Value[]] }
480
16
      // but which -- because it is a copy of it -- has same value as the
481
16
      // { [DomainDef[] -> Value[]] }
482
16
      // that it replicates. Instead of  cloning the known content of
483
16
      // [DomainDef[] -> Value[]]
484
16
      // for DomainTarget[], we add a 'translator' that maps
485
16
      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
486
16
      // before comparing to the known content.
487
16
      // TODO: 'Translator' could also be used to map PHINodes to their incoming
488
16
      // ValInsts.
489
16
      if (ValInstSpace.is_wrapping()) {
490
16
        // { DefDomain[] -> Value[] }
491
16
        isl::map ValInsts = ExpectedVal.range().unwrap();
492
16
493
16
        // { DefDomain[] }
494
16
        isl::set DefDomain = ValInsts.domain();
495
16
496
16
        // { Value[] }
497
16
        isl::space ValSpace = ValInstSpace.unwrap().range();
498
16
499
16
        // { Value[] -> Value[] }
500
16
        isl::map ValToVal =
501
16
            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
502
16
503
16
        // { DomainDef[] -> DomainTarget[] }
504
16
        isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
505
16
506
16
        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
507
16
        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
508
16
509
16
        Translator = Translator.add_map(LocalTranslator);
510
16
        LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
511
16
                          << "\n");
512
16
      }
513
16
    }
514
21
    LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
515
21
                      << "\n");
516
21
    LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
517
21
                      << "\n");
518
21
    assert(Access);
519
21
520
21
    NumKnownLoadsForwarded++;
521
21
    TotalKnownLoadsForwarded++;
522
21
    return FD_DidForwardTree;
523
21
  }
524
525
  /// Forward a scalar by redirecting the access to an array element that stores
526
  /// the same value.
527
  ///
528
  /// @param TargetStmt  The statement the operand tree will be copied to.
529
  /// @param Inst        The scalar to forward.
530
  /// @param UseStmt     The statement that uses @p Inst.
531
  /// @param UseLoop     The loop @p Inst is used in.
532
  /// @param DefStmt     The statement @p Inst is defined in.
533
  /// @param DefLoop     The loop which contains @p Inst.
534
  /// @param DoIt        If false, only determine whether an operand tree can be
535
  ///                    forwarded. If true, carry out the forwarding. Do not
536
  ///                    use DoIt==true if an operand tree is not known to be
537
  ///                    forwardable.
538
  ///
539
  /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
540
  ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
541
  ///         FD_CanForwardProfitably if @p Inst has been reloaded.
542
  ///         FD_DidForwardLeaf       if @p DoIt was true.
543
  ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
544
                                        ScopStmt *UseStmt, Loop *UseLoop,
545
                                        ScopStmt *DefStmt, Loop *DefLoop,
546
24
                                        bool DoIt) {
547
24
    // Cannot do anything without successful known analysis.
548
24
    if (Known.is_null() || Translator.is_null() ||
549
24
        MaxOpGuard.hasQuotaExceeded())
550
0
      return FD_NotApplicable;
551
24
552
24
    MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
553
24
    if (Access && 
Access->isLatestArrayKind()22
) {
554
0
      if (DoIt)
555
0
        return FD_DidForwardLeaf;
556
0
      return FD_CanForwardLeaf;
557
0
    }
558
24
559
24
    // Don't spend too much time analyzing whether it can be reloaded. When
560
24
    // carrying-out the forwarding, we cannot bail-out in the middle of the
561
24
    // transformation. It also shouldn't take as long because some results are
562
24
    // cached.
563
24
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
564
24
565
24
    // { DomainDef[] -> ValInst[] }
566
24
    isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
567
24
568
24
    // { DomainUse[] -> DomainTarget[] }
569
24
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
570
24
571
24
    // { DomainTarget[] -> ValInst[] }
572
24
    isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
573
24
    isl::union_map TranslatedExpectedVal =
574
24
        TargetExpectedVal.apply_range(Translator);
575
24
576
24
    // { DomainTarget[] -> Element[] }
577
24
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
578
24
579
24
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
580
24
    if (!SameVal)
581
10
      return FD_NotApplicable;
582
14
583
14
    if (!DoIt)
584
7
      return FD_CanForwardProfitably;
585
7
586
7
    if (!Access)
587
0
      Access = TargetStmt->ensureValueRead(Inst);
588
7
589
7
    simplify(SameVal);
590
7
    Access->setNewAccessRelation(SameVal);
591
7
592
7
    TotalReloads++;
593
7
    NumReloads++;
594
7
    return FD_DidForwardLeaf;
595
7
  }
596
597
  /// Forwards a speculatively executable instruction.
598
  ///
599
  /// @param TargetStmt  The statement the operand tree will be copied to.
600
  /// @param UseInst     The (possibly speculatable) instruction to forward.
601
  /// @param DefStmt     The statement @p UseInst is defined in.
602
  /// @param DefLoop     The loop which contains @p UseInst.
603
  /// @param DoIt        If false, only determine whether an operand tree can be
604
  ///                    forwarded. If true, carry out the forwarding. Do not
605
  ///                    use DoIt==true if an operand tree is not known to be
606
  ///                    forwardable.
607
  ///
608
  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
609
  ///         FD_CannotForward  if one of @p UseInst's operands is not
610
  ///                           forwardable.
611
  ///         FD_CanForwardTree if @p UseInst is forwardable.
612
  ///         FD_DidForward     if @p DoIt was true.
613
  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
614
                                         Instruction *UseInst,
615
                                         ScopStmt *DefStmt, Loop *DefLoop,
616
100
                                         bool DoIt) {
617
100
    // PHIs, unless synthesizable, are not yet supported.
618
100
    if (isa<PHINode>(UseInst))
619
17
      return FD_NotApplicable;
620
83
621
83
    // Compatible instructions must satisfy the following conditions:
622
83
    // 1. Idempotent (instruction will be copied, not moved; although its
623
83
    //    original instance might be removed by simplification)
624
83
    // 2. Not access memory (There might be memory writes between)
625
83
    // 3. Not cause undefined behaviour (we might copy to a location when the
626
83
    //    original instruction was no executed; this is currently not possible
627
83
    //    because we do not forward PHINodes)
628
83
    // 4. Not leak memory if executed multiple times (i.e. malloc)
629
83
    //
630
83
    // Instruction::mayHaveSideEffects is not sufficient because it considers
631
83
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
632
83
    // not sufficient because it allows memory accesses.
633
83
    if (mayBeMemoryDependent(*UseInst))
634
49
      return FD_NotApplicable;
635
34
636
34
    if (DoIt) {
637
16
      // To ensure the right order, prepend this instruction before its
638
16
      // operands. This ensures that its operands are inserted before the
639
16
      // instruction using them.
640
16
      // TODO: The operand tree is not really a tree, but a DAG. We should be
641
16
      // able to handle DAGs without duplication.
642
16
      TargetStmt->prependInstruction(UseInst);
643
16
      NumInstructionsCopied++;
644
16
      TotalInstructionsCopied++;
645
16
    }
646
34
647
60
    for (Value *OpVal : UseInst->operand_values()) {
648
60
      ForwardingDecision OpDecision =
649
60
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
650
60
      switch (OpDecision) {
651
60
      case FD_CannotForward:
652
2
        assert(!DoIt);
653
2
        return FD_CannotForward;
654
60
655
60
      case FD_CanForwardLeaf:
656
29
      case FD_CanForwardProfitably:
657
29
        assert(!DoIt);
658
29
        break;
659
29
660
29
      case FD_DidForwardLeaf:
661
29
      case FD_DidForwardTree:
662
29
        assert(DoIt);
663
29
        break;
664
29
665
29
      case FD_NotApplicable:
666
0
        llvm_unreachable("forwardTree should never return FD_NotApplicable");
667
60
      }
668
60
    }
669
34
670
34
    
if (32
DoIt32
)
671
16
      return FD_DidForwardTree;
672
16
    return FD_CanForwardProfitably;
673
16
  }
674
675
  /// Determines whether an operand tree can be forwarded or carries out a
676
  /// forwarding, depending on the @p DoIt flag.
677
  ///
678
  /// @param TargetStmt  The statement the operand tree will be copied to.
679
  /// @param UseVal      The value (usually an instruction) which is root of an
680
  ///                    operand tree.
681
  /// @param UseStmt     The statement that uses @p UseVal.
682
  /// @param UseLoop     The loop @p UseVal is used in.
683
  /// @param DoIt        If false, only determine whether an operand tree can be
684
  ///                    forwarded. If true, carry out the forwarding. Do not
685
  ///                    use DoIt==true if an operand tree is not known to be
686
  ///                    forwardable.
687
  ///
688
  /// @return If DoIt==false, return whether the operand tree can be forwarded.
689
  ///         If DoIt==true, return FD_DidForward.
690
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
691
186
                                 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
692
186
    ScopStmt *DefStmt = nullptr;
693
186
    Loop *DefLoop = nullptr;
694
186
695
186
    // { DefDomain[] -> TargetDomain[] }
696
186
    isl::map DefToTarget;
697
186
698
186
    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
699
186
    switch (VUse.getKind()) {
700
186
    case VirtualUse::Constant:
701
42
    case VirtualUse::Block:
702
42
    case VirtualUse::Hoisted:
703
42
      // These can be used anywhere without special considerations.
704
42
      if (DoIt)
705
21
        return FD_DidForwardTree;
706
21
      return FD_CanForwardLeaf;
707
21
708
39
    case VirtualUse::Synthesizable: {
709
39
      // ScopExpander will take care for of generating the code at the new
710
39
      // location.
711
39
      if (DoIt)
712
20
        return FD_DidForwardTree;
713
19
714
19
      // Check if the value is synthesizable at the new location as well. This
715
19
      // might be possible when leaving a loop for which ScalarEvolution is
716
19
      // unable to derive the exit value for.
717
19
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
718
19
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
719
19
      // do not forward past its loop header. This would require us to use a
720
19
      // previous loop induction variable instead the current one. We currently
721
19
      // do not allow forwarding PHI nodes, thus this should never occur (the
722
19
      // only exception where no phi is necessary being an unreachable loop
723
19
      // without edge from the outside).
724
19
      VirtualUse TargetUse = VirtualUse::create(
725
19
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
726
19
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
727
19
        return FD_CanForwardLeaf;
728
0
729
0
      LLVM_DEBUG(
730
0
          dbgs() << "    Synthesizable would not be synthesizable anymore: "
731
0
                 << *UseVal << "\n");
732
0
      return FD_CannotForward;
733
0
    }
734
0
735
5
    case VirtualUse::ReadOnly:
736
5
      // Note that we cannot return FD_CanForwardTree here. With a operand tree
737
5
      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
738
5
      // With -polly-analyze-read-only-scalars=true we would ensure the
739
5
      // existence of a MemoryAccess (which already exists for a leaf) and be
740
5
      // removed again by tryForwardTree because it's goal is to remove this
741
5
      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
742
5
      // to do so.
743
5
      if (!DoIt)
744
3
        return FD_CanForwardLeaf;
745
2
746
2
      // If we model read-only scalars, we need to create a MemoryAccess for it.
747
2
      if (ModelReadOnlyScalars)
748
1
        TargetStmt->ensureValueRead(UseVal);
749
2
750
2
      NumReadOnlyCopied++;
751
2
      TotalReadOnlyCopied++;
752
2
      return FD_DidForwardLeaf;
753
2
754
28
    case VirtualUse::Intra:
755
28
      // Knowing that UseStmt and DefStmt are the same statement instance, just
756
28
      // reuse the information about UseStmt for DefStmt
757
28
      DefStmt = UseStmt;
758
28
759
28
      LLVM_FALLTHROUGH;
760
100
    case VirtualUse::Inter:
761
100
      Instruction *Inst = cast<Instruction>(UseVal);
762
100
763
100
      if (!DefStmt) {
764
72
        DefStmt = S->getStmtFor(Inst);
765
72
        if (!DefStmt)
766
0
          return FD_CannotForward;
767
100
      }
768
100
769
100
      DefLoop = LI->getLoopFor(Inst->getParent());
770
100
771
100
      ForwardingDecision SpeculativeResult =
772
100
          forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
773
100
      if (SpeculativeResult != FD_NotApplicable)
774
34
        return SpeculativeResult;
775
66
776
66
      ForwardingDecision KnownResult = forwardKnownLoad(
777
66
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
778
66
      if (KnownResult != FD_NotApplicable)
779
42
        return KnownResult;
780
24
781
24
      ForwardingDecision ReloadResult = reloadKnownContent(
782
24
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
783
24
      if (ReloadResult != FD_NotApplicable)
784
14
        return ReloadResult;
785
10
786
10
      // When no method is found to forward the operand tree, we effectively
787
10
      // cannot handle it.
788
10
      LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
789
10
      return FD_CannotForward;
790
0
    }
791
0
792
0
    llvm_unreachable("Case unhandled");
793
0
  }
794
795
  /// Try to forward an operand tree rooted in @p RA.
796
48
  bool tryForwardTree(MemoryAccess *RA) {
797
48
    assert(RA->isLatestScalarKind());
798
48
    LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
799
48
800
48
    ScopStmt *Stmt = RA->getStatement();
801
48
    Loop *InLoop = Stmt->getSurroundingLoop();
802
48
803
48
    isl::map TargetToUse;
804
48
    if (!Known.is_null()) {
805
48
      isl::space DomSpace = Stmt->getDomainSpace();
806
48
      TargetToUse =
807
48
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
808
48
    }
809
48
810
48
    ForwardingDecision Assessment =
811
48
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
812
48
    assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
813
48
    if (Assessment != FD_CanForwardProfitably)
814
11
      return false;
815
37
816
37
    ForwardingDecision Execution =
817
37
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
818
37
    assert(((Execution == FD_DidForwardTree) ||
819
37
            (Execution == FD_DidForwardLeaf)) &&
820
37
           "A previous positive assessment must also be executable");
821
37
822
37
    if (Execution == FD_DidForwardTree)
823
30
      Stmt->removeSingleMemoryAccess(RA);
824
37
    return true;
825
37
  }
826
827
  /// Return which SCoP this instance is processing.
828
0
  Scop *getScop() const { return S; }
829
830
  /// Run the algorithm: Use value read accesses as operand tree roots and try
831
  /// to forward them into the statement.
832
34
  bool forwardOperandTrees() {
833
87
    for (ScopStmt &Stmt : *S) {
834
87
      bool StmtModified = false;
835
87
836
87
      // Because we are modifying the MemoryAccess list, collect them first to
837
87
      // avoid iterator invalidation.
838
87
      SmallVector<MemoryAccess *, 16> Accs;
839
177
      for (MemoryAccess *RA : Stmt) {
840
177
        if (!RA->isRead())
841
105
          continue;
842
72
        if (!RA->isLatestScalarKind())
843
24
          continue;
844
48
845
48
        Accs.push_back(RA);
846
48
      }
847
87
848
87
      for (MemoryAccess *RA : Accs) {
849
48
        if (tryForwardTree(RA)) {
850
37
          Modified = true;
851
37
          StmtModified = true;
852
37
          NumForwardedTrees++;
853
37
          TotalForwardedTrees++;
854
37
        }
855
48
      }
856
87
857
87
      if (StmtModified) {
858
33
        NumModifiedStmts++;
859
33
        TotalModifiedStmts++;
860
33
      }
861
87
    }
862
34
863
34
    if (Modified)
864
26
      ScopsModified++;
865
34
    return Modified;
866
34
  }
867
868
  /// Print the pass result, performed transformations and the SCoP after the
869
  /// transformation.
870
33
  void print(raw_ostream &OS, int Indent = 0) {
871
33
    printStatistics(OS, Indent);
872
33
873
33
    if (!Modified) {
874
8
      // This line can easily be checked in regression tests.
875
8
      OS << "ForwardOpTree executed, but did not modify anything\n";
876
8
      return;
877
8
    }
878
25
879
25
    printStatements(OS, Indent);
880
25
  }
881
};
882
883
/// Pass that redirects scalar reads to array elements that are known to contain
884
/// the same value.
885
///
886
/// This reduces the number of scalar accesses and therefore potentially
887
/// increases the freedom of the scheduler. In the ideal case, all reads of a
888
/// scalar definition are redirected (We currently do not care about removing
889
/// the write in this case).  This is also useful for the main DeLICM pass as
890
/// there are less scalars to be mapped.
891
class ForwardOpTree : public ScopPass {
892
private:
893
  /// The pass implementation, also holding per-scop data.
894
  std::unique_ptr<ForwardOpTreeImpl> Impl;
895
896
public:
897
  static char ID;
898
899
33
  explicit ForwardOpTree() : ScopPass(ID) {}
900
  ForwardOpTree(const ForwardOpTree &) = delete;
901
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;
902
903
33
  void getAnalysisUsage(AnalysisUsage &AU) const override {
904
33
    AU.addRequiredTransitive<ScopInfoRegionPass>();
905
33
    AU.addRequired<LoopInfoWrapperPass>();
906
33
    AU.setPreservesAll();
907
33
  }
908
909
34
  bool runOnScop(Scop &S) override {
910
34
    // Free resources for previous SCoP's computation, if not yet done.
911
34
    releaseMemory();
912
34
913
34
    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
914
34
915
34
    {
916
34
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
917
34
      Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
918
34
919
34
      if (AnalyzeKnown) {
920
34
        LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
921
34
        Impl->computeKnownValues();
922
34
      }
923
34
924
34
      LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
925
34
      Impl->forwardOperandTrees();
926
34
927
34
      if (MaxOpGuard.hasQuotaExceeded()) {
928
0
        LLVM_DEBUG(dbgs() << "Not all operations completed because of "
929
0
                             "max_operations exceeded\n");
930
0
        KnownOutOfQuota++;
931
0
      }
932
34
    }
933
34
934
34
    LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
935
34
    LLVM_DEBUG(dbgs() << S);
936
34
937
34
    // Update statistics
938
34
    auto ScopStats = S.getStatistics();
939
34
    NumValueWrites += ScopStats.NumValueWrites;
940
34
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
941
34
    NumPHIWrites += ScopStats.NumPHIWrites;
942
34
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
943
34
    NumSingletonWrites += ScopStats.NumSingletonWrites;
944
34
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
945
34
946
34
    return false;
947
34
  }
948
949
33
  void printScop(raw_ostream &OS, Scop &S) const override {
950
33
    if (!Impl)
951
0
      return;
952
33
953
33
    assert(Impl->getScop() == &S);
954
33
    Impl->print(OS);
955
33
  }
956
957
151
  void releaseMemory() override { Impl.reset(); }
958
}; // class ForwardOpTree
959
960
char ForwardOpTree::ID;
961
} // namespace
962
963
0
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
964
965
47.0k
INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
966
47.0k
                      "Polly - Forward operand tree", false, false)
967
47.0k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
968
47.0k
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
969
                    "Polly - Forward operand tree", false, false)