Coverage Report

Created: 2019-02-20 07:29

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Move instructions between statements.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "polly/ForwardOpTree.h"
14
#include "polly/Options.h"
15
#include "polly/ScopBuilder.h"
16
#include "polly/ScopInfo.h"
17
#include "polly/ScopPass.h"
18
#include "polly/Support/GICHelper.h"
19
#include "polly/Support/ISLOStream.h"
20
#include "polly/Support/ISLTools.h"
21
#include "polly/Support/VirtualInstruction.h"
22
#include "polly/ZoneAlgo.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/ADT/SmallVector.h"
25
#include "llvm/ADT/Statistic.h"
26
#include "llvm/Analysis/LoopInfo.h"
27
#include "llvm/Analysis/ValueTracking.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/Value.h"
31
#include "llvm/Pass.h"
32
#include "llvm/Support/Casting.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Compiler.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/ErrorHandling.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include "isl/ctx.h"
39
#include "isl/isl-noexceptions.h"
40
#include <cassert>
41
#include <memory>
42
43
#define DEBUG_TYPE "polly-optree"
44
45
using namespace llvm;
46
using namespace polly;
47
48
static cl::opt<bool>
49
    AnalyzeKnown("polly-optree-analyze-known",
50
                 cl::desc("Analyze array contents for load forwarding"),
51
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
52
53
static cl::opt<bool>
54
    NormalizePHIs("polly-optree-normalize-phi",
55
                  cl::desc("Replace PHIs by their incoming values"),
56
                  cl::cat(PollyCategory), cl::init(false), cl::Hidden);
57
58
static cl::opt<unsigned>
59
    MaxOps("polly-optree-max-ops",
60
           cl::desc("Maximum number of ISL operations to invest for known "
61
                    "analysis; 0=no limit"),
62
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
63
64
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65
STATISTIC(KnownOutOfQuota,
66
          "Analyses aborted because max_operations was reached");
67
68
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69
STATISTIC(TotalKnownLoadsForwarded,
70
          "Number of forwarded loads because their value was known");
71
STATISTIC(TotalReloads, "Number of reloaded values");
72
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74
STATISTIC(TotalModifiedStmts,
75
          "Number of statements with at least one forwarded tree");
76
77
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
78
79
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80
STATISTIC(NumValueWritesInLoops,
81
          "Number of scalar value writes nested in affine loops after OpTree");
82
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83
STATISTIC(NumPHIWritesInLoops,
84
          "Number of scalar phi writes nested in affine loops after OpTree");
85
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86
STATISTIC(NumSingletonWritesInLoops,
87
          "Number of singleton writes nested in affine loops after OpTree");
88
89
namespace {
90
91
/// The state of whether an operand tree was/can be forwarded.
92
///
93
/// The items apply to an instructions and its operand tree with the instruction
94
/// as the root element. If the value in question is not an instruction in the
95
/// SCoP, it can be a leaf of an instruction's operand tree.
96
enum ForwardingDecision {
97
  /// The root instruction or value cannot be forwarded at all.
98
  FD_CannotForward,
99
100
  /// The root instruction or value can be forwarded as a leaf of a larger
101
  /// operand tree.
102
  /// It does not make sense to move the value itself, it would just replace it
103
  /// by a use of itself. For instance, a constant "5" used in a statement can
104
  /// be forwarded, but it would just replace it by the same constant "5".
105
  /// However, it makes sense to move as an operand of
106
  ///
107
  ///   %add = add 5, 5
108
  ///
109
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
110
  /// (disregarding for a moment that literal constants don't have a location
111
  /// and can be used anywhere) into the same statement as %add would.
112
  FD_CanForwardLeaf,
113
114
  /// The root instruction can be forwarded and doing so avoids a scalar
115
  /// dependency.
116
  ///
117
  /// This can be either because the operand tree can be moved to the target
118
  /// statement, or a memory access is redirected to read from a different
119
  /// location.
120
  FD_CanForwardProfitably,
121
122
  /// Used to indicate that a forwarding has be carried out successfully, and
123
  /// the forwarded memory access can be deleted.
124
  FD_DidForwardTree,
125
126
  /// Used to indicate that a forwarding has be carried out successfully, and
127
  /// the forwarded memory access is being reused.
128
  FD_DidForwardLeaf,
129
130
  /// A forwarding method cannot be applied to the operand tree.
131
  /// The difference to FD_CannotForward is that there might be other methods
132
  /// that can handle it.
133
  /// The conditions that make an operand tree applicable must be checked even
134
  /// with DoIt==true because a method following the one that returned
135
  /// FD_NotApplicable might have returned FD_CanForwardTree.
136
  FD_NotApplicable
137
};
138
139
/// Implementation of operand tree forwarding for a specific SCoP.
140
///
141
/// For a statement that requires a scalar value (through a value read
142
/// MemoryAccess), see if its operand can be moved into the statement. If so,
143
/// the MemoryAccess is removed and the all the operand tree instructions are
144
/// moved into the statement. All original instructions are left in the source
145
/// statements. The simplification pass can clean these up.
146
class ForwardOpTreeImpl : ZoneAlgorithm {
147
private:
148
  /// Scope guard to limit the number of isl operations for this pass.
149
  IslMaxOperationsGuard &MaxOpGuard;
150
151
  /// How many instructions have been copied to other statements.
152
  int NumInstructionsCopied = 0;
153
154
  /// Number of loads forwarded because their value was known.
155
  int NumKnownLoadsForwarded = 0;
156
157
  /// Number of values reloaded from known array elements.
158
  int NumReloads = 0;
159
160
  /// How many read-only accesses have been copied.
161
  int NumReadOnlyCopied = 0;
162
163
  /// How many operand trees have been forwarded.
164
  int NumForwardedTrees = 0;
165
166
  /// Number of statements with at least one forwarded operand tree.
167
  int NumModifiedStmts = 0;
168
169
  /// Whether we carried out at least one change to the SCoP.
170
  bool Modified = false;
171
172
  /// Contains the zones where array elements are known to contain a specific
173
  /// value.
174
  /// { [Element[] -> Zone[]] -> ValInst[] }
175
  /// @see computeKnown()
176
  isl::union_map Known;
177
178
  /// Translator for newly introduced ValInsts to already existing ValInsts such
179
  /// that new introduced load instructions can reuse the Known analysis of its
180
  /// original load. { ValInst[] -> ValInst[] }
181
  isl::union_map Translator;
182
183
  /// Get list of array elements that do contain the same ValInst[] at Domain[].
184
  ///
185
  /// @param ValInst { Domain[] -> ValInst[] }
186
  ///                The values for which we search for alternative locations,
187
  ///                per statement instance.
188
  ///
189
  /// @return { Domain[] -> Element[] }
190
  ///         For each statement instance, the array elements that contain the
191
  ///         same ValInst.
192
65
  isl::union_map findSameContentElements(isl::union_map ValInst) {
193
65
    assert(!ValInst.is_single_valued().is_false());
194
65
195
65
    // { Domain[] }
196
65
    isl::union_set Domain = ValInst.domain();
197
65
198
65
    // { Domain[] -> Scatter[] }
199
65
    isl::union_map Schedule = getScatterFor(Domain);
200
65
201
65
    // { Element[] -> [Scatter[] -> ValInst[]] }
202
65
    isl::union_map MustKnownCurried =
203
65
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
204
65
205
65
    // { [Domain[] -> ValInst[]] -> Scatter[] }
206
65
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
207
65
208
65
    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
209
65
    isl::union_map SchedValDomVal =
210
65
        DomValSched.range_product(ValInst.range_map()).reverse();
211
65
212
65
    // { Element[] -> [Domain[] -> ValInst[]] }
213
65
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
214
65
215
65
    // { Domain[] -> Element[] }
216
65
    isl::union_map MustKnownMap =
217
65
        MustKnownInst.uncurry().domain().unwrap().reverse();
218
65
    simplify(MustKnownMap);
219
65
220
65
    return MustKnownMap;
221
65
  }
222
223
  /// Find a single array element for each statement instance, within a single
224
  /// array.
225
  ///
226
  /// @param MustKnown { Domain[] -> Element[] }
227
  ///                  Set of candidate array elements.
228
  /// @param Domain    { Domain[] }
229
  ///                  The statement instance for which we need elements for.
230
  ///
231
  /// @return { Domain[] -> Element[] }
232
  ///         For each statement instance, an array element out of @p MustKnown.
233
  ///         All array elements must be in the same array (Polly does not yet
234
  ///         support reading from different accesses using the same
235
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
236
  ///         null.
237
65
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
238
65
    // { Domain[] -> Element[] }
239
65
    isl::map Result;
240
65
241
65
    // MemoryAccesses can read only elements from a single array
242
65
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
243
65
    // Look through all spaces until we find one that contains at least the
244
65
    // wanted statement instance.s
245
65
    for (isl::map Map : MustKnown.get_map_list()) {
246
54
      // Get the array this is accessing.
247
54
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
248
54
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
249
54
250
54
      // No support for generation of indirect array accesses.
251
54
      if (SAI->getBasePtrOriginSAI())
252
0
        continue;
253
54
254
54
      // Determine whether this map contains all wanted values.
255
54
      isl::set MapDom = Map.domain();
256
54
      if (!Domain.is_subset(MapDom).is_true())
257
2
        continue;
258
52
259
52
      // There might be multiple array elements that contain the same value, but
260
52
      // choose only one of them. lexmin is used because it returns a one-value
261
52
      // mapping, we do not care about which one.
262
52
      // TODO: Get the simplest access function.
263
52
      Result = Map.lexmin();
264
52
      break;
265
52
    }
266
65
267
65
    return Result;
268
65
  }
269
270
public:
271
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
272
34
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
273
274
  /// Compute the zones of known array element contents.
275
  ///
276
  /// @return True if the computed #Known is usable.
277
34
  bool computeKnownValues() {
278
34
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
279
34
280
34
    // Check that nothing strange occurs.
281
34
    collectCompatibleElts();
282
34
283
34
    {
284
34
      IslQuotaScope QuotaScope = MaxOpGuard.enter();
285
34
286
34
      computeCommon();
287
34
      if (NormalizePHIs)
288
4
        computeNormalizedPHIs();
289
34
      Known = computeKnown(true, true);
290
34
291
34
      // Preexisting ValInsts use the known content analysis of themselves.
292
34
      Translator = makeIdentityMap(Known.range(), false);
293
34
    }
294
34
295
34
    if (!Known || !Translator || !NormalizeMap) {
296
0
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
297
0
      Known = nullptr;
298
0
      Translator = nullptr;
299
0
      NormalizeMap = nullptr;
300
0
      LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
301
0
      return false;
302
0
    }
303
34
304
34
    KnownAnalyzed++;
305
34
    LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
306
34
307
34
    return true;
308
34
  }
309
310
33
  void printStatistics(raw_ostream &OS, int Indent = 0) {
311
33
    OS.indent(Indent) << "Statistics {\n";
312
33
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
313
33
                          << '\n';
314
33
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
315
33
                          << '\n';
316
33
    OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
317
33
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
318
33
                          << '\n';
319
33
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
320
33
                          << '\n';
321
33
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
322
33
                          << NumModifiedStmts << '\n';
323
33
    OS.indent(Indent) << "}\n";
324
33
  }
325
326
25
  void printStatements(raw_ostream &OS, int Indent = 0) const {
327
25
    OS.indent(Indent) << "After statements {\n";
328
62
    for (auto &Stmt : *S) {
329
62
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
330
62
      for (auto *MA : Stmt)
331
120
        MA->print(OS);
332
62
333
62
      OS.indent(Indent + 12);
334
62
      Stmt.printInstructions(OS);
335
62
    }
336
25
    OS.indent(Indent) << "}\n";
337
25
  }
338
339
  /// Create a new MemoryAccess of type read and MemoryKind::Array.
340
  ///
341
  /// @param Stmt           The statement in which the access occurs.
342
  /// @param LI             The instruction that does the access.
343
  /// @param AccessRelation The array element that each statement instance
344
  ///                       accesses.
345
  ///
346
  /// @param The newly created access.
347
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
348
16
                                    isl::map AccessRelation) {
349
16
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
350
16
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
351
16
352
16
    // Create a dummy SCEV access, to be replaced anyway.
353
16
    SmallVector<const SCEV *, 4> Sizes;
354
16
    Sizes.reserve(SAI->getNumberOfDimensions());
355
16
    SmallVector<const SCEV *, 4> Subscripts;
356
16
    Subscripts.reserve(SAI->getNumberOfDimensions());
357
32
    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); 
i += 116
) {
358
16
      Sizes.push_back(SAI->getDimensionSize(i));
359
16
      Subscripts.push_back(nullptr);
360
16
    }
361
16
362
16
    MemoryAccess *Access =
363
16
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
364
16
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
365
16
    S->addAccessFunction(Access);
366
16
    Stmt->addAccess(Access, true);
367
16
368
16
    Access->setNewAccessRelation(AccessRelation);
369
16
370
16
    return Access;
371
16
  }
372
373
  /// Forward a load by reading from an array element that contains the same
374
  /// value. Typically the location it was loaded from.
375
  ///
376
  /// @param TargetStmt  The statement the operand tree will be copied to.
377
  /// @param Inst        The (possibly speculatable) instruction to forward.
378
  /// @param UseStmt     The statement that uses @p Inst.
379
  /// @param UseLoop     The loop @p Inst is used in.
380
  /// @param DefStmt     The statement @p Inst is defined in.
381
  /// @param DefLoop     The loop which contains @p Inst.
382
  /// @param DoIt        If false, only determine whether an operand tree can be
383
  ///                    forwarded. If true, carry out the forwarding. Do not
384
  ///                    use DoIt==true if an operand tree is not known to be
385
  ///                    forwardable.
386
  ///
387
  /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
388
  ///                           load.
389
  ///         FD_CannotForward  if the pointer operand cannot be forwarded.
390
  ///         FD_CanForwardProfitably if @p Inst is forwardable.
391
  ///         FD_DidForwardTree if @p DoIt was true.
392
  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
393
                                      ScopStmt *UseStmt, Loop *UseLoop,
394
                                      ScopStmt *DefStmt, Loop *DefLoop,
395
66
                                      bool DoIt) {
396
66
    // Cannot do anything without successful known analysis.
397
66
    if (Known.is_null() || Translator.is_null() ||
398
66
        MaxOpGuard.hasQuotaExceeded())
399
0
      return FD_NotApplicable;
400
66
401
66
    LoadInst *LI = dyn_cast<LoadInst>(Inst);
402
66
    if (!LI)
403
21
      return FD_NotApplicable;
404
45
405
45
    // If the load is already in the statement, no forwarding is necessary.
406
45
    // However, it might happen that the LoadInst is already present in the
407
45
    // statement's instruction list. In that case we do as follows:
408
45
    // - For the evaluation (DoIt==false), we can trivially forward it as it is
409
45
    //   benefit of forwarding an already present instruction.
410
45
    // - For the execution (DoIt==true), prepend the instruction (to make it
411
45
    //   available to all instructions following in the instruction list), but
412
45
    //   do not add another MemoryAccess.
413
45
    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
414
45
    if (Access && 
!DoIt9
)
415
4
      return FD_CanForwardProfitably;
416
41
417
41
    ForwardingDecision OpDecision = forwardTree(
418
41
        TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
419
41
    switch (OpDecision) {
420
41
    case FD_CannotForward:
421
0
      assert(!DoIt);
422
0
      return OpDecision;
423
41
424
41
    case FD_CanForwardLeaf:
425
20
    case FD_CanForwardProfitably:
426
20
      assert(!DoIt);
427
20
      break;
428
20
429
21
    case FD_DidForwardLeaf:
430
21
    case FD_DidForwardTree:
431
21
      assert(DoIt);
432
21
      break;
433
21
434
21
    default:
435
0
      llvm_unreachable("Shouldn't return this");
436
41
    }
437
41
438
41
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
439
41
440
41
    // { DomainDef[] -> ValInst[] }
441
41
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
442
41
    assert(!isNormalized(ExpectedVal).is_false() &&
443
41
           "LoadInsts are always normalized");
444
41
445
41
    // { DomainUse[] -> DomainTarget[] }
446
41
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
447
41
448
41
    // { DomainTarget[] -> ValInst[] }
449
41
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
450
41
    isl::union_map TranslatedExpectedVal =
451
41
        isl::union_map(TargetExpectedVal).apply_range(Translator);
452
41
453
41
    // { DomainTarget[] -> Element[] }
454
41
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
455
41
456
41
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
457
41
    if (!SameVal)
458
3
      return FD_NotApplicable;
459
38
460
38
    if (DoIt)
461
21
      TargetStmt->prependInstruction(LI);
462
38
463
38
    if (!DoIt)
464
17
      return FD_CanForwardProfitably;
465
21
466
21
    if (Access) {
467
5
      LLVM_DEBUG(
468
5
          dbgs() << "    forwarded known load with preexisting MemoryAccess"
469
5
                 << Access << "\n");
470
16
    } else {
471
16
      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
472
16
      LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
473
16
                        << Access << "\n");
474
16
475
16
      // { ValInst[] }
476
16
      isl::space ValInstSpace = ExpectedVal.get_space().range();
477
16
478
16
      // After adding a new load to the SCoP, also update the Known content
479
16
      // about it. The new load will have a known ValInst of
480
16
      // { [DomainTarget[] -> Value[]] }
481
16
      // but which -- because it is a copy of it -- has same value as the
482
16
      // { [DomainDef[] -> Value[]] }
483
16
      // that it replicates. Instead of  cloning the known content of
484
16
      // [DomainDef[] -> Value[]]
485
16
      // for DomainTarget[], we add a 'translator' that maps
486
16
      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
487
16
      // before comparing to the known content.
488
16
      // TODO: 'Translator' could also be used to map PHINodes to their incoming
489
16
      // ValInsts.
490
16
      if (ValInstSpace.is_wrapping()) {
491
16
        // { DefDomain[] -> Value[] }
492
16
        isl::map ValInsts = ExpectedVal.range().unwrap();
493
16
494
16
        // { DefDomain[] }
495
16
        isl::set DefDomain = ValInsts.domain();
496
16
497
16
        // { Value[] }
498
16
        isl::space ValSpace = ValInstSpace.unwrap().range();
499
16
500
16
        // { Value[] -> Value[] }
501
16
        isl::map ValToVal =
502
16
            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
503
16
504
16
        // { DomainDef[] -> DomainTarget[] }
505
16
        isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
506
16
507
16
        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
508
16
        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
509
16
510
16
        Translator = Translator.add_map(LocalTranslator);
511
16
        LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
512
16
                          << "\n");
513
16
      }
514
16
    }
515
21
    LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
516
21
                      << "\n");
517
21
    LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
518
21
                      << "\n");
519
21
    assert(Access);
520
21
521
21
    NumKnownLoadsForwarded++;
522
21
    TotalKnownLoadsForwarded++;
523
21
    return FD_DidForwardTree;
524
21
  }
525
526
  /// Forward a scalar by redirecting the access to an array element that stores
527
  /// the same value.
528
  ///
529
  /// @param TargetStmt  The statement the operand tree will be copied to.
530
  /// @param Inst        The scalar to forward.
531
  /// @param UseStmt     The statement that uses @p Inst.
532
  /// @param UseLoop     The loop @p Inst is used in.
533
  /// @param DefStmt     The statement @p Inst is defined in.
534
  /// @param DefLoop     The loop which contains @p Inst.
535
  /// @param DoIt        If false, only determine whether an operand tree can be
536
  ///                    forwarded. If true, carry out the forwarding. Do not
537
  ///                    use DoIt==true if an operand tree is not known to be
538
  ///                    forwardable.
539
  ///
540
  /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
541
  ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
542
  ///         FD_CanForwardProfitably if @p Inst has been reloaded.
543
  ///         FD_DidForwardLeaf       if @p DoIt was true.
544
  ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
545
                                        ScopStmt *UseStmt, Loop *UseLoop,
546
                                        ScopStmt *DefStmt, Loop *DefLoop,
547
24
                                        bool DoIt) {
548
24
    // Cannot do anything without successful known analysis.
549
24
    if (Known.is_null() || Translator.is_null() ||
550
24
        MaxOpGuard.hasQuotaExceeded())
551
0
      return FD_NotApplicable;
552
24
553
24
    MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
554
24
    if (Access && 
Access->isLatestArrayKind()22
) {
555
0
      if (DoIt)
556
0
        return FD_DidForwardLeaf;
557
0
      return FD_CanForwardLeaf;
558
0
    }
559
24
560
24
    // Don't spend too much time analyzing whether it can be reloaded. When
561
24
    // carrying-out the forwarding, we cannot bail-out in the middle of the
562
24
    // transformation. It also shouldn't take as long because some results are
563
24
    // cached.
564
24
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
565
24
566
24
    // { DomainDef[] -> ValInst[] }
567
24
    isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
568
24
569
24
    // { DomainUse[] -> DomainTarget[] }
570
24
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
571
24
572
24
    // { DomainTarget[] -> ValInst[] }
573
24
    isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
574
24
    isl::union_map TranslatedExpectedVal =
575
24
        TargetExpectedVal.apply_range(Translator);
576
24
577
24
    // { DomainTarget[] -> Element[] }
578
24
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
579
24
580
24
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
581
24
    if (!SameVal)
582
10
      return FD_NotApplicable;
583
14
584
14
    if (!DoIt)
585
7
      return FD_CanForwardProfitably;
586
7
587
7
    if (!Access)
588
0
      Access = TargetStmt->ensureValueRead(Inst);
589
7
590
7
    simplify(SameVal);
591
7
    Access->setNewAccessRelation(SameVal);
592
7
593
7
    TotalReloads++;
594
7
    NumReloads++;
595
7
    return FD_DidForwardLeaf;
596
7
  }
597
598
  /// Forwards a speculatively executable instruction.
599
  ///
600
  /// @param TargetStmt  The statement the operand tree will be copied to.
601
  /// @param UseInst     The (possibly speculatable) instruction to forward.
602
  /// @param DefStmt     The statement @p UseInst is defined in.
603
  /// @param DefLoop     The loop which contains @p UseInst.
604
  /// @param DoIt        If false, only determine whether an operand tree can be
605
  ///                    forwarded. If true, carry out the forwarding. Do not
606
  ///                    use DoIt==true if an operand tree is not known to be
607
  ///                    forwardable.
608
  ///
609
  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
610
  ///         FD_CannotForward  if one of @p UseInst's operands is not
611
  ///                           forwardable.
612
  ///         FD_CanForwardTree if @p UseInst is forwardable.
613
  ///         FD_DidForward     if @p DoIt was true.
614
  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
615
                                         Instruction *UseInst,
616
                                         ScopStmt *DefStmt, Loop *DefLoop,
617
100
                                         bool DoIt) {
618
100
    // PHIs, unless synthesizable, are not yet supported.
619
100
    if (isa<PHINode>(UseInst))
620
17
      return FD_NotApplicable;
621
83
622
83
    // Compatible instructions must satisfy the following conditions:
623
83
    // 1. Idempotent (instruction will be copied, not moved; although its
624
83
    //    original instance might be removed by simplification)
625
83
    // 2. Not access memory (There might be memory writes between)
626
83
    // 3. Not cause undefined behaviour (we might copy to a location when the
627
83
    //    original instruction was no executed; this is currently not possible
628
83
    //    because we do not forward PHINodes)
629
83
    // 4. Not leak memory if executed multiple times (i.e. malloc)
630
83
    //
631
83
    // Instruction::mayHaveSideEffects is not sufficient because it considers
632
83
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
633
83
    // not sufficient because it allows memory accesses.
634
83
    if (mayBeMemoryDependent(*UseInst))
635
49
      return FD_NotApplicable;
636
34
637
34
    if (DoIt) {
638
16
      // To ensure the right order, prepend this instruction before its
639
16
      // operands. This ensures that its operands are inserted before the
640
16
      // instruction using them.
641
16
      // TODO: The operand tree is not really a tree, but a DAG. We should be
642
16
      // able to handle DAGs without duplication.
643
16
      TargetStmt->prependInstruction(UseInst);
644
16
      NumInstructionsCopied++;
645
16
      TotalInstructionsCopied++;
646
16
    }
647
34
648
60
    for (Value *OpVal : UseInst->operand_values()) {
649
60
      ForwardingDecision OpDecision =
650
60
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
651
60
      switch (OpDecision) {
652
60
      case FD_CannotForward:
653
2
        assert(!DoIt);
654
2
        return FD_CannotForward;
655
60
656
60
      case FD_CanForwardLeaf:
657
29
      case FD_CanForwardProfitably:
658
29
        assert(!DoIt);
659
29
        break;
660
29
661
29
      case FD_DidForwardLeaf:
662
29
      case FD_DidForwardTree:
663
29
        assert(DoIt);
664
29
        break;
665
29
666
29
      case FD_NotApplicable:
667
0
        llvm_unreachable("forwardTree should never return FD_NotApplicable");
668
60
      }
669
60
    }
670
34
671
34
    
if (32
DoIt32
)
672
16
      return FD_DidForwardTree;
673
16
    return FD_CanForwardProfitably;
674
16
  }
675
676
  /// Determines whether an operand tree can be forwarded or carries out a
677
  /// forwarding, depending on the @p DoIt flag.
678
  ///
679
  /// @param TargetStmt  The statement the operand tree will be copied to.
680
  /// @param UseVal      The value (usually an instruction) which is root of an
681
  ///                    operand tree.
682
  /// @param UseStmt     The statement that uses @p UseVal.
683
  /// @param UseLoop     The loop @p UseVal is used in.
684
  /// @param DoIt        If false, only determine whether an operand tree can be
685
  ///                    forwarded. If true, carry out the forwarding. Do not
686
  ///                    use DoIt==true if an operand tree is not known to be
687
  ///                    forwardable.
688
  ///
689
  /// @return If DoIt==false, return whether the operand tree can be forwarded.
690
  ///         If DoIt==true, return FD_DidForward.
691
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
692
186
                                 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
693
186
    ScopStmt *DefStmt = nullptr;
694
186
    Loop *DefLoop = nullptr;
695
186
696
186
    // { DefDomain[] -> TargetDomain[] }
697
186
    isl::map DefToTarget;
698
186
699
186
    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
700
186
    switch (VUse.getKind()) {
701
186
    case VirtualUse::Constant:
702
42
    case VirtualUse::Block:
703
42
    case VirtualUse::Hoisted:
704
42
      // These can be used anywhere without special considerations.
705
42
      if (DoIt)
706
21
        return FD_DidForwardTree;
707
21
      return FD_CanForwardLeaf;
708
21
709
39
    case VirtualUse::Synthesizable: {
710
39
      // ScopExpander will take care for of generating the code at the new
711
39
      // location.
712
39
      if (DoIt)
713
20
        return FD_DidForwardTree;
714
19
715
19
      // Check if the value is synthesizable at the new location as well. This
716
19
      // might be possible when leaving a loop for which ScalarEvolution is
717
19
      // unable to derive the exit value for.
718
19
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
719
19
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
720
19
      // do not forward past its loop header. This would require us to use a
721
19
      // previous loop induction variable instead the current one. We currently
722
19
      // do not allow forwarding PHI nodes, thus this should never occur (the
723
19
      // only exception where no phi is necessary being an unreachable loop
724
19
      // without edge from the outside).
725
19
      VirtualUse TargetUse = VirtualUse::create(
726
19
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
727
19
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
728
19
        return FD_CanForwardLeaf;
729
0
730
0
      LLVM_DEBUG(
731
0
          dbgs() << "    Synthesizable would not be synthesizable anymore: "
732
0
                 << *UseVal << "\n");
733
0
      return FD_CannotForward;
734
0
    }
735
0
736
5
    case VirtualUse::ReadOnly:
737
5
      // Note that we cannot return FD_CanForwardTree here. With a operand tree
738
5
      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
739
5
      // With -polly-analyze-read-only-scalars=true we would ensure the
740
5
      // existence of a MemoryAccess (which already exists for a leaf) and be
741
5
      // removed again by tryForwardTree because it's goal is to remove this
742
5
      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
743
5
      // to do so.
744
5
      if (!DoIt)
745
3
        return FD_CanForwardLeaf;
746
2
747
2
      // If we model read-only scalars, we need to create a MemoryAccess for it.
748
2
      if (ModelReadOnlyScalars)
749
1
        TargetStmt->ensureValueRead(UseVal);
750
2
751
2
      NumReadOnlyCopied++;
752
2
      TotalReadOnlyCopied++;
753
2
      return FD_DidForwardLeaf;
754
2
755
28
    case VirtualUse::Intra:
756
28
      // Knowing that UseStmt and DefStmt are the same statement instance, just
757
28
      // reuse the information about UseStmt for DefStmt
758
28
      DefStmt = UseStmt;
759
28
760
28
      LLVM_FALLTHROUGH;
761
100
    case VirtualUse::Inter:
762
100
      Instruction *Inst = cast<Instruction>(UseVal);
763
100
764
100
      if (!DefStmt) {
765
72
        DefStmt = S->getStmtFor(Inst);
766
72
        if (!DefStmt)
767
0
          return FD_CannotForward;
768
100
      }
769
100
770
100
      DefLoop = LI->getLoopFor(Inst->getParent());
771
100
772
100
      ForwardingDecision SpeculativeResult =
773
100
          forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
774
100
      if (SpeculativeResult != FD_NotApplicable)
775
34
        return SpeculativeResult;
776
66
777
66
      ForwardingDecision KnownResult = forwardKnownLoad(
778
66
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
779
66
      if (KnownResult != FD_NotApplicable)
780
42
        return KnownResult;
781
24
782
24
      ForwardingDecision ReloadResult = reloadKnownContent(
783
24
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
784
24
      if (ReloadResult != FD_NotApplicable)
785
14
        return ReloadResult;
786
10
787
10
      // When no method is found to forward the operand tree, we effectively
788
10
      // cannot handle it.
789
10
      LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
790
10
      return FD_CannotForward;
791
0
    }
792
0
793
0
    llvm_unreachable("Case unhandled");
794
0
  }
795
796
  /// Try to forward an operand tree rooted in @p RA.
797
48
  bool tryForwardTree(MemoryAccess *RA) {
798
48
    assert(RA->isLatestScalarKind());
799
48
    LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
800
48
801
48
    ScopStmt *Stmt = RA->getStatement();
802
48
    Loop *InLoop = Stmt->getSurroundingLoop();
803
48
804
48
    isl::map TargetToUse;
805
48
    if (!Known.is_null()) {
806
48
      isl::space DomSpace = Stmt->getDomainSpace();
807
48
      TargetToUse =
808
48
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
809
48
    }
810
48
811
48
    ForwardingDecision Assessment =
812
48
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
813
48
    assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
814
48
    if (Assessment != FD_CanForwardProfitably)
815
11
      return false;
816
37
817
37
    ForwardingDecision Execution =
818
37
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
819
37
    assert(((Execution == FD_DidForwardTree) ||
820
37
            (Execution == FD_DidForwardLeaf)) &&
821
37
           "A previous positive assessment must also be executable");
822
37
823
37
    if (Execution == FD_DidForwardTree)
824
30
      Stmt->removeSingleMemoryAccess(RA);
825
37
    return true;
826
37
  }
827
828
  /// Return which SCoP this instance is processing.
829
0
  Scop *getScop() const { return S; }
830
831
  /// Run the algorithm: Use value read accesses as operand tree roots and try
832
  /// to forward them into the statement.
833
34
  bool forwardOperandTrees() {
834
87
    for (ScopStmt &Stmt : *S) {
835
87
      bool StmtModified = false;
836
87
837
87
      // Because we are modifying the MemoryAccess list, collect them first to
838
87
      // avoid iterator invalidation.
839
87
      SmallVector<MemoryAccess *, 16> Accs;
840
177
      for (MemoryAccess *RA : Stmt) {
841
177
        if (!RA->isRead())
842
105
          continue;
843
72
        if (!RA->isLatestScalarKind())
844
24
          continue;
845
48
846
48
        Accs.push_back(RA);
847
48
      }
848
87
849
87
      for (MemoryAccess *RA : Accs) {
850
48
        if (tryForwardTree(RA)) {
851
37
          Modified = true;
852
37
          StmtModified = true;
853
37
          NumForwardedTrees++;
854
37
          TotalForwardedTrees++;
855
37
        }
856
48
      }
857
87
858
87
      if (StmtModified) {
859
33
        NumModifiedStmts++;
860
33
        TotalModifiedStmts++;
861
33
      }
862
87
    }
863
34
864
34
    if (Modified)
865
26
      ScopsModified++;
866
34
    return Modified;
867
34
  }
868
869
  /// Print the pass result, performed transformations and the SCoP after the
870
  /// transformation.
871
33
  void print(raw_ostream &OS, int Indent = 0) {
872
33
    printStatistics(OS, Indent);
873
33
874
33
    if (!Modified) {
875
8
      // This line can easily be checked in regression tests.
876
8
      OS << "ForwardOpTree executed, but did not modify anything\n";
877
8
      return;
878
8
    }
879
25
880
25
    printStatements(OS, Indent);
881
25
  }
882
};
883
884
/// Pass that redirects scalar reads to array elements that are known to contain
885
/// the same value.
886
///
887
/// This reduces the number of scalar accesses and therefore potentially
888
/// increases the freedom of the scheduler. In the ideal case, all reads of a
889
/// scalar definition are redirected (We currently do not care about removing
890
/// the write in this case).  This is also useful for the main DeLICM pass as
891
/// there are less scalars to be mapped.
892
class ForwardOpTree : public ScopPass {
893
private:
894
  /// The pass implementation, also holding per-scop data.
895
  std::unique_ptr<ForwardOpTreeImpl> Impl;
896
897
public:
898
  static char ID;
899
900
33
  explicit ForwardOpTree() : ScopPass(ID) {}
901
  ForwardOpTree(const ForwardOpTree &) = delete;
902
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;
903
904
33
  void getAnalysisUsage(AnalysisUsage &AU) const override {
905
33
    AU.addRequiredTransitive<ScopInfoRegionPass>();
906
33
    AU.addRequired<LoopInfoWrapperPass>();
907
33
    AU.setPreservesAll();
908
33
  }
909
910
34
  bool runOnScop(Scop &S) override {
911
34
    // Free resources for previous SCoP's computation, if not yet done.
912
34
    releaseMemory();
913
34
914
34
    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
915
34
916
34
    {
917
34
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
918
34
      Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
919
34
920
34
      if (AnalyzeKnown) {
921
34
        LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
922
34
        Impl->computeKnownValues();
923
34
      }
924
34
925
34
      LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
926
34
      Impl->forwardOperandTrees();
927
34
928
34
      if (MaxOpGuard.hasQuotaExceeded()) {
929
0
        LLVM_DEBUG(dbgs() << "Not all operations completed because of "
930
0
                             "max_operations exceeded\n");
931
0
        KnownOutOfQuota++;
932
0
      }
933
34
    }
934
34
935
34
    LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
936
34
    LLVM_DEBUG(dbgs() << S);
937
34
938
34
    // Update statistics
939
34
    auto ScopStats = S.getStatistics();
940
34
    NumValueWrites += ScopStats.NumValueWrites;
941
34
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
942
34
    NumPHIWrites += ScopStats.NumPHIWrites;
943
34
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
944
34
    NumSingletonWrites += ScopStats.NumSingletonWrites;
945
34
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
946
34
947
34
    return false;
948
34
  }
949
950
33
  void printScop(raw_ostream &OS, Scop &S) const override {
951
33
    if (!Impl)
952
0
      return;
953
33
954
33
    assert(Impl->getScop() == &S);
955
33
    Impl->print(OS);
956
33
  }
957
958
151
  void releaseMemory() override { Impl.reset(); }
959
}; // class ForwardOpTree
960
961
char ForwardOpTree::ID;
962
} // namespace
963
964
0
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
965
966
46.4k
INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
967
46.4k
                      "Polly - Forward operand tree", false, false)
968
46.4k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
969
46.4k
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
970
                    "Polly - Forward operand tree", false, false)