Coverage Report

Created: 2017-11-21 16:49

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Move instructions between statements.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/ForwardOpTree.h"
15
#include "polly/Options.h"
16
#include "polly/ScopBuilder.h"
17
#include "polly/ScopInfo.h"
18
#include "polly/ScopPass.h"
19
#include "polly/Support/GICHelper.h"
20
#include "polly/Support/ISLOStream.h"
21
#include "polly/Support/ISLTools.h"
22
#include "polly/Support/VirtualInstruction.h"
23
#include "polly/ZoneAlgo.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/Analysis/LoopInfo.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/Instruction.h"
30
#include "llvm/IR/Instructions.h"
31
#include "llvm/IR/Value.h"
32
#include "llvm/Pass.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/Compiler.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/ErrorHandling.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include "isl/ctx.h"
40
#include "isl/isl-noexceptions.h"
41
#include <cassert>
42
#include <memory>
43
44
#define DEBUG_TYPE "polly-optree"
45
46
using namespace llvm;
47
using namespace polly;
48
49
static cl::opt<bool>
50
    AnalyzeKnown("polly-optree-analyze-known",
51
                 cl::desc("Analyze array contents for load forwarding"),
52
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53
54
static cl::opt<bool>
55
    NormalizePHIs("polly-optree-normalize-phi",
56
                  cl::desc("Replace PHIs by their incoming values"),
57
                  cl::cat(PollyCategory), cl::init(false), cl::Hidden);
58
59
static cl::opt<unsigned>
60
    MaxOps("polly-optree-max-ops",
61
           cl::desc("Maximum number of ISL operations to invest for known "
62
                    "analysis; 0=no limit"),
63
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
64
65
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
66
STATISTIC(KnownOutOfQuota,
67
          "Analyses aborted because max_operations was reached");
68
69
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
70
STATISTIC(TotalKnownLoadsForwarded,
71
          "Number of forwarded loads because their value was known");
72
STATISTIC(TotalReloads, "Number of reloaded values");
73
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
74
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
75
STATISTIC(TotalModifiedStmts,
76
          "Number of statements with at least one forwarded tree");
77
78
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
79
80
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
81
STATISTIC(NumValueWritesInLoops,
82
          "Number of scalar value writes nested in affine loops after OpTree");
83
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
84
STATISTIC(NumPHIWritesInLoops,
85
          "Number of scalar phi writes nested in affine loops after OpTree");
86
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
87
STATISTIC(NumSingletonWritesInLoops,
88
          "Number of singleton writes nested in affine loops after OpTree");
89
90
namespace {
91
92
/// The state of whether an operand tree was/can be forwarded.
93
///
94
/// The items apply to an instructions and its operand tree with the instruction
95
/// as the root element. If the value in question is not an instruction in the
96
/// SCoP, it can be a leaf of an instruction's operand tree.
97
enum ForwardingDecision {
98
  /// The root instruction or value cannot be forwarded at all.
99
  FD_CannotForward,
100
101
  /// The root instruction or value can be forwarded as a leaf of a larger
102
  /// operand tree.
103
  /// It does not make sense to move the value itself, it would just replace it
104
  /// by a use of itself. For instance, a constant "5" used in a statement can
105
  /// be forwarded, but it would just replace it by the same constant "5".
106
  /// However, it makes sense to move as an operand of
107
  ///
108
  ///   %add = add 5, 5
109
  ///
110
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
111
  /// (disregarding for a moment that literal constants don't have a location
112
  /// and can be used anywhere) into the same statement as %add would.
113
  FD_CanForwardLeaf,
114
115
  /// The root instruction can be forwarded and doing so avoids a scalar
116
  /// dependency.
117
  ///
118
  /// This can be either because the operand tree can be moved to the target
119
  /// statement, or a memory access is redirected to read from a different
120
  /// location.
121
  FD_CanForwardProfitably,
122
123
  /// Used to indicate that a forwarding has be carried out successfully, and
124
  /// the forwarded memory access can be deleted.
125
  FD_DidForwardTree,
126
127
  /// Used to indicate that a forwarding has be carried out successfully, and
128
  /// the forwarded memory access is being reused.
129
  FD_DidForwardLeaf,
130
131
  /// A forwarding method cannot be applied to the operand tree.
132
  /// The difference to FD_CannotForward is that there might be other methods
133
  /// that can handle it.
134
  /// The conditions that make an operand tree applicable must be checked even
135
  /// with DoIt==true because a method following the one that returned
136
  /// FD_NotApplicable might have returned FD_CanForwardTree.
137
  FD_NotApplicable
138
};
139
140
/// Implementation of operand tree forwarding for a specific SCoP.
141
///
142
/// For a statement that requires a scalar value (through a value read
143
/// MemoryAccess), see if its operand can be moved into the statement. If so,
144
/// the MemoryAccess is removed and the all the operand tree instructions are
145
/// moved into the statement. All original instructions are left in the source
146
/// statements. The simplification pass can clean these up.
147
class ForwardOpTreeImpl : ZoneAlgorithm {
148
private:
149
  /// Scope guard to limit the number of isl operations for this pass.
150
  IslMaxOperationsGuard &MaxOpGuard;
151
152
  /// How many instructions have been copied to other statements.
153
  int NumInstructionsCopied = 0;
154
155
  /// Number of loads forwarded because their value was known.
156
  int NumKnownLoadsForwarded = 0;
157
158
  /// Number of values reloaded from known array elements.
159
  int NumReloads = 0;
160
161
  /// How many read-only accesses have been copied.
162
  int NumReadOnlyCopied = 0;
163
164
  /// How many operand trees have been forwarded.
165
  int NumForwardedTrees = 0;
166
167
  /// Number of statements with at least one forwarded operand tree.
168
  int NumModifiedStmts = 0;
169
170
  /// Whether we carried out at least one change to the SCoP.
171
  bool Modified = false;
172
173
  /// Contains the zones where array elements are known to contain a specific
174
  /// value.
175
  /// { [Element[] -> Zone[]] -> ValInst[] }
176
  /// @see computeKnown()
177
  isl::union_map Known;
178
179
  /// Translator for newly introduced ValInsts to already existing ValInsts such
180
  /// that new introduced load instructions can reuse the Known analysis of its
181
  /// original load. { ValInst[] -> ValInst[] }
182
  isl::union_map Translator;
183
184
  /// Get list of array elements that do contain the same ValInst[] at Domain[].
185
  ///
186
  /// @param ValInst { Domain[] -> ValInst[] }
187
  ///                The values for which we search for alternative locations,
188
  ///                per statement instance.
189
  ///
190
  /// @return { Domain[] -> Element[] }
191
  ///         For each statement instance, the array elements that contain the
192
  ///         same ValInst.
193
10
  isl::union_map findSameContentElements(isl::union_map ValInst) {
194
10
    assert(!ValInst.is_single_valued().is_false());
195
10
196
10
    // { Domain[] }
197
10
    isl::union_set Domain = ValInst.domain();
198
10
199
10
    // { Domain[] -> Scatter[] }
200
10
    isl::union_map Schedule = getScatterFor(Domain);
201
10
202
10
    // { Element[] -> [Scatter[] -> ValInst[]] }
203
10
    isl::union_map MustKnownCurried =
204
10
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
205
10
206
10
    // { [Domain[] -> ValInst[]] -> Scatter[] }
207
10
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
208
10
209
10
    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
210
10
    isl::union_map SchedValDomVal =
211
10
        DomValSched.range_product(ValInst.range_map()).reverse();
212
10
213
10
    // { Element[] -> [Domain[] -> ValInst[]] }
214
10
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
215
10
216
10
    // { Domain[] -> Element[] }
217
10
    isl::union_map MustKnownMap =
218
10
        MustKnownInst.uncurry().domain().unwrap().reverse();
219
10
    simplify(MustKnownMap);
220
10
221
10
    return MustKnownMap;
222
10
  }
223
224
  /// Find a single array element for each statement instance, within a single
225
  /// array.
226
  ///
227
  /// @param MustKnown { Domain[] -> Element[] }
228
  ///                  Set of candidate array elements.
229
  /// @param Domain    { Domain[] }
230
  ///                  The statement instance for which we need elements for.
231
  ///
232
  /// @return { Domain[] -> Element[] }
233
  ///         For each statement instance, an array element out of @p MustKnown.
234
  ///         All array elements must be in the same array (Polly does not yet
235
  ///         support reading from different accesses using the same
236
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
237
  ///         null.
238
10
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
239
10
    // { Domain[] -> Element[] }
240
10
    isl::map Result;
241
10
242
10
    // MemoryAccesses can read only elements from a single array
243
10
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
244
10
    // Look through all spaces until we find one that contains at least the
245
10
    // wanted statement instance.s
246
10
    MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
247
6
      // Get the array this is accessing.
248
6
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
249
6
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
250
6
251
6
      // No support for generation of indirect array accesses.
252
6
      if (SAI->getBasePtrOriginSAI())
253
0
        return isl::stat::ok; // continue
254
6
255
6
      // Determine whether this map contains all wanted values.
256
6
      isl::set MapDom = Map.domain();
257
6
      if (!Domain.is_subset(MapDom).is_true())
258
0
        return isl::stat::ok; // continue
259
6
260
6
      // There might be multiple array elements that contain the same value, but
261
6
      // choose only one of them. lexmin is used because it returns a one-value
262
6
      // mapping, we do not care about which one.
263
6
      // TODO: Get the simplest access function.
264
6
      Result = Map.lexmin();
265
6
      return isl::stat::error; // break
266
6
    });
267
10
268
10
    return Result;
269
10
  }
270
271
public:
272
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
273
7
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
274
275
  /// Compute the zones of known array element contents.
276
  ///
277
  /// @return True if the computed #Known is usable.
278
7
  bool computeKnownValues() {
279
7
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
280
7
281
7
    // Check that nothing strange occurs.
282
7
    collectCompatibleElts();
283
7
284
7
    {
285
7
      IslQuotaScope QuotaScope = MaxOpGuard.enter();
286
7
287
7
      computeCommon();
288
7
      if (NormalizePHIs)
289
0
        computeNormalizedPHIs();
290
7
      Known = computeKnown(true, true);
291
7
292
7
      // Preexisting ValInsts use the known content analysis of themselves.
293
7
      Translator = makeIdentityMap(Known.range(), false);
294
7
    }
295
7
296
7
    if (!Known || !Translator || !NormalizeMap) {
297
0
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
298
0
      Known = nullptr;
299
0
      Translator = nullptr;
300
0
      NormalizeMap = nullptr;
301
0
      DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
302
0
      return false;
303
0
    }
304
7
305
7
    KnownAnalyzed++;
306
7
    DEBUG(dbgs() << "All known: " << Known << "\n");
307
7
308
7
    return true;
309
7
  }
310
311
7
  void printStatistics(raw_ostream &OS, int Indent = 0) {
312
7
    OS.indent(Indent) << "Statistics {\n";
313
7
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
314
7
                          << '\n';
315
7
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
316
7
                          << '\n';
317
7
    OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
318
7
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
319
7
                          << '\n';
320
7
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
321
7
                          << '\n';
322
7
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
323
7
                          << NumModifiedStmts << '\n';
324
7
    OS.indent(Indent) << "}\n";
325
7
  }
326
327
4
  void printStatements(raw_ostream &OS, int Indent = 0) const {
328
4
    OS.indent(Indent) << "After statements {\n";
329
9
    for (auto &Stmt : *S) {
330
9
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
331
9
      for (auto *MA : Stmt)
332
17
        MA->print(OS);
333
9
334
9
      OS.indent(Indent + 12);
335
9
      Stmt.printInstructions(OS);
336
9
    }
337
4
    OS.indent(Indent) << "}\n";
338
4
  }
339
340
  /// Create a new MemoryAccess of type read and MemoryKind::Array.
341
  ///
342
  /// @param Stmt           The statement in which the access occurs.
343
  /// @param LI             The instruction that does the access.
344
  /// @param AccessRelation The array element that each statement instance
345
  ///                       accesses.
346
  ///
347
  /// @param The newly created access.
348
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
349
3
                                    isl::map AccessRelation) {
350
3
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
351
3
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
352
3
353
3
    // Create a dummy SCEV access, to be replaced anyway.
354
3
    SmallVector<const SCEV *, 4> Sizes;
355
3
    Sizes.reserve(SAI->getNumberOfDimensions());
356
3
    SmallVector<const SCEV *, 4> Subscripts;
357
3
    Subscripts.reserve(SAI->getNumberOfDimensions());
358
6
    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); 
i += 13
) {
359
3
      Sizes.push_back(SAI->getDimensionSize(i));
360
3
      Subscripts.push_back(nullptr);
361
3
    }
362
3
363
3
    MemoryAccess *Access =
364
3
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
365
3
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
366
3
    S->addAccessFunction(Access);
367
3
    Stmt->addAccess(Access, true);
368
3
369
3
    Access->setNewAccessRelation(AccessRelation);
370
3
371
3
    return Access;
372
3
  }
373
374
  /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
375
  /// use in every instance of @p UseStmt.
376
  ///
377
  /// @param UseStmt Statement a scalar is used in.
378
  /// @param DefStmt Statement a scalar is defined in.
379
  ///
380
  /// @return { DomainUse[] -> DomainDef[] }
381
12
  isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
382
12
    // { DomainUse[] -> Scatter[] }
383
12
    isl::map UseScatter = getScatterFor(UseStmt);
384
12
385
12
    // { Zone[] -> DomainDef[] }
386
12
    isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
387
12
388
12
    // { Scatter[] -> DomainDef[] }
389
12
    isl::map ReachDefTimepoints =
390
12
        convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
391
12
392
12
    // { DomainUse[] -> DomainDef[] }
393
12
    return UseScatter.apply_range(ReachDefTimepoints);
394
12
  }
395
396
  /// Forward a load by reading from an array element that contains the same
397
  /// value. Typically the location it was loaded from.
398
  ///
399
  /// @param TargetStmt  The statement the operand tree will be copied to.
400
  /// @param Inst        The (possibly speculatable) instruction to forward.
401
  /// @param UseStmt     The statement that uses @p Inst.
402
  /// @param UseLoop     The loop @p Inst is used in.
403
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
404
  ///                    A mapping from the statement instance @p Inst is used
405
  ///                    to the statement instance it is forwarded to.
406
  /// @param DefStmt     The statement @p Inst is defined in.
407
  /// @param DefLoop     The loop which contains @p Inst.
408
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
409
  ///                    A mapping from the statement instance @p Inst is
410
  ///                    defined to the statement instance it is forwarded to.
411
  /// @param DoIt        If false, only determine whether an operand tree can be
412
  ///                    forwarded. If true, carry out the forwarding. Do not
413
  ///                    use DoIt==true if an operand tree is not known to be
414
  ///                    forwardable.
415
  ///
416
  /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
417
  ///                           load.
418
  ///         FD_CannotForward  if the pointer operand cannot be forwarded.
419
  ///         FD_CanForwardProfitably if @p Inst is forwardable.
420
  ///         FD_DidForwardTree if @p DoIt was true.
421
  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
422
                                      ScopStmt *UseStmt, Loop *UseLoop,
423
                                      isl::map UseToTarget, ScopStmt *DefStmt,
424
                                      Loop *DefLoop, isl::map DefToTarget,
425
9
                                      bool DoIt) {
426
9
    // Cannot do anything without successful known analysis.
427
9
    if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
428
9
        DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
429
0
      return FD_NotApplicable;
430
9
431
9
    LoadInst *LI = dyn_cast<LoadInst>(Inst);
432
9
    if (!LI)
433
2
      return FD_NotApplicable;
434
7
435
7
    // If the load is already in the statement, no forwarding is necessary.
436
7
    // However, it might happen that the LoadInst is already present in the
437
7
    // statement's instruction list. In that case we do as follows:
438
7
    // - For the evaluation (DoIt==false), we can trivially forward it as it is
439
7
    //   benefit of forwarding an already present instruction.
440
7
    // - For the execution (DoIt==true), prepend the instruction (to make it
441
7
    //   available to all instructions following in the instruction list), but
442
7
    //   do not add another MemoryAccess.
443
7
    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
444
7
    if (Access && 
!DoIt0
)
445
0
      return FD_CanForwardProfitably;
446
7
447
7
    ForwardingDecision OpDecision =
448
7
        forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
449
7
                    DefToTarget, DoIt);
450
7
    switch (OpDecision) {
451
7
    case FD_CannotForward:
452
0
      assert(!DoIt);
453
0
      return OpDecision;
454
7
455
7
    case FD_CanForwardLeaf:
456
4
    case FD_CanForwardProfitably:
457
4
      assert(!DoIt);
458
4
      break;
459
4
460
4
    case FD_DidForwardLeaf:
461
3
    case FD_DidForwardTree:
462
3
      assert(DoIt);
463
3
      break;
464
3
465
3
    default:
466
0
      llvm_unreachable("Shouldn't return this");
467
7
    }
468
7
469
7
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
470
7
471
7
    // { DomainDef[] -> ValInst[] }
472
7
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
473
7
    assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized");
474
7
475
7
    // { DomainTarget[] -> ValInst[] }
476
7
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
477
7
    isl::union_map TranslatedExpectedVal =
478
7
        isl::union_map(TargetExpectedVal).apply_range(Translator);
479
7
480
7
    // { DomainTarget[] -> Element[] }
481
7
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
482
7
483
7
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
484
7
    if (!SameVal)
485
1
      return FD_NotApplicable;
486
6
487
6
    if (DoIt)
488
3
      TargetStmt->prependInstruction(LI);
489
6
490
6
    if (!DoIt)
491
3
      return FD_CanForwardProfitably;
492
3
493
3
    if (Access) {
494
0
      DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
495
0
                   << Access << "\n");
496
3
    } else {
497
3
      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
498
3
      DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
499
3
                   << "\n");
500
3
501
3
      // { ValInst[] }
502
3
      isl::space ValInstSpace = ExpectedVal.get_space().range();
503
3
504
3
      // After adding a new load to the SCoP, also update the Known content
505
3
      // about it. The new load will have a known ValInst of
506
3
      // { [DomainTarget[] -> Value[]] }
507
3
      // but which -- because it is a copy of it -- has same value as the
508
3
      // { [DomainDef[] -> Value[]] }
509
3
      // that it replicates. Instead of  cloning the known content of
510
3
      // [DomainDef[] -> Value[]]
511
3
      // for DomainTarget[], we add a 'translator' that maps
512
3
      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
513
3
      // before comparing to the known content.
514
3
      // TODO: 'Translator' could also be used to map PHINodes to their incoming
515
3
      // ValInsts.
516
3
      if (ValInstSpace.is_wrapping()) {
517
3
        // { DefDomain[] -> Value[] }
518
3
        isl::map ValInsts = ExpectedVal.range().unwrap();
519
3
520
3
        // { DefDomain[] }
521
3
        isl::set DefDomain = ValInsts.domain();
522
3
523
3
        // { Value[] }
524
3
        isl::space ValSpace = ValInstSpace.unwrap().range();
525
3
526
3
        // { Value[] -> Value[] }
527
3
        isl::map ValToVal =
528
3
            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
529
3
530
3
        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
531
3
        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
532
3
533
3
        Translator = Translator.add_map(LocalTranslator);
534
3
        DEBUG(dbgs() << "      local translator is " << LocalTranslator
535
3
                     << "\n");
536
3
      }
537
3
    }
538
3
    DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
539
3
                 << "\n");
540
3
    DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
541
9
    assert(Access);
542
9
543
9
    NumKnownLoadsForwarded++;
544
9
    TotalKnownLoadsForwarded++;
545
9
    return FD_DidForwardTree;
546
9
  }
547
548
  /// Forward a scalar by redirecting the access to an array element that stores
549
  /// the same value.
550
  ///
551
  /// @param TargetStmt  The statement the operand tree will be copied to.
552
  /// @param Inst        The scalar to forward.
553
  /// @param UseStmt     The statement that uses @p Inst.
554
  /// @param UseLoop     The loop @p Inst is used in.
555
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
556
  ///                    A mapping from the statement instance @p Inst is used
557
  ///                    in, to the statement instance it is forwarded to.
558
  /// @param DefStmt     The statement @p Inst is defined in.
559
  /// @param DefLoop     The loop which contains @p Inst.
560
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
561
  ///                    A mapping from the statement instance @p Inst is
562
  ///                    defined in, to the statement instance it is forwarded
563
  ///                    to.
564
  /// @param DoIt        If false, only determine whether an operand tree can be
565
  ///                    forwarded. If true, carry out the forwarding. Do not
566
  ///                    use DoIt==true if an operand tree is not known to be
567
  ///                    forwardable.
568
  ///
569
  /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
570
  ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
571
  ///         FD_CanForwardProfitably if @p Inst has been reloaded.
572
  ///         FD_DidForwardLeaf       if @p DoIt was true.
573
  ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
574
                                        ScopStmt *UseStmt, Loop *UseLoop,
575
                                        isl::map UseToTarget, ScopStmt *DefStmt,
576
                                        Loop *DefLoop, isl::map DefToTarget,
577
3
                                        bool DoIt) {
578
3
    // Cannot do anything without successful known analysis.
579
3
    if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
580
3
        DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
581
0
      return FD_NotApplicable;
582
3
583
3
    MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
584
3
    if (Access && Access->isLatestArrayKind()) {
585
0
      if (DoIt)
586
0
        return FD_DidForwardLeaf;
587
0
      return FD_CanForwardLeaf;
588
0
    }
589
3
590
3
    // Don't spend too much time analyzing whether it can be reloaded. When
591
3
    // carrying-out the forwarding, we cannot bail-out in the middle of the
592
3
    // transformation. It also shouldn't take as long because some results are
593
3
    // cached.
594
3
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
595
3
596
3
    // { DomainDef[] -> ValInst[] }
597
3
    isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
598
3
599
3
    // { DomainTarget[] -> ValInst[] }
600
3
    isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
601
3
    isl::union_map TranslatedExpectedVal =
602
3
        TargetExpectedVal.apply_range(Translator);
603
3
604
3
    // { DomainTarget[] -> Element[] }
605
3
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
606
3
607
3
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
608
3
    if (!SameVal)
609
3
      return FD_NotApplicable;
610
0
611
0
    if (!DoIt)
612
0
      return FD_CanForwardProfitably;
613
0
614
0
    if (!Access)
615
0
      Access = TargetStmt->ensureValueRead(Inst);
616
3
617
3
    simplify(SameVal);
618
3
    Access->setNewAccessRelation(SameVal);
619
3
620
3
    TotalReloads++;
621
3
    NumReloads++;
622
3
    return FD_DidForwardLeaf;
623
3
  }
624
625
  /// Forwards a speculatively executable instruction.
626
  ///
627
  /// @param TargetStmt  The statement the operand tree will be copied to.
628
  /// @param UseInst     The (possibly speculatable) instruction to forward.
629
  /// @param DefStmt     The statement @p UseInst is defined in.
630
  /// @param DefLoop     The loop which contains @p UseInst.
631
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
632
  ///                    A mapping from the statement instance @p UseInst is
633
  ///                    defined to the statement instance it is forwarded to.
634
  /// @param DoIt        If false, only determine whether an operand tree can be
635
  ///                    forwarded. If true, carry out the forwarding. Do not
636
  ///                    use DoIt==true if an operand tree is not known to be
637
  ///                    forwardable.
638
  ///
639
  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
640
  ///         FD_CannotForward  if one of @p UseInst's operands is not
641
  ///                           forwardable.
642
  ///         FD_CanForwardTree if @p UseInst is forwardable.
643
  ///         FD_DidForward     if @p DoIt was true.
644
  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
645
                                         Instruction *UseInst,
646
                                         ScopStmt *DefStmt, Loop *DefLoop,
647
13
                                         isl::map DefToTarget, bool DoIt) {
648
13
    // PHIs, unless synthesizable, are not yet supported.
649
13
    if (isa<PHINode>(UseInst))
650
2
      return FD_NotApplicable;
651
11
652
11
    // Compatible instructions must satisfy the following conditions:
653
11
    // 1. Idempotent (instruction will be copied, not moved; although its
654
11
    //    original instance might be removed by simplification)
655
11
    // 2. Not access memory (There might be memory writes between)
656
11
    // 3. Not cause undefined behaviour (we might copy to a location when the
657
11
    //    original instruction was no executed; this is currently not possible
658
11
    //    because we do not forward PHINodes)
659
11
    // 4. Not leak memory if executed multiple times (i.e. malloc)
660
11
    //
661
11
    // Instruction::mayHaveSideEffects is not sufficient because it considers
662
11
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
663
11
    // not sufficient because it allows memory accesses.
664
11
    if (mayBeMemoryDependent(*UseInst))
665
7
      return FD_NotApplicable;
666
4
667
4
    if (DoIt) {
668
2
      // To ensure the right order, prepend this instruction before its
669
2
      // operands. This ensures that its operands are inserted before the
670
2
      // instruction using them.
671
2
      // TODO: The operand tree is not really a tree, but a DAG. We should be
672
2
      // able to handle DAGs without duplication.
673
2
      TargetStmt->prependInstruction(UseInst);
674
2
      NumInstructionsCopied++;
675
2
      TotalInstructionsCopied++;
676
2
    }
677
4
678
8
    for (Value *OpVal : UseInst->operand_values()) {
679
8
      ForwardingDecision OpDecision =
680
8
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
681
8
      switch (OpDecision) {
682
8
      case FD_CannotForward:
683
0
        assert(!DoIt);
684
0
        return FD_CannotForward;
685
8
686
8
      case FD_CanForwardLeaf:
687
4
      case FD_CanForwardProfitably:
688
4
        assert(!DoIt);
689
4
        break;
690
4
691
4
      case FD_DidForwardLeaf:
692
4
      case FD_DidForwardTree:
693
4
        assert(DoIt);
694
4
        break;
695
4
696
4
      case FD_NotApplicable:
697
0
        llvm_unreachable("forwardTree should never return FD_NotApplicable");
698
4
      }
699
4
    }
700
4
701
4
    if (DoIt)
702
2
      return FD_DidForwardTree;
703
2
    return FD_CanForwardProfitably;
704
2
  }
705
706
  /// Determines whether an operand tree can be forwarded or carries out a
707
  /// forwarding, depending on the @p DoIt flag.
708
  ///
709
  /// @param TargetStmt  The statement the operand tree will be copied to.
710
  /// @param UseVal      The value (usually an instruction) which is root of an
711
  ///                    operand tree.
712
  /// @param UseStmt     The statement that uses @p UseVal.
713
  /// @param UseLoop     The loop @p UseVal is used in.
714
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
715
  ///                    A mapping from the statement instance @p UseVal is used
716
  ///                    to the statement instance it is forwarded to.
717
  /// @param DoIt        If false, only determine whether an operand tree can be
718
  ///                    forwarded. If true, carry out the forwarding. Do not
719
  ///                    use DoIt==true if an operand tree is not known to be
720
  ///                    forwardable.
721
  ///
722
  /// @return If DoIt==false, return whether the operand tree can be forwarded.
723
  ///         If DoIt==true, return FD_DidForward.
724
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
725
                                 ScopStmt *UseStmt, Loop *UseLoop,
726
30
                                 isl::map UseToTarget, bool DoIt) {
727
30
    ScopStmt *DefStmt = nullptr;
728
30
    Loop *DefLoop = nullptr;
729
30
730
30
    // { DefDomain[] -> TargetDomain[] }
731
30
    isl::map DefToTarget;
732
30
733
30
    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
734
30
    switch (VUse.getKind()) {
735
30
    case VirtualUse::Constant:
736
4
    case VirtualUse::Block:
737
4
    case VirtualUse::Hoisted:
738
4
      // These can be used anywhere without special considerations.
739
4
      if (DoIt)
740
2
        return FD_DidForwardTree;
741
2
      return FD_CanForwardLeaf;
742
2
743
7
    case VirtualUse::Synthesizable: {
744
7
      // ScopExpander will take care for of generating the code at the new
745
7
      // location.
746
7
      if (DoIt)
747
3
        return FD_DidForwardTree;
748
4
749
4
      // Check if the value is synthesizable at the new location as well. This
750
4
      // might be possible when leaving a loop for which ScalarEvolution is
751
4
      // unable to derive the exit value for.
752
4
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
753
4
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
754
4
      // do not forward past its loop header. This would require us to use a
755
4
      // previous loop induction variable instead the current one. We currently
756
4
      // do not allow forwarding PHI nodes, thus this should never occur (the
757
4
      // only exception where no phi is necessary being an unreachable loop
758
4
      // without edge from the outside).
759
4
      VirtualUse TargetUse = VirtualUse::create(
760
4
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
761
4
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
762
4
        return FD_CanForwardLeaf;
763
0
764
0
      DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
765
0
                   << *UseVal << "\n");
766
0
      return FD_CannotForward;
767
0
    }
768
0
769
5
    case VirtualUse::ReadOnly:
770
5
      // Note that we cannot return FD_CanForwardTree here. With a operand tree
771
5
      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
772
5
      // With -polly-analyze-read-only-scalars=true we would ensure the
773
5
      // existence of a MemoryAccess (which already exists for a leaf) and be
774
5
      // removed again by tryForwardTree because it's goal is to remove this
775
5
      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
776
5
      // to do so.
777
5
      if (!DoIt)
778
3
        return FD_CanForwardLeaf;
779
2
780
2
      // If we model read-only scalars, we need to create a MemoryAccess for it.
781
2
      if (ModelReadOnlyScalars)
782
1
        TargetStmt->ensureValueRead(UseVal);
783
2
784
2
      NumReadOnlyCopied++;
785
2
      TotalReadOnlyCopied++;
786
2
      return FD_DidForwardLeaf;
787
2
788
2
    case VirtualUse::Intra:
789
1
      // Knowing that UseStmt and DefStmt are the same statement instance, just
790
1
      // reuse the information about UseStmt for DefStmt
791
1
      DefStmt = UseStmt;
792
1
      DefToTarget = UseToTarget;
793
1
794
1
      LLVM_FALLTHROUGH;
795
14
    case VirtualUse::Inter:
796
14
      Instruction *Inst = cast<Instruction>(UseVal);
797
14
798
14
      if (!DefStmt) {
799
13
        DefStmt = S->getStmtFor(Inst);
800
13
        if (!DefStmt)
801
1
          return FD_CannotForward;
802
13
      }
803
13
804
13
      DefLoop = LI->getLoopFor(Inst->getParent());
805
13
806
13
      if (DefToTarget.is_null() && 
!Known.is_null()12
) {
807
12
        IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
808
12
809
12
        // { UseDomain[] -> DefDomain[] }
810
12
        isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
811
12
812
12
        // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
813
12
        // { DefDomain[] -> TargetDomain[] }
814
12
        DefToTarget = UseToTarget.apply_domain(UseToDef);
815
12
        simplify(DefToTarget);
816
12
      }
817
13
818
13
      ForwardingDecision SpeculativeResult = forwardSpeculatable(
819
13
          TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
820
13
      if (SpeculativeResult != FD_NotApplicable)
821
4
        return SpeculativeResult;
822
9
823
9
      ForwardingDecision KnownResult =
824
9
          forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
825
9
                           DefStmt, DefLoop, DefToTarget, DoIt);
826
9
      if (KnownResult != FD_NotApplicable)
827
6
        return KnownResult;
828
3
829
3
      ForwardingDecision ReloadResult =
830
3
          reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
831
3
                             DefStmt, DefLoop, DefToTarget, DoIt);
832
3
      if (ReloadResult != FD_NotApplicable)
833
0
        return ReloadResult;
834
3
835
3
      // When no method is found to forward the operand tree, we effectively
836
3
      // cannot handle it.
837
3
      DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
838
3
      return FD_CannotForward;
839
0
    }
840
0
841
0
    llvm_unreachable("Case unhandled");
842
0
  }
843
844
  /// Try to forward an operand tree rooted in @p RA.
845
10
  bool tryForwardTree(MemoryAccess *RA) {
846
10
    assert(RA->isLatestScalarKind());
847
10
    DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
848
10
849
10
    ScopStmt *Stmt = RA->getStatement();
850
10
    Loop *InLoop = Stmt->getSurroundingLoop();
851
10
852
10
    isl::map TargetToUse;
853
10
    if (!Known.is_null()) {
854
10
      isl::space DomSpace = Stmt->getDomainSpace();
855
10
      TargetToUse =
856
10
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
857
10
    }
858
10
859
10
    ForwardingDecision Assessment = forwardTree(
860
10
        Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
861
10
    assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
862
10
    if (Assessment != FD_CanForwardProfitably)
863
5
      return false;
864
5
865
5
    ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
866
5
                                               InLoop, TargetToUse, true);
867
5
    assert(((Execution == FD_DidForwardTree) ||
868
5
            (Execution == FD_DidForwardLeaf)) &&
869
5
           "A previous positive assessment must also be executable");
870
5
871
5
    if (Execution == FD_DidForwardTree)
872
5
      Stmt->removeSingleMemoryAccess(RA);
873
10
    return true;
874
10
  }
875
876
  /// Return which SCoP this instance is processing.
877
0
  Scop *getScop() const { return S; }
878
879
  /// Run the algorithm: Use value read accesses as operand tree roots and try
880
  /// to forward them into the statement.
881
7
  bool forwardOperandTrees() {
882
16
    for (ScopStmt &Stmt : *S) {
883
16
      bool StmtModified = false;
884
16
885
16
      // Because we are modifying the MemoryAccess list, collect them first to
886
16
      // avoid iterator invalidation.
887
16
      SmallVector<MemoryAccess *, 16> Accs;
888
31
      for (MemoryAccess *RA : Stmt) {
889
31
        if (!RA->isRead())
890
18
          continue;
891
13
        if (!RA->isLatestScalarKind())
892
3
          continue;
893
10
894
10
        Accs.push_back(RA);
895
10
      }
896
16
897
16
      for (MemoryAccess *RA : Accs) {
898
10
        if (tryForwardTree(RA)) {
899
5
          Modified = true;
900
5
          StmtModified = true;
901
5
          NumForwardedTrees++;
902
5
          TotalForwardedTrees++;
903
5
        }
904
10
      }
905
16
906
16
      if (StmtModified) {
907
5
        NumModifiedStmts++;
908
5
        TotalModifiedStmts++;
909
5
      }
910
16
    }
911
7
912
7
    if (Modified)
913
4
      ScopsModified++;
914
7
    return Modified;
915
7
  }
916
917
  /// Print the pass result, performed transformations and the SCoP after the
918
  /// transformation.
919
7
  void print(raw_ostream &OS, int Indent = 0) {
920
7
    printStatistics(OS, Indent);
921
7
922
7
    if (!Modified) {
923
3
      // This line can easily be checked in regression tests.
924
3
      OS << "ForwardOpTree executed, but did not modify anything\n";
925
3
      return;
926
3
    }
927
4
928
4
    printStatements(OS, Indent);
929
4
  }
930
};
931
932
/// Pass that redirects scalar reads to array elements that are known to contain
933
/// the same value.
934
///
935
/// This reduces the number of scalar accesses and therefore potentially
936
/// increases the freedom of the scheduler. In the ideal case, all reads of a
937
/// scalar definition are redirected (We currently do not care about removing
938
/// the write in this case).  This is also useful for the main DeLICM pass as
939
/// there are less scalars to be mapped.
940
class ForwardOpTree : public ScopPass {
941
private:
942
  /// The pass implementation, also holding per-scop data.
943
  std::unique_ptr<ForwardOpTreeImpl> Impl;
944
945
public:
946
  static char ID;
947
948
7
  explicit ForwardOpTree() : ScopPass(ID) {}
949
  ForwardOpTree(const ForwardOpTree &) = delete;
950
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;
951
952
7
  void getAnalysisUsage(AnalysisUsage &AU) const override {
953
7
    AU.addRequiredTransitive<ScopInfoRegionPass>();
954
7
    AU.addRequired<LoopInfoWrapperPass>();
955
7
    AU.setPreservesAll();
956
7
  }
957
958
7
  bool runOnScop(Scop &S) override {
959
7
    // Free resources for previous SCoP's computation, if not yet done.
960
7
    releaseMemory();
961
7
962
7
    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
963
7
964
7
    {
965
7
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
966
7
      Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
967
7
968
7
      if (AnalyzeKnown) {
969
7
        DEBUG(dbgs() << "Prepare forwarders...\n");
970
7
        Impl->computeKnownValues();
971
7
      }
972
7
973
7
      DEBUG(dbgs() << "Forwarding operand trees...\n");
974
7
      Impl->forwardOperandTrees();
975
7
976
7
      if (MaxOpGuard.hasQuotaExceeded()) {
977
0
        DEBUG(dbgs() << "Not all operations completed because of "
978
0
                        "max_operations exceeded\n");
979
0
        KnownOutOfQuota++;
980
0
      }
981
7
    }
982
7
983
7
    DEBUG(dbgs() << "\nFinal Scop:\n");
984
7
    DEBUG(dbgs() << S);
985
7
986
7
    // Update statistics
987
7
    auto ScopStats = S.getStatistics();
988
7
    NumValueWrites += ScopStats.NumValueWrites;
989
7
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
990
7
    NumPHIWrites += ScopStats.NumPHIWrites;
991
7
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
992
7
    NumSingletonWrites += ScopStats.NumSingletonWrites;
993
7
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
994
7
995
7
    return false;
996
7
  }
997
998
7
  void printScop(raw_ostream &OS, Scop &S) const override {
999
7
    if (!Impl)
1000
0
      return;
1001
7
1002
7
    assert(Impl->getScop() == &S);
1003
7
    Impl->print(OS);
1004
7
  }
1005
1006
31
  void releaseMemory() override { Impl.reset(); }
1007
}; // class ForwardOpTree
1008
1009
char ForwardOpTree::ID;
1010
1011
} // namespace
1012
1013
0
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1014
1015
17.2k
INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1016
17.2k
                      "Polly - Forward operand tree", false, false)
1017
17.2k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1018
17.2k
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1019
                    "Polly - Forward operand tree", false, false)