Coverage Report

Created: 2018-02-20 23:11

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Move instructions between statements.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/ForwardOpTree.h"
15
#include "polly/Options.h"
16
#include "polly/ScopBuilder.h"
17
#include "polly/ScopInfo.h"
18
#include "polly/ScopPass.h"
19
#include "polly/Support/GICHelper.h"
20
#include "polly/Support/ISLOStream.h"
21
#include "polly/Support/ISLTools.h"
22
#include "polly/Support/VirtualInstruction.h"
23
#include "polly/ZoneAlgo.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/Analysis/LoopInfo.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/Instruction.h"
30
#include "llvm/IR/Instructions.h"
31
#include "llvm/IR/Value.h"
32
#include "llvm/Pass.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/Compiler.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/ErrorHandling.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include "isl/ctx.h"
40
#include "isl/isl-noexceptions.h"
41
#include <cassert>
42
#include <memory>
43
44
#define DEBUG_TYPE "polly-optree"
45
46
using namespace llvm;
47
using namespace polly;
48
49
static cl::opt<bool>
50
    AnalyzeKnown("polly-optree-analyze-known",
51
                 cl::desc("Analyze array contents for load forwarding"),
52
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53
54
static cl::opt<bool>
55
    NormalizePHIs("polly-optree-normalize-phi",
56
                  cl::desc("Replace PHIs by their incoming values"),
57
                  cl::cat(PollyCategory), cl::init(false), cl::Hidden);
58
59
static cl::opt<unsigned>
60
    MaxOps("polly-optree-max-ops",
61
           cl::desc("Maximum number of ISL operations to invest for known "
62
                    "analysis; 0=no limit"),
63
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
64
65
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
66
STATISTIC(KnownOutOfQuota,
67
          "Analyses aborted because max_operations was reached");
68
69
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
70
STATISTIC(TotalKnownLoadsForwarded,
71
          "Number of forwarded loads because their value was known");
72
STATISTIC(TotalReloads, "Number of reloaded values");
73
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
74
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
75
STATISTIC(TotalModifiedStmts,
76
          "Number of statements with at least one forwarded tree");
77
78
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
79
80
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
81
STATISTIC(NumValueWritesInLoops,
82
          "Number of scalar value writes nested in affine loops after OpTree");
83
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
84
STATISTIC(NumPHIWritesInLoops,
85
          "Number of scalar phi writes nested in affine loops after OpTree");
86
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
87
STATISTIC(NumSingletonWritesInLoops,
88
          "Number of singleton writes nested in affine loops after OpTree");
89
90
namespace {
91
92
/// The state of whether an operand tree was/can be forwarded.
93
///
94
/// The items apply to an instructions and its operand tree with the instruction
95
/// as the root element. If the value in question is not an instruction in the
96
/// SCoP, it can be a leaf of an instruction's operand tree.
97
enum ForwardingDecision {
98
  /// The root instruction or value cannot be forwarded at all.
99
  FD_CannotForward,
100
101
  /// The root instruction or value can be forwarded as a leaf of a larger
102
  /// operand tree.
103
  /// It does not make sense to move the value itself, it would just replace it
104
  /// by a use of itself. For instance, a constant "5" used in a statement can
105
  /// be forwarded, but it would just replace it by the same constant "5".
106
  /// However, it makes sense to move as an operand of
107
  ///
108
  ///   %add = add 5, 5
109
  ///
110
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
111
  /// (disregarding for a moment that literal constants don't have a location
112
  /// and can be used anywhere) into the same statement as %add would.
113
  FD_CanForwardLeaf,
114
115
  /// The root instruction can be forwarded and doing so avoids a scalar
116
  /// dependency.
117
  ///
118
  /// This can be either because the operand tree can be moved to the target
119
  /// statement, or a memory access is redirected to read from a different
120
  /// location.
121
  FD_CanForwardProfitably,
122
123
  /// Used to indicate that a forwarding has be carried out successfully, and
124
  /// the forwarded memory access can be deleted.
125
  FD_DidForwardTree,
126
127
  /// Used to indicate that a forwarding has be carried out successfully, and
128
  /// the forwarded memory access is being reused.
129
  FD_DidForwardLeaf,
130
131
  /// A forwarding method cannot be applied to the operand tree.
132
  /// The difference to FD_CannotForward is that there might be other methods
133
  /// that can handle it.
134
  /// The conditions that make an operand tree applicable must be checked even
135
  /// with DoIt==true because a method following the one that returned
136
  /// FD_NotApplicable might have returned FD_CanForwardTree.
137
  FD_NotApplicable
138
};
139
140
/// Implementation of operand tree forwarding for a specific SCoP.
141
///
142
/// For a statement that requires a scalar value (through a value read
143
/// MemoryAccess), see if its operand can be moved into the statement. If so,
144
/// the MemoryAccess is removed and the all the operand tree instructions are
145
/// moved into the statement. All original instructions are left in the source
146
/// statements. The simplification pass can clean these up.
147
class ForwardOpTreeImpl : ZoneAlgorithm {
148
private:
149
  /// Scope guard to limit the number of isl operations for this pass.
150
  IslMaxOperationsGuard &MaxOpGuard;
151
152
  /// How many instructions have been copied to other statements.
153
  int NumInstructionsCopied = 0;
154
155
  /// Number of loads forwarded because their value was known.
156
  int NumKnownLoadsForwarded = 0;
157
158
  /// Number of values reloaded from known array elements.
159
  int NumReloads = 0;
160
161
  /// How many read-only accesses have been copied.
162
  int NumReadOnlyCopied = 0;
163
164
  /// How many operand trees have been forwarded.
165
  int NumForwardedTrees = 0;
166
167
  /// Number of statements with at least one forwarded operand tree.
168
  int NumModifiedStmts = 0;
169
170
  /// Whether we carried out at least one change to the SCoP.
171
  bool Modified = false;
172
173
  /// Contains the zones where array elements are known to contain a specific
174
  /// value.
175
  /// { [Element[] -> Zone[]] -> ValInst[] }
176
  /// @see computeKnown()
177
  isl::union_map Known;
178
179
  /// Translator for newly introduced ValInsts to already existing ValInsts such
180
  /// that new introduced load instructions can reuse the Known analysis of its
181
  /// original load. { ValInst[] -> ValInst[] }
182
  isl::union_map Translator;
183
184
  /// Get list of array elements that do contain the same ValInst[] at Domain[].
185
  ///
186
  /// @param ValInst { Domain[] -> ValInst[] }
187
  ///                The values for which we search for alternative locations,
188
  ///                per statement instance.
189
  ///
190
  /// @return { Domain[] -> Element[] }
191
  ///         For each statement instance, the array elements that contain the
192
  ///         same ValInst.
193
65
  isl::union_map findSameContentElements(isl::union_map ValInst) {
194
65
    assert(!ValInst.is_single_valued().is_false());
195
65
196
65
    // { Domain[] }
197
65
    isl::union_set Domain = ValInst.domain();
198
65
199
65
    // { Domain[] -> Scatter[] }
200
65
    isl::union_map Schedule = getScatterFor(Domain);
201
65
202
65
    // { Element[] -> [Scatter[] -> ValInst[]] }
203
65
    isl::union_map MustKnownCurried =
204
65
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
205
65
206
65
    // { [Domain[] -> ValInst[]] -> Scatter[] }
207
65
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
208
65
209
65
    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
210
65
    isl::union_map SchedValDomVal =
211
65
        DomValSched.range_product(ValInst.range_map()).reverse();
212
65
213
65
    // { Element[] -> [Domain[] -> ValInst[]] }
214
65
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
215
65
216
65
    // { Domain[] -> Element[] }
217
65
    isl::union_map MustKnownMap =
218
65
        MustKnownInst.uncurry().domain().unwrap().reverse();
219
65
    simplify(MustKnownMap);
220
65
221
65
    return MustKnownMap;
222
65
  }
223
224
  /// Find a single array element for each statement instance, within a single
225
  /// array.
226
  ///
227
  /// @param MustKnown { Domain[] -> Element[] }
228
  ///                  Set of candidate array elements.
229
  /// @param Domain    { Domain[] }
230
  ///                  The statement instance for which we need elements for.
231
  ///
232
  /// @return { Domain[] -> Element[] }
233
  ///         For each statement instance, an array element out of @p MustKnown.
234
  ///         All array elements must be in the same array (Polly does not yet
235
  ///         support reading from different accesses using the same
236
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
237
  ///         null.
238
65
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
239
65
    // { Domain[] -> Element[] }
240
65
    isl::map Result;
241
65
242
65
    // MemoryAccesses can read only elements from a single array
243
65
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
244
65
    // Look through all spaces until we find one that contains at least the
245
65
    // wanted statement instance.s
246
65
    MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
247
54
      // Get the array this is accessing.
248
54
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
249
54
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
250
54
251
54
      // No support for generation of indirect array accesses.
252
54
      if (SAI->getBasePtrOriginSAI())
253
0
        return isl::stat::ok; // continue
254
54
255
54
      // Determine whether this map contains all wanted values.
256
54
      isl::set MapDom = Map.domain();
257
54
      if (!Domain.is_subset(MapDom).is_true())
258
2
        return isl::stat::ok; // continue
259
52
260
52
      // There might be multiple array elements that contain the same value, but
261
52
      // choose only one of them. lexmin is used because it returns a one-value
262
52
      // mapping, we do not care about which one.
263
52
      // TODO: Get the simplest access function.
264
52
      Result = Map.lexmin();
265
52
      return isl::stat::error; // break
266
52
    });
267
65
268
65
    return Result;
269
65
  }
270
271
public:
272
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
273
34
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
274
275
  /// Compute the zones of known array element contents.
276
  ///
277
  /// @return True if the computed #Known is usable.
278
34
  bool computeKnownValues() {
279
34
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
280
34
281
34
    // Check that nothing strange occurs.
282
34
    collectCompatibleElts();
283
34
284
34
    {
285
34
      IslQuotaScope QuotaScope = MaxOpGuard.enter();
286
34
287
34
      computeCommon();
288
34
      if (NormalizePHIs)
289
4
        computeNormalizedPHIs();
290
34
      Known = computeKnown(true, true);
291
34
292
34
      // Preexisting ValInsts use the known content analysis of themselves.
293
34
      Translator = makeIdentityMap(Known.range(), false);
294
34
    }
295
34
296
34
    if (!Known || !Translator || !NormalizeMap) {
297
0
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
298
0
      Known = nullptr;
299
0
      Translator = nullptr;
300
0
      NormalizeMap = nullptr;
301
0
      DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
302
0
      return false;
303
0
    }
304
34
305
34
    KnownAnalyzed++;
306
34
    DEBUG(dbgs() << "All known: " << Known << "\n");
307
34
308
34
    return true;
309
34
  }
310
311
33
  void printStatistics(raw_ostream &OS, int Indent = 0) {
312
33
    OS.indent(Indent) << "Statistics {\n";
313
33
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
314
33
                          << '\n';
315
33
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
316
33
                          << '\n';
317
33
    OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
318
33
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
319
33
                          << '\n';
320
33
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
321
33
                          << '\n';
322
33
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
323
33
                          << NumModifiedStmts << '\n';
324
33
    OS.indent(Indent) << "}\n";
325
33
  }
326
327
25
  void printStatements(raw_ostream &OS, int Indent = 0) const {
328
25
    OS.indent(Indent) << "After statements {\n";
329
62
    for (auto &Stmt : *S) {
330
62
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
331
62
      for (auto *MA : Stmt)
332
120
        MA->print(OS);
333
62
334
62
      OS.indent(Indent + 12);
335
62
      Stmt.printInstructions(OS);
336
62
    }
337
25
    OS.indent(Indent) << "}\n";
338
25
  }
339
340
  /// Create a new MemoryAccess of type read and MemoryKind::Array.
341
  ///
342
  /// @param Stmt           The statement in which the access occurs.
343
  /// @param LI             The instruction that does the access.
344
  /// @param AccessRelation The array element that each statement instance
345
  ///                       accesses.
346
  ///
347
  /// @param The newly created access.
348
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
349
16
                                    isl::map AccessRelation) {
350
16
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
351
16
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
352
16
353
16
    // Create a dummy SCEV access, to be replaced anyway.
354
16
    SmallVector<const SCEV *, 4> Sizes;
355
16
    Sizes.reserve(SAI->getNumberOfDimensions());
356
16
    SmallVector<const SCEV *, 4> Subscripts;
357
16
    Subscripts.reserve(SAI->getNumberOfDimensions());
358
32
    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); 
i += 116
) {
359
16
      Sizes.push_back(SAI->getDimensionSize(i));
360
16
      Subscripts.push_back(nullptr);
361
16
    }
362
16
363
16
    MemoryAccess *Access =
364
16
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
365
16
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
366
16
    S->addAccessFunction(Access);
367
16
    Stmt->addAccess(Access, true);
368
16
369
16
    Access->setNewAccessRelation(AccessRelation);
370
16
371
16
    return Access;
372
16
  }
373
374
  /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
375
  /// use in every instance of @p UseStmt.
376
  ///
377
  /// @param UseStmt Statement a scalar is used in.
378
  /// @param DefStmt Statement a scalar is defined in.
379
  ///
380
  /// @return { DomainUse[] -> DomainDef[] }
381
72
  isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
382
72
    // { DomainUse[] -> Scatter[] }
383
72
    isl::map UseScatter = getScatterFor(UseStmt);
384
72
385
72
    // { Zone[] -> DomainDef[] }
386
72
    isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
387
72
388
72
    // { Scatter[] -> DomainDef[] }
389
72
    isl::map ReachDefTimepoints =
390
72
        convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
391
72
392
72
    // { DomainUse[] -> DomainDef[] }
393
72
    return UseScatter.apply_range(ReachDefTimepoints);
394
72
  }
395
396
  /// Forward a load by reading from an array element that contains the same
397
  /// value. Typically the location it was loaded from.
398
  ///
399
  /// @param TargetStmt  The statement the operand tree will be copied to.
400
  /// @param Inst        The (possibly speculatable) instruction to forward.
401
  /// @param UseStmt     The statement that uses @p Inst.
402
  /// @param UseLoop     The loop @p Inst is used in.
403
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
404
  ///                    A mapping from the statement instance @p Inst is used
405
  ///                    to the statement instance it is forwarded to.
406
  /// @param DefStmt     The statement @p Inst is defined in.
407
  /// @param DefLoop     The loop which contains @p Inst.
408
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
409
  ///                    A mapping from the statement instance @p Inst is
410
  ///                    defined to the statement instance it is forwarded to.
411
  /// @param DoIt        If false, only determine whether an operand tree can be
412
  ///                    forwarded. If true, carry out the forwarding. Do not
413
  ///                    use DoIt==true if an operand tree is not known to be
414
  ///                    forwardable.
415
  ///
416
  /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
417
  ///                           load.
418
  ///         FD_CannotForward  if the pointer operand cannot be forwarded.
419
  ///         FD_CanForwardProfitably if @p Inst is forwardable.
420
  ///         FD_DidForwardTree if @p DoIt was true.
421
  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
422
                                      ScopStmt *UseStmt, Loop *UseLoop,
423
                                      isl::map UseToTarget, ScopStmt *DefStmt,
424
                                      Loop *DefLoop, isl::map DefToTarget,
425
66
                                      bool DoIt) {
426
66
    // Cannot do anything without successful known analysis.
427
66
    if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
428
66
        DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
429
0
      return FD_NotApplicable;
430
66
431
66
    LoadInst *LI = dyn_cast<LoadInst>(Inst);
432
66
    if (!LI)
433
21
      return FD_NotApplicable;
434
45
435
45
    // If the load is already in the statement, no forwarding is necessary.
436
45
    // However, it might happen that the LoadInst is already present in the
437
45
    // statement's instruction list. In that case we do as follows:
438
45
    // - For the evaluation (DoIt==false), we can trivially forward it as it is
439
45
    //   benefit of forwarding an already present instruction.
440
45
    // - For the execution (DoIt==true), prepend the instruction (to make it
441
45
    //   available to all instructions following in the instruction list), but
442
45
    //   do not add another MemoryAccess.
443
45
    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
444
45
    if (Access && 
!DoIt9
)
445
4
      return FD_CanForwardProfitably;
446
41
447
41
    ForwardingDecision OpDecision =
448
41
        forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
449
41
                    DefToTarget, DoIt);
450
41
    switch (OpDecision) {
451
41
    case FD_CannotForward:
452
0
      assert(!DoIt);
453
0
      return OpDecision;
454
41
455
41
    case FD_CanForwardLeaf:
456
20
    case FD_CanForwardProfitably:
457
20
      assert(!DoIt);
458
20
      break;
459
20
460
21
    case FD_DidForwardLeaf:
461
21
    case FD_DidForwardTree:
462
21
      assert(DoIt);
463
21
      break;
464
21
465
21
    default:
466
0
      llvm_unreachable("Shouldn't return this");
467
41
    }
468
41
469
41
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
470
41
471
41
    // { DomainDef[] -> ValInst[] }
472
41
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
473
41
    assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized");
474
41
475
41
    // { DomainTarget[] -> ValInst[] }
476
41
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
477
41
    isl::union_map TranslatedExpectedVal =
478
41
        isl::union_map(TargetExpectedVal).apply_range(Translator);
479
41
480
41
    // { DomainTarget[] -> Element[] }
481
41
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
482
41
483
41
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
484
41
    if (!SameVal)
485
3
      return FD_NotApplicable;
486
38
487
38
    if (DoIt)
488
21
      TargetStmt->prependInstruction(LI);
489
38
490
38
    if (!DoIt)
491
17
      return FD_CanForwardProfitably;
492
21
493
21
    if (Access) {
494
5
      DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
495
5
                   << Access << "\n");
496
16
    } else {
497
16
      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
498
16
      DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
499
16
                   << "\n");
500
16
501
16
      // { ValInst[] }
502
16
      isl::space ValInstSpace = ExpectedVal.get_space().range();
503
16
504
16
      // After adding a new load to the SCoP, also update the Known content
505
16
      // about it. The new load will have a known ValInst of
506
16
      // { [DomainTarget[] -> Value[]] }
507
16
      // but which -- because it is a copy of it -- has same value as the
508
16
      // { [DomainDef[] -> Value[]] }
509
16
      // that it replicates. Instead of  cloning the known content of
510
16
      // [DomainDef[] -> Value[]]
511
16
      // for DomainTarget[], we add a 'translator' that maps
512
16
      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
513
16
      // before comparing to the known content.
514
16
      // TODO: 'Translator' could also be used to map PHINodes to their incoming
515
16
      // ValInsts.
516
16
      if (ValInstSpace.is_wrapping()) {
517
16
        // { DefDomain[] -> Value[] }
518
16
        isl::map ValInsts = ExpectedVal.range().unwrap();
519
16
520
16
        // { DefDomain[] }
521
16
        isl::set DefDomain = ValInsts.domain();
522
16
523
16
        // { Value[] }
524
16
        isl::space ValSpace = ValInstSpace.unwrap().range();
525
16
526
16
        // { Value[] -> Value[] }
527
16
        isl::map ValToVal =
528
16
            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
529
16
530
16
        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
531
16
        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
532
16
533
16
        Translator = Translator.add_map(LocalTranslator);
534
16
        DEBUG(dbgs() << "      local translator is " << LocalTranslator
535
16
                     << "\n");
536
16
      }
537
16
    }
538
21
    DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
539
21
                 << "\n");
540
21
    DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
541
21
    assert(Access);
542
21
543
21
    NumKnownLoadsForwarded++;
544
21
    TotalKnownLoadsForwarded++;
545
21
    return FD_DidForwardTree;
546
21
  }
547
548
  /// Forward a scalar by redirecting the access to an array element that stores
549
  /// the same value.
550
  ///
551
  /// @param TargetStmt  The statement the operand tree will be copied to.
552
  /// @param Inst        The scalar to forward.
553
  /// @param UseStmt     The statement that uses @p Inst.
554
  /// @param UseLoop     The loop @p Inst is used in.
555
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
556
  ///                    A mapping from the statement instance @p Inst is used
557
  ///                    in, to the statement instance it is forwarded to.
558
  /// @param DefStmt     The statement @p Inst is defined in.
559
  /// @param DefLoop     The loop which contains @p Inst.
560
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
561
  ///                    A mapping from the statement instance @p Inst is
562
  ///                    defined in, to the statement instance it is forwarded
563
  ///                    to.
564
  /// @param DoIt        If false, only determine whether an operand tree can be
565
  ///                    forwarded. If true, carry out the forwarding. Do not
566
  ///                    use DoIt==true if an operand tree is not known to be
567
  ///                    forwardable.
568
  ///
569
  /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
570
  ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
571
  ///         FD_CanForwardProfitably if @p Inst has been reloaded.
572
  ///         FD_DidForwardLeaf       if @p DoIt was true.
573
  ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
574
                                        ScopStmt *UseStmt, Loop *UseLoop,
575
                                        isl::map UseToTarget, ScopStmt *DefStmt,
576
                                        Loop *DefLoop, isl::map DefToTarget,
577
24
                                        bool DoIt) {
578
24
    // Cannot do anything without successful known analysis.
579
24
    if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() ||
580
24
        DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded())
581
0
      return FD_NotApplicable;
582
24
583
24
    MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
584
24
    if (Access && 
Access->isLatestArrayKind()22
) {
585
0
      if (DoIt)
586
0
        return FD_DidForwardLeaf;
587
0
      return FD_CanForwardLeaf;
588
0
    }
589
24
590
24
    // Don't spend too much time analyzing whether it can be reloaded. When
591
24
    // carrying-out the forwarding, we cannot bail-out in the middle of the
592
24
    // transformation. It also shouldn't take as long because some results are
593
24
    // cached.
594
24
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
595
24
596
24
    // { DomainDef[] -> ValInst[] }
597
24
    isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
598
24
599
24
    // { DomainTarget[] -> ValInst[] }
600
24
    isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
601
24
    isl::union_map TranslatedExpectedVal =
602
24
        TargetExpectedVal.apply_range(Translator);
603
24
604
24
    // { DomainTarget[] -> Element[] }
605
24
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
606
24
607
24
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
608
24
    if (!SameVal)
609
10
      return FD_NotApplicable;
610
14
611
14
    if (!DoIt)
612
7
      return FD_CanForwardProfitably;
613
7
614
7
    if (!Access)
615
0
      Access = TargetStmt->ensureValueRead(Inst);
616
7
617
7
    simplify(SameVal);
618
7
    Access->setNewAccessRelation(SameVal);
619
7
620
7
    TotalReloads++;
621
7
    NumReloads++;
622
7
    return FD_DidForwardLeaf;
623
7
  }
624
625
  /// Forwards a speculatively executable instruction.
626
  ///
627
  /// @param TargetStmt  The statement the operand tree will be copied to.
628
  /// @param UseInst     The (possibly speculatable) instruction to forward.
629
  /// @param DefStmt     The statement @p UseInst is defined in.
630
  /// @param DefLoop     The loop which contains @p UseInst.
631
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
632
  ///                    A mapping from the statement instance @p UseInst is
633
  ///                    defined to the statement instance it is forwarded to.
634
  /// @param DoIt        If false, only determine whether an operand tree can be
635
  ///                    forwarded. If true, carry out the forwarding. Do not
636
  ///                    use DoIt==true if an operand tree is not known to be
637
  ///                    forwardable.
638
  ///
639
  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
640
  ///         FD_CannotForward  if one of @p UseInst's operands is not
641
  ///                           forwardable.
642
  ///         FD_CanForwardTree if @p UseInst is forwardable.
643
  ///         FD_DidForward     if @p DoIt was true.
644
  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
645
                                         Instruction *UseInst,
646
                                         ScopStmt *DefStmt, Loop *DefLoop,
647
100
                                         isl::map DefToTarget, bool DoIt) {
648
100
    // PHIs, unless synthesizable, are not yet supported.
649
100
    if (isa<PHINode>(UseInst))
650
17
      return FD_NotApplicable;
651
83
652
83
    // Compatible instructions must satisfy the following conditions:
653
83
    // 1. Idempotent (instruction will be copied, not moved; although its
654
83
    //    original instance might be removed by simplification)
655
83
    // 2. Not access memory (There might be memory writes between)
656
83
    // 3. Not cause undefined behaviour (we might copy to a location when the
657
83
    //    original instruction was no executed; this is currently not possible
658
83
    //    because we do not forward PHINodes)
659
83
    // 4. Not leak memory if executed multiple times (i.e. malloc)
660
83
    //
661
83
    // Instruction::mayHaveSideEffects is not sufficient because it considers
662
83
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
663
83
    // not sufficient because it allows memory accesses.
664
83
    if (mayBeMemoryDependent(*UseInst))
665
49
      return FD_NotApplicable;
666
34
667
34
    if (DoIt) {
668
16
      // To ensure the right order, prepend this instruction before its
669
16
      // operands. This ensures that its operands are inserted before the
670
16
      // instruction using them.
671
16
      // TODO: The operand tree is not really a tree, but a DAG. We should be
672
16
      // able to handle DAGs without duplication.
673
16
      TargetStmt->prependInstruction(UseInst);
674
16
      NumInstructionsCopied++;
675
16
      TotalInstructionsCopied++;
676
16
    }
677
34
678
60
    for (Value *OpVal : UseInst->operand_values()) {
679
60
      ForwardingDecision OpDecision =
680
60
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
681
60
      switch (OpDecision) {
682
60
      case FD_CannotForward:
683
2
        assert(!DoIt);
684
2
        return FD_CannotForward;
685
60
686
60
      case FD_CanForwardLeaf:
687
29
      case FD_CanForwardProfitably:
688
29
        assert(!DoIt);
689
29
        break;
690
29
691
29
      case FD_DidForwardLeaf:
692
29
      case FD_DidForwardTree:
693
29
        assert(DoIt);
694
29
        break;
695
29
696
29
      case FD_NotApplicable:
697
0
        llvm_unreachable("forwardTree should never return FD_NotApplicable");
698
60
      }
699
60
    }
700
34
701
34
    
if (32
DoIt32
)
702
16
      return FD_DidForwardTree;
703
16
    return FD_CanForwardProfitably;
704
16
  }
705
706
  /// Determines whether an operand tree can be forwarded or carries out a
707
  /// forwarding, depending on the @p DoIt flag.
708
  ///
709
  /// @param TargetStmt  The statement the operand tree will be copied to.
710
  /// @param UseVal      The value (usually an instruction) which is root of an
711
  ///                    operand tree.
712
  /// @param UseStmt     The statement that uses @p UseVal.
713
  /// @param UseLoop     The loop @p UseVal is used in.
714
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
715
  ///                    A mapping from the statement instance @p UseVal is used
716
  ///                    to the statement instance it is forwarded to.
717
  /// @param DoIt        If false, only determine whether an operand tree can be
718
  ///                    forwarded. If true, carry out the forwarding. Do not
719
  ///                    use DoIt==true if an operand tree is not known to be
720
  ///                    forwardable.
721
  ///
722
  /// @return If DoIt==false, return whether the operand tree can be forwarded.
723
  ///         If DoIt==true, return FD_DidForward.
724
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
725
                                 ScopStmt *UseStmt, Loop *UseLoop,
726
187
                                 isl::map UseToTarget, bool DoIt) {
727
187
    ScopStmt *DefStmt = nullptr;
728
187
    Loop *DefLoop = nullptr;
729
187
730
187
    // { DefDomain[] -> TargetDomain[] }
731
187
    isl::map DefToTarget;
732
187
733
187
    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
734
187
    switch (VUse.getKind()) {
735
187
    case VirtualUse::Constant:
736
42
    case VirtualUse::Block:
737
42
    case VirtualUse::Hoisted:
738
42
      // These can be used anywhere without special considerations.
739
42
      if (DoIt)
740
21
        return FD_DidForwardTree;
741
21
      return FD_CanForwardLeaf;
742
21
743
39
    case VirtualUse::Synthesizable: {
744
39
      // ScopExpander will take care for of generating the code at the new
745
39
      // location.
746
39
      if (DoIt)
747
20
        return FD_DidForwardTree;
748
19
749
19
      // Check if the value is synthesizable at the new location as well. This
750
19
      // might be possible when leaving a loop for which ScalarEvolution is
751
19
      // unable to derive the exit value for.
752
19
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
753
19
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
754
19
      // do not forward past its loop header. This would require us to use a
755
19
      // previous loop induction variable instead the current one. We currently
756
19
      // do not allow forwarding PHI nodes, thus this should never occur (the
757
19
      // only exception where no phi is necessary being an unreachable loop
758
19
      // without edge from the outside).
759
19
      VirtualUse TargetUse = VirtualUse::create(
760
19
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
761
19
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
762
19
        return FD_CanForwardLeaf;
763
0
764
0
      DEBUG(dbgs() << "    Synthesizable would not be synthesizable anymore: "
765
0
                   << *UseVal << "\n");
766
0
      return FD_CannotForward;
767
0
    }
768
0
769
5
    case VirtualUse::ReadOnly:
770
5
      // Note that we cannot return FD_CanForwardTree here. With a operand tree
771
5
      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
772
5
      // With -polly-analyze-read-only-scalars=true we would ensure the
773
5
      // existence of a MemoryAccess (which already exists for a leaf) and be
774
5
      // removed again by tryForwardTree because it's goal is to remove this
775
5
      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
776
5
      // to do so.
777
5
      if (!DoIt)
778
3
        return FD_CanForwardLeaf;
779
2
780
2
      // If we model read-only scalars, we need to create a MemoryAccess for it.
781
2
      if (ModelReadOnlyScalars)
782
1
        TargetStmt->ensureValueRead(UseVal);
783
2
784
2
      NumReadOnlyCopied++;
785
2
      TotalReadOnlyCopied++;
786
2
      return FD_DidForwardLeaf;
787
2
788
28
    case VirtualUse::Intra:
789
28
      // Knowing that UseStmt and DefStmt are the same statement instance, just
790
28
      // reuse the information about UseStmt for DefStmt
791
28
      DefStmt = UseStmt;
792
28
      DefToTarget = UseToTarget;
793
28
794
28
      LLVM_FALLTHROUGH;
795
101
    case VirtualUse::Inter:
796
101
      Instruction *Inst = cast<Instruction>(UseVal);
797
101
798
101
      if (!DefStmt) {
799
73
        DefStmt = S->getStmtFor(Inst);
800
73
        if (!DefStmt)
801
1
          return FD_CannotForward;
802
100
      }
803
100
804
100
      DefLoop = LI->getLoopFor(Inst->getParent());
805
100
806
100
      if (DefToTarget.is_null() && 
!Known.is_null()72
) {
807
72
        IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
808
72
809
72
        // { UseDomain[] -> DefDomain[] }
810
72
        isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
811
72
812
72
        // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
813
72
        // { DefDomain[] -> TargetDomain[] }
814
72
        DefToTarget = UseToTarget.apply_domain(UseToDef);
815
72
        simplify(DefToTarget);
816
72
      }
817
100
818
100
      ForwardingDecision SpeculativeResult = forwardSpeculatable(
819
100
          TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
820
100
      if (SpeculativeResult != FD_NotApplicable)
821
34
        return SpeculativeResult;
822
66
823
66
      ForwardingDecision KnownResult =
824
66
          forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
825
66
                           DefStmt, DefLoop, DefToTarget, DoIt);
826
66
      if (KnownResult != FD_NotApplicable)
827
42
        return KnownResult;
828
24
829
24
      ForwardingDecision ReloadResult =
830
24
          reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
831
24
                             DefStmt, DefLoop, DefToTarget, DoIt);
832
24
      if (ReloadResult != FD_NotApplicable)
833
14
        return ReloadResult;
834
10
835
10
      // When no method is found to forward the operand tree, we effectively
836
10
      // cannot handle it.
837
10
      DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
838
10
      return FD_CannotForward;
839
0
    }
840
0
841
0
    llvm_unreachable("Case unhandled");
842
0
  }
843
844
  /// Try to forward an operand tree rooted in @p RA.
845
49
  bool tryForwardTree(MemoryAccess *RA) {
846
49
    assert(RA->isLatestScalarKind());
847
49
    DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
848
49
849
49
    ScopStmt *Stmt = RA->getStatement();
850
49
    Loop *InLoop = Stmt->getSurroundingLoop();
851
49
852
49
    isl::map TargetToUse;
853
49
    if (!Known.is_null()) {
854
49
      isl::space DomSpace = Stmt->getDomainSpace();
855
49
      TargetToUse =
856
49
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
857
49
    }
858
49
859
49
    ForwardingDecision Assessment = forwardTree(
860
49
        Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
861
49
    assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
862
49
    if (Assessment != FD_CanForwardProfitably)
863
12
      return false;
864
37
865
37
    ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
866
37
                                               InLoop, TargetToUse, true);
867
37
    assert(((Execution == FD_DidForwardTree) ||
868
37
            (Execution == FD_DidForwardLeaf)) &&
869
37
           "A previous positive assessment must also be executable");
870
37
871
37
    if (Execution == FD_DidForwardTree)
872
30
      Stmt->removeSingleMemoryAccess(RA);
873
37
    return true;
874
37
  }
875
876
  /// Return which SCoP this instance is processing.
877
0
  Scop *getScop() const { return S; }
878
879
  /// Run the algorithm: Use value read accesses as operand tree roots and try
880
  /// to forward them into the statement.
881
34
  bool forwardOperandTrees() {
882
88
    for (ScopStmt &Stmt : *S) {
883
88
      bool StmtModified = false;
884
88
885
88
      // Because we are modifying the MemoryAccess list, collect them first to
886
88
      // avoid iterator invalidation.
887
88
      SmallVector<MemoryAccess *, 16> Accs;
888
179
      for (MemoryAccess *RA : Stmt) {
889
179
        if (!RA->isRead())
890
106
          continue;
891
73
        if (!RA->isLatestScalarKind())
892
24
          continue;
893
49
894
49
        Accs.push_back(RA);
895
49
      }
896
88
897
88
      for (MemoryAccess *RA : Accs) {
898
49
        if (tryForwardTree(RA)) {
899
37
          Modified = true;
900
37
          StmtModified = true;
901
37
          NumForwardedTrees++;
902
37
          TotalForwardedTrees++;
903
37
        }
904
49
      }
905
88
906
88
      if (StmtModified) {
907
33
        NumModifiedStmts++;
908
33
        TotalModifiedStmts++;
909
33
      }
910
88
    }
911
34
912
34
    if (Modified)
913
26
      ScopsModified++;
914
34
    return Modified;
915
34
  }
916
917
  /// Print the pass result, performed transformations and the SCoP after the
918
  /// transformation.
919
33
  void print(raw_ostream &OS, int Indent = 0) {
920
33
    printStatistics(OS, Indent);
921
33
922
33
    if (!Modified) {
923
8
      // This line can easily be checked in regression tests.
924
8
      OS << "ForwardOpTree executed, but did not modify anything\n";
925
8
      return;
926
8
    }
927
25
928
25
    printStatements(OS, Indent);
929
25
  }
930
};
931
932
/// Pass that redirects scalar reads to array elements that are known to contain
933
/// the same value.
934
///
935
/// This reduces the number of scalar accesses and therefore potentially
936
/// increases the freedom of the scheduler. In the ideal case, all reads of a
937
/// scalar definition are redirected (We currently do not care about removing
938
/// the write in this case).  This is also useful for the main DeLICM pass as
939
/// there are less scalars to be mapped.
940
class ForwardOpTree : public ScopPass {
941
private:
942
  /// The pass implementation, also holding per-scop data.
943
  std::unique_ptr<ForwardOpTreeImpl> Impl;
944
945
public:
946
  static char ID;
947
948
33
  explicit ForwardOpTree() : ScopPass(ID) {}
949
  ForwardOpTree(const ForwardOpTree &) = delete;
950
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;
951
952
33
  void getAnalysisUsage(AnalysisUsage &AU) const override {
953
33
    AU.addRequiredTransitive<ScopInfoRegionPass>();
954
33
    AU.addRequired<LoopInfoWrapperPass>();
955
33
    AU.setPreservesAll();
956
33
  }
957
958
34
  bool runOnScop(Scop &S) override {
959
34
    // Free resources for previous SCoP's computation, if not yet done.
960
34
    releaseMemory();
961
34
962
34
    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
963
34
964
34
    {
965
34
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
966
34
      Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
967
34
968
34
      if (AnalyzeKnown) {
969
34
        DEBUG(dbgs() << "Prepare forwarders...\n");
970
34
        Impl->computeKnownValues();
971
34
      }
972
34
973
34
      DEBUG(dbgs() << "Forwarding operand trees...\n");
974
34
      Impl->forwardOperandTrees();
975
34
976
34
      if (MaxOpGuard.hasQuotaExceeded()) {
977
0
        DEBUG(dbgs() << "Not all operations completed because of "
978
0
                        "max_operations exceeded\n");
979
0
        KnownOutOfQuota++;
980
0
      }
981
34
    }
982
34
983
34
    DEBUG(dbgs() << "\nFinal Scop:\n");
984
34
    DEBUG(dbgs() << S);
985
34
986
34
    // Update statistics
987
34
    auto ScopStats = S.getStatistics();
988
34
    NumValueWrites += ScopStats.NumValueWrites;
989
34
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
990
34
    NumPHIWrites += ScopStats.NumPHIWrites;
991
34
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
992
34
    NumSingletonWrites += ScopStats.NumSingletonWrites;
993
34
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
994
34
995
34
    return false;
996
34
  }
997
998
33
  void printScop(raw_ostream &OS, Scop &S) const override {
999
33
    if (!Impl)
1000
0
      return;
1001
33
1002
33
    assert(Impl->getScop() == &S);
1003
33
    Impl->print(OS);
1004
33
  }
1005
1006
151
  void releaseMemory() override { Impl.reset(); }
1007
}; // class ForwardOpTree
1008
1009
char ForwardOpTree::ID;
1010
1011
} // namespace
1012
1013
0
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1014
1015
42.1k
INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1016
42.1k
                      "Polly - Forward operand tree", false, false)
1017
42.1k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1018
42.1k
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1019
                    "Polly - Forward operand tree", false, false)