Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Move instructions between statements.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/ForwardOpTree.h"
15
#include "polly/Options.h"
16
#include "polly/ScopBuilder.h"
17
#include "polly/ScopInfo.h"
18
#include "polly/ScopPass.h"
19
#include "polly/Support/GICHelper.h"
20
#include "polly/Support/ISLOStream.h"
21
#include "polly/Support/ISLTools.h"
22
#include "polly/Support/VirtualInstruction.h"
23
#include "polly/ZoneAlgo.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/Analysis/LoopInfo.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/Instruction.h"
30
#include "llvm/IR/Instructions.h"
31
#include "llvm/IR/Value.h"
32
#include "llvm/Pass.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/Compiler.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/ErrorHandling.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include "isl/ctx.h"
40
#include "isl/isl-noexceptions.h"
41
#include <cassert>
42
#include <memory>
43
44
#define DEBUG_TYPE "polly-optree"
45
46
using namespace llvm;
47
using namespace polly;
48
49
static cl::opt<bool>
50
    AnalyzeKnown("polly-optree-analyze-known",
51
                 cl::desc("Analyze array contents for load forwarding"),
52
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
53
54
static cl::opt<unsigned>
55
    MaxOps("polly-optree-max-ops",
56
           cl::desc("Maximum number of ISL operations to invest for known "
57
                    "analysis; 0=no limit"),
58
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
59
60
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
61
STATISTIC(KnownOutOfQuota,
62
          "Analyses aborted because max_operations was reached");
63
64
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
65
STATISTIC(TotalKnownLoadsForwarded,
66
          "Number of forwarded loads because their value was known");
67
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
68
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
69
STATISTIC(TotalModifiedStmts,
70
          "Number of statements with at least one forwarded tree");
71
72
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
73
74
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
75
STATISTIC(NumValueWritesInLoops,
76
          "Number of scalar value writes nested in affine loops after OpTree");
77
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
78
STATISTIC(NumPHIWritesInLoops,
79
          "Number of scalar phi writes nested in affine loops after OpTree");
80
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
81
STATISTIC(NumSingletonWritesInLoops,
82
          "Number of singleton writes nested in affine loops after OpTree");
83
84
namespace {
85
86
/// The state of whether an operand tree was/can be forwarded.
87
///
88
/// The items apply to an instructions and its operand tree with the instruction
89
/// as the root element. If the value in question is not an instruction in the
90
/// SCoP, it can be a leaf of an instruction's operand tree.
91
enum ForwardingDecision {
92
  /// The root instruction or value cannot be forwarded at all.
93
  FD_CannotForward,
94
95
  /// The root instruction or value can be forwarded as a leaf of a larger
96
  /// operand tree.
97
  /// It does not make sense to move the value itself, it would just replace it
98
  /// by a use of itself. For instance, a constant "5" used in a statement can
99
  /// be forwarded, but it would just replace it by the same constant "5".
100
  /// However, it makes sense to move as an operand of
101
  ///
102
  ///   %add = add 5, 5
103
  ///
104
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
105
  /// (disregarding for a moment that literal constants don't have a location
106
  /// and can be used anywhere) into the same statement as %add would.
107
  FD_CanForwardLeaf,
108
109
  /// The root instruction can be forwarded in a non-trivial way. This requires
110
  /// the operand tree root to be an instruction in some statement.
111
  FD_CanForwardTree,
112
113
  /// Used to indicate that a forwarding has be carried out successfully.
114
  FD_DidForward,
115
116
  /// A forwarding method cannot be applied to the operand tree.
117
  /// The difference to FD_CannotForward is that there might be other methods
118
  /// that can handle it.
119
  /// The conditions that make an operand tree applicable must be checked even
120
  /// with DoIt==true because a method following the one that returned
121
  /// FD_NotApplicable might have returned FD_CanForwardTree.
122
  FD_NotApplicable
123
};
124
125
/// Implementation of operand tree forwarding for a specific SCoP.
126
///
127
/// For a statement that requires a scalar value (through a value read
128
/// MemoryAccess), see if its operand can be moved into the statement. If so,
129
/// the MemoryAccess is removed and the all the operand tree instructions are
130
/// moved into the statement. All original instructions are left in the source
131
/// statements. The simplification pass can clean these up.
132
class ForwardOpTreeImpl : ZoneAlgorithm {
133
private:
134
  /// Scope guard to limit the number of isl operations for this pass.
135
  IslMaxOperationsGuard &MaxOpGuard;
136
137
  /// How many instructions have been copied to other statements.
138
  int NumInstructionsCopied = 0;
139
140
  /// Number of loads forwarded because their value was known.
141
  int NumKnownLoadsForwarded = 0;
142
143
  /// How many read-only accesses have been copied.
144
  int NumReadOnlyCopied = 0;
145
146
  /// How many operand trees have been forwarded.
147
  int NumForwardedTrees = 0;
148
149
  /// Number of statements with at least one forwarded operand tree.
150
  int NumModifiedStmts = 0;
151
152
  /// Whether we carried out at least one change to the SCoP.
153
  bool Modified = false;
154
155
  /// Contains the zones where array elements are known to contain a specific
156
  /// value.
157
  /// { [Element[] -> Zone[]] -> ValInst[] }
158
  /// @see computeKnown()
159
  isl::union_map Known;
160
161
  /// Translator for newly introduced ValInsts to already existing ValInsts such
162
  /// that new introduced load instructions can reuse the Known analysis of its
163
  /// original load. { ValInst[] -> ValInst[] }
164
  isl::union_map Translator;
165
166
  /// Get list of array elements that do contain the same ValInst[] at Domain[].
167
  ///
168
  /// @param ValInst { Domain[] -> ValInst[] }
169
  ///                The values for which we search for alternative locations,
170
  ///                per statement instance.
171
  ///
172
  /// @return { Domain[] -> Element[] }
173
  ///         For each statement instance, the array elements that contain the
174
  ///         same ValInst.
175
33
  isl::union_map findSameContentElements(isl::union_map ValInst) {
176
33
    assert(ValInst.is_single_valued().is_true());
177
33
178
33
    // { Domain[] }
179
33
    isl::union_set Domain = ValInst.domain();
180
33
181
33
    // { Domain[] -> Scatter[] }
182
33
    isl::union_map Schedule = getScatterFor(Domain);
183
33
184
33
    // { Element[] -> [Scatter[] -> ValInst[]] }
185
33
    isl::union_map MustKnownCurried =
186
33
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
187
33
188
33
    // { [Domain[] -> ValInst[]] -> Scatter[] }
189
33
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
190
33
191
33
    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
192
33
    isl::union_map SchedValDomVal =
193
33
        DomValSched.range_product(ValInst.range_map()).reverse();
194
33
195
33
    // { Element[] -> [Domain[] -> ValInst[]] }
196
33
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
197
33
198
33
    // { Domain[] -> Element[] }
199
33
    isl::union_map MustKnownMap =
200
33
        MustKnownInst.uncurry().domain().unwrap().reverse();
201
33
    simplify(MustKnownMap);
202
33
203
33
    return MustKnownMap;
204
33
  }
205
206
  /// Find a single array element for each statement instance, within a single
207
  /// array.
208
  ///
209
  /// @param MustKnown { Domain[] -> Element[] }
210
  ///                  Set of candidate array elements.
211
  /// @param Domain    { Domain[] }
212
  ///                  The statement instance for which we need elements for.
213
  ///
214
  /// @return { Domain[] -> Element[] }
215
  ///         For each statement instance, an array element out of @p MustKnown.
216
  ///         All array elements must be in the same array (Polly does not yet
217
  ///         support reading from different accesses using the same
218
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
219
  ///         null.
220
33
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
221
33
    // { Domain[] -> Element[] }
222
33
    isl::map Result;
223
33
224
33
    // MemoryAccesses can read only elements from a single array
225
33
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
226
33
    // Look through all spaces until we find one that contains at least the
227
33
    // wanted statement instance.s
228
31
    MustKnown.foreach_map([&](isl::map Map) -> isl::stat {
229
31
      // Get the array this is accessing.
230
31
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
231
31
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
232
31
233
31
      // No support for generation of indirect array accesses.
234
31
      if (SAI->getBasePtrOriginSAI())
235
0
        return isl::stat::ok; // continue
236
31
237
31
      // Determine whether this map contains all wanted values.
238
31
      isl::set MapDom = Map.domain();
239
31
      if (!Domain.is_subset(MapDom).is_true())
240
1
        return isl::stat::ok; // continue
241
30
242
30
      // There might be multiple array elements that contain the same value, but
243
30
      // choose only one of them. lexmin is used because it returns a one-value
244
30
      // mapping, we do not care about which one.
245
30
      // TODO: Get the simplest access function.
246
30
      Result = Map.lexmin();
247
30
      return isl::stat::error; // break
248
30
    });
249
33
250
33
    return Result;
251
33
  }
252
253
public:
254
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
255
28
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
256
257
  /// Compute the zones of known array element contents.
258
  ///
259
  /// @return True if the computed #Known is usable.
260
28
  bool computeKnownValues() {
261
28
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
262
28
263
28
    // Check that nothing strange occurs.
264
28
    collectCompatibleElts();
265
28
266
28
    {
267
28
      IslQuotaScope QuotaScope = MaxOpGuard.enter();
268
28
269
28
      computeCommon();
270
28
      Known = computeKnown(true, true);
271
28
272
28
      // Preexisting ValInsts use the known content analysis of themselves.
273
28
      Translator = makeIdentityMap(Known.range(), false);
274
28
    }
275
28
276
28
    if (
!Known || 28
!Translator28
) {
277
0
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
278
0
      Known = nullptr;
279
0
      Translator = nullptr;
280
0
      DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
281
0
      return false;
282
0
    }
283
28
284
28
    KnownAnalyzed++;
285
28
    DEBUG(dbgs() << "All known: " << Known << "\n");
286
28
287
28
    return true;
288
28
  }
289
290
28
  void printStatistics(raw_ostream &OS, int Indent = 0) {
291
28
    OS.indent(Indent) << "Statistics {\n";
292
28
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
293
28
                          << '\n';
294
28
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
295
28
                          << '\n';
296
28
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
297
28
                          << '\n';
298
28
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
299
28
                          << '\n';
300
28
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
301
28
                          << NumModifiedStmts << '\n';
302
28
    OS.indent(Indent) << "}\n";
303
28
  }
304
305
21
  void printStatements(raw_ostream &OS, int Indent = 0) const {
306
21
    OS.indent(Indent) << "After statements {\n";
307
46
    for (auto &Stmt : *S) {
308
46
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
309
46
      for (auto *MA : Stmt)
310
84
        MA->print(OS);
311
46
312
46
      OS.indent(Indent + 12);
313
46
      Stmt.printInstructions(OS);
314
46
    }
315
21
    OS.indent(Indent) << "}\n";
316
21
  }
317
318
  /// Create a new MemoryAccess of type read and MemoryKind::Array.
319
  ///
320
  /// @param Stmt           The statement in which the access occurs.
321
  /// @param LI             The instruction that does the access.
322
  /// @param AccessRelation The array element that each statement instance
323
  ///                       accesses.
324
  ///
325
  /// @param The newly created access.
326
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
327
12
                                    isl::map AccessRelation) {
328
12
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
329
12
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
330
12
331
12
    // Create a dummy SCEV access, to be replaced anyway.
332
12
    SmallVector<const SCEV *, 4> Sizes;
333
12
    Sizes.reserve(SAI->getNumberOfDimensions());
334
12
    SmallVector<const SCEV *, 4> Subscripts;
335
12
    Subscripts.reserve(SAI->getNumberOfDimensions());
336
24
    for (unsigned i = 0; 
i < SAI->getNumberOfDimensions()24
;
i += 112
) {
337
12
      Sizes.push_back(SAI->getDimensionSize(i));
338
12
      Subscripts.push_back(nullptr);
339
12
    }
340
12
341
12
    MemoryAccess *Access =
342
12
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
343
12
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
344
12
    S->addAccessFunction(Access);
345
12
    Stmt->addAccess(Access, true);
346
12
347
12
    Access->setNewAccessRelation(AccessRelation);
348
12
349
12
    return Access;
350
12
  }
351
352
  /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
353
  /// use in every instance of @p UseStmt.
354
  ///
355
  /// @param UseStmt Statement a scalar is used in.
356
  /// @param DefStmt Statement a scalar is defined in.
357
  ///
358
  /// @return { DomainUse[] -> DomainDef[] }
359
58
  isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
360
58
    // { DomainUse[] -> Scatter[] }
361
58
    isl::map UseScatter = getScatterFor(UseStmt);
362
58
363
58
    // { Zone[] -> DomainDef[] }
364
58
    isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
365
58
366
58
    // { Scatter[] -> DomainDef[] }
367
58
    isl::map ReachDefTimepoints =
368
58
        convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
369
58
370
58
    // { DomainUse[] -> DomainDef[] }
371
58
    return UseScatter.apply_range(ReachDefTimepoints);
372
58
  }
373
374
  /// Forward a load by reading from an array element that contains the same
375
  /// value. Typically the location it was loaded from.
376
  ///
377
  /// @param TargetStmt  The statement the operand tree will be copied to.
378
  /// @param Inst        The (possibly speculatable) instruction to forward.
379
  /// @param UseStmt     The statement that uses @p Inst.
380
  /// @param UseLoop     The loop @p Inst is used in.
381
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
382
  ///                    A mapping from the statement instance @p Inst is used
383
  ///                    to the statement instance it is forwarded to.
384
  /// @param DefStmt     The statement @p Inst is defined in.
385
  /// @param DefLoop     The loop which contains @p Inst.
386
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
387
  ///                    A mapping from the statement instance @p Inst is
388
  ///                    defined to the statement instance it is forwarded to.
389
  /// @param DoIt        If false, only determine whether an operand tree can be
390
  ///                    forwarded. If true, carry out the forwarding. Do not
391
  ///                    use DoIt==true if an operand tree is not known to be
392
  ///                    forwardable.
393
  ///
394
  /// @return FD_NotApplicable  if @p Inst is not a LoadInst.
395
  ///         FD_CannotForward  if no array element to load from was found.
396
  ///         FD_CanForwardLeaf if the load is already in the target statement
397
  ///                           instance.
398
  ///         FD_CanForwardTree if @p Inst is forwardable.
399
  ///         FD_DidForward     if @p DoIt was true.
400
  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
401
                                      ScopStmt *UseStmt, Loop *UseLoop,
402
                                      isl::map UseToTarget, ScopStmt *DefStmt,
403
                                      Loop *DefLoop, isl::map DefToTarget,
404
41
                                      bool DoIt) {
405
41
    // Cannot do anything without successful known analysis.
406
41
    if (
Known.is_null() || 41
Translator.is_null()41
||
UseToTarget.is_null()41
||
407
41
        
DefToTarget.is_null()41
||
MaxOpGuard.hasQuotaExceeded()41
)
408
0
      return FD_NotApplicable;
409
41
410
41
    LoadInst *LI = dyn_cast<LoadInst>(Inst);
411
41
    if (!LI)
412
4
      return FD_NotApplicable;
413
37
414
37
    // If the load is already in the statement, no forwarding is necessary.
415
37
    // However, it might happen that the LoadInst is already present in the
416
37
    // statement's instruction list. In that case we do as follows:
417
37
    // - For the evaluation (DoIt==false), we can trivially forward it as it is
418
37
    //   benefit of forwarding an already present instruction.
419
37
    // - For the execution (DoIt==true), prepend the instruction (to make it
420
37
    //   available to all instructions following in the instruction list), but
421
37
    //   do not add another MemoryAccess.
422
37
    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
423
37
    if (
Access && 37
!DoIt9
)
424
4
      return FD_CanForwardTree;
425
33
426
33
    
if (33
DoIt33
)
427
17
      TargetStmt->prependInstruction(LI);
428
33
429
33
    ForwardingDecision OpDecision =
430
33
        forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
431
33
                    DefToTarget, DoIt);
432
33
    switch (OpDecision) {
433
0
    case FD_CannotForward:
434
0
      assert(!DoIt);
435
0
      return OpDecision;
436
33
437
16
    case FD_CanForwardLeaf:
438
16
    case FD_CanForwardTree:
439
16
      assert(!DoIt);
440
16
      break;
441
16
442
17
    case FD_DidForward:
443
17
      assert(DoIt);
444
17
      break;
445
16
446
0
    default:
447
0
      llvm_unreachable("Shouldn't return this");
448
33
    }
449
33
450
33
    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
451
33
452
33
    // { DomainDef[] -> ValInst[] }
453
33
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
454
33
455
33
    // { DomainTarget[] -> ValInst[] }
456
33
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
457
33
    isl::union_map TranslatedExpectedVal =
458
33
        isl::union_map(TargetExpectedVal).apply_range(Translator);
459
33
460
33
    // { DomainTarget[] -> Element[] }
461
33
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
462
33
463
33
    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
464
33
    if (!SameVal)
465
3
      return FD_CannotForward;
466
30
467
30
    
if (30
!DoIt30
)
468
13
      return FD_CanForwardTree;
469
17
470
17
    
if (17
Access17
) {
471
5
      DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
472
5
                   << Access << "\n");
473
17
    } else {
474
12
      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
475
12
      DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
476
12
                   << "\n");
477
12
478
12
      // { ValInst[] }
479
12
      isl::space ValInstSpace = ExpectedVal.get_space().range();
480
12
481
12
      // After adding a new load to the SCoP, also update the Known content
482
12
      // about it. The new load will have a known ValInst of
483
12
      // { [DomainTarget[] -> Value[]] }
484
12
      // but which -- because it is a copy of it -- has same value as the
485
12
      // { [DomainDef[] -> Value[]] }
486
12
      // that it replicates. Instead of  cloning the known content of
487
12
      // [DomainDef[] -> Value[]]
488
12
      // for DomainTarget[], we add a 'translator' that maps
489
12
      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
490
12
      // before comparing to the known content.
491
12
      // TODO: 'Translator' could also be used to map PHINodes to their incoming
492
12
      // ValInsts.
493
12
      if (
ValInstSpace.is_wrapping()12
) {
494
12
        // { DefDomain[] -> Value[] }
495
12
        isl::map ValInsts = ExpectedVal.range().unwrap();
496
12
497
12
        // { DefDomain[] }
498
12
        isl::set DefDomain = ValInsts.domain();
499
12
500
12
        // { Value[] }
501
12
        isl::space ValSpace = ValInstSpace.unwrap().range();
502
12
503
12
        // { Value[] -> Value[] }
504
12
        isl::map ValToVal =
505
12
            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
506
12
507
12
        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
508
12
        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
509
12
510
12
        Translator = Translator.add_map(LocalTranslator);
511
12
        DEBUG(dbgs() << "      local translator is " << LocalTranslator
512
12
                     << "\n");
513
12
      }
514
12
    }
515
17
    DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
516
17
                 << "\n");
517
17
    DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
518
41
    assert(Access);
519
41
520
41
    NumKnownLoadsForwarded++;
521
41
    TotalKnownLoadsForwarded++;
522
41
    return FD_DidForward;
523
41
  }
524
525
  /// Forwards a speculatively executable instruction.
526
  ///
527
  /// @param TargetStmt  The statement the operand tree will be copied to.
528
  /// @param UseInst     The (possibly speculatable) instruction to forward.
529
  /// @param DefStmt     The statement @p UseInst is defined in.
530
  /// @param DefLoop     The loop which contains @p UseInst.
531
  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
532
  ///                    A mapping from the statement instance @p UseInst is
533
  ///                    defined to the statement instance it is forwarded to.
534
  /// @param DoIt        If false, only determine whether an operand tree can be
535
  ///                    forwarded. If true, carry out the forwarding. Do not
536
  ///                    use DoIt==true if an operand tree is not known to be
537
  ///                    forwardable.
538
  ///
539
  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
540
  ///         FD_CannotForward  if one of @p UseInst's operands is not
541
  ///                           forwardable.
542
  ///         FD_CanForwardTree if @p UseInst is forwardable.
543
  ///         FD_DidForward     if @p DoIt was true.
544
  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
545
                                         Instruction *UseInst,
546
                                         ScopStmt *DefStmt, Loop *DefLoop,
547
74
                                         isl::map DefToTarget, bool DoIt) {
548
74
    // PHIs, unless synthesizable, are not yet supported.
549
74
    if (isa<PHINode>(UseInst))
550
2
      return FD_NotApplicable;
551
72
552
72
    // Compatible instructions must satisfy the following conditions:
553
72
    // 1. Idempotent (instruction will be copied, not moved; although its
554
72
    //    original instance might be removed by simplification)
555
72
    // 2. Not access memory (There might be memory writes between)
556
72
    // 3. Not cause undefined behaviour (we might copy to a location when the
557
72
    //    original instruction was no executed; this is currently not possible
558
72
    //    because we do not forward PHINodes)
559
72
    // 4. Not leak memory if executed multiple times (i.e. malloc)
560
72
    //
561
72
    // Instruction::mayHaveSideEffects is not sufficient because it considers
562
72
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
563
72
    // not sufficient because it allows memory accesses.
564
72
    
if (72
mayBeMemoryDependent(*UseInst)72
)
565
39
      return FD_NotApplicable;
566
33
567
33
    
if (33
DoIt33
) {
568
16
      // To ensure the right order, prepend this instruction before its
569
16
      // operands. This ensures that its operands are inserted before the
570
16
      // instruction using them.
571
16
      // TODO: The operand tree is not really a tree, but a DAG. We should be
572
16
      // able to handle DAGs without duplication.
573
16
      TargetStmt->prependInstruction(UseInst);
574
16
      NumInstructionsCopied++;
575
16
      TotalInstructionsCopied++;
576
16
    }
577
33
578
59
    for (Value *OpVal : UseInst->operand_values()) {
579
59
      ForwardingDecision OpDecision =
580
59
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
581
59
      switch (OpDecision) {
582
1
      case FD_CannotForward:
583
1
        assert(!DoIt);
584
1
        return FD_CannotForward;
585
59
586
29
      case FD_CanForwardLeaf:
587
29
      case FD_CanForwardTree:
588
29
        assert(!DoIt);
589
29
        break;
590
29
591
29
      case FD_DidForward:
592
29
        assert(DoIt);
593
29
        break;
594
29
595
0
      case FD_NotApplicable:
596
0
        llvm_unreachable("forwardTree should never return FD_NotApplicable");
597
32
      }
598
32
    }
599
32
600
32
    
if (32
DoIt32
)
601
16
      return FD_DidForward;
602
16
    return FD_CanForwardTree;
603
16
  }
604
605
  /// Determines whether an operand tree can be forwarded or carries out a
606
  /// forwarding, depending on the @p DoIt flag.
607
  ///
608
  /// @param TargetStmt  The statement the operand tree will be copied to.
609
  /// @param UseVal      The value (usually an instruction) which is root of an
610
  ///                    operand tree.
611
  /// @param UseStmt     The statement that uses @p UseVal.
612
  /// @param UseLoop     The loop @p UseVal is used in.
613
  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
614
  ///                    A mapping from the statement instance @p UseVal is used
615
  ///                    to the statement instance it is forwarded to.
616
  /// @param DoIt        If false, only determine whether an operand tree can be
617
  ///                    forwarded. If true, carry out the forwarding. Do not
618
  ///                    use DoIt==true if an operand tree is not known to be
619
  ///                    forwardable.
620
  ///
621
  /// @return If DoIt==false, return whether the operand tree can be forwarded.
622
  ///         If DoIt==true, return FD_DidForward.
623
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
624
                                 ScopStmt *UseStmt, Loop *UseLoop,
625
153
                                 isl::map UseToTarget, bool DoIt) {
626
153
    ScopStmt *DefStmt = nullptr;
627
153
    Loop *DefLoop = nullptr;
628
153
629
153
    // { DefDomain[] -> TargetDomain[] }
630
153
    isl::map DefToTarget;
631
153
632
153
    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
633
153
    switch (VUse.getKind()) {
634
34
    case VirtualUse::Constant:
635
34
    case VirtualUse::Block:
636
34
    case VirtualUse::Hoisted:
637
34
      // These can be used anywhere without special considerations.
638
34
      if (DoIt)
639
17
        return FD_DidForward;
640
17
      return FD_CanForwardLeaf;
641
17
642
39
    case VirtualUse::Synthesizable: {
643
39
      // ScopExpander will take care for of generating the code at the new
644
39
      // location.
645
39
      if (DoIt)
646
20
        return FD_DidForward;
647
19
648
19
      // Check if the value is synthesizable at the new location as well. This
649
19
      // might be possible when leaving a loop for which ScalarEvolution is
650
19
      // unable to derive the exit value for.
651
19
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
652
19
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
653
19
      // do not forward past its loop header. This would require us to use a
654
19
      // previous loop induction variable instead the current one. We currently
655
19
      // do not allow forwarding PHI nodes, thus this should never occur (the
656
19
      // only exception where no phi is necessary being an unreachable loop
657
19
      // without edge from the outside).
658
19
      VirtualUse TargetUse = VirtualUse::create(
659
19
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
660
19
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
661
19
        return FD_CanForwardLeaf;
662
0
663
0
      
DEBUG0
(dbgs() << " Synthesizable would not be synthesizable anymore: "
664
0
                   << *UseVal << "\n");
665
0
      return FD_CannotForward;
666
0
    }
667
0
668
5
    case VirtualUse::ReadOnly:
669
5
      // Note that we cannot return FD_CanForwardTree here. With a operand tree
670
5
      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
671
5
      // With -polly-analyze-read-only-scalars=true we would ensure the
672
5
      // existence of a MemoryAccess (which already exists for a leaf) and be
673
5
      // removed again by tryForwardTree because it's goal is to remove this
674
5
      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
675
5
      // to do so.
676
5
      if (!DoIt)
677
3
        return FD_CanForwardLeaf;
678
2
679
2
      // If we model read-only scalars, we need to create a MemoryAccess for it.
680
2
      
if (2
ModelReadOnlyScalars2
)
681
1
        TargetStmt->ensureValueRead(UseVal);
682
2
683
2
      NumReadOnlyCopied++;
684
2
      TotalReadOnlyCopied++;
685
2
      return FD_DidForward;
686
2
687
16
    case VirtualUse::Intra:
688
16
      // Knowing that UseStmt and DefStmt are the same statement instance, just
689
16
      // reuse the information about UseStmt for DefStmt
690
16
      DefStmt = UseStmt;
691
16
      DefToTarget = UseToTarget;
692
16
693
16
      LLVM_FALLTHROUGH;
694
75
    case VirtualUse::Inter:
695
75
      Instruction *Inst = cast<Instruction>(UseVal);
696
75
697
75
      if (
!DefStmt75
) {
698
59
        DefStmt = S->getStmtFor(Inst);
699
59
        if (!DefStmt)
700
1
          return FD_CannotForward;
701
74
      }
702
74
703
74
      DefLoop = LI->getLoopFor(Inst->getParent());
704
74
705
74
      if (
DefToTarget.is_null() && 74
!Known.is_null()58
) {
706
58
        IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
707
58
708
58
        // { UseDomain[] -> DefDomain[] }
709
58
        isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
710
58
711
58
        // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
712
58
        // { DefDomain[] -> TargetDomain[] }
713
58
        DefToTarget = UseToTarget.apply_domain(UseToDef);
714
58
        simplify(DefToTarget);
715
58
      }
716
74
717
74
      ForwardingDecision SpeculativeResult = forwardSpeculatable(
718
74
          TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
719
74
      if (SpeculativeResult != FD_NotApplicable)
720
33
        return SpeculativeResult;
721
41
722
41
      ForwardingDecision KnownResult =
723
41
          forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
724
41
                           DefStmt, DefLoop, DefToTarget, DoIt);
725
41
      if (KnownResult != FD_NotApplicable)
726
37
        return KnownResult;
727
4
728
4
      // When no method is found to forward the operand tree, we effectively
729
4
      // cannot handle it.
730
4
      
DEBUG4
(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
731
4
      return FD_CannotForward;
732
0
    }
733
0
734
0
    
llvm_unreachable0
("Case unhandled");
735
0
  }
736
737
  /// Try to forward an operand tree rooted in @p RA.
738
35
  bool tryForwardTree(MemoryAccess *RA) {
739
35
    assert(RA->isLatestScalarKind());
740
35
    DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
741
35
742
35
    ScopStmt *Stmt = RA->getStatement();
743
35
    Loop *InLoop = Stmt->getSurroundingLoop();
744
35
745
35
    isl::map TargetToUse;
746
35
    if (
!Known.is_null()35
) {
747
35
      isl::space DomSpace = Stmt->getDomainSpace();
748
35
      TargetToUse =
749
35
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
750
35
    }
751
35
752
35
    ForwardingDecision Assessment = forwardTree(
753
35
        Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
754
35
    assert(Assessment != FD_DidForward);
755
35
    if (Assessment != FD_CanForwardTree)
756
9
      return false;
757
26
758
26
    ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
759
26
                                               InLoop, TargetToUse, true);
760
26
    assert(Execution == FD_DidForward &&
761
26
           "A previous positive assessment must also be executable");
762
26
    (void)Execution;
763
26
764
26
    Stmt->removeSingleMemoryAccess(RA);
765
26
    return true;
766
26
  }
767
768
  /// Return which SCoP this instance is processing.
769
0
  Scop *getScop() const { return S; }
770
771
  /// Run the algorithm: Use value read accesses as operand tree roots and try
772
  /// to forward them into the statement.
773
28
  bool forwardOperandTrees() {
774
63
    for (ScopStmt &Stmt : *S) {
775
63
      bool StmtModified = false;
776
63
777
63
      // Because we are modifying the MemoryAccess list, collect them first to
778
63
      // avoid iterator invalidation.
779
63
      SmallVector<MemoryAccess *, 16> Accs;
780
126
      for (MemoryAccess *RA : Stmt) {
781
126
        if (!RA->isRead())
782
77
          continue;
783
49
        
if (49
!RA->isLatestScalarKind()49
)
784
14
          continue;
785
35
786
35
        Accs.push_back(RA);
787
35
      }
788
63
789
35
      for (MemoryAccess *RA : Accs) {
790
35
        if (
tryForwardTree(RA)35
) {
791
26
          Modified = true;
792
26
          StmtModified = true;
793
26
          NumForwardedTrees++;
794
26
          TotalForwardedTrees++;
795
26
        }
796
35
      }
797
63
798
63
      if (
StmtModified63
) {
799
23
        NumModifiedStmts++;
800
23
        TotalModifiedStmts++;
801
23
      }
802
63
    }
803
28
804
28
    if (Modified)
805
21
      ScopsModified++;
806
28
    return Modified;
807
28
  }
808
809
  /// Print the pass result, performed transformations and the SCoP after the
810
  /// transformation.
811
28
  void print(raw_ostream &OS, int Indent = 0) {
812
28
    printStatistics(OS, Indent);
813
28
814
28
    if (
!Modified28
) {
815
7
      // This line can easily be checked in regression tests.
816
7
      OS << "ForwardOpTree executed, but did not modify anything\n";
817
7
      return;
818
7
    }
819
21
820
21
    printStatements(OS, Indent);
821
21
  }
822
};
823
824
/// Pass that redirects scalar reads to array elements that are known to contain
825
/// the same value.
826
///
827
/// This reduces the number of scalar accesses and therefore potentially
828
/// increases the freedom of the scheduler. In the ideal case, all reads of a
829
/// scalar definition are redirected (We currently do not care about removing
830
/// the write in this case).  This is also useful for the main DeLICM pass as
831
/// there are less scalars to be mapped.
832
class ForwardOpTree : public ScopPass {
833
private:
834
  /// The pass implementation, also holding per-scop data.
835
  std::unique_ptr<ForwardOpTreeImpl> Impl;
836
837
public:
838
  static char ID;
839
840
27
  explicit ForwardOpTree() : ScopPass(ID) {}
841
  ForwardOpTree(const ForwardOpTree &) = delete;
842
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;
843
844
27
  void getAnalysisUsage(AnalysisUsage &AU) const override {
845
27
    AU.addRequiredTransitive<ScopInfoRegionPass>();
846
27
    AU.addRequired<LoopInfoWrapperPass>();
847
27
    AU.setPreservesAll();
848
27
  }
849
850
28
  bool runOnScop(Scop &S) override {
851
28
    // Free resources for previous SCoP's computation, if not yet done.
852
28
    releaseMemory();
853
28
854
28
    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
855
28
856
28
    {
857
28
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx(), MaxOps, false);
858
28
      Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
859
28
860
28
      if (
AnalyzeKnown28
) {
861
28
        DEBUG(dbgs() << "Prepare forwarders...\n");
862
28
        Impl->computeKnownValues();
863
28
      }
864
28
865
28
      DEBUG(dbgs() << "Forwarding operand trees...\n");
866
28
      Impl->forwardOperandTrees();
867
28
868
28
      if (
MaxOpGuard.hasQuotaExceeded()28
) {
869
0
        DEBUG(dbgs() << "Not all operations completed because of "
870
0
                        "max_operations exceeded\n");
871
0
        KnownOutOfQuota++;
872
0
      }
873
28
    }
874
28
875
28
    DEBUG(dbgs() << "\nFinal Scop:\n");
876
28
    DEBUG(dbgs() << S);
877
28
878
28
    // Update statistics
879
28
    auto ScopStats = S.getStatistics();
880
28
    NumValueWrites += ScopStats.NumValueWrites;
881
28
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
882
28
    NumPHIWrites += ScopStats.NumPHIWrites;
883
28
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
884
28
    NumSingletonWrites += ScopStats.NumSingletonWrites;
885
28
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
886
28
887
28
    return false;
888
28
  }
889
890
28
  void printScop(raw_ostream &OS, Scop &S) const override {
891
28
    if (!Impl)
892
0
      return;
893
28
894
28
    assert(Impl->getScop() == &S);
895
28
    Impl->print(OS);
896
28
  }
897
898
120
  void releaseMemory() override { Impl.reset(); }
899
}; // class ForwardOpTree
900
901
char ForwardOpTree::ID;
902
903
} // namespace
904
905
0
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
906
907
41.7k
INITIALIZE_PASS_BEGIN41.7k
(ForwardOpTree, "polly-optree",
908
41.7k
                      "Polly - Forward operand tree", false, false)
909
41.7k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
910
41.7k
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
911
                    "Polly - Forward operand tree", false, false)