Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MacroFusion.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file This file contains the implementation of the DAG scheduling mutation
10
/// to pair instructions back to back.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/CodeGen/MacroFusion.h"
15
#include "llvm/ADT/STLExtras.h"
16
#include "llvm/ADT/Statistic.h"
17
#include "llvm/CodeGen/MachineInstr.h"
18
#include "llvm/CodeGen/MachineScheduler.h"
19
#include "llvm/CodeGen/ScheduleDAG.h"
20
#include "llvm/CodeGen/ScheduleDAGMutation.h"
21
#include "llvm/CodeGen/TargetInstrInfo.h"
22
#include "llvm/Support/CommandLine.h"
23
#include "llvm/Support/Debug.h"
24
#include "llvm/Support/raw_ostream.h"
25
26
#define DEBUG_TYPE "machine-scheduler"
27
28
STATISTIC(NumFused, "Number of instr pairs fused");
29
30
using namespace llvm;
31
32
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
33
  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
34
35
1.27M
static bool isHazard(const SDep &Dep) {
36
1.27M
  return Dep.getKind() == SDep::Anti || 
Dep.getKind() == SDep::Output1.27M
;
37
1.27M
}
38
39
static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
40
459k
                                SUnit &SecondSU) {
41
459k
  // Check that neither instr is already paired with another along the edge
42
459k
  // between them.
43
459k
  for (SDep &SI : FirstSU.Succs)
44
485k
    if (SI.isCluster())
45
1.44k
      return false;
46
459k
47
459k
  
for (SDep &SI : SecondSU.Preds)457k
48
538k
    if (SI.isCluster())
49
0
      return false;
50
457k
  // Though the reachability checks above could be made more generic,
51
457k
  // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid,
52
457k
  // the extra computation cost makes it less interesting in general cases.
53
457k
54
457k
  // Create a single weak edge between the adjacent instrs. The only effect is
55
457k
  // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
56
457k
  if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
57
0
    return false;
58
457k
59
457k
  // Adjust the latency between both instrs.
60
457k
  for (SDep &SI : FirstSU.Succs)
61
936k
    if (SI.getSUnit() == &SecondSU)
62
916k
      SI.setLatency(0);
63
457k
64
457k
  for (SDep &SI : SecondSU.Preds)
65
996k
    if (SI.getSUnit() == &FirstSU)
66
916k
      SI.setLatency(0);
67
457k
68
457k
  LLVM_DEBUG(
69
457k
      dbgs() << "Macro fuse: "; DAG.dumpNodeName(FirstSU); dbgs() << " - ";
70
457k
      DAG.dumpNodeName(SecondSU); dbgs() << " /  ";
71
457k
      dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - "
72
457k
             << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);
73
457k
74
457k
  // Make data dependencies from the FirstSU also dependent on the SecondSU to
75
457k
  // prevent them from being scheduled between the FirstSU and the SecondSU.
76
457k
  if (&SecondSU != &DAG.ExitSU)
77
33.4k
    
for (const SDep &SI : FirstSU.Succs)11.1k
{
78
33.4k
      SUnit *SU = SI.getSUnit();
79
33.4k
      if (SI.isWeak() || 
isHazard(SI)22.2k
||
80
33.4k
          
SU == &DAG.ExitSU21.6k
||
SU == &SecondSU21.6k
||
SU->isPred(&SecondSU)10.4k
)
81
29.3k
        continue;
82
4.06k
      LLVM_DEBUG(dbgs() << "  Bind "; DAG.dumpNodeName(SecondSU);
83
4.06k
                 dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';);
84
4.06k
      DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
85
4.06k
    }
86
457k
87
457k
  // Make the FirstSU also dependent on the dependencies of the SecondSU to
88
457k
  // prevent them from being scheduled between the FirstSU and the SecondSU.
89
457k
  if (&FirstSU != &DAG.EntrySU) {
90
996k
    for (const SDep &SI : SecondSU.Preds) {
91
996k
      SUnit *SU = SI.getSUnit();
92
996k
      if (SI.isWeak() || 
isHazard(SI)538k
||
&FirstSU == SU537k
||
FirstSU.isSucc(SU)79.9k
)
93
916k
        continue;
94
79.9k
      LLVM_DEBUG(dbgs() << "  Bind "; DAG.dumpNodeName(*SU); dbgs() << " - ";
95
79.9k
                 DAG.dumpNodeName(FirstSU); dbgs() << '\n';);
96
79.9k
      DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
97
79.9k
    }
98
457k
    // ExitSU comes last by design, which acts like an implicit dependency
99
457k
    // between ExitSU and any bottom root in the graph. We should transfer
100
457k
    // this to FirstSU as well.
101
457k
    if (&SecondSU == &DAG.ExitSU) {
102
2.13M
      for (SUnit &SU : DAG.SUnits) {
103
2.13M
        if (SU.Succs.empty())
104
225k
          DAG.addEdge(&FirstSU, SDep(&SU, SDep::Artificial));
105
2.13M
      }
106
446k
    }
107
457k
  }
108
457k
109
457k
  ++NumFused;
110
457k
  return true;
111
457k
}
112
113
namespace {
114
115
/// Post-process the DAG to create cluster edges between instrs that may
116
/// be fused by the processor into a single operation.
117
class MacroFusion : public ScheduleDAGMutation {
118
  ShouldSchedulePredTy shouldScheduleAdjacent;
119
  bool FuseBlock;
120
  bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
121
122
public:
123
  MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
124
430k
    : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
125
126
  void apply(ScheduleDAGInstrs *DAGInstrs) override;
127
};
128
129
} // end anonymous namespace
130
131
2.60M
void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
132
2.60M
  if (FuseBlock)
133
2.24M
    // For each of the SUnits in the scheduling block, try to fuse the instr in
134
2.24M
    // it with one in its predecessors.
135
2.24M
    for (SUnit &ISU : DAG->SUnits)
136
9.84M
        scheduleAdjacentImpl(*DAG, ISU);
137
2.60M
138
2.60M
  if (DAG->ExitSU.getInstr())
139
2.41M
    // Try to fuse the instr in the ExitSU with one in its predecessors.
140
2.41M
    scheduleAdjacentImpl(*DAG, DAG->ExitSU);
141
2.60M
}
142
143
/// Implement the fusion of instr pairs in the scheduling DAG,
144
/// anchored at the instr in AnchorSU..
145
12.2M
bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) {
146
12.2M
  const MachineInstr &AnchorMI = *AnchorSU.getInstr();
147
12.2M
  const TargetInstrInfo &TII = *DAG.TII;
148
12.2M
  const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
149
12.2M
150
12.2M
  // Check if the anchor instr may be fused.
151
12.2M
  if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI))
152
11.5M
    return false;
153
671k
154
671k
  // Explorer for fusion candidates among the dependencies of the anchor instr.
155
716k
  
for (SDep &Dep : AnchorSU.Preds)671k
{
156
716k
    // Ignore dependencies other than data or strong ordering.
157
716k
    if (Dep.isWeak() || 
isHazard(Dep)716k
)
158
312
      continue;
159
716k
160
716k
    SUnit &DepSU = *Dep.getSUnit();
161
716k
    if (DepSU.isBoundaryNode())
162
0
      continue;
163
716k
164
716k
    const MachineInstr *DepMI = DepSU.getInstr();
165
716k
    if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
166
256k
      continue;
167
459k
168
459k
    if (fuseInstructionPair(DAG, DepSU, AnchorSU))
169
457k
      return true;
170
459k
  }
171
671k
172
671k
  
return false213k
;
173
671k
}
174
175
std::unique_ptr<ScheduleDAGMutation>
176
llvm::createMacroFusionDAGMutation(
177
293k
     ShouldSchedulePredTy shouldScheduleAdjacent) {
178
293k
  if(EnableMacroFusion)
179
293k
    return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
180
0
  return nullptr;
181
0
}
182
183
std::unique_ptr<ScheduleDAGMutation>
184
llvm::createBranchMacroFusionDAGMutation(
185
136k
     ShouldSchedulePredTy shouldScheduleAdjacent) {
186
136k
  if(EnableMacroFusion)
187
136k
    return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
188
0
  return nullptr;
189
0
}