Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86MacroFusion.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- X86MacroFusion.cpp - X86 Macro Fusion ------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file This file contains the X86 implementation of the DAG scheduling
10
/// mutation to pair instructions back to back.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "X86MacroFusion.h"
15
#include "X86Subtarget.h"
16
#include "llvm/CodeGen/MacroFusion.h"
17
#include "llvm/CodeGen/TargetInstrInfo.h"
18
19
using namespace llvm;
20
21
namespace {
22
23
// The classification for the first instruction.
24
enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid };
25
26
// The classification for the second instruction (jump).
27
enum class JumpKind {
28
  // JE, JL, JG and variants.
29
  ELG,
30
  // JA, JB and variants.
31
  AB,
32
  // JS, JP, JO and variants.
33
  SPO,
34
  // Not a fusable jump.
35
  Invalid,
36
};
37
38
} // namespace
39
40
85.7k
static FirstInstrKind classifyFirst(const MachineInstr &MI) {
41
85.7k
  switch (MI.getOpcode()) {
42
85.7k
  default:
43
16.4k
    return FirstInstrKind::Invalid;
44
85.7k
  case X86::TEST8rr:
45
33.8k
  case X86::TEST16rr:
46
33.8k
  case X86::TEST32rr:
47
33.8k
  case X86::TEST64rr:
48
33.8k
  case X86::TEST8ri:
49
33.8k
  case X86::TEST16ri:
50
33.8k
  case X86::TEST32ri:
51
33.8k
  case X86::TEST64ri32:
52
33.8k
  case X86::TEST8mr:
53
33.8k
  case X86::TEST16mr:
54
33.8k
  case X86::TEST32mr:
55
33.8k
  case X86::TEST64mr:
56
33.8k
    return FirstInstrKind::Test;
57
33.8k
  case X86::AND16ri:
58
721
  case X86::AND16ri8:
59
721
  case X86::AND16rm:
60
721
  case X86::AND16rr:
61
721
  case X86::AND32ri:
62
721
  case X86::AND32ri8:
63
721
  case X86::AND32rm:
64
721
  case X86::AND32rr:
65
721
  case X86::AND64ri32:
66
721
  case X86::AND64ri8:
67
721
  case X86::AND64rm:
68
721
  case X86::AND64rr:
69
721
  case X86::AND8ri:
70
721
  case X86::AND8rm:
71
721
  case X86::AND8rr:
72
721
    return FirstInstrKind::And;
73
31.0k
  case X86::CMP16ri:
74
31.0k
  case X86::CMP16ri8:
75
31.0k
  case X86::CMP16rm:
76
31.0k
  case X86::CMP16rr:
77
31.0k
  case X86::CMP16mr:
78
31.0k
  case X86::CMP32ri:
79
31.0k
  case X86::CMP32ri8:
80
31.0k
  case X86::CMP32rm:
81
31.0k
  case X86::CMP32rr:
82
31.0k
  case X86::CMP32mr:
83
31.0k
  case X86::CMP64ri32:
84
31.0k
  case X86::CMP64ri8:
85
31.0k
  case X86::CMP64rm:
86
31.0k
  case X86::CMP64rr:
87
31.0k
  case X86::CMP64mr:
88
31.0k
  case X86::CMP8ri:
89
31.0k
  case X86::CMP8rm:
90
31.0k
  case X86::CMP8rr:
91
31.0k
  case X86::CMP8mr:
92
31.0k
    return FirstInstrKind::Cmp;
93
31.0k
  case X86::ADD16ri:
94
2.67k
  case X86::ADD16ri8:
95
2.67k
  case X86::ADD16ri8_DB:
96
2.67k
  case X86::ADD16ri_DB:
97
2.67k
  case X86::ADD16rm:
98
2.67k
  case X86::ADD16rr:
99
2.67k
  case X86::ADD16rr_DB:
100
2.67k
  case X86::ADD32ri:
101
2.67k
  case X86::ADD32ri8:
102
2.67k
  case X86::ADD32ri8_DB:
103
2.67k
  case X86::ADD32ri_DB:
104
2.67k
  case X86::ADD32rm:
105
2.67k
  case X86::ADD32rr:
106
2.67k
  case X86::ADD32rr_DB:
107
2.67k
  case X86::ADD64ri32:
108
2.67k
  case X86::ADD64ri32_DB:
109
2.67k
  case X86::ADD64ri8:
110
2.67k
  case X86::ADD64ri8_DB:
111
2.67k
  case X86::ADD64rm:
112
2.67k
  case X86::ADD64rr:
113
2.67k
  case X86::ADD64rr_DB:
114
2.67k
  case X86::ADD8ri:
115
2.67k
  case X86::ADD8ri_DB:
116
2.67k
  case X86::ADD8rm:
117
2.67k
  case X86::ADD8rr:
118
2.67k
  case X86::ADD8rr_DB:
119
2.67k
  case X86::SUB16ri:
120
2.67k
  case X86::SUB16ri8:
121
2.67k
  case X86::SUB16rm:
122
2.67k
  case X86::SUB16rr:
123
2.67k
  case X86::SUB32ri:
124
2.67k
  case X86::SUB32ri8:
125
2.67k
  case X86::SUB32rm:
126
2.67k
  case X86::SUB32rr:
127
2.67k
  case X86::SUB64ri32:
128
2.67k
  case X86::SUB64ri8:
129
2.67k
  case X86::SUB64rm:
130
2.67k
  case X86::SUB64rr:
131
2.67k
  case X86::SUB8ri:
132
2.67k
  case X86::SUB8rm:
133
2.67k
  case X86::SUB8rr:
134
2.67k
    return FirstInstrKind::ALU;
135
2.67k
  case X86::INC16r:
136
1.02k
  case X86::INC32r:
137
1.02k
  case X86::INC64r:
138
1.02k
  case X86::INC8r:
139
1.02k
  case X86::DEC16r:
140
1.02k
  case X86::DEC32r:
141
1.02k
  case X86::DEC64r:
142
1.02k
  case X86::DEC8r:
143
1.02k
    return FirstInstrKind::IncDec;
144
85.7k
  }
145
85.7k
}
146
147
288k
static JumpKind classifySecond(const MachineInstr &MI) {
148
288k
  X86::CondCode CC = X86::getCondFromBranch(MI);
149
288k
  if (CC == X86::COND_INVALID)
150
117k
    return JumpKind::Invalid;
151
170k
152
170k
  switch (CC) {
153
170k
  default:
154
0
    return JumpKind::Invalid;
155
170k
  case X86::COND_E:
156
127k
  case X86::COND_NE:
157
127k
  case X86::COND_L:
158
127k
  case X86::COND_LE:
159
127k
  case X86::COND_G:
160
127k
  case X86::COND_GE:
161
127k
    return JumpKind::ELG;
162
127k
  case X86::COND_B:
163
38.6k
  case X86::COND_BE:
164
38.6k
  case X86::COND_A:
165
38.6k
  case X86::COND_AE:
166
38.6k
    return JumpKind::AB;
167
38.6k
  case X86::COND_S:
168
3.40k
  case X86::COND_NS:
169
3.40k
  case X86::COND_P:
170
3.40k
  case X86::COND_NP:
171
3.40k
  case X86::COND_O:
172
3.40k
  case X86::COND_NO:
173
3.40k
    return JumpKind::SPO;
174
170k
  }
175
170k
}
176
177
/// Check if the instr pair, FirstMI and SecondMI, should be fused
178
/// together. Given SecondMI, when FirstMI is unspecified, then check if
179
/// SecondMI may be part of a fused pair at all.
180
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
181
                                   const TargetSubtargetInfo &TSI,
182
                                   const MachineInstr *FirstMI,
183
429k
                                   const MachineInstr &SecondMI) {
184
429k
  const X86Subtarget &ST = static_cast<const X86Subtarget &>(TSI);
185
429k
186
429k
  // Check if this processor supports any kind of fusion.
187
429k
  if (!(ST.hasBranchFusion() || 
ST.hasMacroFusion()428k
))
188
141k
    return false;
189
288k
190
288k
  const JumpKind BranchKind = classifySecond(SecondMI);
191
288k
192
288k
  if (BranchKind == JumpKind::Invalid)
193
117k
    return false; // Second cannot be fused with anything.
194
170k
195
170k
  if (FirstMI == nullptr)
196
84.3k
    return true; // We're only checking whether Second can be fused at all.
197
85.7k
198
85.7k
  const FirstInstrKind TestKind = classifyFirst(*FirstMI);
199
85.7k
200
85.7k
  if (ST.hasBranchFusion()) {
201
27
    // Branch fusion can merge CMP and TEST with all conditional jumps.
202
27
    return (TestKind == FirstInstrKind::Cmp ||
203
27
            
TestKind == FirstInstrKind::Test22
);
204
27
  }
205
85.7k
206
85.7k
  
if (85.7k
ST.hasMacroFusion()85.7k
) {
207
85.7k
    // Macro Fusion rules are a bit more complex. See Agner Fog's
208
85.7k
    // Microarchitecture table 9.2 "Instruction Fusion".
209
85.7k
    switch (TestKind) {
210
85.7k
    case FirstInstrKind::Test:
211
34.5k
    case FirstInstrKind::And:
212
34.5k
      return true;
213
34.5k
    case FirstInstrKind::Cmp:
214
33.6k
    case FirstInstrKind::ALU:
215
33.6k
      return BranchKind == JumpKind::ELG || 
BranchKind == JumpKind::AB19.6k
;
216
33.6k
    case FirstInstrKind::IncDec:
217
1.01k
      return BranchKind == JumpKind::ELG;
218
33.6k
    case FirstInstrKind::Invalid:
219
16.4k
      return false;
220
18.4E
    }
221
18.4E
  }
222
18.4E
223
18.4E
  llvm_unreachable("unknown branch fusion type");
224
18.4E
}
225
226
namespace llvm {
227
228
std::unique_ptr<ScheduleDAGMutation>
229
136k
createX86MacroFusionDAGMutation () {
230
136k
  return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
231
136k
}
232
233
} // end namespace llvm