/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86EvexToVex.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- X86EvexToVex.cpp ---------------------------------------------------===// |
2 | | // Compress EVEX instructions to VEX encoding when possible to reduce code size |
3 | | // |
4 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | | // See https://llvm.org/LICENSE.txt for license information. |
6 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file |
11 | | /// This file defines the pass that goes over all AVX-512 instructions which |
12 | | /// are encoded using the EVEX prefix and if possible replaces them by their |
13 | | /// corresponding VEX encoding which is usually shorter by 2 bytes. |
14 | | /// EVEX instructions may be encoded via the VEX prefix when the AVX-512 |
15 | | /// instruction has a corresponding AVX/AVX2 opcode, when vector length |
16 | | /// accessed by instruction is less than 512 bits and when it does not use |
17 | | // the xmm or the mask registers or xmm/ymm registers with indexes higher than 15. |
18 | | /// The pass applies code reduction on the generated code for AVX-512 instrs. |
19 | | // |
20 | | //===----------------------------------------------------------------------===// |
21 | | |
22 | | #include "MCTargetDesc/X86BaseInfo.h" |
23 | | #include "MCTargetDesc/X86InstComments.h" |
24 | | #include "X86.h" |
25 | | #include "X86InstrInfo.h" |
26 | | #include "X86Subtarget.h" |
27 | | #include "llvm/ADT/StringRef.h" |
28 | | #include "llvm/CodeGen/MachineFunction.h" |
29 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | | #include "llvm/CodeGen/MachineInstr.h" |
31 | | #include "llvm/CodeGen/MachineOperand.h" |
32 | | #include "llvm/MC/MCInstrDesc.h" |
33 | | #include "llvm/Pass.h" |
34 | | #include <cassert> |
35 | | #include <cstdint> |
36 | | |
37 | | using namespace llvm; |
38 | | |
39 | | // Including the generated EVEX2VEX tables. |
40 | | struct X86EvexToVexCompressTableEntry { |
41 | | uint16_t EvexOpcode; |
42 | | uint16_t VexOpcode; |
43 | | |
44 | 0 | bool operator<(const X86EvexToVexCompressTableEntry &RHS) const { |
45 | 0 | return EvexOpcode < RHS.EvexOpcode; |
46 | 0 | } |
47 | | |
48 | | friend bool operator<(const X86EvexToVexCompressTableEntry &TE, |
49 | 426k | unsigned Opc) { |
50 | 426k | return TE.EvexOpcode < Opc; |
51 | 426k | } |
52 | | }; |
53 | | #include "X86GenEVEX2VEXTables.inc" |
54 | | |
55 | 146k | #define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible" |
56 | | #define EVEX2VEX_NAME "x86-evex-to-vex-compress" |
57 | | |
58 | | #define DEBUG_TYPE EVEX2VEX_NAME |
59 | | |
60 | | namespace { |
61 | | |
62 | | class EvexToVexInstPass : public MachineFunctionPass { |
63 | | |
64 | | /// For EVEX instructions that can be encoded using VEX encoding, replace |
65 | | /// them by the VEX encoding in order to reduce size. |
66 | | bool CompressEvexToVexImpl(MachineInstr &MI) const; |
67 | | |
68 | | public: |
69 | | static char ID; |
70 | | |
71 | 11.4k | EvexToVexInstPass() : MachineFunctionPass(ID) { } |
72 | | |
73 | 146k | StringRef getPassName() const override { return EVEX2VEX_DESC; } |
74 | | |
75 | | /// Loop over all of the basic blocks, replacing EVEX instructions |
76 | | /// by equivalent VEX instructions when possible for reducing code size. |
77 | | bool runOnMachineFunction(MachineFunction &MF) override; |
78 | | |
79 | | // This pass runs after regalloc and doesn't support VReg operands. |
80 | 11.3k | MachineFunctionProperties getRequiredProperties() const override { |
81 | 11.3k | return MachineFunctionProperties().set( |
82 | 11.3k | MachineFunctionProperties::Property::NoVRegs); |
83 | 11.3k | } |
84 | | |
85 | | private: |
86 | | /// Machine instruction info used throughout the class. |
87 | | const X86InstrInfo *TII; |
88 | | }; |
89 | | |
90 | | } // end anonymous namespace |
91 | | |
92 | | char EvexToVexInstPass::ID = 0; |
93 | | |
94 | 135k | bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { |
95 | 135k | TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
96 | 135k | |
97 | 135k | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
98 | 135k | if (!ST.hasAVX512()) |
99 | 93.9k | return false; |
100 | 41.5k | |
101 | 41.5k | bool Changed = false; |
102 | 41.5k | |
103 | 41.5k | /// Go over all basic blocks in function and replace |
104 | 41.5k | /// EVEX encoded instrs by VEX encoding when possible. |
105 | 44.9k | for (MachineBasicBlock &MBB : MF) { |
106 | 44.9k | |
107 | 44.9k | // Traverse the basic block. |
108 | 44.9k | for (MachineInstr &MI : MBB) |
109 | 223k | Changed |= CompressEvexToVexImpl(MI); |
110 | 44.9k | } |
111 | 41.5k | |
112 | 41.5k | return Changed; |
113 | 41.5k | } |
114 | | |
115 | 37.3k | static bool usesExtendedRegister(const MachineInstr &MI) { |
116 | 110k | auto isHiRegIdx = [](unsigned Reg) { |
117 | 110k | // Check for XMM register with indexes between 16 - 31. |
118 | 110k | if (Reg >= X86::XMM16 && Reg <= X86::XMM3122.1k ) |
119 | 748 | return true; |
120 | 109k | |
121 | 109k | // Check for YMM register with indexes between 16 - 31. |
122 | 109k | if (Reg >= X86::YMM16 && Reg <= X86::YMM31511 ) |
123 | 470 | return true; |
124 | 109k | |
125 | 109k | return false; |
126 | 109k | }; |
127 | 37.3k | |
128 | 37.3k | // Check that operands are not ZMM regs or |
129 | 37.3k | // XMM/YMM regs with hi indexes between 16 - 31. |
130 | 137k | for (const MachineOperand &MO : MI.explicit_operands()) { |
131 | 137k | if (!MO.isReg()) |
132 | 27.1k | continue; |
133 | 110k | |
134 | 110k | unsigned Reg = MO.getReg(); |
135 | 110k | |
136 | 110k | assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) && |
137 | 110k | "ZMM instructions should not be in the EVEX->VEX tables"); |
138 | 110k | |
139 | 110k | if (isHiRegIdx(Reg)) |
140 | 1.21k | return true; |
141 | 110k | } |
142 | 37.3k | |
143 | 37.3k | return false36.0k ; |
144 | 37.3k | } |
145 | | |
146 | | // Do any custom cleanup needed to finalize the conversion. |
147 | 36.0k | static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { |
148 | 36.0k | (void)NewOpc; |
149 | 36.0k | unsigned Opc = MI.getOpcode(); |
150 | 36.0k | switch (Opc) { |
151 | 36.0k | case X86::VALIGNDZ128rri: |
152 | 32 | case X86::VALIGNDZ128rmi: |
153 | 32 | case X86::VALIGNQZ128rri: |
154 | 32 | case X86::VALIGNQZ128rmi: { |
155 | 32 | assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) && |
156 | 32 | "Unexpected new opcode!"); |
157 | 32 | unsigned Scale = (Opc == X86::VALIGNQZ128rri || |
158 | 32 | Opc == X86::VALIGNQZ128rmi11 ) ? 822 : 410 ; |
159 | 32 | MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); |
160 | 32 | Imm.setImm(Imm.getImm() * Scale); |
161 | 32 | break; |
162 | 32 | } |
163 | 58 | case X86::VSHUFF32X4Z256rmi: |
164 | 58 | case X86::VSHUFF32X4Z256rri: |
165 | 58 | case X86::VSHUFF64X2Z256rmi: |
166 | 58 | case X86::VSHUFF64X2Z256rri: |
167 | 58 | case X86::VSHUFI32X4Z256rmi: |
168 | 58 | case X86::VSHUFI32X4Z256rri: |
169 | 58 | case X86::VSHUFI64X2Z256rmi: |
170 | 58 | case X86::VSHUFI64X2Z256rri: { |
171 | 58 | assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr || |
172 | 58 | NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) && |
173 | 58 | "Unexpected new opcode!"); |
174 | 58 | MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); |
175 | 58 | int64_t ImmVal = Imm.getImm(); |
176 | 58 | // Set bit 5, move bit 1 to bit 4, copy bit 0. |
177 | 58 | Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1)); |
178 | 58 | break; |
179 | 58 | } |
180 | 226 | case X86::VRNDSCALEPDZ128rri: |
181 | 226 | case X86::VRNDSCALEPDZ128rmi: |
182 | 226 | case X86::VRNDSCALEPSZ128rri: |
183 | 226 | case X86::VRNDSCALEPSZ128rmi: |
184 | 226 | case X86::VRNDSCALEPDZ256rri: |
185 | 226 | case X86::VRNDSCALEPDZ256rmi: |
186 | 226 | case X86::VRNDSCALEPSZ256rri: |
187 | 226 | case X86::VRNDSCALEPSZ256rmi: |
188 | 226 | case X86::VRNDSCALESDZr: |
189 | 226 | case X86::VRNDSCALESDZm: |
190 | 226 | case X86::VRNDSCALESSZr: |
191 | 226 | case X86::VRNDSCALESSZm: |
192 | 226 | case X86::VRNDSCALESDZr_Int: |
193 | 226 | case X86::VRNDSCALESDZm_Int: |
194 | 226 | case X86::VRNDSCALESSZr_Int: |
195 | 226 | case X86::VRNDSCALESSZm_Int: |
196 | 226 | const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); |
197 | 226 | int64_t ImmVal = Imm.getImm(); |
198 | 226 | // Ensure that only bits 3:0 of the immediate are used. |
199 | 226 | if ((ImmVal & 0xf) != ImmVal) |
200 | 22 | return false; |
201 | 204 | break; |
202 | 36.0k | } |
203 | 36.0k | |
204 | 36.0k | return true; |
205 | 36.0k | } |
206 | | |
207 | | |
208 | | // For EVEX instructions that can be encoded using VEX encoding |
209 | | // replace them by the VEX encoding in order to reduce size. |
210 | 223k | bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { |
211 | 223k | // VEX format. |
212 | 223k | // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1 |
213 | 223k | // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM] |
214 | 223k | // |
215 | 223k | // EVEX format. |
216 | 223k | // # of bytes: 4 1 1 1 4 / 1 1 |
217 | 223k | // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate] |
218 | 223k | |
219 | 223k | const MCInstrDesc &Desc = MI.getDesc(); |
220 | 223k | |
221 | 223k | // Check for EVEX instructions only. |
222 | 223k | if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX) |
223 | 133k | return false; |
224 | 90.1k | |
225 | 90.1k | // Check for EVEX instructions with mask or broadcast as in these cases |
226 | 90.1k | // the EVEX prefix is needed in order to carry this information |
227 | 90.1k | // thus preventing the transformation to VEX encoding. |
228 | 90.1k | if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B)) |
229 | 17.3k | return false; |
230 | 72.8k | |
231 | 72.8k | // Check for EVEX instructions with L2 set. These instructions are 512-bits |
232 | 72.8k | // and can't be converted to VEX. |
233 | 72.8k | if (Desc.TSFlags & X86II::EVEX_L2) |
234 | 27.3k | return false; |
235 | 45.5k | |
236 | | #ifndef NDEBUG |
237 | | // Make sure the tables are sorted. |
238 | | static std::atomic<bool> TableChecked(false); |
239 | | if (!TableChecked.load(std::memory_order_relaxed)) { |
240 | | assert(std::is_sorted(std::begin(X86EvexToVex128CompressTable), |
241 | | std::end(X86EvexToVex128CompressTable)) && |
242 | | "X86EvexToVex128CompressTable is not sorted!"); |
243 | | assert(std::is_sorted(std::begin(X86EvexToVex256CompressTable), |
244 | | std::end(X86EvexToVex256CompressTable)) && |
245 | | "X86EvexToVex256CompressTable is not sorted!"); |
246 | | TableChecked.store(true, std::memory_order_relaxed); |
247 | | } |
248 | | #endif |
249 | | |
250 | 45.5k | // Use the VEX.L bit to select the 128 or 256-bit table. |
251 | 45.5k | ArrayRef<X86EvexToVexCompressTableEntry> Table = |
252 | 45.5k | (Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable)15.0k |
253 | 45.5k | : makeArrayRef(X86EvexToVex128CompressTable)30.5k ; |
254 | 45.5k | |
255 | 45.5k | auto I = llvm::lower_bound(Table, MI.getOpcode()); |
256 | 45.5k | if (I == Table.end() || I->EvexOpcode != MI.getOpcode()) |
257 | 8.23k | return false; |
258 | 37.3k | |
259 | 37.3k | unsigned NewOpc = I->VexOpcode; |
260 | 37.3k | |
261 | 37.3k | if (usesExtendedRegister(MI)) |
262 | 1.21k | return false; |
263 | 36.0k | |
264 | 36.0k | if (!performCustomAdjustments(MI, NewOpc)) |
265 | 22 | return false; |
266 | 36.0k | |
267 | 36.0k | MI.setDesc(TII->get(NewOpc)); |
268 | 36.0k | MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX); |
269 | 36.0k | return true; |
270 | 36.0k | } |
271 | | |
272 | | INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false) |
273 | | |
274 | 11.3k | FunctionPass *llvm::createX86EvexToVexInsts() { |
275 | 11.3k | return new EvexToVexInstPass(); |
276 | 11.3k | } |