/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
Line | Count | Source |
1 | | //===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file This file contains the AArch64 implementation of the DAG scheduling |
11 | | /// mutation to pair instructions back to back. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AArch64MacroFusion.h" |
16 | | #include "AArch64Subtarget.h" |
17 | | #include "llvm/CodeGen/MacroFusion.h" |
18 | | #include "llvm/Target/TargetInstrInfo.h" |
19 | | |
20 | | using namespace llvm; |
21 | | |
22 | | namespace { |
23 | | |
24 | | /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused |
25 | | /// together. Given SecondMI, when FirstMI is unspecified, then check if |
26 | | /// SecondMI may be part of a fused pair at all. |
27 | | static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
28 | | const TargetSubtargetInfo &TSI, |
29 | | const MachineInstr *FirstMI, |
30 | 20.2M | const MachineInstr &SecondMI) { |
31 | 20.2M | const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII); |
32 | 20.2M | const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); |
33 | 20.2M | |
34 | 20.2M | // Assume wildcards for unspecified instrs. |
35 | 20.2M | unsigned FirstOpcode = |
36 | 1.08M | FirstMI ? FirstMI->getOpcode() |
37 | 19.1M | : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); |
38 | 20.2M | unsigned SecondOpcode = SecondMI.getOpcode(); |
39 | 20.2M | |
40 | 20.2M | if (ST.hasArithmeticBccFusion()) |
41 | 20.2M | // Fuse CMN, CMP, TST followed by Bcc. |
42 | 20.1M | if (20.1M SecondOpcode == AArch64::Bcc20.1M ) |
43 | 1.40M | switch (FirstOpcode) { |
44 | 49.7k | default: |
45 | 49.7k | return false; |
46 | 653k | case AArch64::ADDSWri: |
47 | 653k | case AArch64::ADDSWrr: |
48 | 653k | case AArch64::ADDSXri: |
49 | 653k | case AArch64::ADDSXrr: |
50 | 653k | case AArch64::ANDSWri: |
51 | 653k | case AArch64::ANDSWrr: |
52 | 653k | case AArch64::ANDSXri: |
53 | 653k | case AArch64::ANDSXrr: |
54 | 653k | case AArch64::SUBSWri: |
55 | 653k | case AArch64::SUBSWrr: |
56 | 653k | case AArch64::SUBSXri: |
57 | 653k | case AArch64::SUBSXrr: |
58 | 653k | case AArch64::BICSWrr: |
59 | 653k | case AArch64::BICSXrr: |
60 | 653k | return true; |
61 | 710 | case AArch64::ADDSWrs: |
62 | 710 | case AArch64::ADDSXrs: |
63 | 710 | case AArch64::ANDSWrs: |
64 | 710 | case AArch64::ANDSXrs: |
65 | 710 | case AArch64::SUBSWrs: |
66 | 710 | case AArch64::SUBSXrs: |
67 | 710 | case AArch64::BICSWrs: |
68 | 710 | case AArch64::BICSXrs: |
69 | 710 | // Shift value can be 0 making these behave like the "rr" variant... |
70 | 710 | return !II.hasShiftedReg(*FirstMI); |
71 | 702k | case AArch64::INSTRUCTION_LIST_END: |
72 | 702k | return true; |
73 | 18.8M | } |
74 | 18.8M | |
75 | 18.8M | if (18.8M ST.hasArithmeticCbzFusion()18.8M ) |
76 | 18.8M | // Fuse ALU operations followed by CBZ/CBNZ. |
77 | 18.7M | if (18.7M SecondOpcode == AArch64::CBNZW || 18.7M SecondOpcode == AArch64::CBNZX18.3M || |
78 | 18.7M | SecondOpcode == AArch64::CBZW18.2M || SecondOpcode == AArch64::CBZX18.1M ) |
79 | 694k | switch (FirstOpcode) { |
80 | 379k | default: |
81 | 379k | return false; |
82 | 940 | case AArch64::ADDWri: |
83 | 940 | case AArch64::ADDWrr: |
84 | 940 | case AArch64::ADDXri: |
85 | 940 | case AArch64::ADDXrr: |
86 | 940 | case AArch64::ANDWri: |
87 | 940 | case AArch64::ANDWrr: |
88 | 940 | case AArch64::ANDXri: |
89 | 940 | case AArch64::ANDXrr: |
90 | 940 | case AArch64::EORWri: |
91 | 940 | case AArch64::EORWrr: |
92 | 940 | case AArch64::EORXri: |
93 | 940 | case AArch64::EORXrr: |
94 | 940 | case AArch64::ORRWri: |
95 | 940 | case AArch64::ORRWrr: |
96 | 940 | case AArch64::ORRXri: |
97 | 940 | case AArch64::ORRXrr: |
98 | 940 | case AArch64::SUBWri: |
99 | 940 | case AArch64::SUBWrr: |
100 | 940 | case AArch64::SUBXri: |
101 | 940 | case AArch64::SUBXrr: |
102 | 940 | return true; |
103 | 127 | case AArch64::ADDWrs: |
104 | 127 | case AArch64::ADDXrs: |
105 | 127 | case AArch64::ANDWrs: |
106 | 127 | case AArch64::ANDXrs: |
107 | 127 | case AArch64::SUBWrs: |
108 | 127 | case AArch64::SUBXrs: |
109 | 127 | case AArch64::BICWrs: |
110 | 127 | case AArch64::BICXrs: |
111 | 127 | // Shift value can be 0 making these behave like the "rr" variant... |
112 | 127 | return !II.hasShiftedReg(*FirstMI); |
113 | 314k | case AArch64::INSTRUCTION_LIST_END: |
114 | 314k | return true; |
115 | 18.1M | } |
116 | 18.1M | |
117 | 18.1M | if (18.1M ST.hasFuseAES()18.1M ) |
118 | 18.1M | // Fuse AES crypto operations. |
119 | 102k | switch(SecondOpcode) { |
120 | 102k | // AES encode. |
121 | 286 | case AArch64::AESMCrr: |
122 | 286 | case AArch64::AESMCrrTied: |
123 | 286 | return FirstOpcode == AArch64::AESErr || |
124 | 146 | FirstOpcode == AArch64::INSTRUCTION_LIST_END; |
125 | 286 | // AES decode. |
126 | 230 | case AArch64::AESIMCrr: |
127 | 230 | case AArch64::AESIMCrrTied: |
128 | 230 | return FirstOpcode == AArch64::AESDrr || |
129 | 118 | FirstOpcode == AArch64::INSTRUCTION_LIST_END; |
130 | 18.1M | } |
131 | 18.1M | |
132 | 18.1M | if (18.1M ST.hasFuseLiterals()18.1M ) |
133 | 18.1M | // Fuse literal generation operations. |
134 | 1.65k | switch (SecondOpcode) { |
135 | 1.65k | // PC relative address. |
136 | 36 | case AArch64::ADDXri: |
137 | 36 | return FirstOpcode == AArch64::ADRP || |
138 | 34 | FirstOpcode == AArch64::INSTRUCTION_LIST_END; |
139 | 1.65k | // 32 bit immediate. |
140 | 4 | case AArch64::MOVKWi: |
141 | 4 | return (FirstOpcode == AArch64::MOVZWi && |
142 | 2 | SecondMI.getOperand(3).getImm() == 16) || |
143 | 2 | FirstOpcode == AArch64::INSTRUCTION_LIST_END; |
144 | 1.65k | // Lower and upper half of 64 bit immediate. |
145 | 12 | case AArch64::MOVKXi: |
146 | 12 | return FirstOpcode == AArch64::INSTRUCTION_LIST_END || |
147 | 6 | (FirstOpcode == AArch64::MOVZXi && |
148 | 6 | SecondMI.getOperand(3).getImm() == 16) || |
149 | 4 | (FirstOpcode == AArch64::MOVKXi && |
150 | 4 | FirstMI->getOperand(3).getImm() == 32 && |
151 | 12 | SecondMI.getOperand(3).getImm() == 48); |
152 | 18.1M | } |
153 | 18.1M | |
154 | 18.1M | return false; |
155 | 18.1M | } |
156 | | |
157 | | } // end namespace |
158 | | |
159 | | |
160 | | namespace llvm { |
161 | | |
162 | 463k | std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { |
163 | 463k | return createMacroFusionDAGMutation(shouldScheduleAdjacent); |
164 | 463k | } |
165 | | |
166 | | } // end namespace llvm |