/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the SystemZSelectionDAGInfo class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "SystemZTargetMachine.h" |
14 | | #include "llvm/CodeGen/SelectionDAG.h" |
15 | | |
16 | | using namespace llvm; |
17 | | |
18 | | #define DEBUG_TYPE "systemz-selectiondag-info" |
19 | | |
20 | | // Decide whether it is best to use a loop or straight-line code for |
21 | | // a block operation of Size bytes with source address Src and destination |
22 | | // address Dest. Sequence is the opcode to use for straight-line code |
23 | | // (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). |
24 | | // Return the chain for the completed operation. |
25 | | static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence, |
26 | | unsigned Loop, SDValue Chain, SDValue Dst, |
27 | 70 | SDValue Src, uint64_t Size) { |
28 | 70 | EVT PtrVT = Src.getValueType(); |
29 | 70 | // The heuristic we use is to prefer loops for anything that would |
30 | 70 | // require 7 or more MVCs. With these kinds of sizes there isn't |
31 | 70 | // much to choose between straight-line code and looping code, |
32 | 70 | // since the time will be dominated by the MVCs themselves. |
33 | 70 | // However, the loop has 4 or 5 instructions (depending on whether |
34 | 70 | // the base addresses can be proved equal), so there doesn't seem |
35 | 70 | // much point using a loop for 5 * 256 bytes or fewer. Anything in |
36 | 70 | // the range (5 * 256, 6 * 256) will need another instruction after |
37 | 70 | // the loop, so it doesn't seem worth using a loop then either. |
38 | 70 | // The next value up, 6 * 256, can be implemented in the same |
39 | 70 | // number of straight-line MVCs as 6 * 256 - 1. |
40 | 70 | if (Size > 6 * 256) |
41 | 4 | return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, |
42 | 4 | DAG.getConstant(Size, DL, PtrVT), |
43 | 4 | DAG.getConstant(Size / 256, DL, PtrVT)); |
44 | 66 | return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, |
45 | 66 | DAG.getConstant(Size, DL, PtrVT)); |
46 | 66 | } |
47 | | |
48 | | SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( |
49 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, |
50 | | SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, |
51 | 26 | MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { |
52 | 26 | if (IsVolatile) |
53 | 0 | return SDValue(); |
54 | 26 | |
55 | 26 | if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) |
56 | 24 | return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, |
57 | 24 | Chain, Dst, Src, CSize->getZExtValue()); |
58 | 2 | return SDValue(); |
59 | 2 | } |
60 | | |
61 | | // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by |
62 | | // Chain, Dst, ByteVal and Size. These cases are expected to use |
63 | | // MVI, MVHHI, MVHI and MVGHI respectively. |
64 | | static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
65 | | SDValue Dst, uint64_t ByteVal, uint64_t Size, |
66 | 82 | unsigned Align, MachinePointerInfo DstPtrInfo) { |
67 | 82 | uint64_t StoreVal = ByteVal; |
68 | 324 | for (unsigned I = 1; I < Size; ++I242 ) |
69 | 242 | StoreVal |= ByteVal << (I * 8); |
70 | 82 | return DAG.getStore( |
71 | 82 | Chain, DL, DAG.getConstant(StoreVal, DL, MVT::getIntegerVT(Size * 8)), |
72 | 82 | Dst, DstPtrInfo, Align); |
73 | 82 | } |
74 | | |
75 | | SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( |
76 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, |
77 | | SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, |
78 | 103 | MachinePointerInfo DstPtrInfo) const { |
79 | 103 | EVT PtrVT = Dst.getValueType(); |
80 | 103 | |
81 | 103 | if (IsVolatile) |
82 | 0 | return SDValue(); |
83 | 103 | |
84 | 103 | if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { |
85 | 102 | uint64_t Bytes = CSize->getZExtValue(); |
86 | 102 | if (Bytes == 0) |
87 | 0 | return SDValue(); |
88 | 102 | if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) { |
89 | 90 | // Handle cases that can be done using at most two of |
90 | 90 | // MVI, MVHI, MVHHI and MVGHI. The latter two can only be |
91 | 90 | // used if ByteVal is all zeros or all ones; in other casees, |
92 | 90 | // we can move at most 2 halfwords. |
93 | 90 | uint64_t ByteVal = CByte->getZExtValue(); |
94 | 90 | if (ByteVal == 0 || ByteVal == 25552 ? |
95 | 76 | Bytes <= 16 && countPopulation(Bytes) <= 264 : |
96 | 90 | Bytes <= 414 ) { |
97 | 52 | unsigned Size1 = Bytes == 16 ? 84 : 1 << findLastSet(Bytes)48 ; |
98 | 52 | unsigned Size2 = Bytes - Size1; |
99 | 52 | SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, |
100 | 52 | Align, DstPtrInfo); |
101 | 52 | if (Size2 == 0) |
102 | 22 | return Chain1; |
103 | 30 | Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, |
104 | 30 | DAG.getConstant(Size1, DL, PtrVT)); |
105 | 30 | DstPtrInfo = DstPtrInfo.getWithOffset(Size1); |
106 | 30 | SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, |
107 | 30 | std::min(Align, Size1), DstPtrInfo); |
108 | 30 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); |
109 | 30 | } |
110 | 12 | } else { |
111 | 12 | // Handle one and two bytes using STC. |
112 | 12 | if (Bytes <= 2) { |
113 | 4 | SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); |
114 | 4 | if (Bytes == 1) |
115 | 2 | return Chain1; |
116 | 2 | SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, |
117 | 2 | DAG.getConstant(1, DL, PtrVT)); |
118 | 2 | SDValue Chain2 = |
119 | 2 | DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1), |
120 | 2 | /* Alignment = */ 1); |
121 | 2 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); |
122 | 2 | } |
123 | 12 | } |
124 | 46 | assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); |
125 | 46 | |
126 | 46 | // Handle the special case of a memset of 0, which can use XC. |
127 | 46 | auto *CByte = dyn_cast<ConstantSDNode>(Byte); |
128 | 46 | if (CByte && CByte->getZExtValue() == 038 ) |
129 | 16 | return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, |
130 | 16 | Chain, Dst, Dst, Bytes); |
131 | 30 | |
132 | 30 | // Copy the byte to the first location and then use MVC to copy |
133 | 30 | // it to the rest. |
134 | 30 | Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align); |
135 | 30 | SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, |
136 | 30 | DAG.getConstant(1, DL, PtrVT)); |
137 | 30 | return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, |
138 | 30 | Chain, DstPlus1, Dst, Bytes - 1); |
139 | 30 | } |
140 | 1 | return SDValue(); |
141 | 1 | } |
142 | | |
143 | | // Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), |
144 | | // deciding whether to use a loop or straight-line code. |
145 | | static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
146 | 12 | SDValue Src1, SDValue Src2, uint64_t Size) { |
147 | 12 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
148 | 12 | EVT PtrVT = Src1.getValueType(); |
149 | 12 | // A two-CLC sequence is a clear win over a loop, not least because it |
150 | 12 | // needs only one branch. A three-CLC sequence needs the same number |
151 | 12 | // of branches as a loop (i.e. 2), but is shorter. That brings us to |
152 | 12 | // lengths greater than 768 bytes. It seems relatively likely that |
153 | 12 | // a difference will be found within the first 768 bytes, so we just |
154 | 12 | // optimize for the smallest number of branch instructions, in order |
155 | 12 | // to avoid polluting the prediction buffer too much. A loop only ever |
156 | 12 | // needs 2 branches, whereas a straight-line sequence would need 3 or more. |
157 | 12 | if (Size > 3 * 256) |
158 | 1 | return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, |
159 | 1 | DAG.getConstant(Size, DL, PtrVT), |
160 | 1 | DAG.getConstant(Size / 256, DL, PtrVT)); |
161 | 11 | return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, |
162 | 11 | DAG.getConstant(Size, DL, PtrVT)); |
163 | 11 | } |
164 | | |
165 | | // Convert the current CC value into an integer that is 0 if CC == 0, |
166 | | // greater than zero if CC == 1 and less than zero if CC >= 2. |
167 | | // The sequence starts with IPM, which puts CC into bits 29 and 28 |
168 | | // of an integer and clears bits 30 and 31. |
169 | | static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg, |
170 | 16 | SelectionDAG &DAG) { |
171 | 16 | SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); |
172 | 16 | SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM, |
173 | 16 | DAG.getConstant(30 - SystemZ::IPM_CC, DL, MVT::i32)); |
174 | 16 | SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL, |
175 | 16 | DAG.getConstant(30, DL, MVT::i32)); |
176 | 16 | return SRA; |
177 | 16 | } |
178 | | |
179 | | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( |
180 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, |
181 | | SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, |
182 | 12 | MachinePointerInfo Op2PtrInfo) const { |
183 | 12 | if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { |
184 | 12 | uint64_t Bytes = CSize->getZExtValue(); |
185 | 12 | assert(Bytes > 0 && "Caller should have handled 0-size case"); |
186 | 12 | // Swap operands to invert CC == 1 vs. CC == 2 cases. |
187 | 12 | SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes); |
188 | 12 | Chain = CCReg.getValue(1); |
189 | 12 | return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); |
190 | 12 | } |
191 | 0 | return std::make_pair(SDValue(), SDValue()); |
192 | 0 | } |
193 | | |
194 | | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( |
195 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, |
196 | 5 | SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const { |
197 | 5 | // Use SRST to find the character. End is its address on success. |
198 | 5 | EVT PtrVT = Src.getValueType(); |
199 | 5 | SDVTList VTs = DAG.getVTList(PtrVT, MVT::i32, MVT::Other); |
200 | 5 | Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); |
201 | 5 | Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); |
202 | 5 | Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, |
203 | 5 | DAG.getConstant(255, DL, MVT::i32)); |
204 | 5 | SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); |
205 | 5 | SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, |
206 | 5 | Limit, Src, Char); |
207 | 5 | SDValue CCReg = End.getValue(1); |
208 | 5 | Chain = End.getValue(2); |
209 | 5 | |
210 | 5 | // Now select between End and null, depending on whether the character |
211 | 5 | // was found. |
212 | 5 | SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), |
213 | 5 | DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), |
214 | 5 | DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), |
215 | 5 | CCReg}; |
216 | 5 | End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); |
217 | 5 | return std::make_pair(End, Chain); |
218 | 5 | } |
219 | | |
220 | | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy( |
221 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest, |
222 | | SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo, |
223 | 3 | bool isStpcpy) const { |
224 | 3 | SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); |
225 | 3 | SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, |
226 | 3 | DAG.getConstant(0, DL, MVT::i32)); |
227 | 3 | return std::make_pair(isStpcpy ? EndDest1 : Dest2 , EndDest.getValue(1)); |
228 | 3 | } |
229 | | |
230 | | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp( |
231 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, |
232 | | SDValue Src2, MachinePointerInfo Op1PtrInfo, |
233 | 4 | MachinePointerInfo Op2PtrInfo) const { |
234 | 4 | SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other); |
235 | 4 | // Swap operands to invert CC == 1 vs. CC == 2 cases. |
236 | 4 | SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src2, Src1, |
237 | 4 | DAG.getConstant(0, DL, MVT::i32)); |
238 | 4 | SDValue CCReg = Unused.getValue(1); |
239 | 4 | Chain = Unused.getValue(2); |
240 | 4 | return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); |
241 | 4 | } |
242 | | |
243 | | // Search from Src for a null character, stopping once Src reaches Limit. |
244 | | // Return a pair of values, the first being the number of nonnull characters |
245 | | // and the second being the out chain. |
246 | | // |
247 | | // This can be used for strlen by setting Limit to 0. |
248 | | static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, |
249 | | const SDLoc &DL, |
250 | | SDValue Chain, SDValue Src, |
251 | 2 | SDValue Limit) { |
252 | 2 | EVT PtrVT = Src.getValueType(); |
253 | 2 | SDVTList VTs = DAG.getVTList(PtrVT, MVT::i32, MVT::Other); |
254 | 2 | SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, |
255 | 2 | Limit, Src, DAG.getConstant(0, DL, MVT::i32)); |
256 | 2 | Chain = End.getValue(2); |
257 | 2 | SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); |
258 | 2 | return std::make_pair(Len, Chain); |
259 | 2 | } |
260 | | |
261 | | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen( |
262 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, |
263 | 1 | MachinePointerInfo SrcPtrInfo) const { |
264 | 1 | EVT PtrVT = Src.getValueType(); |
265 | 1 | return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); |
266 | 1 | } |
267 | | |
268 | | std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen( |
269 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, |
270 | 1 | SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const { |
271 | 1 | EVT PtrVT = Src.getValueType(); |
272 | 1 | MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); |
273 | 1 | SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); |
274 | 1 | return getBoundedStrlen(DAG, DL, Chain, Src, Limit); |
275 | 1 | } |