/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/IR/AutoUpgrade.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the auto-upgrade helper functions. |
10 | | // This is where deprecated IR intrinsics and other IR features are updated to |
11 | | // current specifications. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/IR/AutoUpgrade.h" |
16 | | #include "llvm/ADT/StringSwitch.h" |
17 | | #include "llvm/IR/Constants.h" |
18 | | #include "llvm/IR/DIBuilder.h" |
19 | | #include "llvm/IR/DebugInfo.h" |
20 | | #include "llvm/IR/DiagnosticInfo.h" |
21 | | #include "llvm/IR/Function.h" |
22 | | #include "llvm/IR/IRBuilder.h" |
23 | | #include "llvm/IR/Instruction.h" |
24 | | #include "llvm/IR/IntrinsicInst.h" |
25 | | #include "llvm/IR/LLVMContext.h" |
26 | | #include "llvm/IR/Module.h" |
27 | | #include "llvm/IR/Verifier.h" |
28 | | #include "llvm/Support/ErrorHandling.h" |
29 | | #include "llvm/Support/Regex.h" |
30 | | #include <cstring> |
31 | | using namespace llvm; |
32 | | |
33 | 11.6k | static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } |
34 | | |
35 | | // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have |
36 | | // changed their type from v4f32 to v2i64. |
37 | | static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID, |
38 | 193 | Function *&NewFn) { |
39 | 193 | // Check whether this is an old version of the function, which received |
40 | 193 | // v4f32 arguments. |
41 | 193 | Type *Arg0Type = F->getFunctionType()->getParamType(0); |
42 | 193 | if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) |
43 | 185 | return false; |
44 | 8 | |
45 | 8 | // Yes, it's old, replace it with new version. |
46 | 8 | rename(F); |
47 | 8 | NewFn = Intrinsic::getDeclaration(F->getParent(), IID); |
48 | 8 | return true; |
49 | 8 | } |
50 | | |
51 | | // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask |
52 | | // arguments have changed their type from i32 to i8. |
53 | | static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, |
54 | 159 | Function *&NewFn) { |
55 | 159 | // Check that the last argument is an i32. |
56 | 159 | Type *LastArgType = F->getFunctionType()->getParamType( |
57 | 159 | F->getFunctionType()->getNumParams() - 1); |
58 | 159 | if (!LastArgType->isIntegerTy(32)) |
59 | 118 | return false; |
60 | 41 | |
61 | 41 | // Move this function aside and map down. |
62 | 41 | rename(F); |
63 | 41 | NewFn = Intrinsic::getDeclaration(F->getParent(), IID); |
64 | 41 | return true; |
65 | 41 | } |
66 | | |
67 | 11.6k | static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { |
68 | 11.6k | // All of the intrinsics matches below should be marked with which llvm |
69 | 11.6k | // version started autoupgrading them. At some point in the future we would |
70 | 11.6k | // like to use this information to remove upgrade code for some older |
71 | 11.6k | // intrinsics. It is currently undecided how we will determine that future |
72 | 11.6k | // point. |
73 | 11.6k | if (Name == "addcarryx.u32" || // Added in 8.0 |
74 | 11.6k | Name == "addcarryx.u64"11.6k || // Added in 8.0 |
75 | 11.6k | Name == "addcarry.u32"11.6k || // Added in 8.0 |
76 | 11.6k | Name == "addcarry.u64"11.6k || // Added in 8.0 |
77 | 11.6k | Name == "subborrow.u32"11.6k || // Added in 8.0 |
78 | 11.6k | Name == "subborrow.u64"11.6k || // Added in 8.0 |
79 | 11.6k | Name.startswith("sse2.padds.")11.6k || // Added in 8.0 |
80 | 11.6k | Name.startswith("sse2.psubs.")11.6k || // Added in 8.0 |
81 | 11.6k | Name.startswith("sse2.paddus.")11.6k || // Added in 8.0 |
82 | 11.6k | Name.startswith("sse2.psubus.")11.6k || // Added in 8.0 |
83 | 11.6k | Name.startswith("avx2.padds.")11.5k || // Added in 8.0 |
84 | 11.6k | Name.startswith("avx2.psubs.")11.5k || // Added in 8.0 |
85 | 11.6k | Name.startswith("avx2.paddus.")11.5k || // Added in 8.0 |
86 | 11.6k | Name.startswith("avx2.psubus.")11.5k || // Added in 8.0 |
87 | 11.6k | Name.startswith("avx512.padds.")11.5k || // Added in 8.0 |
88 | 11.6k | Name.startswith("avx512.psubs.")11.5k || // Added in 8.0 |
89 | 11.6k | Name.startswith("avx512.mask.padds.")11.5k || // Added in 8.0 |
90 | 11.6k | Name.startswith("avx512.mask.psubs.")11.5k || // Added in 8.0 |
91 | 11.6k | Name.startswith("avx512.mask.paddus.")11.5k || // Added in 8.0 |
92 | 11.6k | Name.startswith("avx512.mask.psubus.")11.5k || // Added in 8.0 |
93 | 11.6k | Name=="ssse3.pabs.b.128"11.5k || // Added in 6.0 |
94 | 11.6k | Name=="ssse3.pabs.w.128"11.4k || // Added in 6.0 |
95 | 11.6k | Name=="ssse3.pabs.d.128"11.4k || // Added in 6.0 |
96 | 11.6k | Name.startswith("fma4.vfmadd.s")11.4k || // Added in 7.0 |
97 | 11.6k | Name.startswith("fma.vfmadd.")11.4k || // Added in 7.0 |
98 | 11.6k | Name.startswith("fma.vfmsub.")11.3k || // Added in 7.0 |
99 | 11.6k | Name.startswith("fma.vfmaddsub.")11.3k || // Added in 7.0 |
100 | 11.6k | Name.startswith("fma.vfmsubadd.")11.2k || // Added in 7.0 |
101 | 11.6k | Name.startswith("fma.vfnmadd.")11.2k || // Added in 7.0 |
102 | 11.6k | Name.startswith("fma.vfnmsub.")11.2k || // Added in 7.0 |
103 | 11.6k | Name.startswith("avx512.mask.vfmadd.")11.1k || // Added in 7.0 |
104 | 11.6k | Name.startswith("avx512.mask.vfnmadd.")11.1k || // Added in 7.0 |
105 | 11.6k | Name.startswith("avx512.mask.vfnmsub.")11.1k || // Added in 7.0 |
106 | 11.6k | Name.startswith("avx512.mask3.vfmadd.")11.1k || // Added in 7.0 |
107 | 11.6k | Name.startswith("avx512.maskz.vfmadd.")11.0k || // Added in 7.0 |
108 | 11.6k | Name.startswith("avx512.mask3.vfmsub.")11.0k || // Added in 7.0 |
109 | 11.6k | Name.startswith("avx512.mask3.vfnmsub.")11.0k || // Added in 7.0 |
110 | 11.6k | Name.startswith("avx512.mask.vfmaddsub.")11.0k || // Added in 7.0 |
111 | 11.6k | Name.startswith("avx512.maskz.vfmaddsub.")11.0k || // Added in 7.0 |
112 | 11.6k | Name.startswith("avx512.mask3.vfmaddsub.")11.0k || // Added in 7.0 |
113 | 11.6k | Name.startswith("avx512.mask3.vfmsubadd.")11.0k || // Added in 7.0 |
114 | 11.6k | Name.startswith("avx512.mask.shuf.i")10.9k || // Added in 6.0 |
115 | 11.6k | Name.startswith("avx512.mask.shuf.f")10.9k || // Added in 6.0 |
116 | 11.6k | Name.startswith("avx512.kunpck")10.9k || //added in 6.0 |
117 | 11.6k | Name.startswith("avx2.pabs.")10.9k || // Added in 6.0 |
118 | 11.6k | Name.startswith("avx512.mask.pabs.")10.9k || // Added in 6.0 |
119 | 11.6k | Name.startswith("avx512.broadcastm")10.9k || // Added in 6.0 |
120 | 11.6k | Name == "sse.sqrt.ss"10.9k || // Added in 7.0 |
121 | 11.6k | Name == "sse2.sqrt.sd"10.8k || // Added in 7.0 |
122 | 11.6k | Name.startswith("avx512.mask.sqrt.p")10.8k || // Added in 7.0 |
123 | 11.6k | Name.startswith("avx.sqrt.p")10.8k || // Added in 7.0 |
124 | 11.6k | Name.startswith("sse2.sqrt.p")10.8k || // Added in 7.0 |
125 | 11.6k | Name.startswith("sse.sqrt.p")10.8k || // Added in 7.0 |
126 | 11.6k | Name.startswith("avx512.mask.pbroadcast")10.8k || // Added in 6.0 |
127 | 11.6k | Name.startswith("sse2.pcmpeq.")10.8k || // Added in 3.1 |
128 | 11.6k | Name.startswith("sse2.pcmpgt.")10.8k || // Added in 3.1 |
129 | 11.6k | Name.startswith("avx2.pcmpeq.")10.8k || // Added in 3.1 |
130 | 11.6k | Name.startswith("avx2.pcmpgt.")10.8k || // Added in 3.1 |
131 | 11.6k | Name.startswith("avx512.mask.pcmpeq.")10.8k || // Added in 3.9 |
132 | 11.6k | Name.startswith("avx512.mask.pcmpgt.")10.7k || // Added in 3.9 |
133 | 11.6k | Name.startswith("avx.vperm2f128.")10.7k || // Added in 6.0 |
134 | 11.6k | Name == "avx2.vperm2i128"10.7k || // Added in 6.0 |
135 | 11.6k | Name == "sse.add.ss"10.7k || // Added in 4.0 |
136 | 11.6k | Name == "sse2.add.sd"10.7k || // Added in 4.0 |
137 | 11.6k | Name == "sse.sub.ss"10.7k || // Added in 4.0 |
138 | 11.6k | Name == "sse2.sub.sd"10.6k || // Added in 4.0 |
139 | 11.6k | Name == "sse.mul.ss"10.6k || // Added in 4.0 |
140 | 11.6k | Name == "sse2.mul.sd"10.6k || // Added in 4.0 |
141 | 11.6k | Name == "sse.div.ss"10.6k || // Added in 4.0 |
142 | 11.6k | Name == "sse2.div.sd"10.6k || // Added in 4.0 |
143 | 11.6k | Name == "sse41.pmaxsb"10.6k || // Added in 3.9 |
144 | 11.6k | Name == "sse2.pmaxs.w"10.6k || // Added in 3.9 |
145 | 11.6k | Name == "sse41.pmaxsd"10.6k || // Added in 3.9 |
146 | 11.6k | Name == "sse2.pmaxu.b"10.6k || // Added in 3.9 |
147 | 11.6k | Name == "sse41.pmaxuw"10.5k || // Added in 3.9 |
148 | 11.6k | Name == "sse41.pmaxud"10.5k || // Added in 3.9 |
149 | 11.6k | Name == "sse41.pminsb"10.5k || // Added in 3.9 |
150 | 11.6k | Name == "sse2.pmins.w"10.5k || // Added in 3.9 |
151 | 11.6k | Name == "sse41.pminsd"10.5k || // Added in 3.9 |
152 | 11.6k | Name == "sse2.pminu.b"10.5k || // Added in 3.9 |
153 | 11.6k | Name == "sse41.pminuw"10.5k || // Added in 3.9 |
154 | 11.6k | Name == "sse41.pminud"10.5k || // Added in 3.9 |
155 | 11.6k | Name == "avx512.kand.w"10.4k || // Added in 7.0 |
156 | 11.6k | Name == "avx512.kandn.w"10.4k || // Added in 7.0 |
157 | 11.6k | Name == "avx512.knot.w"10.4k || // Added in 7.0 |
158 | 11.6k | Name == "avx512.kor.w"10.4k || // Added in 7.0 |
159 | 11.6k | Name == "avx512.kxor.w"10.4k || // Added in 7.0 |
160 | 11.6k | Name == "avx512.kxnor.w"10.4k || // Added in 7.0 |
161 | 11.6k | Name == "avx512.kortestc.w"10.4k || // Added in 7.0 |
162 | 11.6k | Name == "avx512.kortestz.w"10.4k || // Added in 7.0 |
163 | 11.6k | Name.startswith("avx512.mask.pshuf.b.")10.4k || // Added in 4.0 |
164 | 11.6k | Name.startswith("avx2.pmax")10.4k || // Added in 3.9 |
165 | 11.6k | Name.startswith("avx2.pmin")10.4k || // Added in 3.9 |
166 | 11.6k | Name.startswith("avx512.mask.pmax")10.3k || // Added in 4.0 |
167 | 11.6k | Name.startswith("avx512.mask.pmin")10.3k || // Added in 4.0 |
168 | 11.6k | Name.startswith("avx2.vbroadcast")10.2k || // Added in 3.8 |
169 | 11.6k | Name.startswith("avx2.pbroadcast")10.2k || // Added in 3.8 |
170 | 11.6k | Name.startswith("avx.vpermil.")10.2k || // Added in 3.1 |
171 | 11.6k | Name.startswith("sse2.pshuf")10.1k || // Added in 3.9 |
172 | 11.6k | Name.startswith("avx512.pbroadcast")10.1k || // Added in 3.9 |
173 | 11.6k | Name.startswith("avx512.mask.broadcast.s")10.1k || // Added in 3.9 |
174 | 11.6k | Name.startswith("avx512.mask.movddup")10.1k || // Added in 3.9 |
175 | 11.6k | Name.startswith("avx512.mask.movshdup")10.1k || // Added in 3.9 |
176 | 11.6k | Name.startswith("avx512.mask.movsldup")10.1k || // Added in 3.9 |
177 | 11.6k | Name.startswith("avx512.mask.pshuf.d.")10.1k || // Added in 3.9 |
178 | 11.6k | Name.startswith("avx512.mask.pshufl.w.")10.0k || // Added in 3.9 |
179 | 11.6k | Name.startswith("avx512.mask.pshufh.w.")10.0k || // Added in 3.9 |
180 | 11.6k | Name.startswith("avx512.mask.shuf.p")10.0k || // Added in 4.0 |
181 | 11.6k | Name.startswith("avx512.mask.vpermil.p")10.0k || // Added in 3.9 |
182 | 11.6k | Name.startswith("avx512.mask.perm.df.")10.0k || // Added in 3.9 |
183 | 11.6k | Name.startswith("avx512.mask.perm.di.")10.0k || // Added in 3.9 |
184 | 11.6k | Name.startswith("avx512.mask.punpckl")10.0k || // Added in 3.9 |
185 | 11.6k | Name.startswith("avx512.mask.punpckh")10.0k || // Added in 3.9 |
186 | 11.6k | Name.startswith("avx512.mask.unpckl.")10.0k || // Added in 3.9 |
187 | 11.6k | Name.startswith("avx512.mask.unpckh.")9.99k || // Added in 3.9 |
188 | 11.6k | Name.startswith("avx512.mask.pand.")9.97k || // Added in 3.9 |
189 | 11.6k | Name.startswith("avx512.mask.pandn.")9.97k || // Added in 3.9 |
190 | 11.6k | Name.startswith("avx512.mask.por.")9.96k || // Added in 3.9 |
191 | 11.6k | Name.startswith("avx512.mask.pxor.")9.95k || // Added in 3.9 |
192 | 11.6k | Name.startswith("avx512.mask.and.")9.94k || // Added in 3.9 |
193 | 11.6k | Name.startswith("avx512.mask.andn.")9.94k || // Added in 3.9 |
194 | 11.6k | Name.startswith("avx512.mask.or.")9.93k || // Added in 3.9 |
195 | 11.6k | Name.startswith("avx512.mask.xor.")9.92k || // Added in 3.9 |
196 | 11.6k | Name.startswith("avx512.mask.padd.")9.92k || // Added in 4.0 |
197 | 11.6k | Name.startswith("avx512.mask.psub.")9.90k || // Added in 4.0 |
198 | 11.6k | Name.startswith("avx512.mask.pmull.")9.89k || // Added in 4.0 |
199 | 11.6k | Name.startswith("avx512.mask.cvtdq2pd.")9.88k || // Added in 4.0 |
200 | 11.6k | Name.startswith("avx512.mask.cvtudq2pd.")9.87k || // Added in 4.0 |
201 | 11.6k | Name.startswith("avx512.mask.cvtudq2ps.")9.86k || // Added in 7.0 updated 9.0 |
202 | 11.6k | Name.startswith("avx512.mask.cvtqq2pd.")9.86k || // Added in 7.0 updated 9.0 |
203 | 11.6k | Name.startswith("avx512.mask.cvtuqq2pd.")9.85k || // Added in 7.0 updated 9.0 |
204 | 11.6k | Name.startswith("avx512.mask.cvtdq2ps.")9.85k || // Added in 7.0 updated 9.0 |
205 | 11.6k | Name == "avx512.mask.cvtqq2ps.256"9.84k || // Added in 9.0 |
206 | 11.6k | Name == "avx512.mask.cvtqq2ps.512"9.84k || // Added in 9.0 |
207 | 11.6k | Name == "avx512.mask.cvtuqq2ps.256"9.84k || // Added in 9.0 |
208 | 11.6k | Name == "avx512.mask.cvtuqq2ps.512"9.83k || // Added in 9.0 |
209 | 11.6k | Name == "avx512.mask.cvtpd2dq.256"9.83k || // Added in 7.0 |
210 | 11.6k | Name == "avx512.mask.cvtpd2ps.256"9.83k || // Added in 7.0 |
211 | 11.6k | Name == "avx512.mask.cvttpd2dq.256"9.83k || // Added in 7.0 |
212 | 11.6k | Name == "avx512.mask.cvttps2dq.128"9.82k || // Added in 7.0 |
213 | 11.6k | Name == "avx512.mask.cvttps2dq.256"9.82k || // Added in 7.0 |
214 | 11.6k | Name == "avx512.mask.cvtps2pd.128"9.82k || // Added in 7.0 |
215 | 11.6k | Name == "avx512.mask.cvtps2pd.256"9.82k || // Added in 7.0 |
216 | 11.6k | Name == "avx512.cvtusi2sd"9.82k || // Added in 7.0 |
217 | 11.6k | Name.startswith("avx512.mask.permvar.")9.81k || // Added in 7.0 |
218 | 11.6k | Name == "sse2.pmulu.dq"9.79k || // Added in 7.0 |
219 | 11.6k | Name == "sse41.pmuldq"9.77k || // Added in 7.0 |
220 | 11.6k | Name == "avx2.pmulu.dq"9.77k || // Added in 7.0 |
221 | 11.6k | Name == "avx2.pmul.dq"9.76k || // Added in 7.0 |
222 | 11.6k | Name == "avx512.pmulu.dq.512"9.75k || // Added in 7.0 |
223 | 11.6k | Name == "avx512.pmul.dq.512"9.75k || // Added in 7.0 |
224 | 11.6k | Name.startswith("avx512.mask.pmul.dq.")9.75k || // Added in 4.0 |
225 | 11.6k | Name.startswith("avx512.mask.pmulu.dq.")9.74k || // Added in 4.0 |
226 | 11.6k | Name.startswith("avx512.mask.pmul.hr.sw.")9.73k || // Added in 7.0 |
227 | 11.6k | Name.startswith("avx512.mask.pmulh.w.")9.73k || // Added in 7.0 |
228 | 11.6k | Name.startswith("avx512.mask.pmulhu.w.")9.72k || // Added in 7.0 |
229 | 11.6k | Name.startswith("avx512.mask.pmaddw.d.")9.72k || // Added in 7.0 |
230 | 11.6k | Name.startswith("avx512.mask.pmaddubs.w.")9.71k || // Added in 7.0 |
231 | 11.6k | Name.startswith("avx512.mask.packsswb.")9.70k || // Added in 5.0 |
232 | 11.6k | Name.startswith("avx512.mask.packssdw.")9.70k || // Added in 5.0 |
233 | 11.6k | Name.startswith("avx512.mask.packuswb.")9.69k || // Added in 5.0 |
234 | 11.6k | Name.startswith("avx512.mask.packusdw.")9.69k || // Added in 5.0 |
235 | 11.6k | Name.startswith("avx512.mask.cmp.b")9.68k || // Added in 5.0 |
236 | 11.6k | Name.startswith("avx512.mask.cmp.d")9.67k || // Added in 5.0 |
237 | 11.6k | Name.startswith("avx512.mask.cmp.q")9.67k || // Added in 5.0 |
238 | 11.6k | Name.startswith("avx512.mask.cmp.w")9.66k || // Added in 5.0 |
239 | 11.6k | Name.startswith("avx512.mask.cmp.p")9.66k || // Added in 7.0 |
240 | 11.6k | Name.startswith("avx512.mask.ucmp.")9.64k || // Added in 5.0 |
241 | 11.6k | Name.startswith("avx512.cvtb2mask.")9.62k || // Added in 7.0 |
242 | 11.6k | Name.startswith("avx512.cvtw2mask.")9.61k || // Added in 7.0 |
243 | 11.6k | Name.startswith("avx512.cvtd2mask.")9.61k || // Added in 7.0 |
244 | 11.6k | Name.startswith("avx512.cvtq2mask.")9.60k || // Added in 7.0 |
245 | 11.6k | Name.startswith("avx512.mask.vpermilvar.")9.59k || // Added in 4.0 |
246 | 11.6k | Name.startswith("avx512.mask.psll.d")9.58k || // Added in 4.0 |
247 | 11.6k | Name.startswith("avx512.mask.psll.q")9.57k || // Added in 4.0 |
248 | 11.6k | Name.startswith("avx512.mask.psll.w")9.56k || // Added in 4.0 |
249 | 11.6k | Name.startswith("avx512.mask.psra.d")9.55k || // Added in 4.0 |
250 | 11.6k | Name.startswith("avx512.mask.psra.q")9.54k || // Added in 4.0 |
251 | 11.6k | Name.startswith("avx512.mask.psra.w")9.53k || // Added in 4.0 |
252 | 11.6k | Name.startswith("avx512.mask.psrl.d")9.52k || // Added in 4.0 |
253 | 11.6k | Name.startswith("avx512.mask.psrl.q")9.51k || // Added in 4.0 |
254 | 11.6k | Name.startswith("avx512.mask.psrl.w")9.49k || // Added in 4.0 |
255 | 11.6k | Name.startswith("avx512.mask.pslli")9.48k || // Added in 4.0 |
256 | 11.6k | Name.startswith("avx512.mask.psrai")9.48k || // Added in 4.0 |
257 | 11.6k | Name.startswith("avx512.mask.psrli")9.47k || // Added in 4.0 |
258 | 11.6k | Name.startswith("avx512.mask.psllv")9.47k || // Added in 4.0 |
259 | 11.6k | Name.startswith("avx512.mask.psrav")9.45k || // Added in 4.0 |
260 | 11.6k | Name.startswith("avx512.mask.psrlv")9.43k || // Added in 4.0 |
261 | 11.6k | Name.startswith("sse41.pmovsx")9.41k || // Added in 3.8 |
262 | 11.6k | Name.startswith("sse41.pmovzx")9.35k || // Added in 3.9 |
263 | 11.6k | Name.startswith("avx2.pmovsx")9.28k || // Added in 3.9 |
264 | 11.6k | Name.startswith("avx2.pmovzx")9.26k || // Added in 3.9 |
265 | 11.6k | Name.startswith("avx512.mask.pmovsx")9.23k || // Added in 4.0 |
266 | 11.6k | Name.startswith("avx512.mask.pmovzx")9.20k || // Added in 4.0 |
267 | 11.6k | Name.startswith("avx512.mask.lzcnt.")9.16k || // Added in 5.0 |
268 | 11.6k | Name.startswith("avx512.mask.pternlog.")9.15k || // Added in 7.0 |
269 | 11.6k | Name.startswith("avx512.maskz.pternlog.")9.14k || // Added in 7.0 |
270 | 11.6k | Name.startswith("avx512.mask.vpmadd52")9.12k || // Added in 7.0 |
271 | 11.6k | Name.startswith("avx512.maskz.vpmadd52")9.11k || // Added in 7.0 |
272 | 11.6k | Name.startswith("avx512.mask.vpermi2var.")9.10k || // Added in 7.0 |
273 | 11.6k | Name.startswith("avx512.mask.vpermt2var.")9.03k || // Added in 7.0 |
274 | 11.6k | Name.startswith("avx512.maskz.vpermt2var.")9.00k || // Added in 7.0 |
275 | 11.6k | Name.startswith("avx512.mask.vpdpbusd.")8.94k || // Added in 7.0 |
276 | 11.6k | Name.startswith("avx512.maskz.vpdpbusd.")8.94k || // Added in 7.0 |
277 | 11.6k | Name.startswith("avx512.mask.vpdpbusds.")8.93k || // Added in 7.0 |
278 | 11.6k | Name.startswith("avx512.maskz.vpdpbusds.")8.93k || // Added in 7.0 |
279 | 11.6k | Name.startswith("avx512.mask.vpdpwssd.")8.92k || // Added in 7.0 |
280 | 11.6k | Name.startswith("avx512.maskz.vpdpwssd.")8.91k || // Added in 7.0 |
281 | 11.6k | Name.startswith("avx512.mask.vpdpwssds.")8.91k || // Added in 7.0 |
282 | 11.6k | Name.startswith("avx512.maskz.vpdpwssds.")8.90k || // Added in 7.0 |
283 | 11.6k | Name.startswith("avx512.mask.dbpsadbw.")8.90k || // Added in 7.0 |
284 | 11.6k | Name.startswith("avx512.mask.vpshld.")8.89k || // Added in 7.0 |
285 | 11.6k | Name.startswith("avx512.mask.vpshrd.")8.87k || // Added in 7.0 |
286 | 11.6k | Name.startswith("avx512.mask.vpshldv.")8.85k || // Added in 8.0 |
287 | 11.6k | Name.startswith("avx512.mask.vpshrdv.")8.84k || // Added in 8.0 |
288 | 11.6k | Name.startswith("avx512.maskz.vpshldv.")8.82k || // Added in 8.0 |
289 | 11.6k | Name.startswith("avx512.maskz.vpshrdv.")8.80k || // Added in 8.0 |
290 | 11.6k | Name.startswith("avx512.vpshld.")8.78k || // Added in 8.0 |
291 | 11.6k | Name.startswith("avx512.vpshrd.")8.76k || // Added in 8.0 |
292 | 11.6k | Name.startswith("avx512.mask.add.p")8.75k || // Added in 7.0. 128/256 in 4.0 |
293 | 11.6k | Name.startswith("avx512.mask.sub.p")8.74k || // Added in 7.0. 128/256 in 4.0 |
294 | 11.6k | Name.startswith("avx512.mask.mul.p")8.73k || // Added in 7.0. 128/256 in 4.0 |
295 | 11.6k | Name.startswith("avx512.mask.div.p")8.73k || // Added in 7.0. 128/256 in 4.0 |
296 | 11.6k | Name.startswith("avx512.mask.max.p")8.72k || // Added in 7.0. 128/256 in 5.0 |
297 | 11.6k | Name.startswith("avx512.mask.min.p")8.71k || // Added in 7.0. 128/256 in 5.0 |
298 | 11.6k | Name.startswith("avx512.mask.fpclass.p")8.71k || // Added in 7.0 |
299 | 11.6k | Name.startswith("avx512.mask.vpshufbitqmb.")8.70k || // Added in 8.0 |
300 | 11.6k | Name.startswith("avx512.mask.pmultishift.qb.")8.69k || // Added in 8.0 |
301 | 11.6k | Name.startswith("avx512.mask.conflict.")8.69k || // Added in 9.0 |
302 | 11.6k | Name == "avx512.mask.pmov.qd.256"8.67k || // Added in 9.0 |
303 | 11.6k | Name == "avx512.mask.pmov.qd.512"8.67k || // Added in 9.0 |
304 | 11.6k | Name == "avx512.mask.pmov.wb.256"8.67k || // Added in 9.0 |
305 | 11.6k | Name == "avx512.mask.pmov.wb.512"8.66k || // Added in 9.0 |
306 | 11.6k | Name == "sse.cvtsi2ss"8.66k || // Added in 7.0 |
307 | 11.6k | Name == "sse.cvtsi642ss"8.65k || // Added in 7.0 |
308 | 11.6k | Name == "sse2.cvtsi2sd"8.64k || // Added in 7.0 |
309 | 11.6k | Name == "sse2.cvtsi642sd"8.63k || // Added in 7.0 |
310 | 11.6k | Name == "sse2.cvtss2sd"8.62k || // Added in 7.0 |
311 | 11.6k | Name == "sse2.cvtdq2pd"8.61k || // Added in 3.9 |
312 | 11.6k | Name == "sse2.cvtdq2ps"8.60k || // Added in 7.0 |
313 | 11.6k | Name == "sse2.cvtps2pd"8.59k || // Added in 3.9 |
314 | 11.6k | Name == "avx.cvtdq2.pd.256"8.58k || // Added in 3.9 |
315 | 11.6k | Name == "avx.cvtdq2.ps.256"8.57k || // Added in 7.0 |
316 | 11.6k | Name == "avx.cvt.ps2.pd.256"8.56k || // Added in 3.9 |
317 | 11.6k | Name.startswith("avx.vinsertf128.")8.56k || // Added in 3.7 |
318 | 11.6k | Name == "avx2.vinserti128"8.54k || // Added in 3.7 |
319 | 11.6k | Name.startswith("avx512.mask.insert")8.54k || // Added in 4.0 |
320 | 11.6k | Name.startswith("avx.vextractf128.")8.52k || // Added in 3.7 |
321 | 11.6k | Name == "avx2.vextracti128"8.50k || // Added in 3.7 |
322 | 11.6k | Name.startswith("avx512.mask.vextract")8.49k || // Added in 4.0 |
323 | 11.6k | Name.startswith("sse4a.movnt.")8.48k || // Added in 3.9 |
324 | 11.6k | Name.startswith("avx.movnt.")8.47k || // Added in 3.2 |
325 | 11.6k | Name.startswith("avx512.storent.")8.46k || // Added in 3.9 |
326 | 11.6k | Name == "sse41.movntdqa"8.45k || // Added in 5.0 |
327 | 11.6k | Name == "avx2.movntdqa"8.44k || // Added in 5.0 |
328 | 11.6k | Name == "avx512.movntdqa"8.43k || // Added in 5.0 |
329 | 11.6k | Name == "sse2.storel.dq"8.43k || // Added in 3.9 |
330 | 11.6k | Name.startswith("sse.storeu.")8.42k || // Added in 3.9 |
331 | 11.6k | Name.startswith("sse2.storeu.")8.41k || // Added in 3.9 |
332 | 11.6k | Name.startswith("avx.storeu.")8.39k || // Added in 3.9 |
333 | 11.6k | Name.startswith("avx512.mask.storeu.")8.37k || // Added in 3.9 |
334 | 11.6k | Name.startswith("avx512.mask.store.p")8.33k || // Added in 3.9 |
335 | 11.6k | Name.startswith("avx512.mask.store.b.")8.32k || // Added in 3.9 |
336 | 11.6k | Name.startswith("avx512.mask.store.w.")8.32k || // Added in 3.9 |
337 | 11.6k | Name.startswith("avx512.mask.store.d.")8.32k || // Added in 3.9 |
338 | 11.6k | Name.startswith("avx512.mask.store.q.")8.31k || // Added in 3.9 |
339 | 11.6k | Name == "avx512.mask.store.ss"8.31k || // Added in 7.0 |
340 | 11.6k | Name.startswith("avx512.mask.loadu.")8.31k || // Added in 3.9 |
341 | 11.6k | Name.startswith("avx512.mask.load.")8.27k || // Added in 3.9 |
342 | 11.6k | Name.startswith("avx512.mask.expand.load.")8.25k || // Added in 7.0 |
343 | 11.6k | Name.startswith("avx512.mask.compress.store.")8.21k || // Added in 7.0 |
344 | 11.6k | Name.startswith("avx512.mask.expand.b")8.18k || // Added in 9.0 |
345 | 11.6k | Name.startswith("avx512.mask.expand.w")8.16k || // Added in 9.0 |
346 | 11.6k | Name.startswith("avx512.mask.expand.d")8.15k || // Added in 9.0 |
347 | 11.6k | Name.startswith("avx512.mask.expand.q")8.15k || // Added in 9.0 |
348 | 11.6k | Name.startswith("avx512.mask.expand.p")8.14k || // Added in 9.0 |
349 | 11.6k | Name.startswith("avx512.mask.compress.b")8.13k || // Added in 9.0 |
350 | 11.6k | Name.startswith("avx512.mask.compress.w")8.12k || // Added in 9.0 |
351 | 11.6k | Name.startswith("avx512.mask.compress.d")8.10k || // Added in 9.0 |
352 | 11.6k | Name.startswith("avx512.mask.compress.q")8.10k || // Added in 9.0 |
353 | 11.6k | Name.startswith("avx512.mask.compress.p")8.09k || // Added in 9.0 |
354 | 11.6k | Name == "sse42.crc32.64.8"8.08k || // Added in 3.4 |
355 | 11.6k | Name.startswith("avx.vbroadcast.s")8.07k || // Added in 3.5 |
356 | 11.6k | Name.startswith("avx512.vbroadcast.s")8.07k || // Added in 7.0 |
357 | 11.6k | Name.startswith("avx512.mask.palignr.")8.07k || // Added in 3.9 |
358 | 11.6k | Name.startswith("avx512.mask.valign.")8.06k || // Added in 4.0 |
359 | 11.6k | Name.startswith("sse2.psll.dq")8.05k || // Added in 3.7 |
360 | 11.6k | Name.startswith("sse2.psrl.dq")8.03k || // Added in 3.7 |
361 | 11.6k | Name.startswith("avx2.psll.dq")8.01k || // Added in 3.7 |
362 | 11.6k | Name.startswith("avx2.psrl.dq")8.01k || // Added in 3.7 |
363 | 11.6k | Name.startswith("avx512.psll.dq")8.00k || // Added in 3.9 |
364 | 11.6k | Name.startswith("avx512.psrl.dq")8.00k || // Added in 3.9 |
365 | 11.6k | Name == "sse41.pblendw"7.99k || // Added in 3.7 |
366 | 11.6k | Name.startswith("sse41.blendp")7.98k || // Added in 3.7 |
367 | 11.6k | Name.startswith("avx.blend.p")7.95k || // Added in 3.7 |
368 | 11.6k | Name == "avx2.pblendw"7.94k || // Added in 3.7 |
369 | 11.6k | Name.startswith("avx2.pblendd.")7.93k || // Added in 3.7 |
370 | 11.6k | Name.startswith("avx.vbroadcastf128")7.91k || // Added in 4.0 |
371 | 11.6k | Name == "avx2.vbroadcasti128"7.90k || // Added in 3.7 |
372 | 11.6k | Name.startswith("avx512.mask.broadcastf")7.90k || // Added in 6.0 |
373 | 11.6k | Name.startswith("avx512.mask.broadcasti")7.89k || // Added in 6.0 |
374 | 11.6k | Name == "xop.vpcmov"7.87k || // Added in 3.8 |
375 | 11.6k | Name == "xop.vpcmov.256"7.87k || // Added in 5.0 |
376 | 11.6k | Name.startswith("avx512.mask.move.s")7.86k || // Added in 4.0 |
377 | 11.6k | Name.startswith("avx512.cvtmask2")7.86k || // Added in 5.0 |
378 | 11.6k | Name.startswith("xop.vpcom")7.83k || // Added in 3.2, Updated in 9.0 |
379 | 11.6k | Name.startswith("xop.vprot")7.67k || // Added in 8.0 |
380 | 11.6k | Name.startswith("avx512.prol")7.66k || // Added in 8.0 |
381 | 11.6k | Name.startswith("avx512.pror")7.63k || // Added in 8.0 |
382 | 11.6k | Name.startswith("avx512.mask.prorv.")7.61k || // Added in 8.0 |
383 | 11.6k | Name.startswith("avx512.mask.pror.")7.59k || // Added in 8.0 |
384 | 11.6k | Name.startswith("avx512.mask.prolv.")7.58k || // Added in 8.0 |
385 | 11.6k | Name.startswith("avx512.mask.prol.")7.57k || // Added in 8.0 |
386 | 11.6k | Name.startswith("avx512.ptestm")7.55k || //Added in 6.0 |
387 | 11.6k | Name.startswith("avx512.ptestnm")7.53k || //Added in 6.0 |
388 | 11.6k | Name.startswith("avx512.mask.pavg")7.50k ) // Added in 6.0 |
389 | 4.14k | return true; |
390 | 7.49k | |
391 | 7.49k | return false; |
392 | 7.49k | } |
393 | | |
394 | | static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, |
395 | 11.7k | Function *&NewFn) { |
396 | 11.7k | // Only handle intrinsics that start with "x86.". |
397 | 11.7k | if (!Name.startswith("x86.")) |
398 | 76 | return false; |
399 | 11.6k | // Remove "x86." prefix. |
400 | 11.6k | Name = Name.substr(4); |
401 | 11.6k | |
402 | 11.6k | if (ShouldUpgradeX86Intrinsic(F, Name)) { |
403 | 4.14k | NewFn = nullptr; |
404 | 4.14k | return true; |
405 | 4.14k | } |
406 | 7.49k | |
407 | 7.49k | if (Name == "rdtscp") { // Added in 8.0 |
408 | 4 | // If this intrinsic has 0 operands, it's the new version. |
409 | 4 | if (F->getFunctionType()->getNumParams() == 0) |
410 | 2 | return false; |
411 | 2 | |
412 | 2 | rename(F); |
413 | 2 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
414 | 2 | Intrinsic::x86_rdtscp); |
415 | 2 | return true; |
416 | 2 | } |
417 | 7.49k | |
418 | 7.49k | // SSE4.1 ptest functions may have an old signature. |
419 | 7.49k | if (Name.startswith("sse41.ptest")) { // Added in 3.2 |
420 | 193 | if (Name.substr(11) == "c") |
421 | 66 | return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn); |
422 | 127 | if (Name.substr(11) == "z") |
423 | 65 | return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn); |
424 | 62 | if (Name.substr(11) == "nzc") |
425 | 62 | return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn); |
426 | 7.30k | } |
427 | 7.30k | // Several blend and other instructions with masks used the wrong number of |
428 | 7.30k | // bits. |
429 | 7.30k | if (Name == "sse41.insertps") // Added in 3.6 |
430 | 51 | return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, |
431 | 51 | NewFn); |
432 | 7.24k | if (Name == "sse41.dppd") // Added in 3.6 |
433 | 26 | return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, |
434 | 26 | NewFn); |
435 | 7.22k | if (Name == "sse41.dpps") // Added in 3.6 |
436 | 26 | return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, |
437 | 26 | NewFn); |
438 | 7.19k | if (Name == "sse41.mpsadbw") // Added in 3.6 |
439 | 26 | return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, |
440 | 26 | NewFn); |
441 | 7.17k | if (Name == "avx.dp.ps.256") // Added in 3.6 |
442 | 15 | return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, |
443 | 15 | NewFn); |
444 | 7.15k | if (Name == "avx2.mpsadbw") // Added in 3.6 |
445 | 15 | return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, |
446 | 15 | NewFn); |
447 | 7.14k | |
448 | 7.14k | // frcz.ss/sd may need to have an argument dropped. Added in 3.2 |
449 | 7.14k | if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 25 ) { |
450 | 0 | rename(F); |
451 | 0 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
452 | 0 | Intrinsic::x86_xop_vfrcz_ss); |
453 | 0 | return true; |
454 | 0 | } |
455 | 7.14k | if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 25 ) { |
456 | 0 | rename(F); |
457 | 0 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
458 | 0 | Intrinsic::x86_xop_vfrcz_sd); |
459 | 0 | return true; |
460 | 0 | } |
461 | 7.14k | // Upgrade any XOP PERMIL2 index operand still using a float/double vector. |
462 | 7.14k | if (Name.startswith("xop.vpermil2")) { // Added in 3.9 |
463 | 48 | auto Idx = F->getFunctionType()->getParamType(2); |
464 | 48 | if (Idx->isFPOrFPVectorTy()) { |
465 | 4 | rename(F); |
466 | 4 | unsigned IdxSize = Idx->getPrimitiveSizeInBits(); |
467 | 4 | unsigned EltSize = Idx->getScalarSizeInBits(); |
468 | 4 | Intrinsic::ID Permil2ID; |
469 | 4 | if (EltSize == 64 && IdxSize == 1282 ) |
470 | 1 | Permil2ID = Intrinsic::x86_xop_vpermil2pd; |
471 | 3 | else if (EltSize == 32 && IdxSize == 1282 ) |
472 | 1 | Permil2ID = Intrinsic::x86_xop_vpermil2ps; |
473 | 2 | else if (EltSize == 64 && IdxSize == 2561 ) |
474 | 1 | Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; |
475 | 1 | else |
476 | 1 | Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; |
477 | 4 | NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); |
478 | 4 | return true; |
479 | 4 | } |
480 | 7.13k | } |
481 | 7.13k | |
482 | 7.13k | if (Name == "seh.recoverfp") { |
483 | 1 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp); |
484 | 1 | return true; |
485 | 1 | } |
486 | 7.13k | |
487 | 7.13k | return false; |
488 | 7.13k | } |
489 | | |
490 | 3.79M | static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { |
491 | 3.79M | assert(F && "Illegal to upgrade a non-existent Function."); |
492 | 3.79M | |
493 | 3.79M | // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.". |
494 | 3.79M | if (F->getName() == "clang.arc.use") { |
495 | 3 | NewFn = nullptr; |
496 | 3 | return true; |
497 | 3 | } |
498 | 3.79M | |
499 | 3.79M | // Quickly eliminate it, if it's not a candidate. |
500 | 3.79M | StringRef Name = F->getName(); |
501 | 3.79M | if (Name.size() <= 8 || !Name.startswith("llvm.")2.20M ) |
502 | 3.28M | return false; |
503 | 512k | Name = Name.substr(5); // Strip off "llvm." |
504 | 512k | |
505 | 512k | switch (Name[0]) { |
506 | 512k | default: break96.5k ; |
507 | 512k | case 'a': { |
508 | 227k | if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")227k ) { |
509 | 3 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, |
510 | 3 | F->arg_begin()->getType()); |
511 | 3 | return true; |
512 | 3 | } |
513 | 227k | if (Name.startswith("arm.neon.vclz")) { |
514 | 2 | Type* args[2] = { |
515 | 2 | F->arg_begin()->getType(), |
516 | 2 | Type::getInt1Ty(F->getContext()) |
517 | 2 | }; |
518 | 2 | // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to |
519 | 2 | // the end of the name. Change name from llvm.arm.neon.vclz.* to |
520 | 2 | // llvm.ctlz.* |
521 | 2 | FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); |
522 | 2 | NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), |
523 | 2 | "llvm.ctlz." + Name.substr(14), F->getParent()); |
524 | 2 | return true; |
525 | 2 | } |
526 | 227k | if (Name.startswith("arm.neon.vcnt")) { |
527 | 2 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, |
528 | 2 | F->arg_begin()->getType()); |
529 | 2 | return true; |
530 | 2 | } |
531 | 227k | Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); |
532 | 227k | if (vldRegex.match(Name)) { |
533 | 7 | auto fArgs = F->getFunctionType()->params(); |
534 | 7 | SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); |
535 | 7 | // Can't use Intrinsic::getDeclaration here as the return types might |
536 | 7 | // then only be structurally equal. |
537 | 7 | FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); |
538 | 7 | NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), |
539 | 7 | "llvm." + Name + ".p0i8", F->getParent()); |
540 | 7 | return true; |
541 | 7 | } |
542 | 227k | Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); |
543 | 227k | if (vstRegex.match(Name)) { |
544 | 7 | static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, |
545 | 7 | Intrinsic::arm_neon_vst2, |
546 | 7 | Intrinsic::arm_neon_vst3, |
547 | 7 | Intrinsic::arm_neon_vst4}; |
548 | 7 | |
549 | 7 | static const Intrinsic::ID StoreLaneInts[] = { |
550 | 7 | Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, |
551 | 7 | Intrinsic::arm_neon_vst4lane |
552 | 7 | }; |
553 | 7 | |
554 | 7 | auto fArgs = F->getFunctionType()->params(); |
555 | 7 | Type *Tys[] = {fArgs[0], fArgs[1]}; |
556 | 7 | if (Name.find("lane") == StringRef::npos) |
557 | 4 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
558 | 4 | StoreInts[fArgs.size() - 3], Tys); |
559 | 3 | else |
560 | 3 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
561 | 3 | StoreLaneInts[fArgs.size() - 5], Tys); |
562 | 7 | return true; |
563 | 7 | } |
564 | 227k | if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer"227k ) { |
565 | 2 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); |
566 | 2 | return true; |
567 | 2 | } |
568 | 227k | if (Name.startswith("aarch64.neon.addp")) { |
569 | 23 | if (F->arg_size() != 2) |
570 | 0 | break; // Invalid IR. |
571 | 23 | auto fArgs = F->getFunctionType()->params(); |
572 | 23 | VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]); |
573 | 23 | if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) { |
574 | 1 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
575 | 1 | Intrinsic::aarch64_neon_faddp, fArgs); |
576 | 1 | return true; |
577 | 1 | } |
578 | 227k | } |
579 | 227k | break; |
580 | 227k | } |
581 | 227k | |
582 | 227k | case 'c': { |
583 | 22.7k | if (Name.startswith("ctlz.") && F->arg_size() == 13.55k ) { |
584 | 33 | rename(F); |
585 | 33 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, |
586 | 33 | F->arg_begin()->getType()); |
587 | 33 | return true; |
588 | 33 | } |
589 | 22.7k | if (Name.startswith("cttz.") && F->arg_size() == 1600 ) { |
590 | 29 | rename(F); |
591 | 29 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, |
592 | 29 | F->arg_begin()->getType()); |
593 | 29 | return true; |
594 | 29 | } |
595 | 22.6k | break; |
596 | 22.6k | } |
597 | 22.6k | case 'd': { |
598 | 1.42k | if (Name == "dbg.value" && F->arg_size() == 4651 ) { |
599 | 127 | rename(F); |
600 | 127 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); |
601 | 127 | return true; |
602 | 127 | } |
603 | 1.30k | break; |
604 | 1.30k | } |
605 | 25.5k | case 'e': { |
606 | 25.5k | SmallVector<StringRef, 2> Groups; |
607 | 25.5k | Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); |
608 | 25.5k | if (R.match(Name, &Groups)) { |
609 | 8 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
610 | 8 | if (Groups[1] == "fadd") |
611 | 4 | ID = Intrinsic::experimental_vector_reduce_v2_fadd; |
612 | 8 | if (Groups[1] == "fmul") |
613 | 4 | ID = Intrinsic::experimental_vector_reduce_v2_fmul; |
614 | 8 | |
615 | 8 | if (ID != Intrinsic::not_intrinsic) { |
616 | 8 | rename(F); |
617 | 8 | auto Args = F->getFunctionType()->params(); |
618 | 8 | Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]}; |
619 | 8 | NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); |
620 | 8 | return true; |
621 | 8 | } |
622 | 25.5k | } |
623 | 25.5k | break; |
624 | 25.5k | } |
625 | 29.6k | case 'i': |
626 | 29.6k | case 'l': { |
627 | 29.6k | bool IsLifetimeStart = Name.startswith("lifetime.start"); |
628 | 29.6k | if (IsLifetimeStart || Name.startswith("invariant.start")23.6k ) { |
629 | 6.07k | Intrinsic::ID ID = IsLifetimeStart ? |
630 | 6.01k | Intrinsic::lifetime_start : Intrinsic::invariant_start63 ; |
631 | 6.07k | auto Args = F->getFunctionType()->params(); |
632 | 6.07k | Type* ObjectPtr[1] = {Args[1]}; |
633 | 6.07k | if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { |
634 | 2.85k | rename(F); |
635 | 2.85k | NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); |
636 | 2.85k | return true; |
637 | 2.85k | } |
638 | 26.8k | } |
639 | 26.8k | |
640 | 26.8k | bool IsLifetimeEnd = Name.startswith("lifetime.end"); |
641 | 26.8k | if (IsLifetimeEnd || Name.startswith("invariant.end")20.8k ) { |
642 | 6.03k | Intrinsic::ID ID = IsLifetimeEnd ? |
643 | 5.99k | Intrinsic::lifetime_end : Intrinsic::invariant_end48 ; |
644 | 6.03k | |
645 | 6.03k | auto Args = F->getFunctionType()->params(); |
646 | 6.03k | Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 15.99k : 248 ]}; |
647 | 6.03k | if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { |
648 | 2.83k | rename(F); |
649 | 2.83k | NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); |
650 | 2.83k | return true; |
651 | 2.83k | } |
652 | 23.9k | } |
653 | 23.9k | if (Name.startswith("invariant.group.barrier")) { |
654 | 3 | // Rename invariant.group.barrier to launder.invariant.group |
655 | 3 | auto Args = F->getFunctionType()->params(); |
656 | 3 | Type* ObjectPtr[1] = {Args[0]}; |
657 | 3 | rename(F); |
658 | 3 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
659 | 3 | Intrinsic::launder_invariant_group, ObjectPtr); |
660 | 3 | return true; |
661 | 3 | |
662 | 3 | } |
663 | 23.9k | |
664 | 23.9k | break; |
665 | 23.9k | } |
666 | 23.9k | case 'm': { |
667 | 19.0k | if (Name.startswith("masked.load.")) { |
668 | 671 | Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; |
669 | 671 | if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { |
670 | 11 | rename(F); |
671 | 11 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
672 | 11 | Intrinsic::masked_load, |
673 | 11 | Tys); |
674 | 11 | return true; |
675 | 11 | } |
676 | 19.0k | } |
677 | 19.0k | if (Name.startswith("masked.store.")) { |
678 | 959 | auto Args = F->getFunctionType()->params(); |
679 | 959 | Type *Tys[] = { Args[0], Args[1] }; |
680 | 959 | if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { |
681 | 10 | rename(F); |
682 | 10 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
683 | 10 | Intrinsic::masked_store, |
684 | 10 | Tys); |
685 | 10 | return true; |
686 | 10 | } |
687 | 19.0k | } |
688 | 19.0k | // Renaming gather/scatter intrinsics with no address space overloading |
689 | 19.0k | // to the new overload which includes an address space |
690 | 19.0k | if (Name.startswith("masked.gather.")) { |
691 | 614 | Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; |
692 | 614 | if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { |
693 | 49 | rename(F); |
694 | 49 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
695 | 49 | Intrinsic::masked_gather, Tys); |
696 | 49 | return true; |
697 | 49 | } |
698 | 19.0k | } |
699 | 19.0k | if (Name.startswith("masked.scatter.")) { |
700 | 485 | auto Args = F->getFunctionType()->params(); |
701 | 485 | Type *Tys[] = {Args[0], Args[1]}; |
702 | 485 | if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { |
703 | 9 | rename(F); |
704 | 9 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
705 | 9 | Intrinsic::masked_scatter, Tys); |
706 | 9 | return true; |
707 | 9 | } |
708 | 19.0k | } |
709 | 19.0k | // Updating the memory intrinsics (memcpy/memmove/memset) that have an |
710 | 19.0k | // alignment parameter to embedding the alignment as an attribute of |
711 | 19.0k | // the pointer args. |
712 | 19.0k | if (Name.startswith("memcpy.") && F->arg_size() == 56.45k ) { |
713 | 2.80k | rename(F); |
714 | 2.80k | // Get the types of dest, src, and len |
715 | 2.80k | ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3); |
716 | 2.80k | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, |
717 | 2.80k | ParamTypes); |
718 | 2.80k | return true; |
719 | 2.80k | } |
720 | 16.2k | if (Name.startswith("memmove.") && F->arg_size() == 5159 ) { |
721 | 3 | rename(F); |
722 | 3 | // Get the types of dest, src, and len |
723 | 3 | ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3); |
724 | 3 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, |
725 | 3 | ParamTypes); |
726 | 3 | return true; |
727 | 3 | } |
728 | 16.2k | if (Name.startswith("memset.") && F->arg_size() == 56.02k ) { |
729 | 2.80k | rename(F); |
730 | 2.80k | // Get the types of dest, and len |
731 | 2.80k | const auto *FT = F->getFunctionType(); |
732 | 2.80k | Type *ParamTypes[2] = { |
733 | 2.80k | FT->getParamType(0), // Dest |
734 | 2.80k | FT->getParamType(2) // len |
735 | 2.80k | }; |
736 | 2.80k | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, |
737 | 2.80k | ParamTypes); |
738 | 2.80k | return true; |
739 | 2.80k | } |
740 | 13.4k | break; |
741 | 13.4k | } |
742 | 17.5k | case 'n': { |
743 | 17.5k | if (Name.startswith("nvvm.")) { |
744 | 566 | Name = Name.substr(5); |
745 | 566 | |
746 | 566 | // The following nvvm intrinsics correspond exactly to an LLVM intrinsic. |
747 | 566 | Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name) |
748 | 566 | .Cases("brev32", "brev64", Intrinsic::bitreverse) |
749 | 566 | .Case("clz.i", Intrinsic::ctlz) |
750 | 566 | .Case("popc.i", Intrinsic::ctpop) |
751 | 566 | .Default(Intrinsic::not_intrinsic); |
752 | 566 | if (IID != Intrinsic::not_intrinsic && F->arg_size() == 18 ) { |
753 | 8 | NewFn = Intrinsic::getDeclaration(F->getParent(), IID, |
754 | 8 | {F->getReturnType()}); |
755 | 8 | return true; |
756 | 8 | } |
757 | 558 | |
758 | 558 | // The following nvvm intrinsics correspond exactly to an LLVM idiom, but |
759 | 558 | // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. |
760 | 558 | // |
761 | 558 | // TODO: We could add lohi.i2d. |
762 | 558 | bool Expand = StringSwitch<bool>(Name) |
763 | 558 | .Cases("abs.i", "abs.ll", true) |
764 | 558 | .Cases("clz.ll", "popc.ll", "h2f", true) |
765 | 558 | .Cases("max.i", "max.ll", "max.ui", "max.ull", true) |
766 | 558 | .Cases("min.i", "min.ll", "min.ui", "min.ull", true) |
767 | 558 | .StartsWith("atomic.load.add.f32.p", true) |
768 | 558 | .StartsWith("atomic.load.add.f64.p", true) |
769 | 558 | .Default(false); |
770 | 558 | if (Expand) { |
771 | 35 | NewFn = nullptr; |
772 | 35 | return true; |
773 | 35 | } |
774 | 17.4k | } |
775 | 17.4k | break; |
776 | 17.4k | } |
777 | 17.4k | case 'o': |
778 | 405 | // We only need to change the name to match the mangling including the |
779 | 405 | // address space. |
780 | 405 | if (Name.startswith("objectsize.")) { |
781 | 142 | Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; |
782 | 142 | if (F->arg_size() == 2 || F->arg_size() == 3104 || |
783 | 142 | F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)101 ) { |
784 | 42 | rename(F); |
785 | 42 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, |
786 | 42 | Tys); |
787 | 42 | return true; |
788 | 42 | } |
789 | 363 | } |
790 | 363 | break; |
791 | 363 | |
792 | 59.8k | case 's': |
793 | 59.8k | if (Name == "stackprotectorcheck") { |
794 | 6 | NewFn = nullptr; |
795 | 6 | return true; |
796 | 6 | } |
797 | 59.8k | break; |
798 | 59.8k | |
799 | 59.8k | case 'x': |
800 | 11.7k | if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) |
801 | 4.20k | return true; |
802 | 496k | } |
803 | 496k | // Remangle our intrinsic since we upgrade the mangling |
804 | 496k | auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); |
805 | 496k | if (Result != None) { |
806 | 452 | NewFn = Result.getValue(); |
807 | 452 | return true; |
808 | 452 | } |
809 | 495k | |
810 | 495k | // This may not belong here. This function is effectively being overloaded |
811 | 495k | // to both detect an intrinsic which needs upgrading, and to provide the |
812 | 495k | // upgraded form of the intrinsic. We should perhaps have two separate |
813 | 495k | // functions for this. |
814 | 495k | return false; |
815 | 495k | } |
816 | | |
817 | 3.79M | bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { |
818 | 3.79M | NewFn = nullptr; |
819 | 3.79M | bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); |
820 | 3.79M | assert(F != NewFn && "Intrinsic function upgraded to the same function"); |
821 | 3.79M | |
822 | 3.79M | // Upgrade intrinsic attributes. This does not change the function. |
823 | 3.79M | if (NewFn) |
824 | 12.1k | F = NewFn; |
825 | 3.79M | if (Intrinsic::ID id = F->getIntrinsicID()) |
826 | 507k | F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); |
827 | 3.79M | return Upgraded; |
828 | 3.79M | } |
829 | | |
830 | 960k | GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { |
831 | 960k | if (!(GV->hasName() && (647k GV->getName() == "llvm.global_ctors"647k || |
832 | 647k | GV->getName() == "llvm.global_dtors"646k )) || |
833 | 960k | !GV->hasInitializer()3.84k ) |
834 | 956k | return nullptr; |
835 | 3.83k | ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType()); |
836 | 3.83k | if (!ATy) |
837 | 0 | return nullptr; |
838 | 3.83k | StructType *STy = dyn_cast<StructType>(ATy->getElementType()); |
839 | 3.83k | if (!STy3.83k || STy->getNumElements() != 2) |
840 | 3.83k | return nullptr; |
841 | 3 | |
842 | 3 | LLVMContext &C = GV->getContext(); |
843 | 3 | IRBuilder<> IRB(C); |
844 | 3 | auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1), |
845 | 3 | IRB.getInt8PtrTy()); |
846 | 3 | Constant *Init = GV->getInitializer(); |
847 | 3 | unsigned N = Init->getNumOperands(); |
848 | 3 | std::vector<Constant *> NewCtors(N); |
849 | 7 | for (unsigned i = 0; i != N; ++i4 ) { |
850 | 4 | auto Ctor = cast<Constant>(Init->getOperand(i)); |
851 | 4 | NewCtors[i] = ConstantStruct::get( |
852 | 4 | EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1), |
853 | 4 | Constant::getNullValue(IRB.getInt8PtrTy())); |
854 | 4 | } |
855 | 3 | Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors); |
856 | 3 | |
857 | 3 | return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), |
858 | 3 | NewInit, GV->getName()); |
859 | 3 | } |
860 | | |
861 | | // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them |
862 | | // to byte shuffles. |
863 | | static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, |
864 | 30 | Value *Op, unsigned Shift) { |
865 | 30 | Type *ResultTy = Op->getType(); |
866 | 30 | unsigned NumElts = ResultTy->getVectorNumElements() * 8; |
867 | 30 | |
868 | 30 | // Bitcast from a 64-bit element type to a byte element type. |
869 | 30 | Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); |
870 | 30 | Op = Builder.CreateBitCast(Op, VecTy, "cast"); |
871 | 30 | |
872 | 30 | // We'll be shuffling in zeroes. |
873 | 30 | Value *Res = Constant::getNullValue(VecTy); |
874 | 30 | |
875 | 30 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
876 | 30 | // we'll just return the zero vector. |
877 | 30 | if (Shift < 16) { |
878 | 30 | uint32_t Idxs[64]; |
879 | 30 | // 256/512-bit version is split into 2/4 16-byte lanes. |
880 | 86 | for (unsigned l = 0; l != NumElts; l += 1656 ) |
881 | 952 | for (unsigned i = 0; 56 i != 16; ++i896 ) { |
882 | 896 | unsigned Idx = NumElts + i - Shift; |
883 | 896 | if (Idx < NumElts) |
884 | 244 | Idx -= NumElts - 16; // end of lane, switch operand. |
885 | 896 | Idxs[l + i] = Idx + l; |
886 | 896 | } |
887 | 30 | |
888 | 30 | Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); |
889 | 30 | } |
890 | 30 | |
891 | 30 | // Bitcast back to a 64-bit element type. |
892 | 30 | return Builder.CreateBitCast(Res, ResultTy, "cast"); |
893 | 30 | } |
894 | | |
895 | | // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them |
896 | | // to byte shuffles. |
897 | | static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
898 | 124 | unsigned Shift) { |
899 | 124 | Type *ResultTy = Op->getType(); |
900 | 124 | unsigned NumElts = ResultTy->getVectorNumElements() * 8; |
901 | 124 | |
902 | 124 | // Bitcast from a 64-bit element type to a byte element type. |
903 | 124 | Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); |
904 | 124 | Op = Builder.CreateBitCast(Op, VecTy, "cast"); |
905 | 124 | |
906 | 124 | // We'll be shuffling in zeroes. |
907 | 124 | Value *Res = Constant::getNullValue(VecTy); |
908 | 124 | |
909 | 124 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
910 | 124 | // we'll just return the zero vector. |
911 | 124 | if (Shift < 16) { |
912 | 124 | uint32_t Idxs[64]; |
913 | 124 | // 256/512-bit version is split into 2/4 16-byte lanes. |
914 | 274 | for (unsigned l = 0; l != NumElts; l += 16150 ) |
915 | 2.55k | for (unsigned i = 0; 150 i != 16; ++i2.40k ) { |
916 | 2.40k | unsigned Idx = i + Shift; |
917 | 2.40k | if (Idx >= 16) |
918 | 660 | Idx += NumElts - 16; // end of lane, switch operand. |
919 | 2.40k | Idxs[l + i] = Idx + l; |
920 | 2.40k | } |
921 | 124 | |
922 | 124 | Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); |
923 | 124 | } |
924 | 124 | |
925 | 124 | // Bitcast back to a 64-bit element type. |
926 | 124 | return Builder.CreateBitCast(Res, ResultTy, "cast"); |
927 | 124 | } |
928 | | |
929 | | static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, |
930 | 4.38k | unsigned NumElts) { |
931 | 4.38k | llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), |
932 | 4.38k | cast<IntegerType>(Mask->getType())->getBitWidth()); |
933 | 4.38k | Mask = Builder.CreateBitCast(Mask, MaskTy); |
934 | 4.38k | |
935 | 4.38k | // If we have less than 8 elements, then the starting mask was an i8 and |
936 | 4.38k | // we need to extract down to the right number of elements. |
937 | 4.38k | if (NumElts < 8) { |
938 | 1.29k | uint32_t Indices[4]; |
939 | 5.72k | for (unsigned i = 0; i != NumElts; ++i4.43k ) |
940 | 4.43k | Indices[i] = i; |
941 | 1.29k | Mask = Builder.CreateShuffleVector(Mask, Mask, |
942 | 1.29k | makeArrayRef(Indices, NumElts), |
943 | 1.29k | "extract"); |
944 | 1.29k | } |
945 | 4.38k | |
946 | 4.38k | return Mask; |
947 | 4.38k | } |
948 | | |
949 | | static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, |
950 | 5.08k | Value *Op0, Value *Op1) { |
951 | 5.08k | // If the mask is all ones just emit the first operation. |
952 | 5.08k | if (const auto *C = dyn_cast<Constant>(Mask)) |
953 | 1.91k | if (C->isAllOnesValue()) |
954 | 1.91k | return Op0; |
955 | 3.17k | |
956 | 3.17k | Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); |
957 | 3.17k | return Builder.CreateSelect(Mask, Op0, Op1); |
958 | 3.17k | } |
959 | | |
960 | | static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, |
961 | 175 | Value *Op0, Value *Op1) { |
962 | 175 | // If the mask is all ones just emit the first operation. |
963 | 175 | if (const auto *C = dyn_cast<Constant>(Mask)) |
964 | 52 | if (C->isAllOnesValue()) |
965 | 44 | return Op0; |
966 | 131 | |
967 | 131 | llvm::VectorType *MaskTy = |
968 | 131 | llvm::VectorType::get(Builder.getInt1Ty(), |
969 | 131 | Mask->getType()->getIntegerBitWidth()); |
970 | 131 | Mask = Builder.CreateBitCast(Mask, MaskTy); |
971 | 131 | Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); |
972 | 131 | return Builder.CreateSelect(Mask, Op0, Op1); |
973 | 131 | } |
974 | | |
975 | | // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. |
976 | | // PALIGNR handles large immediates by shifting while VALIGN masks the immediate |
977 | | // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. |
978 | | static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, |
979 | | Value *Op1, Value *Shift, |
980 | | Value *Passthru, Value *Mask, |
981 | 42 | bool IsVALIGN) { |
982 | 42 | unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); |
983 | 42 | |
984 | 42 | unsigned NumElts = Op0->getType()->getVectorNumElements(); |
985 | 42 | assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); |
986 | 42 | assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); |
987 | 42 | assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); |
988 | 42 | |
989 | 42 | // Mask the immediate for VALIGN. |
990 | 42 | if (IsVALIGN) |
991 | 24 | ShiftVal &= (NumElts - 1); |
992 | 42 | |
993 | 42 | // If palignr is shifting the pair of vectors more than the size of two |
994 | 42 | // lanes, emit zero. |
995 | 42 | if (ShiftVal >= 32) |
996 | 0 | return llvm::Constant::getNullValue(Op0->getType()); |
997 | 42 | |
998 | 42 | // If palignr is shifting the pair of input vectors more than one lane, |
999 | 42 | // but less than two lanes, convert to shifting in zeroes. |
1000 | 42 | if (ShiftVal > 16) { |
1001 | 0 | ShiftVal -= 16; |
1002 | 0 | Op1 = Op0; |
1003 | 0 | Op0 = llvm::Constant::getNullValue(Op0->getType()); |
1004 | 0 | } |
1005 | 42 | |
1006 | 42 | uint32_t Indices[64]; |
1007 | 42 | // 256-bit palignr operates on 128-bit lanes so we need to handle that |
1008 | 108 | for (unsigned l = 0; l < NumElts; l += 1666 ) { |
1009 | 1.12k | for (unsigned i = 0; i != 16; ++i1.05k ) { |
1010 | 1.05k | unsigned Idx = ShiftVal + i; |
1011 | 1.05k | if (!IsVALIGN && Idx >= 16672 ) // Disable wrap for VALIGN. |
1012 | 84 | Idx += NumElts - 16; // End of lane, switch operand. |
1013 | 1.05k | Indices[l + i] = Idx + l; |
1014 | 1.05k | } |
1015 | 66 | } |
1016 | 42 | |
1017 | 42 | Value *Align = Builder.CreateShuffleVector(Op1, Op0, |
1018 | 42 | makeArrayRef(Indices, NumElts), |
1019 | 42 | "palignr"); |
1020 | 42 | |
1021 | 42 | return EmitX86Select(Builder, Mask, Align, Passthru); |
1022 | 42 | } |
1023 | | |
1024 | | static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, |
1025 | 397 | bool ZeroMask, bool IndexForm) { |
1026 | 397 | Type *Ty = CI.getType(); |
1027 | 397 | unsigned VecWidth = Ty->getPrimitiveSizeInBits(); |
1028 | 397 | unsigned EltWidth = Ty->getScalarSizeInBits(); |
1029 | 397 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
1030 | 397 | Intrinsic::ID IID; |
1031 | 397 | if (VecWidth == 128 && EltWidth == 3286 && IsFloat22 ) |
1032 | 7 | IID = Intrinsic::x86_avx512_vpermi2var_ps_128; |
1033 | 390 | else if (VecWidth == 128 && EltWidth == 3279 && !IsFloat15 ) |
1034 | 15 | IID = Intrinsic::x86_avx512_vpermi2var_d_128; |
1035 | 375 | else if (VecWidth == 128 && EltWidth == 6464 && IsFloat17 ) |
1036 | 5 | IID = Intrinsic::x86_avx512_vpermi2var_pd_128; |
1037 | 370 | else if (VecWidth == 128 && EltWidth == 6459 && !IsFloat12 ) |
1038 | 12 | IID = Intrinsic::x86_avx512_vpermi2var_q_128; |
1039 | 358 | else if (VecWidth == 256 && EltWidth == 3292 && IsFloat21 ) |
1040 | 7 | IID = Intrinsic::x86_avx512_vpermi2var_ps_256; |
1041 | 351 | else if (VecWidth == 256 && EltWidth == 3285 && !IsFloat14 ) |
1042 | 14 | IID = Intrinsic::x86_avx512_vpermi2var_d_256; |
1043 | 337 | else if (VecWidth == 256 && EltWidth == 6471 && IsFloat17 ) |
1044 | 5 | IID = Intrinsic::x86_avx512_vpermi2var_pd_256; |
1045 | 332 | else if (VecWidth == 256 && EltWidth == 6466 && !IsFloat12 ) |
1046 | 12 | IID = Intrinsic::x86_avx512_vpermi2var_q_256; |
1047 | 320 | else if (VecWidth == 512 && EltWidth == 32219 && IsFloat101 ) |
1048 | 60 | IID = Intrinsic::x86_avx512_vpermi2var_ps_512; |
1049 | 260 | else if (VecWidth == 512 && EltWidth == 32159 && !IsFloat41 ) |
1050 | 41 | IID = Intrinsic::x86_avx512_vpermi2var_d_512; |
1051 | 219 | else if (VecWidth == 512 && EltWidth == 64118 && IsFloat68 ) |
1052 | 40 | IID = Intrinsic::x86_avx512_vpermi2var_pd_512; |
1053 | 179 | else if (VecWidth == 512 && EltWidth == 6478 && !IsFloat28 ) |
1054 | 28 | IID = Intrinsic::x86_avx512_vpermi2var_q_512; |
1055 | 151 | else if (VecWidth == 128 && EltWidth == 1647 ) |
1056 | 12 | IID = Intrinsic::x86_avx512_vpermi2var_hi_128; |
1057 | 139 | else if (VecWidth == 256 && EltWidth == 1654 ) |
1058 | 32 | IID = Intrinsic::x86_avx512_vpermi2var_hi_256; |
1059 | 107 | else if (VecWidth == 512 && EltWidth == 1650 ) |
1060 | 32 | IID = Intrinsic::x86_avx512_vpermi2var_hi_512; |
1061 | 75 | else if (VecWidth == 128 && EltWidth == 835 ) |
1062 | 35 | IID = Intrinsic::x86_avx512_vpermi2var_qi_128; |
1063 | 40 | else if (VecWidth == 256 && EltWidth == 822 ) |
1064 | 22 | IID = Intrinsic::x86_avx512_vpermi2var_qi_256; |
1065 | 18 | else if (VecWidth == 512 && EltWidth == 8) |
1066 | 18 | IID = Intrinsic::x86_avx512_vpermi2var_qi_512; |
1067 | 18 | else |
1068 | 18 | llvm_unreachable0 ("Unexpected intrinsic"); |
1069 | 397 | |
1070 | 397 | Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1), |
1071 | 397 | CI.getArgOperand(2) }; |
1072 | 397 | |
1073 | 397 | // If this isn't index form we need to swap operand 0 and 1. |
1074 | 397 | if (!IndexForm) |
1075 | 235 | std::swap(Args[0], Args[1]); |
1076 | 397 | |
1077 | 397 | Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), |
1078 | 397 | Args); |
1079 | 397 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)179 |
1080 | 397 | : Builder.CreateBitCast(CI.getArgOperand(1), |
1081 | 218 | Ty); |
1082 | 397 | return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); |
1083 | 397 | } |
1084 | | |
1085 | | static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, |
1086 | 750 | bool IsSigned, bool IsAddition) { |
1087 | 750 | Type *Ty = CI.getType(); |
1088 | 750 | Value *Op0 = CI.getOperand(0); |
1089 | 750 | Value *Op1 = CI.getOperand(1); |
1090 | 750 | |
1091 | 750 | Intrinsic::ID IID = |
1092 | 750 | IsSigned ? (IsAddition 369 ? Intrinsic::sadd_sat204 : Intrinsic::ssub_sat165 ) |
1093 | 750 | : (IsAddition 381 ? Intrinsic::uadd_sat114 : Intrinsic::usub_sat267 ); |
1094 | 750 | Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); |
1095 | 750 | Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); |
1096 | 750 | |
1097 | 750 | if (CI.getNumArgOperands() == 4) { // For masked intrinsics. |
1098 | 288 | Value *VecSrc = CI.getOperand(2); |
1099 | 288 | Value *Mask = CI.getOperand(3); |
1100 | 288 | Res = EmitX86Select(Builder, Mask, Res, VecSrc); |
1101 | 288 | } |
1102 | 750 | return Res; |
1103 | 750 | } |
1104 | | |
1105 | | static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, |
1106 | 364 | bool IsRotateRight) { |
1107 | 364 | Type *Ty = CI.getType(); |
1108 | 364 | Value *Src = CI.getArgOperand(0); |
1109 | 364 | Value *Amt = CI.getArgOperand(1); |
1110 | 364 | |
1111 | 364 | // Amount may be scalar immediate, in which case create a splat vector. |
1112 | 364 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1113 | 364 | // we only care about the lowest log2 bits anyway. |
1114 | 364 | if (Amt->getType() != Ty) { |
1115 | 152 | unsigned NumElts = Ty->getVectorNumElements(); |
1116 | 152 | Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); |
1117 | 152 | Amt = Builder.CreateVectorSplat(NumElts, Amt); |
1118 | 152 | } |
1119 | 364 | |
1120 | 364 | Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr172 : Intrinsic::fshl192 ; |
1121 | 364 | Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); |
1122 | 364 | Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt}); |
1123 | 364 | |
1124 | 364 | if (CI.getNumArgOperands() == 4) { // For masked intrinsics. |
1125 | 200 | Value *VecSrc = CI.getOperand(2); |
1126 | 200 | Value *Mask = CI.getOperand(3); |
1127 | 200 | Res = EmitX86Select(Builder, Mask, Res, VecSrc); |
1128 | 200 | } |
1129 | 364 | return Res; |
1130 | 364 | } |
1131 | | |
1132 | | static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm, |
1133 | 113 | bool IsSigned) { |
1134 | 113 | Type *Ty = CI.getType(); |
1135 | 113 | Value *LHS = CI.getArgOperand(0); |
1136 | 113 | Value *RHS = CI.getArgOperand(1); |
1137 | 113 | |
1138 | 113 | CmpInst::Predicate Pred; |
1139 | 113 | switch (Imm) { |
1140 | 113 | case 0x0: |
1141 | 28 | Pred = IsSigned ? ICmpInst::ICMP_SLT15 : ICmpInst::ICMP_ULT13 ; |
1142 | 28 | break; |
1143 | 113 | case 0x1: |
1144 | 12 | Pred = IsSigned ? ICmpInst::ICMP_SLE7 : ICmpInst::ICMP_ULE5 ; |
1145 | 12 | break; |
1146 | 113 | case 0x2: |
1147 | 12 | Pred = IsSigned ? ICmpInst::ICMP_SGT7 : ICmpInst::ICMP_UGT5 ; |
1148 | 12 | break; |
1149 | 113 | case 0x3: |
1150 | 12 | Pred = IsSigned ? ICmpInst::ICMP_SGE5 : ICmpInst::ICMP_UGE7 ; |
1151 | 12 | break; |
1152 | 113 | case 0x4: |
1153 | 13 | Pred = ICmpInst::ICMP_EQ; |
1154 | 13 | break; |
1155 | 113 | case 0x5: |
1156 | 12 | Pred = ICmpInst::ICMP_NE; |
1157 | 12 | break; |
1158 | 113 | case 0x6: |
1159 | 12 | return Constant::getNullValue(Ty); // FALSE |
1160 | 113 | case 0x7: |
1161 | 12 | return Constant::getAllOnesValue(Ty); // TRUE |
1162 | 113 | default: |
1163 | 0 | llvm_unreachable("Unknown XOP vpcom/vpcomu predicate"); |
1164 | 89 | } |
1165 | 89 | |
1166 | 89 | Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS); |
1167 | 89 | Value *Ext = Builder.CreateSExt(Cmp, Ty); |
1168 | 89 | return Ext; |
1169 | 89 | } |
1170 | | |
1171 | | static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI, |
1172 | 260 | bool IsShiftRight, bool ZeroMask) { |
1173 | 260 | Type *Ty = CI.getType(); |
1174 | 260 | Value *Op0 = CI.getArgOperand(0); |
1175 | 260 | Value *Op1 = CI.getArgOperand(1); |
1176 | 260 | Value *Amt = CI.getArgOperand(2); |
1177 | 260 | |
1178 | 260 | if (IsShiftRight) |
1179 | 130 | std::swap(Op0, Op1); |
1180 | 260 | |
1181 | 260 | // Amount may be scalar immediate, in which case create a splat vector. |
1182 | 260 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1183 | 260 | // we only care about the lowest log2 bits anyway. |
1184 | 260 | if (Amt->getType() != Ty) { |
1185 | 152 | unsigned NumElts = Ty->getVectorNumElements(); |
1186 | 152 | Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); |
1187 | 152 | Amt = Builder.CreateVectorSplat(NumElts, Amt); |
1188 | 152 | } |
1189 | 260 | |
1190 | 260 | Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr130 : Intrinsic::fshl130 ; |
1191 | 260 | Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); |
1192 | 260 | Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt}); |
1193 | 260 | |
1194 | 260 | unsigned NumArgs = CI.getNumArgOperands(); |
1195 | 260 | if (NumArgs >= 4) { // For masked intrinsics. |
1196 | 184 | Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3)76 : |
1197 | 184 | ZeroMask 108 ? ConstantAggregateZero::get(CI.getType())36 : |
1198 | 108 | CI.getArgOperand(0)72 ; |
1199 | 184 | Value *Mask = CI.getOperand(NumArgs - 1); |
1200 | 184 | Res = EmitX86Select(Builder, Mask, Res, VecSrc); |
1201 | 184 | } |
1202 | 260 | return Res; |
1203 | 260 | } |
1204 | | |
1205 | | static Value *UpgradeMaskedStore(IRBuilder<> &Builder, |
1206 | | Value *Ptr, Value *Data, Value *Mask, |
1207 | 120 | bool Aligned) { |
1208 | 120 | // Cast the pointer to the right type. |
1209 | 120 | Ptr = Builder.CreateBitCast(Ptr, |
1210 | 120 | llvm::PointerType::getUnqual(Data->getType())); |
1211 | 120 | unsigned Align = |
1212 | 120 | Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 848 : 172 ; |
1213 | 120 | |
1214 | 120 | // If the mask is all ones just emit a regular store. |
1215 | 120 | if (const auto *C = dyn_cast<Constant>(Mask)) |
1216 | 60 | if (C->isAllOnesValue()) |
1217 | 60 | return Builder.CreateAlignedStore(Data, Ptr, Align); |
1218 | 60 | |
1219 | 60 | // Convert the mask from an integer type to a vector of i1. |
1220 | 60 | unsigned NumElts = Data->getType()->getVectorNumElements(); |
1221 | 60 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
1222 | 60 | return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); |
1223 | 60 | } |
1224 | | |
1225 | | static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, |
1226 | | Value *Ptr, Value *Passthru, Value *Mask, |
1227 | 180 | bool Aligned) { |
1228 | 180 | Type *ValTy = Passthru->getType(); |
1229 | 180 | // Cast the pointer to the right type. |
1230 | 180 | Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy)); |
1231 | 180 | unsigned Align = |
1232 | 180 | Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 872 : 1108 ; |
1233 | 180 | |
1234 | 180 | // If the mask is all ones just emit a regular store. |
1235 | 180 | if (const auto *C = dyn_cast<Constant>(Mask)) |
1236 | 60 | if (C->isAllOnesValue()) |
1237 | 60 | return Builder.CreateAlignedLoad(ValTy, Ptr, Align); |
1238 | 120 | |
1239 | 120 | // Convert the mask from an integer type to a vector of i1. |
1240 | 120 | unsigned NumElts = Passthru->getType()->getVectorNumElements(); |
1241 | 120 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
1242 | 120 | return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); |
1243 | 120 | } |
1244 | | |
1245 | 96 | static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { |
1246 | 96 | Value *Op0 = CI.getArgOperand(0); |
1247 | 96 | llvm::Type *Ty = Op0->getType(); |
1248 | 96 | Value *Zero = llvm::Constant::getNullValue(Ty); |
1249 | 96 | Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); |
1250 | 96 | Value *Neg = Builder.CreateNeg(Op0); |
1251 | 96 | Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); |
1252 | 96 | |
1253 | 96 | if (CI.getNumArgOperands() == 3) |
1254 | 48 | Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); |
1255 | 96 | |
1256 | 96 | return Res; |
1257 | 96 | } |
1258 | | |
1259 | | static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, |
1260 | 490 | ICmpInst::Predicate Pred) { |
1261 | 490 | Value *Op0 = CI.getArgOperand(0); |
1262 | 490 | Value *Op1 = CI.getArgOperand(1); |
1263 | 490 | Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); |
1264 | 490 | Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); |
1265 | 490 | |
1266 | 490 | if (CI.getNumArgOperands() == 4) |
1267 | 192 | Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); |
1268 | 490 | |
1269 | 490 | return Res; |
1270 | 490 | } |
1271 | | |
1272 | 224 | static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { |
1273 | 224 | Type *Ty = CI.getType(); |
1274 | 224 | |
1275 | 224 | // Arguments have a vXi32 type so cast to vXi64. |
1276 | 224 | Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty); |
1277 | 224 | Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty); |
1278 | 224 | |
1279 | 224 | if (IsSigned) { |
1280 | 98 | // Shift left then arithmetic shift right. |
1281 | 98 | Constant *ShiftAmt = ConstantInt::get(Ty, 32); |
1282 | 98 | LHS = Builder.CreateShl(LHS, ShiftAmt); |
1283 | 98 | LHS = Builder.CreateAShr(LHS, ShiftAmt); |
1284 | 98 | RHS = Builder.CreateShl(RHS, ShiftAmt); |
1285 | 98 | RHS = Builder.CreateAShr(RHS, ShiftAmt); |
1286 | 126 | } else { |
1287 | 126 | // Clear the upper bits. |
1288 | 126 | Constant *Mask = ConstantInt::get(Ty, 0xffffffff); |
1289 | 126 | LHS = Builder.CreateAnd(LHS, Mask); |
1290 | 126 | RHS = Builder.CreateAnd(RHS, Mask); |
1291 | 126 | } |
1292 | 224 | |
1293 | 224 | Value *Res = Builder.CreateMul(LHS, RHS); |
1294 | 224 | |
1295 | 224 | if (CI.getNumArgOperands() == 4) |
1296 | 108 | Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); |
1297 | 224 | |
1298 | 224 | return Res; |
1299 | 224 | } |
1300 | | |
1301 | | // Applying mask on vector of i1's and make sure result is at least 8 bits wide. |
1302 | | static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, |
1303 | 1.03k | Value *Mask) { |
1304 | 1.03k | unsigned NumElts = Vec->getType()->getVectorNumElements(); |
1305 | 1.03k | if (Mask) { |
1306 | 1.00k | const auto *C = dyn_cast<Constant>(Mask); |
1307 | 1.00k | if (!C || !C->isAllOnesValue()508 ) |
1308 | 498 | Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); |
1309 | 1.00k | } |
1310 | 1.03k | |
1311 | 1.03k | if (NumElts < 8) { |
1312 | 265 | uint32_t Indices[8]; |
1313 | 1.14k | for (unsigned i = 0; i != NumElts; ++i884 ) |
1314 | 884 | Indices[i] = i; |
1315 | 1.50k | for (unsigned i = NumElts; i != 8; ++i1.23k ) |
1316 | 1.23k | Indices[i] = NumElts + i % NumElts; |
1317 | 265 | Vec = Builder.CreateShuffleVector(Vec, |
1318 | 265 | Constant::getNullValue(Vec->getType()), |
1319 | 265 | Indices); |
1320 | 265 | } |
1321 | 1.03k | return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); |
1322 | 1.03k | } |
1323 | | |
1324 | | static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, |
1325 | 864 | unsigned CC, bool Signed) { |
1326 | 864 | Value *Op0 = CI.getArgOperand(0); |
1327 | 864 | unsigned NumElts = Op0->getType()->getVectorNumElements(); |
1328 | 864 | |
1329 | 864 | Value *Cmp; |
1330 | 864 | if (CC == 3) { |
1331 | 96 | Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); |
1332 | 768 | } else if (CC == 7) { |
1333 | 96 | Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); |
1334 | 672 | } else { |
1335 | 672 | ICmpInst::Predicate Pred; |
1336 | 672 | switch (CC) { |
1337 | 672 | default: 0 llvm_unreachable0 ("Unknown condition code"); |
1338 | 672 | case 0: Pred = ICmpInst::ICMP_EQ; break144 ; |
1339 | 672 | case 1: Pred = Signed 96 ? ICmpInst::ICMP_SLT48 : ICmpInst::ICMP_ULT48 ; break; |
1340 | 672 | case 2: Pred = Signed 96 ? ICmpInst::ICMP_SLE48 : ICmpInst::ICMP_ULE48 ; break; |
1341 | 672 | case 4: Pred = ICmpInst::ICMP_NE; break96 ; |
1342 | 672 | case 5: Pred = Signed 96 ? ICmpInst::ICMP_SGE48 : ICmpInst::ICMP_UGE48 ; break; |
1343 | 672 | case 6: Pred = Signed 144 ? ICmpInst::ICMP_SGT96 : ICmpInst::ICMP_UGT48 ; break; |
1344 | 672 | } |
1345 | 672 | Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); |
1346 | 672 | } |
1347 | 864 | |
1348 | 864 | Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); |
1349 | 864 | |
1350 | 864 | return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask); |
1351 | 864 | } |
1352 | | |
1353 | | // Replace a masked intrinsic with an older unmasked intrinsic. |
1354 | | static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, |
1355 | 510 | Intrinsic::ID IID) { |
1356 | 510 | Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID); |
1357 | 510 | Value *Rep = Builder.CreateCall(Intrin, |
1358 | 510 | { CI.getArgOperand(0), CI.getArgOperand(1) }); |
1359 | 510 | return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); |
1360 | 510 | } |
1361 | | |
1362 | 8 | static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { |
1363 | 8 | Value* A = CI.getArgOperand(0); |
1364 | 8 | Value* B = CI.getArgOperand(1); |
1365 | 8 | Value* Src = CI.getArgOperand(2); |
1366 | 8 | Value* Mask = CI.getArgOperand(3); |
1367 | 8 | |
1368 | 8 | Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); |
1369 | 8 | Value* Cmp = Builder.CreateIsNotNull(AndNode); |
1370 | 8 | Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); |
1371 | 8 | Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); |
1372 | 8 | Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); |
1373 | 8 | return Builder.CreateInsertElement(A, Select, (uint64_t)0); |
1374 | 8 | } |
1375 | | |
1376 | | |
1377 | 24 | static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { |
1378 | 24 | Value* Op = CI.getArgOperand(0); |
1379 | 24 | Type* ReturnOp = CI.getType(); |
1380 | 24 | unsigned NumElts = CI.getType()->getVectorNumElements(); |
1381 | 24 | Value *Mask = getX86MaskVec(Builder, Op, NumElts); |
1382 | 24 | return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); |
1383 | 24 | } |
1384 | | |
1385 | | // Replace intrinsic with unmasked version and a select. |
1386 | | static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, |
1387 | 546 | CallInst &CI, Value *&Rep) { |
1388 | 546 | Name = Name.substr(12); // Remove avx512.mask. |
1389 | 546 | |
1390 | 546 | unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); |
1391 | 546 | unsigned EltWidth = CI.getType()->getScalarSizeInBits(); |
1392 | 546 | Intrinsic::ID IID; |
1393 | 546 | if (Name.startswith("max.p")) { |
1394 | 12 | if (VecWidth == 128 && EltWidth == 326 ) |
1395 | 6 | IID = Intrinsic::x86_sse_max_ps; |
1396 | 6 | else if (VecWidth == 128 && EltWidth == 640 ) |
1397 | 0 | IID = Intrinsic::x86_sse2_max_pd; |
1398 | 6 | else if (VecWidth == 256 && EltWidth == 32) |
1399 | 6 | IID = Intrinsic::x86_avx_max_ps_256; |
1400 | 0 | else if (VecWidth == 256 && EltWidth == 64) |
1401 | 0 | IID = Intrinsic::x86_avx_max_pd_256; |
1402 | 0 | else |
1403 | 0 | llvm_unreachable("Unexpected intrinsic"); |
1404 | 534 | } else if (Name.startswith("min.p")) { |
1405 | 12 | if (VecWidth == 128 && EltWidth == 326 ) |
1406 | 6 | IID = Intrinsic::x86_sse_min_ps; |
1407 | 6 | else if (VecWidth == 128 && EltWidth == 640 ) |
1408 | 0 | IID = Intrinsic::x86_sse2_min_pd; |
1409 | 6 | else if (VecWidth == 256 && EltWidth == 32) |
1410 | 6 | IID = Intrinsic::x86_avx_min_ps_256; |
1411 | 0 | else if (VecWidth == 256 && EltWidth == 64) |
1412 | 0 | IID = Intrinsic::x86_avx_min_pd_256; |
1413 | 0 | else |
1414 | 0 | llvm_unreachable("Unexpected intrinsic"); |
1415 | 522 | } else if (Name.startswith("pshuf.b.")) { |
1416 | 36 | if (VecWidth == 128) |
1417 | 4 | IID = Intrinsic::x86_ssse3_pshuf_b_128; |
1418 | 32 | else if (VecWidth == 256) |
1419 | 4 | IID = Intrinsic::x86_avx2_pshuf_b; |
1420 | 28 | else if (VecWidth == 512) |
1421 | 28 | IID = Intrinsic::x86_avx512_pshuf_b_512; |
1422 | 28 | else |
1423 | 28 | llvm_unreachable0 ("Unexpected intrinsic"); |
1424 | 486 | } else if (Name.startswith("pmul.hr.sw.")) { |
1425 | 12 | if (VecWidth == 128) |
1426 | 4 | IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; |
1427 | 8 | else if (VecWidth == 256) |
1428 | 4 | IID = Intrinsic::x86_avx2_pmul_hr_sw; |
1429 | 4 | else if (VecWidth == 512) |
1430 | 4 | IID = Intrinsic::x86_avx512_pmul_hr_sw_512; |
1431 | 4 | else |
1432 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1433 | 474 | } else if (Name.startswith("pmulh.w.")) { |
1434 | 12 | if (VecWidth == 128) |
1435 | 4 | IID = Intrinsic::x86_sse2_pmulh_w; |
1436 | 8 | else if (VecWidth == 256) |
1437 | 4 | IID = Intrinsic::x86_avx2_pmulh_w; |
1438 | 4 | else if (VecWidth == 512) |
1439 | 4 | IID = Intrinsic::x86_avx512_pmulh_w_512; |
1440 | 4 | else |
1441 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1442 | 462 | } else if (Name.startswith("pmulhu.w.")) { |
1443 | 12 | if (VecWidth == 128) |
1444 | 4 | IID = Intrinsic::x86_sse2_pmulhu_w; |
1445 | 8 | else if (VecWidth == 256) |
1446 | 4 | IID = Intrinsic::x86_avx2_pmulhu_w; |
1447 | 4 | else if (VecWidth == 512) |
1448 | 4 | IID = Intrinsic::x86_avx512_pmulhu_w_512; |
1449 | 4 | else |
1450 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1451 | 450 | } else if (Name.startswith("pmaddw.d.")) { |
1452 | 12 | if (VecWidth == 128) |
1453 | 4 | IID = Intrinsic::x86_sse2_pmadd_wd; |
1454 | 8 | else if (VecWidth == 256) |
1455 | 4 | IID = Intrinsic::x86_avx2_pmadd_wd; |
1456 | 4 | else if (VecWidth == 512) |
1457 | 4 | IID = Intrinsic::x86_avx512_pmaddw_d_512; |
1458 | 4 | else |
1459 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1460 | 438 | } else if (Name.startswith("pmaddubs.w.")) { |
1461 | 12 | if (VecWidth == 128) |
1462 | 4 | IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; |
1463 | 8 | else if (VecWidth == 256) |
1464 | 4 | IID = Intrinsic::x86_avx2_pmadd_ub_sw; |
1465 | 4 | else if (VecWidth == 512) |
1466 | 4 | IID = Intrinsic::x86_avx512_pmaddubs_w_512; |
1467 | 4 | else |
1468 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1469 | 426 | } else if (Name.startswith("packsswb.")) { |
1470 | 36 | if (VecWidth == 128) |
1471 | 12 | IID = Intrinsic::x86_sse2_packsswb_128; |
1472 | 24 | else if (VecWidth == 256) |
1473 | 12 | IID = Intrinsic::x86_avx2_packsswb; |
1474 | 12 | else if (VecWidth == 512) |
1475 | 12 | IID = Intrinsic::x86_avx512_packsswb_512; |
1476 | 12 | else |
1477 | 12 | llvm_unreachable0 ("Unexpected intrinsic"); |
1478 | 390 | } else if (Name.startswith("packssdw.")) { |
1479 | 54 | if (VecWidth == 128) |
1480 | 18 | IID = Intrinsic::x86_sse2_packssdw_128; |
1481 | 36 | else if (VecWidth == 256) |
1482 | 18 | IID = Intrinsic::x86_avx2_packssdw; |
1483 | 18 | else if (VecWidth == 512) |
1484 | 18 | IID = Intrinsic::x86_avx512_packssdw_512; |
1485 | 18 | else |
1486 | 18 | llvm_unreachable0 ("Unexpected intrinsic"); |
1487 | 336 | } else if (Name.startswith("packuswb.")) { |
1488 | 36 | if (VecWidth == 128) |
1489 | 12 | IID = Intrinsic::x86_sse2_packuswb_128; |
1490 | 24 | else if (VecWidth == 256) |
1491 | 12 | IID = Intrinsic::x86_avx2_packuswb; |
1492 | 12 | else if (VecWidth == 512) |
1493 | 12 | IID = Intrinsic::x86_avx512_packuswb_512; |
1494 | 12 | else |
1495 | 12 | llvm_unreachable0 ("Unexpected intrinsic"); |
1496 | 300 | } else if (Name.startswith("packusdw.")) { |
1497 | 54 | if (VecWidth == 128) |
1498 | 18 | IID = Intrinsic::x86_sse41_packusdw; |
1499 | 36 | else if (VecWidth == 256) |
1500 | 18 | IID = Intrinsic::x86_avx2_packusdw; |
1501 | 18 | else if (VecWidth == 512) |
1502 | 18 | IID = Intrinsic::x86_avx512_packusdw_512; |
1503 | 18 | else |
1504 | 18 | llvm_unreachable0 ("Unexpected intrinsic"); |
1505 | 246 | } else if (Name.startswith("vpermilvar.")) { |
1506 | 46 | if (VecWidth == 128 && EltWidth == 3212 ) |
1507 | 6 | IID = Intrinsic::x86_avx_vpermilvar_ps; |
1508 | 40 | else if (VecWidth == 128 && EltWidth == 646 ) |
1509 | 6 | IID = Intrinsic::x86_avx_vpermilvar_pd; |
1510 | 34 | else if (VecWidth == 256 && EltWidth == 3212 ) |
1511 | 6 | IID = Intrinsic::x86_avx_vpermilvar_ps_256; |
1512 | 28 | else if (VecWidth == 256 && EltWidth == 646 ) |
1513 | 6 | IID = Intrinsic::x86_avx_vpermilvar_pd_256; |
1514 | 22 | else if (VecWidth == 512 && EltWidth == 32) |
1515 | 16 | IID = Intrinsic::x86_avx512_vpermilvar_ps_512; |
1516 | 6 | else if (VecWidth == 512 && EltWidth == 64) |
1517 | 6 | IID = Intrinsic::x86_avx512_vpermilvar_pd_512; |
1518 | 6 | else |
1519 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
1520 | 200 | } else if (Name == "cvtpd2dq.256") { |
1521 | 4 | IID = Intrinsic::x86_avx_cvt_pd2dq_256; |
1522 | 196 | } else if (Name == "cvtpd2ps.256") { |
1523 | 4 | IID = Intrinsic::x86_avx_cvt_pd2_ps_256; |
1524 | 192 | } else if (Name == "cvttpd2dq.256") { |
1525 | 4 | IID = Intrinsic::x86_avx_cvtt_pd2dq_256; |
1526 | 188 | } else if (Name == "cvttps2dq.128") { |
1527 | 4 | IID = Intrinsic::x86_sse2_cvttps2dq; |
1528 | 184 | } else if (Name == "cvttps2dq.256") { |
1529 | 4 | IID = Intrinsic::x86_avx_cvtt_ps2dq_256; |
1530 | 180 | } else if (Name.startswith("permvar.")) { |
1531 | 84 | bool IsFloat = CI.getType()->isFPOrFPVectorTy(); |
1532 | 84 | if (VecWidth == 256 && EltWidth == 3236 && IsFloat12 ) |
1533 | 6 | IID = Intrinsic::x86_avx2_permps; |
1534 | 78 | else if (VecWidth == 256 && EltWidth == 3230 && !IsFloat6 ) |
1535 | 6 | IID = Intrinsic::x86_avx2_permd; |
1536 | 72 | else if (VecWidth == 256 && EltWidth == 6424 && IsFloat12 ) |
1537 | 6 | IID = Intrinsic::x86_avx512_permvar_df_256; |
1538 | 66 | else if (VecWidth == 256 && EltWidth == 6418 && !IsFloat6 ) |
1539 | 6 | IID = Intrinsic::x86_avx512_permvar_di_256; |
1540 | 60 | else if (VecWidth == 512 && EltWidth == 3236 && IsFloat12 ) |
1541 | 6 | IID = Intrinsic::x86_avx512_permvar_sf_512; |
1542 | 54 | else if (VecWidth == 512 && EltWidth == 3230 && !IsFloat6 ) |
1543 | 6 | IID = Intrinsic::x86_avx512_permvar_si_512; |
1544 | 48 | else if (VecWidth == 512 && EltWidth == 6424 && IsFloat12 ) |
1545 | 6 | IID = Intrinsic::x86_avx512_permvar_df_512; |
1546 | 42 | else if (VecWidth == 512 && EltWidth == 6418 && !IsFloat6 ) |
1547 | 6 | IID = Intrinsic::x86_avx512_permvar_di_512; |
1548 | 36 | else if (VecWidth == 128 && EltWidth == 1612 ) |
1549 | 6 | IID = Intrinsic::x86_avx512_permvar_hi_128; |
1550 | 30 | else if (VecWidth == 256 && EltWidth == 1612 ) |
1551 | 6 | IID = Intrinsic::x86_avx512_permvar_hi_256; |
1552 | 24 | else if (VecWidth == 512 && EltWidth == 1612 ) |
1553 | 6 | IID = Intrinsic::x86_avx512_permvar_hi_512; |
1554 | 18 | else if (VecWidth == 128 && EltWidth == 86 ) |
1555 | 6 | IID = Intrinsic::x86_avx512_permvar_qi_128; |
1556 | 12 | else if (VecWidth == 256 && EltWidth == 86 ) |
1557 | 6 | IID = Intrinsic::x86_avx512_permvar_qi_256; |
1558 | 6 | else if (VecWidth == 512 && EltWidth == 8) |
1559 | 6 | IID = Intrinsic::x86_avx512_permvar_qi_512; |
1560 | 6 | else |
1561 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
1562 | 96 | } else if (Name.startswith("dbpsadbw.")) { |
1563 | 18 | if (VecWidth == 128) |
1564 | 6 | IID = Intrinsic::x86_avx512_dbpsadbw_128; |
1565 | 12 | else if (VecWidth == 256) |
1566 | 6 | IID = Intrinsic::x86_avx512_dbpsadbw_256; |
1567 | 6 | else if (VecWidth == 512) |
1568 | 6 | IID = Intrinsic::x86_avx512_dbpsadbw_512; |
1569 | 6 | else |
1570 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
1571 | 78 | } else if (Name.startswith("pmultishift.qb.")) { |
1572 | 18 | if (VecWidth == 128) |
1573 | 6 | IID = Intrinsic::x86_avx512_pmultishift_qb_128; |
1574 | 12 | else if (VecWidth == 256) |
1575 | 6 | IID = Intrinsic::x86_avx512_pmultishift_qb_256; |
1576 | 6 | else if (VecWidth == 512) |
1577 | 6 | IID = Intrinsic::x86_avx512_pmultishift_qb_512; |
1578 | 6 | else |
1579 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
1580 | 60 | } else if (Name.startswith("conflict.")) { |
1581 | 36 | if (Name[9] == 'd' && VecWidth == 12818 ) |
1582 | 6 | IID = Intrinsic::x86_avx512_conflict_d_128; |
1583 | 30 | else if (Name[9] == 'd' && VecWidth == 25612 ) |
1584 | 6 | IID = Intrinsic::x86_avx512_conflict_d_256; |
1585 | 24 | else if (Name[9] == 'd' && VecWidth == 5126 ) |
1586 | 6 | IID = Intrinsic::x86_avx512_conflict_d_512; |
1587 | 18 | else if (Name[9] == 'q' && VecWidth == 128) |
1588 | 6 | IID = Intrinsic::x86_avx512_conflict_q_128; |
1589 | 12 | else if (Name[9] == 'q' && VecWidth == 256) |
1590 | 6 | IID = Intrinsic::x86_avx512_conflict_q_256; |
1591 | 6 | else if (Name[9] == 'q' && VecWidth == 512) |
1592 | 6 | IID = Intrinsic::x86_avx512_conflict_q_512; |
1593 | 6 | else |
1594 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
1595 | 36 | } else if (24 Name.startswith("pavg.")24 ) { |
1596 | 24 | if (Name[5] == 'b' && VecWidth == 12812 ) |
1597 | 4 | IID = Intrinsic::x86_sse2_pavg_b; |
1598 | 20 | else if (Name[5] == 'b' && VecWidth == 2568 ) |
1599 | 4 | IID = Intrinsic::x86_avx2_pavg_b; |
1600 | 16 | else if (Name[5] == 'b' && VecWidth == 5124 ) |
1601 | 4 | IID = Intrinsic::x86_avx512_pavg_b_512; |
1602 | 12 | else if (Name[5] == 'w' && VecWidth == 128) |
1603 | 4 | IID = Intrinsic::x86_sse2_pavg_w; |
1604 | 8 | else if (Name[5] == 'w' && VecWidth == 256) |
1605 | 4 | IID = Intrinsic::x86_avx2_pavg_w; |
1606 | 4 | else if (Name[5] == 'w' && VecWidth == 512) |
1607 | 4 | IID = Intrinsic::x86_avx512_pavg_w_512; |
1608 | 4 | else |
1609 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1610 | 24 | } else |
1611 | 0 | return false; |
1612 | 546 | |
1613 | 546 | SmallVector<Value *, 4> Args(CI.arg_operands().begin(), |
1614 | 546 | CI.arg_operands().end()); |
1615 | 546 | Args.pop_back(); |
1616 | 546 | Args.pop_back(); |
1617 | 546 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), |
1618 | 546 | Args); |
1619 | 546 | unsigned NumArgs = CI.getNumArgOperands(); |
1620 | 546 | Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep, |
1621 | 546 | CI.getArgOperand(NumArgs - 2)); |
1622 | 546 | return true; |
1623 | 546 | } |
1624 | | |
1625 | | /// Upgrade comment in call to inline asm that represents an objc retain release |
1626 | | /// marker. |
1627 | 75 | void llvm::UpgradeInlineAsmString(std::string *AsmStr) { |
1628 | 75 | size_t Pos; |
1629 | 75 | if (AsmStr->find("mov\tfp") == 0 && |
1630 | 75 | AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos1 && |
1631 | 75 | (Pos = AsmStr->find("# marker")) != std::string::npos1 ) { |
1632 | 1 | AsmStr->replace(Pos, 1, ";"); |
1633 | 1 | } |
1634 | 75 | return; |
1635 | 75 | } |
1636 | | |
1637 | | /// Upgrade a call to an old intrinsic. All argument and return casting must be |
1638 | | /// provided to seamlessly integrate with existing context. |
1639 | 83.0k | void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { |
1640 | 83.0k | Function *F = CI->getCalledFunction(); |
1641 | 83.0k | LLVMContext &C = CI->getContext(); |
1642 | 83.0k | IRBuilder<> Builder(C); |
1643 | 83.0k | Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); |
1644 | 83.0k | |
1645 | 83.0k | assert(F && "Intrinsic call is not direct?"); |
1646 | 83.0k | |
1647 | 83.0k | if (!NewFn) { |
1648 | 10.2k | // Get the Function's name. |
1649 | 10.2k | StringRef Name = F->getName(); |
1650 | 10.2k | |
1651 | 10.2k | // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped |
1652 | 10.2k | // from upgrader because the optimizer now only recognizes intrinsics for |
1653 | 10.2k | // ARC runtime calls. |
1654 | 10.2k | if (Name == "clang.arc.use") { |
1655 | 1 | CI->eraseFromParent(); |
1656 | 1 | return; |
1657 | 1 | } |
1658 | 10.2k | |
1659 | 10.2k | assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); |
1660 | 10.2k | Name = Name.substr(5); |
1661 | 10.2k | |
1662 | 10.2k | bool IsX86 = Name.startswith("x86."); |
1663 | 10.2k | if (IsX86) |
1664 | 10.1k | Name = Name.substr(4); |
1665 | 10.2k | bool IsNVVM = Name.startswith("nvvm."); |
1666 | 10.2k | if (IsNVVM) |
1667 | 35 | Name = Name.substr(5); |
1668 | 10.2k | |
1669 | 10.2k | if (IsX86 && Name.startswith("sse4a.movnt.")10.1k ) { |
1670 | 8 | Module *M = F->getParent(); |
1671 | 8 | SmallVector<Metadata *, 1> Elts; |
1672 | 8 | Elts.push_back( |
1673 | 8 | ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); |
1674 | 8 | MDNode *Node = MDNode::get(C, Elts); |
1675 | 8 | |
1676 | 8 | Value *Arg0 = CI->getArgOperand(0); |
1677 | 8 | Value *Arg1 = CI->getArgOperand(1); |
1678 | 8 | |
1679 | 8 | // Nontemporal (unaligned) store of the 0'th element of the float/double |
1680 | 8 | // vector. |
1681 | 8 | Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); |
1682 | 8 | PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); |
1683 | 8 | Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); |
1684 | 8 | Value *Extract = |
1685 | 8 | Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); |
1686 | 8 | |
1687 | 8 | StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); |
1688 | 8 | SI->setMetadata(M->getMDKindID("nontemporal"), Node); |
1689 | 8 | |
1690 | 8 | // Remove intrinsic. |
1691 | 8 | CI->eraseFromParent(); |
1692 | 8 | return; |
1693 | 8 | } |
1694 | 10.2k | |
1695 | 10.2k | if (IsX86 && (10.1k Name.startswith("avx.movnt.")10.1k || |
1696 | 10.1k | Name.startswith("avx512.storent.")10.1k )) { |
1697 | 18 | Module *M = F->getParent(); |
1698 | 18 | SmallVector<Metadata *, 1> Elts; |
1699 | 18 | Elts.push_back( |
1700 | 18 | ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); |
1701 | 18 | MDNode *Node = MDNode::get(C, Elts); |
1702 | 18 | |
1703 | 18 | Value *Arg0 = CI->getArgOperand(0); |
1704 | 18 | Value *Arg1 = CI->getArgOperand(1); |
1705 | 18 | |
1706 | 18 | // Convert the type of the pointer to a pointer to the stored type. |
1707 | 18 | Value *BC = Builder.CreateBitCast(Arg0, |
1708 | 18 | PointerType::getUnqual(Arg1->getType()), |
1709 | 18 | "cast"); |
1710 | 18 | VectorType *VTy = cast<VectorType>(Arg1->getType()); |
1711 | 18 | StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, |
1712 | 18 | VTy->getBitWidth() / 8); |
1713 | 18 | SI->setMetadata(M->getMDKindID("nontemporal"), Node); |
1714 | 18 | |
1715 | 18 | // Remove intrinsic. |
1716 | 18 | CI->eraseFromParent(); |
1717 | 18 | return; |
1718 | 18 | } |
1719 | 10.1k | |
1720 | 10.1k | if (IsX86 && Name == "sse2.storel.dq"10.1k ) { |
1721 | 6 | Value *Arg0 = CI->getArgOperand(0); |
1722 | 6 | Value *Arg1 = CI->getArgOperand(1); |
1723 | 6 | |
1724 | 6 | Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); |
1725 | 6 | Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); |
1726 | 6 | Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); |
1727 | 6 | Value *BC = Builder.CreateBitCast(Arg0, |
1728 | 6 | PointerType::getUnqual(Elt->getType()), |
1729 | 6 | "cast"); |
1730 | 6 | Builder.CreateAlignedStore(Elt, BC, 1); |
1731 | 6 | |
1732 | 6 | // Remove intrinsic. |
1733 | 6 | CI->eraseFromParent(); |
1734 | 6 | return; |
1735 | 6 | } |
1736 | 10.1k | |
1737 | 10.1k | if (IsX86 && (10.1k Name.startswith("sse.storeu.")10.1k || |
1738 | 10.1k | Name.startswith("sse2.storeu.")10.1k || |
1739 | 10.1k | Name.startswith("avx.storeu.")10.0k )) { |
1740 | 87 | Value *Arg0 = CI->getArgOperand(0); |
1741 | 87 | Value *Arg1 = CI->getArgOperand(1); |
1742 | 87 | |
1743 | 87 | Arg0 = Builder.CreateBitCast(Arg0, |
1744 | 87 | PointerType::getUnqual(Arg1->getType()), |
1745 | 87 | "cast"); |
1746 | 87 | Builder.CreateAlignedStore(Arg1, Arg0, 1); |
1747 | 87 | |
1748 | 87 | // Remove intrinsic. |
1749 | 87 | CI->eraseFromParent(); |
1750 | 87 | return; |
1751 | 87 | } |
1752 | 10.0k | |
1753 | 10.0k | if (IsX86 && Name == "avx512.mask.store.ss"10.0k ) { |
1754 | 0 | Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1)); |
1755 | 0 | UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), |
1756 | 0 | Mask, false); |
1757 | 0 |
|
1758 | 0 | // Remove intrinsic. |
1759 | 0 | CI->eraseFromParent(); |
1760 | 0 | return; |
1761 | 0 | } |
1762 | 10.0k | |
1763 | 10.0k | if (IsX86 && (Name.startswith("avx512.mask.store"))10.0k ) { |
1764 | 120 | // "avx512.mask.storeu." or "avx512.mask.store." |
1765 | 120 | bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". |
1766 | 120 | UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), |
1767 | 120 | CI->getArgOperand(2), Aligned); |
1768 | 120 | |
1769 | 120 | // Remove intrinsic. |
1770 | 120 | CI->eraseFromParent(); |
1771 | 120 | return; |
1772 | 120 | } |
1773 | 9.97k | |
1774 | 9.97k | Value *Rep; |
1775 | 9.97k | // Upgrade packed integer vector compare intrinsics to compare instructions. |
1776 | 9.97k | if (IsX86 && (9.93k Name.startswith("sse2.pcmp")9.93k || |
1777 | 9.93k | Name.startswith("avx2.pcmp")9.93k )) { |
1778 | 1 | // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." |
1779 | 1 | bool CmpEq = Name[9] == 'e'; |
1780 | 1 | Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT0 , |
1781 | 1 | CI->getArgOperand(0), CI->getArgOperand(1)); |
1782 | 1 | Rep = Builder.CreateSExt(Rep, CI->getType(), ""); |
1783 | 9.97k | } else if (IsX86 && (Name.startswith("avx512.broadcastm"))9.93k ) { |
1784 | 12 | Type *ExtTy = Type::getInt32Ty(C); |
1785 | 12 | if (CI->getOperand(0)->getType()->isIntegerTy(8)) |
1786 | 6 | ExtTy = Type::getInt64Ty(C); |
1787 | 12 | unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / |
1788 | 12 | ExtTy->getPrimitiveSizeInBits(); |
1789 | 12 | Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); |
1790 | 12 | Rep = Builder.CreateVectorSplat(NumElts, Rep); |
1791 | 9.96k | } else if (IsX86 && (9.92k Name == "sse.sqrt.ss"9.92k || |
1792 | 9.92k | Name == "sse2.sqrt.sd"9.89k )) { |
1793 | 68 | Value *Vec = CI->getArgOperand(0); |
1794 | 68 | Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0); |
1795 | 68 | Function *Intr = Intrinsic::getDeclaration(F->getParent(), |
1796 | 68 | Intrinsic::sqrt, Elt0->getType()); |
1797 | 68 | Elt0 = Builder.CreateCall(Intr, Elt0); |
1798 | 68 | Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0); |
1799 | 9.89k | } else if (IsX86 && (9.85k Name.startswith("avx.sqrt.p")9.85k || |
1800 | 9.85k | Name.startswith("sse2.sqrt.p")9.84k || |
1801 | 9.85k | Name.startswith("sse.sqrt.p")9.84k )) { |
1802 | 22 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
1803 | 22 | Intrinsic::sqrt, |
1804 | 22 | CI->getType()), |
1805 | 22 | {CI->getArgOperand(0)}); |
1806 | 9.87k | } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))9.83k ) { |
1807 | 28 | if (CI->getNumArgOperands() == 4 && |
1808 | 28 | (24 !isa<ConstantInt>(CI->getArgOperand(3))24 || |
1809 | 24 | cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { |
1810 | 12 | Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_5126 |
1811 | 12 | : Intrinsic::x86_avx512_sqrt_pd_5126 ; |
1812 | 12 | |
1813 | 12 | Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) }; |
1814 | 12 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), |
1815 | 12 | IID), Args); |
1816 | 16 | } else { |
1817 | 16 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
1818 | 16 | Intrinsic::sqrt, |
1819 | 16 | CI->getType()), |
1820 | 16 | {CI->getArgOperand(0)}); |
1821 | 16 | } |
1822 | 28 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1823 | 28 | CI->getArgOperand(1)); |
1824 | 9.84k | } else if (IsX86 && (9.80k Name.startswith("avx512.ptestm")9.80k || |
1825 | 9.80k | Name.startswith("avx512.ptestnm")9.75k )) { |
1826 | 100 | Value *Op0 = CI->getArgOperand(0); |
1827 | 100 | Value *Op1 = CI->getArgOperand(1); |
1828 | 100 | Value *Mask = CI->getArgOperand(2); |
1829 | 100 | Rep = Builder.CreateAnd(Op0, Op1); |
1830 | 100 | llvm::Type *Ty = Op0->getType(); |
1831 | 100 | Value *Zero = llvm::Constant::getNullValue(Ty); |
1832 | 100 | ICmpInst::Predicate Pred = |
1833 | 100 | Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE52 : ICmpInst::ICMP_EQ48 ; |
1834 | 100 | Rep = Builder.CreateICmp(Pred, Rep, Zero); |
1835 | 100 | Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask); |
1836 | 9.74k | } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))9.70k ){ |
1837 | 72 | unsigned NumElts = |
1838 | 72 | CI->getArgOperand(1)->getType()->getVectorNumElements(); |
1839 | 72 | Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); |
1840 | 72 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1841 | 72 | CI->getArgOperand(1)); |
1842 | 9.67k | } else if (IsX86 && (Name.startswith("avx512.kunpck"))9.63k ) { |
1843 | 6 | unsigned NumElts = CI->getType()->getScalarSizeInBits(); |
1844 | 6 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); |
1845 | 6 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); |
1846 | 6 | uint32_t Indices[64]; |
1847 | 230 | for (unsigned i = 0; i != NumElts; ++i224 ) |
1848 | 224 | Indices[i] = i; |
1849 | 6 | |
1850 | 6 | // First extract half of each vector. This gives better codegen than |
1851 | 6 | // doing it in a single shuffle. |
1852 | 6 | LHS = Builder.CreateShuffleVector(LHS, LHS, |
1853 | 6 | makeArrayRef(Indices, NumElts / 2)); |
1854 | 6 | RHS = Builder.CreateShuffleVector(RHS, RHS, |
1855 | 6 | makeArrayRef(Indices, NumElts / 2)); |
1856 | 6 | // Concat the vectors. |
1857 | 6 | // NOTE: Operands have to be swapped to match intrinsic definition. |
1858 | 6 | Rep = Builder.CreateShuffleVector(RHS, LHS, |
1859 | 6 | makeArrayRef(Indices, NumElts)); |
1860 | 6 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1861 | 9.66k | } else if (IsX86 && Name == "avx512.kand.w"9.62k ) { |
1862 | 4 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1863 | 4 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); |
1864 | 4 | Rep = Builder.CreateAnd(LHS, RHS); |
1865 | 4 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1866 | 9.66k | } else if (IsX86 && Name == "avx512.kandn.w"9.62k ) { |
1867 | 4 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1868 | 4 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); |
1869 | 4 | LHS = Builder.CreateNot(LHS); |
1870 | 4 | Rep = Builder.CreateAnd(LHS, RHS); |
1871 | 4 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1872 | 9.66k | } else if (IsX86 && Name == "avx512.kor.w"9.61k ) { |
1873 | 4 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1874 | 4 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); |
1875 | 4 | Rep = Builder.CreateOr(LHS, RHS); |
1876 | 4 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1877 | 9.65k | } else if (IsX86 && Name == "avx512.kxor.w"9.61k ) { |
1878 | 4 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1879 | 4 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); |
1880 | 4 | Rep = Builder.CreateXor(LHS, RHS); |
1881 | 4 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1882 | 9.65k | } else if (IsX86 && Name == "avx512.kxnor.w"9.61k ) { |
1883 | 4 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1884 | 4 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); |
1885 | 4 | LHS = Builder.CreateNot(LHS); |
1886 | 4 | Rep = Builder.CreateXor(LHS, RHS); |
1887 | 4 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1888 | 9.64k | } else if (IsX86 && Name == "avx512.knot.w"9.60k ) { |
1889 | 2 | Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1890 | 2 | Rep = Builder.CreateNot(Rep); |
1891 | 2 | Rep = Builder.CreateBitCast(Rep, CI->getType()); |
1892 | 9.64k | } else if (IsX86 && |
1893 | 9.64k | (9.60k Name == "avx512.kortestz.w"9.60k || Name == "avx512.kortestc.w"9.60k )) { |
1894 | 4 | Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); |
1895 | 4 | Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); |
1896 | 4 | Rep = Builder.CreateOr(LHS, RHS); |
1897 | 4 | Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty()); |
1898 | 4 | Value *C; |
1899 | 4 | if (Name[14] == 'c') |
1900 | 0 | C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); |
1901 | 4 | else |
1902 | 4 | C = ConstantInt::getNullValue(Builder.getInt16Ty()); |
1903 | 4 | Rep = Builder.CreateICmpEQ(Rep, C); |
1904 | 4 | Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); |
1905 | 9.64k | } else if (IsX86 && (9.60k Name == "sse.add.ss"9.60k || Name == "sse2.add.sd"9.59k || |
1906 | 9.60k | Name == "sse.sub.ss"9.58k || Name == "sse2.sub.sd"9.56k || |
1907 | 9.60k | Name == "sse.mul.ss"9.56k || Name == "sse2.mul.sd"9.54k || |
1908 | 9.60k | Name == "sse.div.ss"9.53k || Name == "sse2.div.sd"9.53k )) { |
1909 | 79 | Type *I32Ty = Type::getInt32Ty(C); |
1910 | 79 | Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), |
1911 | 79 | ConstantInt::get(I32Ty, 0)); |
1912 | 79 | Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), |
1913 | 79 | ConstantInt::get(I32Ty, 0)); |
1914 | 79 | Value *EltOp; |
1915 | 79 | if (Name.contains(".add.")) |
1916 | 17 | EltOp = Builder.CreateFAdd(Elt0, Elt1); |
1917 | 62 | else if (Name.contains(".sub.")) |
1918 | 23 | EltOp = Builder.CreateFSub(Elt0, Elt1); |
1919 | 39 | else if (Name.contains(".mul.")) |
1920 | 23 | EltOp = Builder.CreateFMul(Elt0, Elt1); |
1921 | 16 | else |
1922 | 16 | EltOp = Builder.CreateFDiv(Elt0, Elt1); |
1923 | 79 | Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp, |
1924 | 79 | ConstantInt::get(I32Ty, 0)); |
1925 | 9.56k | } else if (IsX86 && Name.startswith("avx512.mask.pcmp")9.52k ) { |
1926 | 96 | // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." |
1927 | 96 | bool CmpEq = Name[16] == 'e'; |
1928 | 96 | Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 048 : 648 , true); |
1929 | 9.46k | } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")9.42k ) { |
1930 | 6 | Type *OpTy = CI->getArgOperand(0)->getType(); |
1931 | 6 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
1932 | 6 | Intrinsic::ID IID; |
1933 | 6 | switch (VecWidth) { |
1934 | 6 | default: 0 llvm_unreachable0 ("Unexpected intrinsic"); |
1935 | 6 | case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break2 ; |
1936 | 6 | case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break2 ; |
1937 | 6 | case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break2 ; |
1938 | 6 | } |
1939 | 6 | |
1940 | 6 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
1941 | 6 | { CI->getOperand(0), CI->getArgOperand(1) }); |
1942 | 6 | Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); |
1943 | 9.46k | } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")9.42k ) { |
1944 | 24 | Type *OpTy = CI->getArgOperand(0)->getType(); |
1945 | 24 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
1946 | 24 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
1947 | 24 | Intrinsic::ID IID; |
1948 | 24 | if (VecWidth == 128 && EltWidth == 328 ) |
1949 | 4 | IID = Intrinsic::x86_avx512_fpclass_ps_128; |
1950 | 20 | else if (VecWidth == 256 && EltWidth == 328 ) |
1951 | 4 | IID = Intrinsic::x86_avx512_fpclass_ps_256; |
1952 | 16 | else if (VecWidth == 512 && EltWidth == 328 ) |
1953 | 4 | IID = Intrinsic::x86_avx512_fpclass_ps_512; |
1954 | 12 | else if (VecWidth == 128 && EltWidth == 644 ) |
1955 | 4 | IID = Intrinsic::x86_avx512_fpclass_pd_128; |
1956 | 8 | else if (VecWidth == 256 && EltWidth == 644 ) |
1957 | 4 | IID = Intrinsic::x86_avx512_fpclass_pd_256; |
1958 | 4 | else if (VecWidth == 512 && EltWidth == 64) |
1959 | 4 | IID = Intrinsic::x86_avx512_fpclass_pd_512; |
1960 | 4 | else |
1961 | 4 | llvm_unreachable0 ("Unexpected intrinsic"); |
1962 | 24 | |
1963 | 24 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
1964 | 24 | { CI->getOperand(0), CI->getArgOperand(1) }); |
1965 | 24 | Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); |
1966 | 9.43k | } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")9.39k ) { |
1967 | 12 | Type *OpTy = CI->getArgOperand(0)->getType(); |
1968 | 12 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
1969 | 12 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
1970 | 12 | Intrinsic::ID IID; |
1971 | 12 | if (VecWidth == 128 && EltWidth == 324 ) |
1972 | 2 | IID = Intrinsic::x86_avx512_cmp_ps_128; |
1973 | 10 | else if (VecWidth == 256 && EltWidth == 324 ) |
1974 | 2 | IID = Intrinsic::x86_avx512_cmp_ps_256; |
1975 | 8 | else if (VecWidth == 512 && EltWidth == 324 ) |
1976 | 2 | IID = Intrinsic::x86_avx512_cmp_ps_512; |
1977 | 6 | else if (VecWidth == 128 && EltWidth == 642 ) |
1978 | 2 | IID = Intrinsic::x86_avx512_cmp_pd_128; |
1979 | 4 | else if (VecWidth == 256 && EltWidth == 642 ) |
1980 | 2 | IID = Intrinsic::x86_avx512_cmp_pd_256; |
1981 | 2 | else if (VecWidth == 512 && EltWidth == 64) |
1982 | 2 | IID = Intrinsic::x86_avx512_cmp_pd_512; |
1983 | 2 | else |
1984 | 2 | llvm_unreachable0 ("Unexpected intrinsic"); |
1985 | 12 | |
1986 | 12 | SmallVector<Value *, 4> Args; |
1987 | 12 | Args.push_back(CI->getArgOperand(0)); |
1988 | 12 | Args.push_back(CI->getArgOperand(1)); |
1989 | 12 | Args.push_back(CI->getArgOperand(2)); |
1990 | 12 | if (CI->getNumArgOperands() == 5) |
1991 | 4 | Args.push_back(CI->getArgOperand(4)); |
1992 | 12 | |
1993 | 12 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
1994 | 12 | Args); |
1995 | 12 | Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3)); |
1996 | 9.42k | } else if (IsX86 && Name.startswith("avx512.mask.cmp.")9.38k && |
1997 | 9.42k | Name[16] != 'p'384 ) { |
1998 | 384 | // Integer compare intrinsics. |
1999 | 384 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2000 | 384 | Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); |
2001 | 9.04k | } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")9.00k ) { |
2002 | 384 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2003 | 384 | Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); |
2004 | 8.65k | } else if (IsX86 && (8.61k Name.startswith("avx512.cvtb2mask.")8.61k || |
2005 | 8.61k | Name.startswith("avx512.cvtw2mask.")8.61k || |
2006 | 8.61k | Name.startswith("avx512.cvtd2mask.")8.60k || |
2007 | 8.61k | Name.startswith("avx512.cvtq2mask.")8.59k )) { |
2008 | 26 | Value *Op = CI->getArgOperand(0); |
2009 | 26 | Value *Zero = llvm::Constant::getNullValue(Op->getType()); |
2010 | 26 | Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); |
2011 | 26 | Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr); |
2012 | 8.63k | } else if(IsX86 && (8.59k Name == "ssse3.pabs.b.128"8.59k || |
2013 | 8.59k | Name == "ssse3.pabs.w.128"8.58k || |
2014 | 8.59k | Name == "ssse3.pabs.d.128"8.57k || |
2015 | 8.59k | Name.startswith("avx2.pabs")8.56k || |
2016 | 8.59k | Name.startswith("avx512.mask.pabs")8.54k )) { |
2017 | 96 | Rep = upgradeAbs(Builder, *CI); |
2018 | 8.53k | } else if (IsX86 && (8.49k Name == "sse41.pmaxsb"8.49k || |
2019 | 8.49k | Name == "sse2.pmaxs.w"8.48k || |
2020 | 8.49k | Name == "sse41.pmaxsd"8.47k || |
2021 | 8.49k | Name.startswith("avx2.pmaxs")8.45k || |
2022 | 8.49k | Name.startswith("avx512.mask.pmaxs")8.42k )) { |
2023 | 115 | Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); |
2024 | 8.42k | } else if (IsX86 && (8.37k Name == "sse2.pmaxu.b"8.37k || |
2025 | 8.37k | Name == "sse41.pmaxuw"8.32k || |
2026 | 8.37k | Name == "sse41.pmaxud"8.31k || |
2027 | 8.37k | Name.startswith("avx2.pmaxu")8.29k || |
2028 | 8.37k | Name.startswith("avx512.mask.pmaxu")8.27k )) { |
2029 | 152 | Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); |
2030 | 8.26k | } else if (IsX86 && (8.22k Name == "sse41.pminsb"8.22k || |
2031 | 8.22k | Name == "sse2.pmins.w"8.21k || |
2032 | 8.22k | Name == "sse41.pminsd"8.20k || |
2033 | 8.22k | Name.startswith("avx2.pmins")8.18k || |
2034 | 8.22k | Name.startswith("avx512.mask.pmins")8.16k )) { |
2035 | 111 | Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); |
2036 | 8.15k | } else if (IsX86 && (8.11k Name == "sse2.pminu.b"8.11k || |
2037 | 8.11k | Name == "sse41.pminuw"8.10k || |
2038 | 8.11k | Name == "sse41.pminud"8.09k || |
2039 | 8.11k | Name.startswith("avx2.pminu")8.07k || |
2040 | 8.11k | Name.startswith("avx512.mask.pminu")8.05k )) { |
2041 | 112 | Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); |
2042 | 8.04k | } else if (IsX86 && (8.00k Name == "sse2.pmulu.dq"8.00k || |
2043 | 8.00k | Name == "avx2.pmulu.dq"7.96k || |
2044 | 8.00k | Name == "avx512.pmulu.dq.512"7.95k || |
2045 | 8.00k | Name.startswith("avx512.mask.pmulu.dq.")7.93k )) { |
2046 | 126 | Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false); |
2047 | 7.91k | } else if (IsX86 && (7.87k Name == "sse41.pmuldq"7.87k || |
2048 | 7.87k | Name == "avx2.pmul.dq"7.86k || |
2049 | 7.87k | Name == "avx512.pmul.dq.512"7.85k || |
2050 | 7.87k | Name.startswith("avx512.mask.pmul.dq.")7.83k )) { |
2051 | 98 | Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true); |
2052 | 7.82k | } else if (IsX86 && (7.78k Name == "sse.cvtsi2ss"7.78k || |
2053 | 7.78k | Name == "sse2.cvtsi2sd"7.76k || |
2054 | 7.78k | Name == "sse.cvtsi642ss"7.75k || |
2055 | 7.78k | Name == "sse2.cvtsi642sd"7.74k )) { |
2056 | 38 | Rep = Builder.CreateSIToFP(CI->getArgOperand(1), |
2057 | 38 | CI->getType()->getVectorElementType()); |
2058 | 38 | Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); |
2059 | 7.78k | } else if (IsX86 && Name == "avx512.cvtusi2sd"7.74k ) { |
2060 | 2 | Rep = Builder.CreateUIToFP(CI->getArgOperand(1), |
2061 | 2 | CI->getType()->getVectorElementType()); |
2062 | 2 | Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); |
2063 | 7.78k | } else if (IsX86 && Name == "sse2.cvtss2sd"7.74k ) { |
2064 | 21 | Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0); |
2065 | 21 | Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType()); |
2066 | 21 | Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); |
2067 | 7.76k | } else if (IsX86 && (7.71k Name == "sse2.cvtdq2pd"7.71k || |
2068 | 7.71k | Name == "sse2.cvtdq2ps"7.70k || |
2069 | 7.71k | Name == "avx.cvtdq2.pd.256"7.69k || |
2070 | 7.71k | Name == "avx.cvtdq2.ps.256"7.68k || |
2071 | 7.71k | Name.startswith("avx512.mask.cvtdq2pd.")7.68k || |
2072 | 7.71k | Name.startswith("avx512.mask.cvtudq2pd.")7.67k || |
2073 | 7.71k | Name.startswith("avx512.mask.cvtdq2ps.")7.65k || |
2074 | 7.71k | Name.startswith("avx512.mask.cvtudq2ps.")7.64k || |
2075 | 7.71k | Name.startswith("avx512.mask.cvtqq2pd.")7.63k || |
2076 | 7.71k | Name.startswith("avx512.mask.cvtuqq2pd.")7.62k || |
2077 | 7.71k | Name == "avx512.mask.cvtqq2ps.256"7.61k || |
2078 | 7.71k | Name == "avx512.mask.cvtqq2ps.512"7.60k || |
2079 | 7.71k | Name == "avx512.mask.cvtuqq2ps.256"7.60k || |
2080 | 7.71k | Name == "avx512.mask.cvtuqq2ps.512"7.59k || |
2081 | 7.71k | Name == "sse2.cvtps2pd"7.59k || |
2082 | 7.71k | Name == "avx.cvt.ps2.pd.256"7.58k || |
2083 | 7.71k | Name == "avx512.mask.cvtps2pd.128"7.58k || |
2084 | 7.71k | Name == "avx512.mask.cvtps2pd.256"7.57k )) { |
2085 | 147 | Type *DstTy = CI->getType(); |
2086 | 147 | Rep = CI->getArgOperand(0); |
2087 | 147 | Type *SrcTy = Rep->getType(); |
2088 | 147 | |
2089 | 147 | unsigned NumDstElts = DstTy->getVectorNumElements(); |
2090 | 147 | if (NumDstElts < SrcTy->getVectorNumElements()) { |
2091 | 32 | assert(NumDstElts == 2 && "Unexpected vector size"); |
2092 | 32 | uint32_t ShuffleMask[2] = { 0, 1 }; |
2093 | 32 | Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); |
2094 | 32 | } |
2095 | 147 | |
2096 | 147 | bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy(); |
2097 | 147 | bool IsUnsigned = (StringRef::npos != Name.find("cvtu")); |
2098 | 147 | if (IsPS2PD) |
2099 | 22 | Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); |
2100 | 125 | else if (CI->getNumArgOperands() == 4 && |
2101 | 125 | (24 !isa<ConstantInt>(CI->getArgOperand(3))24 || |
2102 | 24 | cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { |
2103 | 12 | Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round6 |
2104 | 12 | : Intrinsic::x86_avx512_sitofp_round6 ; |
2105 | 12 | Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, |
2106 | 12 | { DstTy, SrcTy }); |
2107 | 12 | Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) }); |
2108 | 113 | } else { |
2109 | 113 | Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")38 |
2110 | 113 | : Builder.CreateSIToFP(Rep, DstTy, "cvt")75 ; |
2111 | 113 | } |
2112 | 147 | |
2113 | 147 | if (CI->getNumArgOperands() >= 3) |
2114 | 96 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2115 | 96 | CI->getArgOperand(1)); |
2116 | 7.61k | } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))7.57k ) { |
2117 | 108 | Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), |
2118 | 108 | CI->getArgOperand(1), CI->getArgOperand(2), |
2119 | 108 | /*Aligned*/false); |
2120 | 7.50k | } else if (IsX86 && (Name.startswith("avx512.mask.load."))7.46k ) { |
2121 | 72 | Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), |
2122 | 72 | CI->getArgOperand(1),CI->getArgOperand(2), |
2123 | 72 | /*Aligned*/true); |
2124 | 7.43k | } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")7.39k ) { |
2125 | 110 | Type *ResultTy = CI->getType(); |
2126 | 110 | Type *PtrTy = ResultTy->getVectorElementType(); |
2127 | 110 | |
2128 | 110 | // Cast the pointer to element type. |
2129 | 110 | Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), |
2130 | 110 | llvm::PointerType::getUnqual(PtrTy)); |
2131 | 110 | |
2132 | 110 | Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), |
2133 | 110 | ResultTy->getVectorNumElements()); |
2134 | 110 | |
2135 | 110 | Function *ELd = Intrinsic::getDeclaration(F->getParent(), |
2136 | 110 | Intrinsic::masked_expandload, |
2137 | 110 | ResultTy); |
2138 | 110 | Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) }); |
2139 | 7.32k | } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")7.28k ) { |
2140 | 72 | Type *ResultTy = CI->getArgOperand(1)->getType(); |
2141 | 72 | Type *PtrTy = ResultTy->getVectorElementType(); |
2142 | 72 | |
2143 | 72 | // Cast the pointer to element type. |
2144 | 72 | Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), |
2145 | 72 | llvm::PointerType::getUnqual(PtrTy)); |
2146 | 72 | |
2147 | 72 | Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), |
2148 | 72 | ResultTy->getVectorNumElements()); |
2149 | 72 | |
2150 | 72 | Function *CSt = Intrinsic::getDeclaration(F->getParent(), |
2151 | 72 | Intrinsic::masked_compressstore, |
2152 | 72 | ResultTy); |
2153 | 72 | Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); |
2154 | 7.25k | } else if (IsX86 && (7.21k Name.startswith("avx512.mask.compress.")7.21k || |
2155 | 7.21k | Name.startswith("avx512.mask.expand.")7.07k )) { |
2156 | 264 | Type *ResultTy = CI->getType(); |
2157 | 264 | |
2158 | 264 | Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), |
2159 | 264 | ResultTy->getVectorNumElements()); |
2160 | 264 | |
2161 | 264 | bool IsCompress = Name[12] == 'c'; |
2162 | 264 | Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress132 |
2163 | 264 | : Intrinsic::x86_avx512_mask_expand132 ; |
2164 | 264 | Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy); |
2165 | 264 | Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1), |
2166 | 264 | MaskVec }); |
2167 | 6.98k | } else if (IsX86 && Name.startswith("xop.vpcom")6.94k ) { |
2168 | 113 | bool IsSigned; |
2169 | 113 | if (Name.endswith("ub") || Name.endswith("uw")99 || Name.endswith("ud")85 || |
2170 | 113 | Name.endswith("uq")71 ) |
2171 | 56 | IsSigned = false; |
2172 | 57 | else if (Name.endswith("b") || Name.endswith("w")42 || Name.endswith("d")28 || |
2173 | 57 | Name.endswith("q")14 ) |
2174 | 57 | IsSigned = true; |
2175 | 57 | else |
2176 | 57 | llvm_unreachable0 ("Unknown suffix"); |
2177 | 113 | |
2178 | 113 | unsigned Imm; |
2179 | 113 | if (CI->getNumArgOperands() == 3) { |
2180 | 32 | Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2181 | 81 | } else { |
2182 | 81 | Name = Name.substr(9); // strip off "xop.vpcom" |
2183 | 81 | if (Name.startswith("lt")) |
2184 | 10 | Imm = 0; |
2185 | 71 | else if (Name.startswith("le")) |
2186 | 10 | Imm = 1; |
2187 | 61 | else if (Name.startswith("gt")) |
2188 | 10 | Imm = 2; |
2189 | 51 | else if (Name.startswith("ge")) |
2190 | 10 | Imm = 3; |
2191 | 41 | else if (Name.startswith("eq")) |
2192 | 11 | Imm = 4; |
2193 | 30 | else if (Name.startswith("ne")) |
2194 | 10 | Imm = 5; |
2195 | 20 | else if (Name.startswith("false")) |
2196 | 10 | Imm = 6; |
2197 | 10 | else if (Name.startswith("true")) |
2198 | 10 | Imm = 7; |
2199 | 10 | else |
2200 | 10 | llvm_unreachable0 ("Unknown condition"); |
2201 | 81 | } |
2202 | 113 | |
2203 | 113 | Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); |
2204 | 6.87k | } else if (IsX86 && Name.startswith("xop.vpcmov")6.83k ) { |
2205 | 12 | Value *Sel = CI->getArgOperand(2); |
2206 | 12 | Value *NotSel = Builder.CreateNot(Sel); |
2207 | 12 | Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); |
2208 | 12 | Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); |
2209 | 12 | Rep = Builder.CreateOr(Sel0, Sel1); |
2210 | 6.86k | } else if (IsX86 && (6.82k Name.startswith("xop.vprot")6.82k || |
2211 | 6.82k | Name.startswith("avx512.prol")6.80k || |
2212 | 6.82k | Name.startswith("avx512.mask.prol")6.72k )) { |
2213 | 192 | Rep = upgradeX86Rotate(Builder, *CI, false); |
2214 | 6.67k | } else if (IsX86 && (6.62k Name.startswith("avx512.pror")6.62k || |
2215 | 6.62k | Name.startswith("avx512.mask.pror")6.55k )) { |
2216 | 172 | Rep = upgradeX86Rotate(Builder, *CI, true); |
2217 | 6.49k | } else if (IsX86 && (6.45k Name.startswith("avx512.vpshld.")6.45k || |
2218 | 6.45k | Name.startswith("avx512.mask.vpshld")6.41k || |
2219 | 6.45k | Name.startswith("avx512.maskz.vpshld")6.34k )) { |
2220 | 130 | bool ZeroMask = Name[11] == 'z'; |
2221 | 130 | Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask); |
2222 | 6.36k | } else if (IsX86 && (6.32k Name.startswith("avx512.vpshrd.")6.32k || |
2223 | 6.32k | Name.startswith("avx512.mask.vpshrd")6.28k || |
2224 | 6.32k | Name.startswith("avx512.maskz.vpshrd")6.21k )) { |
2225 | 130 | bool ZeroMask = Name[11] == 'z'; |
2226 | 130 | Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask); |
2227 | 6.23k | } else if (IsX86 && Name == "sse42.crc32.64.8"6.19k ) { |
2228 | 6 | Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), |
2229 | 6 | Intrinsic::x86_sse42_crc32_32_8); |
2230 | 6 | Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); |
2231 | 6 | Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); |
2232 | 6 | Rep = Builder.CreateZExt(Rep, CI->getType(), ""); |
2233 | 6.23k | } else if (IsX86 && (6.19k Name.startswith("avx.vbroadcast.s")6.19k || |
2234 | 6.19k | Name.startswith("avx512.vbroadcast.s")6.18k )) { |
2235 | 7 | // Replace broadcasts with a series of insertelements. |
2236 | 7 | Type *VecTy = CI->getType(); |
2237 | 7 | Type *EltTy = VecTy->getVectorElementType(); |
2238 | 7 | unsigned EltNum = VecTy->getVectorNumElements(); |
2239 | 7 | Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), |
2240 | 7 | EltTy->getPointerTo()); |
2241 | 7 | Value *Load = Builder.CreateLoad(EltTy, Cast); |
2242 | 7 | Type *I32Ty = Type::getInt32Ty(C); |
2243 | 7 | Rep = UndefValue::get(VecTy); |
2244 | 71 | for (unsigned I = 0; I < EltNum; ++I64 ) |
2245 | 64 | Rep = Builder.CreateInsertElement(Rep, Load, |
2246 | 64 | ConstantInt::get(I32Ty, I)); |
2247 | 6.22k | } else if (IsX86 && (6.18k Name.startswith("sse41.pmovsx")6.18k || |
2248 | 6.18k | Name.startswith("sse41.pmovzx")6.12k || |
2249 | 6.18k | Name.startswith("avx2.pmovsx")6.05k || |
2250 | 6.18k | Name.startswith("avx2.pmovzx")6.03k || |
2251 | 6.18k | Name.startswith("avx512.mask.pmovsx")6.00k || |
2252 | 6.18k | Name.startswith("avx512.mask.pmovzx")5.89k )) { |
2253 | 394 | VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); |
2254 | 394 | VectorType *DstTy = cast<VectorType>(CI->getType()); |
2255 | 394 | unsigned NumDstElts = DstTy->getNumElements(); |
2256 | 394 | |
2257 | 394 | // Extract a subvector of the first NumDstElts lanes and sign/zero extend. |
2258 | 394 | SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); |
2259 | 3.07k | for (unsigned i = 0; i != NumDstElts; ++i2.67k ) |
2260 | 2.67k | ShuffleMask[i] = i; |
2261 | 394 | |
2262 | 394 | Value *SV = Builder.CreateShuffleVector( |
2263 | 394 | CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); |
2264 | 394 | |
2265 | 394 | bool DoSext = (StringRef::npos != Name.find("pmovsx")); |
2266 | 394 | Rep = DoSext ? Builder.CreateSExt(SV, DstTy)192 |
2267 | 394 | : Builder.CreateZExt(SV, DstTy)202 ; |
2268 | 394 | // If there are 3 arguments, it's a masked intrinsic so we need a select. |
2269 | 394 | if (CI->getNumArgOperands() == 3) |
2270 | 216 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2271 | 216 | CI->getArgOperand(1)); |
2272 | 5.83k | } else if (Name == "avx512.mask.pmov.qd.256" || |
2273 | 5.83k | Name == "avx512.mask.pmov.qd.512"5.82k || |
2274 | 5.83k | Name == "avx512.mask.pmov.wb.256"5.81k || |
2275 | 5.83k | Name == "avx512.mask.pmov.wb.512"5.81k ) { |
2276 | 24 | Type *Ty = CI->getArgOperand(1)->getType(); |
2277 | 24 | Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty); |
2278 | 24 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2279 | 24 | CI->getArgOperand(1)); |
2280 | 5.80k | } else if (IsX86 && (5.76k Name.startswith("avx.vbroadcastf128")5.76k || |
2281 | 5.76k | Name == "avx2.vbroadcasti128"5.75k )) { |
2282 | 9 | // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. |
2283 | 9 | Type *EltTy = CI->getType()->getVectorElementType(); |
2284 | 9 | unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); |
2285 | 9 | Type *VT = VectorType::get(EltTy, NumSrcElts); |
2286 | 9 | Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), |
2287 | 9 | PointerType::getUnqual(VT)); |
2288 | 9 | Value *Load = Builder.CreateAlignedLoad(VT, Op, 1); |
2289 | 9 | if (NumSrcElts == 2) |
2290 | 5 | Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), |
2291 | 5 | { 0, 1, 0, 1 }); |
2292 | 4 | else |
2293 | 4 | Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), |
2294 | 4 | { 0, 1, 2, 3, 0, 1, 2, 3 }); |
2295 | 5.79k | } else if (IsX86 && (5.75k Name.startswith("avx512.mask.shuf.i")5.75k || |
2296 | 5.75k | Name.startswith("avx512.mask.shuf.f")5.74k )) { |
2297 | 38 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2298 | 38 | Type *VT = CI->getType(); |
2299 | 38 | unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; |
2300 | 38 | unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); |
2301 | 38 | unsigned ControlBitsMask = NumLanes - 1; |
2302 | 38 | unsigned NumControlBits = NumLanes / 2; |
2303 | 38 | SmallVector<uint32_t, 8> ShuffleMask(0); |
2304 | 38 | |
2305 | 150 | for (unsigned l = 0; l != NumLanes; ++l112 ) { |
2306 | 112 | unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; |
2307 | 112 | // We actually need the other source. |
2308 | 112 | if (l >= NumLanes / 2) |
2309 | 56 | LaneMask += NumLanes; |
2310 | 440 | for (unsigned i = 0; i != NumElementsInLane; ++i328 ) |
2311 | 328 | ShuffleMask.push_back(LaneMask * NumElementsInLane + i); |
2312 | 112 | } |
2313 | 38 | Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), |
2314 | 38 | CI->getArgOperand(1), ShuffleMask); |
2315 | 38 | Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, |
2316 | 38 | CI->getArgOperand(3)); |
2317 | 5.76k | }else if (IsX86 && (5.71k Name.startswith("avx512.mask.broadcastf")5.71k || |
2318 | 5.71k | Name.startswith("avx512.mask.broadcasti")5.65k )) { |
2319 | 126 | unsigned NumSrcElts = |
2320 | 126 | CI->getArgOperand(0)->getType()->getVectorNumElements(); |
2321 | 126 | unsigned NumDstElts = CI->getType()->getVectorNumElements(); |
2322 | 126 | |
2323 | 126 | SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); |
2324 | 1.39k | for (unsigned i = 0; i != NumDstElts; ++i1.27k ) |
2325 | 1.27k | ShuffleMask[i] = i % NumSrcElts; |
2326 | 126 | |
2327 | 126 | Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), |
2328 | 126 | CI->getArgOperand(0), |
2329 | 126 | ShuffleMask); |
2330 | 126 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2331 | 126 | CI->getArgOperand(1)); |
2332 | 5.63k | } else if (IsX86 && (5.59k Name.startswith("avx2.pbroadcast")5.59k || |
2333 | 5.59k | Name.startswith("avx2.vbroadcast")5.56k || |
2334 | 5.59k | Name.startswith("avx512.pbroadcast")5.54k || |
2335 | 5.59k | Name.startswith("avx512.mask.broadcast.s")5.47k )) { |
2336 | 146 | // Replace vp?broadcasts with a vector shuffle. |
2337 | 146 | Value *Op = CI->getArgOperand(0); |
2338 | 146 | unsigned NumElts = CI->getType()->getVectorNumElements(); |
2339 | 146 | Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); |
2340 | 146 | Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), |
2341 | 146 | Constant::getNullValue(MaskTy)); |
2342 | 146 | |
2343 | 146 | if (CI->getNumArgOperands() == 3) |
2344 | 102 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2345 | 102 | CI->getArgOperand(1)); |
2346 | 5.48k | } else if (IsX86 && (5.44k Name.startswith("sse2.padds.")5.44k || |
2347 | 5.44k | Name.startswith("sse2.psubs.")5.34k || |
2348 | 5.44k | Name.startswith("avx2.padds.")5.27k || |
2349 | 5.44k | Name.startswith("avx2.psubs.")5.26k || |
2350 | 5.44k | Name.startswith("avx512.padds.")5.24k || |
2351 | 5.44k | Name.startswith("avx512.psubs.")5.23k || |
2352 | 5.44k | Name.startswith("avx512.mask.padds.")5.22k || |
2353 | 5.44k | Name.startswith("avx512.mask.psubs.")5.15k )) { |
2354 | 369 | bool IsAdd = Name.contains(".padds"); |
2355 | 369 | Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd); |
2356 | 5.11k | } else if (IsX86 && (5.07k Name.startswith("sse2.paddus.")5.07k || |
2357 | 5.07k | Name.startswith("sse2.psubus.")5.04k || |
2358 | 5.07k | Name.startswith("avx2.paddus.")4.85k || |
2359 | 5.07k | Name.startswith("avx2.psubus.")4.84k || |
2360 | 5.07k | Name.startswith("avx512.mask.paddus.")4.84k || |
2361 | 5.07k | Name.startswith("avx512.mask.psubus.")4.76k )) { |
2362 | 381 | bool IsAdd = Name.contains(".paddus"); |
2363 | 381 | Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd); |
2364 | 4.73k | } else if (IsX86 && Name.startswith("avx512.mask.palignr.")4.69k ) { |
2365 | 18 | Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), |
2366 | 18 | CI->getArgOperand(1), |
2367 | 18 | CI->getArgOperand(2), |
2368 | 18 | CI->getArgOperand(3), |
2369 | 18 | CI->getArgOperand(4), |
2370 | 18 | false); |
2371 | 4.72k | } else if (IsX86 && Name.startswith("avx512.mask.valign.")4.67k ) { |
2372 | 24 | Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), |
2373 | 24 | CI->getArgOperand(1), |
2374 | 24 | CI->getArgOperand(2), |
2375 | 24 | CI->getArgOperand(3), |
2376 | 24 | CI->getArgOperand(4), |
2377 | 24 | true); |
2378 | 4.69k | } else if (IsX86 && (4.65k Name == "sse2.psll.dq"4.65k || |
2379 | 4.65k | Name == "avx2.psll.dq"4.64k )) { |
2380 | 14 | // 128/256-bit shift left specified in bits. |
2381 | 14 | unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2382 | 14 | Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), |
2383 | 14 | Shift / 8); // Shift is in bits. |
2384 | 4.68k | } else if (IsX86 && (4.64k Name == "sse2.psrl.dq"4.64k || |
2385 | 4.64k | Name == "avx2.psrl.dq"4.53k )) { |
2386 | 108 | // 128/256-bit shift right specified in bits. |
2387 | 108 | unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2388 | 108 | Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), |
2389 | 108 | Shift / 8); // Shift is in bits. |
2390 | 4.57k | } else if (IsX86 && (4.53k Name == "sse2.psll.dq.bs"4.53k || |
2391 | 4.53k | Name == "avx2.psll.dq.bs"4.52k || |
2392 | 4.53k | Name == "avx512.psll.dq.512"4.52k )) { |
2393 | 16 | // 128/256/512-bit shift left specified in bytes. |
2394 | 16 | unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2395 | 16 | Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); |
2396 | 4.55k | } else if (IsX86 && (4.51k Name == "sse2.psrl.dq.bs"4.51k || |
2397 | 4.51k | Name == "avx2.psrl.dq.bs"4.51k || |
2398 | 4.51k | Name == "avx512.psrl.dq.512"4.50k )) { |
2399 | 16 | // 128/256/512-bit shift right specified in bytes. |
2400 | 16 | unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2401 | 16 | Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); |
2402 | 4.54k | } else if (IsX86 && (4.50k Name == "sse41.pblendw"4.50k || |
2403 | 4.50k | Name.startswith("sse41.blendp")4.48k || |
2404 | 4.50k | Name.startswith("avx.blend.p")4.45k || |
2405 | 4.50k | Name == "avx2.pblendw"4.43k || |
2406 | 4.50k | Name.startswith("avx2.pblendd.")4.42k )) { |
2407 | 98 | Value *Op0 = CI->getArgOperand(0); |
2408 | 98 | Value *Op1 = CI->getArgOperand(1); |
2409 | 98 | unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2410 | 98 | VectorType *VecTy = cast<VectorType>(CI->getType()); |
2411 | 98 | unsigned NumElts = VecTy->getNumElements(); |
2412 | 98 | |
2413 | 98 | SmallVector<uint32_t, 16> Idxs(NumElts); |
2414 | 748 | for (unsigned i = 0; i != NumElts; ++i650 ) |
2415 | 650 | Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts294 : i356 ; |
2416 | 98 | |
2417 | 98 | Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); |
2418 | 4.44k | } else if (IsX86 && (4.40k Name.startswith("avx.vinsertf128.")4.40k || |
2419 | 4.40k | Name == "avx2.vinserti128"4.37k || |
2420 | 4.40k | Name.startswith("avx512.mask.insert")4.37k )) { |
2421 | 102 | Value *Op0 = CI->getArgOperand(0); |
2422 | 102 | Value *Op1 = CI->getArgOperand(1); |
2423 | 102 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2424 | 102 | unsigned DstNumElts = CI->getType()->getVectorNumElements(); |
2425 | 102 | unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); |
2426 | 102 | unsigned Scale = DstNumElts / SrcNumElts; |
2427 | 102 | |
2428 | 102 | // Mask off the high bits of the immediate value; hardware ignores those. |
2429 | 102 | Imm = Imm % Scale; |
2430 | 102 | |
2431 | 102 | // Extend the second operand into a vector the size of the destination. |
2432 | 102 | Value *UndefV = UndefValue::get(Op1->getType()); |
2433 | 102 | SmallVector<uint32_t, 8> Idxs(DstNumElts); |
2434 | 492 | for (unsigned i = 0; i != SrcNumElts; ++i390 ) |
2435 | 390 | Idxs[i] = i; |
2436 | 636 | for (unsigned i = SrcNumElts; i != DstNumElts; ++i534 ) |
2437 | 534 | Idxs[i] = SrcNumElts; |
2438 | 102 | Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); |
2439 | 102 | |
2440 | 102 | // Insert the second operand into the first operand. |
2441 | 102 | |
2442 | 102 | // Note that there is no guarantee that instruction lowering will actually |
2443 | 102 | // produce a vinsertf128 instruction for the created shuffles. In |
2444 | 102 | // particular, the 0 immediate case involves no lane changes, so it can |
2445 | 102 | // be handled as a blend. |
2446 | 102 | |
2447 | 102 | // Example of shuffle mask for 32-bit elements: |
2448 | 102 | // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
2449 | 102 | // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > |
2450 | 102 | |
2451 | 102 | // First fill with identify mask. |
2452 | 1.02k | for (unsigned i = 0; i != DstNumElts; ++i924 ) |
2453 | 924 | Idxs[i] = i; |
2454 | 102 | // Then replace the elements where we need to insert. |
2455 | 492 | for (unsigned i = 0; i != SrcNumElts; ++i390 ) |
2456 | 390 | Idxs[i + Imm * SrcNumElts] = i + DstNumElts; |
2457 | 102 | Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); |
2458 | 102 | |
2459 | 102 | // If the intrinsic has a mask operand, handle that. |
2460 | 102 | if (CI->getNumArgOperands() == 5) |
2461 | 72 | Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, |
2462 | 72 | CI->getArgOperand(3)); |
2463 | 4.34k | } else if (IsX86 && (4.30k Name.startswith("avx.vextractf128.")4.30k || |
2464 | 4.30k | Name == "avx2.vextracti128"4.26k || |
2465 | 4.30k | Name.startswith("avx512.mask.vextract")4.26k )) { |
2466 | 69 | Value *Op0 = CI->getArgOperand(0); |
2467 | 69 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2468 | 69 | unsigned DstNumElts = CI->getType()->getVectorNumElements(); |
2469 | 69 | unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); |
2470 | 69 | unsigned Scale = SrcNumElts / DstNumElts; |
2471 | 69 | |
2472 | 69 | // Mask off the high bits of the immediate value; hardware ignores those. |
2473 | 69 | Imm = Imm % Scale; |
2474 | 69 | |
2475 | 69 | // Get indexes for the subvector of the input vector. |
2476 | 69 | SmallVector<uint32_t, 8> Idxs(DstNumElts); |
2477 | 317 | for (unsigned i = 0; i != DstNumElts; ++i248 ) { |
2478 | 248 | Idxs[i] = i + (Imm * DstNumElts); |
2479 | 248 | } |
2480 | 69 | Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); |
2481 | 69 | |
2482 | 69 | // If the intrinsic has a mask operand, handle that. |
2483 | 69 | if (CI->getNumArgOperands() == 4) |
2484 | 32 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2485 | 32 | CI->getArgOperand(2)); |
2486 | 4.27k | } else if (!IsX86 && Name == "stackprotectorcheck"41 ) { |
2487 | 6 | Rep = nullptr; |
2488 | 4.26k | } else if (IsX86 && (4.23k Name.startswith("avx512.mask.perm.df.")4.23k || |
2489 | 4.23k | Name.startswith("avx512.mask.perm.di.")4.22k )) { |
2490 | 24 | Value *Op0 = CI->getArgOperand(0); |
2491 | 24 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2492 | 24 | VectorType *VecTy = cast<VectorType>(CI->getType()); |
2493 | 24 | unsigned NumElts = VecTy->getNumElements(); |
2494 | 24 | |
2495 | 24 | SmallVector<uint32_t, 8> Idxs(NumElts); |
2496 | 168 | for (unsigned i = 0; i != NumElts; ++i144 ) |
2497 | 144 | Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); |
2498 | 24 | |
2499 | 24 | Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); |
2500 | 24 | |
2501 | 24 | if (CI->getNumArgOperands() == 4) |
2502 | 24 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2503 | 24 | CI->getArgOperand(2)); |
2504 | 4.24k | } else if (IsX86 && (4.20k Name.startswith("avx.vperm2f128.")4.20k || |
2505 | 4.20k | Name == "avx2.vperm2i128"4.19k )) { |
2506 | 16 | // The immediate permute control byte looks like this: |
2507 | 16 | // [1:0] - select 128 bits from sources for low half of destination |
2508 | 16 | // [2] - ignore |
2509 | 16 | // [3] - zero low half of destination |
2510 | 16 | // [5:4] - select 128 bits from sources for high half of destination |
2511 | 16 | // [6] - ignore |
2512 | 16 | // [7] - zero high half of destination |
2513 | 16 | |
2514 | 16 | uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2515 | 16 | |
2516 | 16 | unsigned NumElts = CI->getType()->getVectorNumElements(); |
2517 | 16 | unsigned HalfSize = NumElts / 2; |
2518 | 16 | SmallVector<uint32_t, 8> ShuffleMask(NumElts); |
2519 | 16 | |
2520 | 16 | // Determine which operand(s) are actually in use for this instruction. |
2521 | 16 | Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1)12 : CI->getArgOperand(0)4 ; |
2522 | 16 | Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1)0 : CI->getArgOperand(0); |
2523 | 16 | |
2524 | 16 | // If needed, replace operands based on zero mask. |
2525 | 16 | V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType())0 : V0; |
2526 | 16 | V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType())0 : V1; |
2527 | 16 | |
2528 | 16 | // Permute low half of result. |
2529 | 16 | unsigned StartIndex = (Imm & 0x01) ? HalfSize : 00 ; |
2530 | 64 | for (unsigned i = 0; i < HalfSize; ++i48 ) |
2531 | 48 | ShuffleMask[i] = StartIndex + i; |
2532 | 16 | |
2533 | 16 | // Permute high half of result. |
2534 | 16 | StartIndex = (Imm & 0x10) ? HalfSize0 : 0; |
2535 | 64 | for (unsigned i = 0; i < HalfSize; ++i48 ) |
2536 | 48 | ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; |
2537 | 16 | |
2538 | 16 | Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); |
2539 | 16 | |
2540 | 4.22k | } else if (IsX86 && (4.19k Name.startswith("avx.vpermil.")4.19k || |
2541 | 4.19k | Name == "sse2.pshuf.d"4.17k || |
2542 | 4.19k | Name.startswith("avx512.mask.vpermil.p")4.09k || |
2543 | 4.19k | Name.startswith("avx512.mask.pshuf.d.")4.06k )) { |
2544 | 148 | Value *Op0 = CI->getArgOperand(0); |
2545 | 148 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2546 | 148 | VectorType *VecTy = cast<VectorType>(CI->getType()); |
2547 | 148 | unsigned NumElts = VecTy->getNumElements(); |
2548 | 148 | // Calculate the size of each index in the immediate. |
2549 | 148 | unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); |
2550 | 148 | unsigned IdxMask = ((1 << IdxSize) - 1); |
2551 | 148 | |
2552 | 148 | SmallVector<uint32_t, 8> Idxs(NumElts); |
2553 | 148 | // Lookup the bits for this element, wrapping around the immediate every |
2554 | 148 | // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need |
2555 | 148 | // to offset by the first index of each group. |
2556 | 952 | for (unsigned i = 0; i != NumElts; ++i804 ) |
2557 | 804 | Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); |
2558 | 148 | |
2559 | 148 | Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); |
2560 | 148 | |
2561 | 148 | if (CI->getNumArgOperands() == 4) |
2562 | 54 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2563 | 54 | CI->getArgOperand(2)); |
2564 | 4.07k | } else if (IsX86 && (4.04k Name == "sse2.pshufl.w"4.04k || |
2565 | 4.04k | Name.startswith("avx512.mask.pshufl.w.")3.98k )) { |
2566 | 78 | Value *Op0 = CI->getArgOperand(0); |
2567 | 78 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2568 | 78 | unsigned NumElts = CI->getType()->getVectorNumElements(); |
2569 | 78 | |
2570 | 78 | SmallVector<uint32_t, 16> Idxs(NumElts); |
2571 | 180 | for (unsigned l = 0; l != NumElts; l += 8102 ) { |
2572 | 510 | for (unsigned i = 0; i != 4; ++i408 ) |
2573 | 408 | Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; |
2574 | 510 | for (unsigned i = 4; i != 8; ++i408 ) |
2575 | 408 | Idxs[i + l] = i + l; |
2576 | 102 | } |
2577 | 78 | |
2578 | 78 | Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); |
2579 | 78 | |
2580 | 78 | if (CI->getNumArgOperands() == 4) |
2581 | 18 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2582 | 18 | CI->getArgOperand(2)); |
2583 | 4.00k | } else if (IsX86 && (3.96k Name == "sse2.pshufh.w"3.96k || |
2584 | 3.96k | Name.startswith("avx512.mask.pshufh.w.")3.92k )) { |
2585 | 60 | Value *Op0 = CI->getArgOperand(0); |
2586 | 60 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); |
2587 | 60 | unsigned NumElts = CI->getType()->getVectorNumElements(); |
2588 | 60 | |
2589 | 60 | SmallVector<uint32_t, 16> Idxs(NumElts); |
2590 | 144 | for (unsigned l = 0; l != NumElts; l += 884 ) { |
2591 | 420 | for (unsigned i = 0; i != 4; ++i336 ) |
2592 | 336 | Idxs[i + l] = i + l; |
2593 | 420 | for (unsigned i = 0; i != 4; ++i336 ) |
2594 | 336 | Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; |
2595 | 84 | } |
2596 | 60 | |
2597 | 60 | Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); |
2598 | 60 | |
2599 | 60 | if (CI->getNumArgOperands() == 4) |
2600 | 18 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2601 | 18 | CI->getArgOperand(2)); |
2602 | 3.94k | } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")3.90k ) { |
2603 | 28 | Value *Op0 = CI->getArgOperand(0); |
2604 | 28 | Value *Op1 = CI->getArgOperand(1); |
2605 | 28 | unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
2606 | 28 | unsigned NumElts = CI->getType()->getVectorNumElements(); |
2607 | 28 | |
2608 | 28 | unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
2609 | 28 | unsigned HalfLaneElts = NumLaneElts / 2; |
2610 | 28 | |
2611 | 28 | SmallVector<uint32_t, 16> Idxs(NumElts); |
2612 | 216 | for (unsigned i = 0; i != NumElts; ++i188 ) { |
2613 | 188 | // Base index is the starting element of the lane. |
2614 | 188 | Idxs[i] = i - (i % NumLaneElts); |
2615 | 188 | // If we are half way through the lane switch to the other source. |
2616 | 188 | if ((i % NumLaneElts) >= HalfLaneElts) |
2617 | 94 | Idxs[i] += NumElts; |
2618 | 188 | // Now select the specific element. By adding HalfLaneElts bits from |
2619 | 188 | // the immediate. Wrapping around the immediate every 8-bits. |
2620 | 188 | Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); |
2621 | 188 | } |
2622 | 28 | |
2623 | 28 | Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); |
2624 | 28 | |
2625 | 28 | Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, |
2626 | 28 | CI->getArgOperand(3)); |
2627 | 3.91k | } else if (IsX86 && (3.87k Name.startswith("avx512.mask.movddup")3.87k || |
2628 | 3.87k | Name.startswith("avx512.mask.movshdup")3.86k || |
2629 | 3.87k | Name.startswith("avx512.mask.movsldup")3.84k )) { |
2630 | 54 | Value *Op0 = CI->getArgOperand(0); |
2631 | 54 | unsigned NumElts = CI->getType()->getVectorNumElements(); |
2632 | 54 | unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
2633 | 54 | |
2634 | 54 | unsigned Offset = 0; |
2635 | 54 | if (Name.startswith("avx512.mask.movshdup.")) |
2636 | 18 | Offset = 1; |
2637 | 54 | |
2638 | 54 | SmallVector<uint32_t, 16> Idxs(NumElts); |
2639 | 180 | for (unsigned l = 0; l != NumElts; l += NumLaneElts126 ) |
2640 | 336 | for (unsigned i = 0; 126 i != NumLaneElts; i += 2210 ) { |
2641 | 210 | Idxs[i + l + 0] = i + l + Offset; |
2642 | 210 | Idxs[i + l + 1] = i + l + Offset; |
2643 | 210 | } |
2644 | 54 | |
2645 | 54 | Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); |
2646 | 54 | |
2647 | 54 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2648 | 54 | CI->getArgOperand(1)); |
2649 | 3.85k | } else if (IsX86 && (3.82k Name.startswith("avx512.mask.punpckl")3.82k || |
2650 | 3.82k | Name.startswith("avx512.mask.unpckl.")3.77k )) { |
2651 | 74 | Value *Op0 = CI->getArgOperand(0); |
2652 | 74 | Value *Op1 = CI->getArgOperand(1); |
2653 | 74 | int NumElts = CI->getType()->getVectorNumElements(); |
2654 | 74 | int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
2655 | 74 | |
2656 | 74 | SmallVector<uint32_t, 64> Idxs(NumElts); |
2657 | 250 | for (int l = 0; l != NumElts; l += NumLaneElts176 ) |
2658 | 1.20k | for (int i = 0; 176 i != NumLaneElts; ++i1.02k ) |
2659 | 1.02k | Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); |
2660 | 74 | |
2661 | 74 | Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); |
2662 | 74 | |
2663 | 74 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2664 | 74 | CI->getArgOperand(2)); |
2665 | 3.78k | } else if (IsX86 && (3.75k Name.startswith("avx512.mask.punpckh")3.75k || |
2666 | 3.75k | Name.startswith("avx512.mask.unpckh.")3.70k )) { |
2667 | 72 | Value *Op0 = CI->getArgOperand(0); |
2668 | 72 | Value *Op1 = CI->getArgOperand(1); |
2669 | 72 | int NumElts = CI->getType()->getVectorNumElements(); |
2670 | 72 | int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
2671 | 72 | |
2672 | 72 | SmallVector<uint32_t, 64> Idxs(NumElts); |
2673 | 240 | for (int l = 0; l != NumElts; l += NumLaneElts168 ) |
2674 | 1.17k | for (int i = 0; 168 i != NumLaneElts; ++i1.00k ) |
2675 | 1.00k | Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); |
2676 | 72 | |
2677 | 72 | Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); |
2678 | 72 | |
2679 | 72 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2680 | 72 | CI->getArgOperand(2)); |
2681 | 3.71k | } else if (IsX86 && (3.67k Name.startswith("avx512.mask.and.")3.67k || |
2682 | 3.67k | Name.startswith("avx512.mask.pand.")3.62k )) { |
2683 | 98 | VectorType *FTy = cast<VectorType>(CI->getType()); |
2684 | 98 | VectorType *ITy = VectorType::getInteger(FTy); |
2685 | 98 | Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), |
2686 | 98 | Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
2687 | 98 | Rep = Builder.CreateBitCast(Rep, FTy); |
2688 | 98 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2689 | 98 | CI->getArgOperand(2)); |
2690 | 3.61k | } else if (IsX86 && (3.58k Name.startswith("avx512.mask.andn.")3.58k || |
2691 | 3.58k | Name.startswith("avx512.mask.pandn.")3.52k )) { |
2692 | 126 | VectorType *FTy = cast<VectorType>(CI->getType()); |
2693 | 126 | VectorType *ITy = VectorType::getInteger(FTy); |
2694 | 126 | Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); |
2695 | 126 | Rep = Builder.CreateAnd(Rep, |
2696 | 126 | Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
2697 | 126 | Rep = Builder.CreateBitCast(Rep, FTy); |
2698 | 126 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2699 | 126 | CI->getArgOperand(2)); |
2700 | 3.48k | } else if (IsX86 && (3.45k Name.startswith("avx512.mask.or.")3.45k || |
2701 | 3.45k | Name.startswith("avx512.mask.por.")3.40k )) { |
2702 | 98 | VectorType *FTy = cast<VectorType>(CI->getType()); |
2703 | 98 | VectorType *ITy = VectorType::getInteger(FTy); |
2704 | 98 | Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), |
2705 | 98 | Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
2706 | 98 | Rep = Builder.CreateBitCast(Rep, FTy); |
2707 | 98 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2708 | 98 | CI->getArgOperand(2)); |
2709 | 3.39k | } else if (IsX86 && (3.35k Name.startswith("avx512.mask.xor.")3.35k || |
2710 | 3.35k | Name.startswith("avx512.mask.pxor.")3.30k )) { |
2711 | 98 | VectorType *FTy = cast<VectorType>(CI->getType()); |
2712 | 98 | VectorType *ITy = VectorType::getInteger(FTy); |
2713 | 98 | Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), |
2714 | 98 | Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
2715 | 98 | Rep = Builder.CreateBitCast(Rep, FTy); |
2716 | 98 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2717 | 98 | CI->getArgOperand(2)); |
2718 | 3.29k | } else if (IsX86 && Name.startswith("avx512.mask.padd.")3.25k ) { |
2719 | 108 | Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); |
2720 | 108 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2721 | 108 | CI->getArgOperand(2)); |
2722 | 3.18k | } else if (IsX86 && Name.startswith("avx512.mask.psub.")3.15k ) { |
2723 | 108 | Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); |
2724 | 108 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2725 | 108 | CI->getArgOperand(2)); |
2726 | 3.07k | } else if (IsX86 && Name.startswith("avx512.mask.pmull.")3.04k ) { |
2727 | 108 | Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); |
2728 | 108 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2729 | 108 | CI->getArgOperand(2)); |
2730 | 2.96k | } else if (IsX86 && Name.startswith("avx512.mask.add.p")2.93k ) { |
2731 | 42 | if (Name.endswith(".512")) { |
2732 | 30 | Intrinsic::ID IID; |
2733 | 30 | if (Name[17] == 's') |
2734 | 30 | IID = Intrinsic::x86_avx512_add_ps_512; |
2735 | 0 | else |
2736 | 0 | IID = Intrinsic::x86_avx512_add_pd_512; |
2737 | 30 | |
2738 | 30 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
2739 | 30 | { CI->getArgOperand(0), CI->getArgOperand(1), |
2740 | 30 | CI->getArgOperand(4) }); |
2741 | 30 | } else { |
2742 | 12 | Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); |
2743 | 12 | } |
2744 | 42 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2745 | 42 | CI->getArgOperand(2)); |
2746 | 2.92k | } else if (IsX86 && Name.startswith("avx512.mask.div.p")2.89k ) { |
2747 | 42 | if (Name.endswith(".512")) { |
2748 | 30 | Intrinsic::ID IID; |
2749 | 30 | if (Name[17] == 's') |
2750 | 30 | IID = Intrinsic::x86_avx512_div_ps_512; |
2751 | 0 | else |
2752 | 0 | IID = Intrinsic::x86_avx512_div_pd_512; |
2753 | 30 | |
2754 | 30 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
2755 | 30 | { CI->getArgOperand(0), CI->getArgOperand(1), |
2756 | 30 | CI->getArgOperand(4) }); |
2757 | 30 | } else { |
2758 | 12 | Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); |
2759 | 12 | } |
2760 | 42 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2761 | 42 | CI->getArgOperand(2)); |
2762 | 2.88k | } else if (IsX86 && Name.startswith("avx512.mask.mul.p")2.85k ) { |
2763 | 44 | if (Name.endswith(".512")) { |
2764 | 32 | Intrinsic::ID IID; |
2765 | 32 | if (Name[17] == 's') |
2766 | 24 | IID = Intrinsic::x86_avx512_mul_ps_512; |
2767 | 8 | else |
2768 | 8 | IID = Intrinsic::x86_avx512_mul_pd_512; |
2769 | 32 | |
2770 | 32 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
2771 | 32 | { CI->getArgOperand(0), CI->getArgOperand(1), |
2772 | 32 | CI->getArgOperand(4) }); |
2773 | 32 | } else { |
2774 | 12 | Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); |
2775 | 12 | } |
2776 | 44 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2777 | 44 | CI->getArgOperand(2)); |
2778 | 2.84k | } else if (IsX86 && Name.startswith("avx512.mask.sub.p")2.80k ) { |
2779 | 40 | if (Name.endswith(".512")) { |
2780 | 28 | Intrinsic::ID IID; |
2781 | 28 | if (Name[17] == 's') |
2782 | 28 | IID = Intrinsic::x86_avx512_sub_ps_512; |
2783 | 0 | else |
2784 | 0 | IID = Intrinsic::x86_avx512_sub_pd_512; |
2785 | 28 | |
2786 | 28 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
2787 | 28 | { CI->getArgOperand(0), CI->getArgOperand(1), |
2788 | 28 | CI->getArgOperand(4) }); |
2789 | 28 | } else { |
2790 | 12 | Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); |
2791 | 12 | } |
2792 | 40 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2793 | 40 | CI->getArgOperand(2)); |
2794 | 2.80k | } else if (IsX86 && (2.76k Name.startswith("avx512.mask.max.p")2.76k || |
2795 | 2.76k | Name.startswith("avx512.mask.min.p")2.74k ) && |
2796 | 2.80k | Name.drop_front(18) == ".512"48 ) { |
2797 | 24 | bool IsDouble = Name[17] == 'd'; |
2798 | 24 | bool IsMin = Name[13] == 'i'; |
2799 | 24 | static const Intrinsic::ID MinMaxTbl[2][2] = { |
2800 | 24 | { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 }, |
2801 | 24 | { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 } |
2802 | 24 | }; |
2803 | 24 | Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; |
2804 | 24 | |
2805 | 24 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
2806 | 24 | { CI->getArgOperand(0), CI->getArgOperand(1), |
2807 | 24 | CI->getArgOperand(4) }); |
2808 | 24 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
2809 | 24 | CI->getArgOperand(2)); |
2810 | 2.77k | } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")2.74k ) { |
2811 | 26 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
2812 | 26 | Intrinsic::ctlz, |
2813 | 26 | CI->getType()), |
2814 | 26 | { CI->getArgOperand(0), Builder.getInt1(false) }); |
2815 | 26 | Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
2816 | 26 | CI->getArgOperand(1)); |
2817 | 2.75k | } else if (IsX86 && Name.startswith("avx512.mask.psll")2.71k ) { |
2818 | 156 | bool IsImmediate = Name[16] == 'i' || |
2819 | 156 | (144 Name.size() > 18144 && Name[18] == 'i'132 ); |
2820 | 156 | bool IsVariable = Name[16] == 'v'; |
2821 | 156 | char Size = Name[16] == '.' ? Name[17]90 : |
2822 | 156 | Name[17] == '.' 66 ? Name[18]24 : |
2823 | 66 | Name[18] == '.' 42 ? Name[19]30 : |
2824 | 42 | Name[20]12 ; |
2825 | 156 | |
2826 | 156 | Intrinsic::ID IID; |
2827 | 156 | if (IsVariable && Name[17] != '.'54 ) { |
2828 | 42 | if (Size == 'd' && Name[17] == '2'12 ) // avx512.mask.psllv2.di |
2829 | 6 | IID = Intrinsic::x86_avx2_psllv_q; |
2830 | 36 | else if (Size == 'd' && Name[17] == '4'6 ) // avx512.mask.psllv4.di |
2831 | 6 | IID = Intrinsic::x86_avx2_psllv_q_256; |
2832 | 30 | else if (Size == 's' && Name[17] == '4'12 ) // avx512.mask.psllv4.si |
2833 | 6 | IID = Intrinsic::x86_avx2_psllv_d; |
2834 | 24 | else if (Size == 's' && Name[17] == '8'6 ) // avx512.mask.psllv8.si |
2835 | 6 | IID = Intrinsic::x86_avx2_psllv_d_256; |
2836 | 18 | else if (Size == 'h' && Name[17] == '8'12 ) // avx512.mask.psllv8.hi |
2837 | 6 | IID = Intrinsic::x86_avx512_psllv_w_128; |
2838 | 12 | else if (Size == 'h' && Name[17] == '1'6 ) // avx512.mask.psllv16.hi |
2839 | 6 | IID = Intrinsic::x86_avx512_psllv_w_256; |
2840 | 6 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi |
2841 | 6 | IID = Intrinsic::x86_avx512_psllv_w_512; |
2842 | 6 | else |
2843 | 6 | llvm_unreachable0 ("Unexpected size"); |
2844 | 114 | } else if (Name.endswith(".128")) { |
2845 | 24 | if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 |
2846 | 12 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d6 |
2847 | 12 | : Intrinsic::x86_sse2_psll_d6 ; |
2848 | 12 | else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 |
2849 | 0 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q |
2850 | 0 | : Intrinsic::x86_sse2_psll_q; |
2851 | 12 | else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 |
2852 | 12 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w6 |
2853 | 12 | : Intrinsic::x86_sse2_psll_w6 ; |
2854 | 12 | else |
2855 | 12 | llvm_unreachable0 ("Unexpected size"); |
2856 | 90 | } else if (Name.endswith(".256")) { |
2857 | 30 | if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 |
2858 | 12 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d6 |
2859 | 12 | : Intrinsic::x86_avx2_psll_d6 ; |
2860 | 18 | else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 |
2861 | 6 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q0 |
2862 | 6 | : Intrinsic::x86_avx2_psll_q; |
2863 | 12 | else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 |
2864 | 12 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w6 |
2865 | 12 | : Intrinsic::x86_avx2_psll_w6 ; |
2866 | 12 | else |
2867 | 12 | llvm_unreachable0 ("Unexpected size"); |
2868 | 60 | } else { |
2869 | 60 | if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 |
2870 | 24 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_51212 : |
2871 | 24 | IsVariable 12 ? Intrinsic::x86_avx512_psllv_d_5126 : |
2872 | 12 | Intrinsic::x86_avx512_psll_d_5126 ; |
2873 | 36 | else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 |
2874 | 24 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_51212 : |
2875 | 24 | IsVariable 12 ? Intrinsic::x86_avx512_psllv_q_5126 : |
2876 | 12 | Intrinsic::x86_avx512_psll_q_5126 ; |
2877 | 12 | else if (Size == 'w') // psll.wi.512, pslli.w, psll.w |
2878 | 12 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_5126 |
2879 | 12 | : Intrinsic::x86_avx512_psll_w_5126 ; |
2880 | 12 | else |
2881 | 12 | llvm_unreachable0 ("Unexpected size"); |
2882 | 60 | } |
2883 | 156 | |
2884 | 156 | Rep = UpgradeX86MaskedShift(Builder, *CI, IID); |
2885 | 2.59k | } else if (IsX86 && Name.startswith("avx512.mask.psrl")2.56k ) { |
2886 | 188 | bool IsImmediate = Name[16] == 'i' || |
2887 | 188 | (176 Name.size() > 18176 && Name[18] == 'i'164 ); |
2888 | 188 | bool IsVariable = Name[16] == 'v'; |
2889 | 188 | char Size = Name[16] == '.' ? Name[17]108 : |
2890 | 188 | Name[17] == '.' 80 ? Name[18]26 : |
2891 | 80 | Name[18] == '.' 54 ? Name[19]36 : |
2892 | 54 | Name[20]18 ; |
2893 | 188 | |
2894 | 188 | Intrinsic::ID IID; |
2895 | 188 | if (IsVariable && Name[17] != '.'68 ) { |
2896 | 54 | if (Size == 'd' && Name[17] == '2'12 ) // avx512.mask.psrlv2.di |
2897 | 6 | IID = Intrinsic::x86_avx2_psrlv_q; |
2898 | 48 | else if (Size == 'd' && Name[17] == '4'6 ) // avx512.mask.psrlv4.di |
2899 | 6 | IID = Intrinsic::x86_avx2_psrlv_q_256; |
2900 | 42 | else if (Size == 's' && Name[17] == '4'12 ) // avx512.mask.psrlv4.si |
2901 | 6 | IID = Intrinsic::x86_avx2_psrlv_d; |
2902 | 36 | else if (Size == 's' && Name[17] == '8'6 ) // avx512.mask.psrlv8.si |
2903 | 6 | IID = Intrinsic::x86_avx2_psrlv_d_256; |
2904 | 30 | else if (Size == 'h' && Name[17] == '8'24 ) // avx512.mask.psrlv8.hi |
2905 | 12 | IID = Intrinsic::x86_avx512_psrlv_w_128; |
2906 | 18 | else if (Size == 'h' && Name[17] == '1'12 ) // avx512.mask.psrlv16.hi |
2907 | 12 | IID = Intrinsic::x86_avx512_psrlv_w_256; |
2908 | 6 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi |
2909 | 6 | IID = Intrinsic::x86_avx512_psrlv_w_512; |
2910 | 6 | else |
2911 | 6 | llvm_unreachable0 ("Unexpected size"); |
2912 | 134 | } else if (Name.endswith(".128")) { |
2913 | 36 | if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 |
2914 | 12 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d6 |
2915 | 12 | : Intrinsic::x86_sse2_psrl_d6 ; |
2916 | 24 | else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 |
2917 | 12 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q6 |
2918 | 12 | : Intrinsic::x86_sse2_psrl_q6 ; |
2919 | 12 | else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 |
2920 | 12 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w6 |
2921 | 12 | : Intrinsic::x86_sse2_psrl_w6 ; |
2922 | 12 | else |
2923 | 12 | llvm_unreachable0 ("Unexpected size"); |
2924 | 98 | } else if (Name.endswith(".256")) { |
2925 | 36 | if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 |
2926 | 12 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d6 |
2927 | 12 | : Intrinsic::x86_avx2_psrl_d6 ; |
2928 | 24 | else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 |
2929 | 12 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q6 |
2930 | 12 | : Intrinsic::x86_avx2_psrl_q6 ; |
2931 | 12 | else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 |
2932 | 12 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w6 |
2933 | 12 | : Intrinsic::x86_avx2_psrl_w6 ; |
2934 | 12 | else |
2935 | 12 | llvm_unreachable0 ("Unexpected size"); |
2936 | 62 | } else { |
2937 | 62 | if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 |
2938 | 24 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_51212 : |
2939 | 24 | IsVariable 12 ? Intrinsic::x86_avx512_psrlv_d_5126 : |
2940 | 12 | Intrinsic::x86_avx512_psrl_d_5126 ; |
2941 | 38 | else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 |
2942 | 26 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_51212 : |
2943 | 26 | IsVariable 14 ? Intrinsic::x86_avx512_psrlv_q_5128 : |
2944 | 14 | Intrinsic::x86_avx512_psrl_q_5126 ; |
2945 | 12 | else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) |
2946 | 12 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_5126 |
2947 | 12 | : Intrinsic::x86_avx512_psrl_w_5126 ; |
2948 | 12 | else |
2949 | 12 | llvm_unreachable0 ("Unexpected size"); |
2950 | 62 | } |
2951 | 188 | |
2952 | 188 | Rep = UpgradeX86MaskedShift(Builder, *CI, IID); |
2953 | 2.40k | } else if (IsX86 && Name.startswith("avx512.mask.psra")2.37k ) { |
2954 | 166 | bool IsImmediate = Name[16] == 'i' || |
2955 | 166 | (154 Name.size() > 18154 && Name[18] == 'i'142 ); |
2956 | 166 | bool IsVariable = Name[16] == 'v'; |
2957 | 166 | char Size = Name[16] == '.' ? Name[17]96 : |
2958 | 166 | Name[17] == '.' 70 ? Name[18]38 : |
2959 | 70 | Name[18] == '.' 32 ? Name[19]20 : |
2960 | 32 | Name[20]12 ; |
2961 | 166 | |
2962 | 166 | Intrinsic::ID IID; |
2963 | 166 | if (IsVariable && Name[17] != '.'58 ) { |
2964 | 32 | if (Size == 's' && Name[17] == '4'14 ) // avx512.mask.psrav4.si |
2965 | 6 | IID = Intrinsic::x86_avx2_psrav_d; |
2966 | 26 | else if (Size == 's' && Name[17] == '8'8 ) // avx512.mask.psrav8.si |
2967 | 8 | IID = Intrinsic::x86_avx2_psrav_d_256; |
2968 | 18 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi |
2969 | 6 | IID = Intrinsic::x86_avx512_psrav_w_128; |
2970 | 12 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi |
2971 | 6 | IID = Intrinsic::x86_avx512_psrav_w_256; |
2972 | 6 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi |
2973 | 6 | IID = Intrinsic::x86_avx512_psrav_w_512; |
2974 | 6 | else |
2975 | 6 | llvm_unreachable0 ("Unexpected size"); |
2976 | 134 | } else if (Name.endswith(".128")) { |
2977 | 38 | if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 |
2978 | 6 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d0 |
2979 | 6 | : Intrinsic::x86_sse2_psra_d; |
2980 | 32 | else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 |
2981 | 20 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_1286 : |
2982 | 20 | IsVariable 14 ? Intrinsic::x86_avx512_psrav_q_1288 : |
2983 | 14 | Intrinsic::x86_avx512_psra_q_1286 ; |
2984 | 12 | else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 |
2985 | 12 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w6 |
2986 | 12 | : Intrinsic::x86_sse2_psra_w6 ; |
2987 | 12 | else |
2988 | 12 | llvm_unreachable0 ("Unexpected size"); |
2989 | 96 | } else if (Name.endswith(".256")) { |
2990 | 36 | if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 |
2991 | 6 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d0 |
2992 | 6 | : Intrinsic::x86_avx2_psra_d; |
2993 | 30 | else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 |
2994 | 18 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_2566 : |
2995 | 18 | IsVariable 12 ? Intrinsic::x86_avx512_psrav_q_2566 : |
2996 | 12 | Intrinsic::x86_avx512_psra_q_2566 ; |
2997 | 12 | else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 |
2998 | 12 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w6 |
2999 | 12 | : Intrinsic::x86_avx2_psra_w6 ; |
3000 | 12 | else |
3001 | 12 | llvm_unreachable0 ("Unexpected size"); |
3002 | 60 | } else { |
3003 | 60 | if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 |
3004 | 24 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_51212 : |
3005 | 24 | IsVariable 12 ? Intrinsic::x86_avx512_psrav_d_5126 : |
3006 | 12 | Intrinsic::x86_avx512_psra_d_5126 ; |
3007 | 36 | else if (Size == 'q') // psra.qi.512, psrai.q, psra.q |
3008 | 24 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_51212 : |
3009 | 24 | IsVariable 12 ? Intrinsic::x86_avx512_psrav_q_5126 : |
3010 | 12 | Intrinsic::x86_avx512_psra_q_5126 ; |
3011 | 12 | else if (Size == 'w') // psra.wi.512, psrai.w, psra.w |
3012 | 12 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_5126 |
3013 | 12 | : Intrinsic::x86_avx512_psra_w_5126 ; |
3014 | 12 | else |
3015 | 12 | llvm_unreachable0 ("Unexpected size"); |
3016 | 60 | } |
3017 | 166 | |
3018 | 166 | Rep = UpgradeX86MaskedShift(Builder, *CI, IID); |
3019 | 2.24k | } else if (IsX86 && Name.startswith("avx512.mask.move.s")2.20k ) { |
3020 | 8 | Rep = upgradeMaskedMove(Builder, *CI); |
3021 | 2.23k | } else if (IsX86 && Name.startswith("avx512.cvtmask2")2.19k ) { |
3022 | 24 | Rep = UpgradeMaskToInt(Builder, *CI); |
3023 | 2.20k | } else if (IsX86 && Name.endswith(".movntdqa")2.17k ) { |
3024 | 20 | Module *M = F->getParent(); |
3025 | 20 | MDNode *Node = MDNode::get( |
3026 | 20 | C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); |
3027 | 20 | |
3028 | 20 | Value *Ptr = CI->getArgOperand(0); |
3029 | 20 | VectorType *VTy = cast<VectorType>(CI->getType()); |
3030 | 20 | |
3031 | 20 | // Convert the type of the pointer to a pointer to the stored type. |
3032 | 20 | Value *BC = |
3033 | 20 | Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); |
3034 | 20 | LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8); |
3035 | 20 | LI->setMetadata(M->getMDKindID("nontemporal"), Node); |
3036 | 20 | Rep = LI; |
3037 | 2.18k | } else if (IsX86 && (2.15k Name.startswith("fma.vfmadd.")2.15k || |
3038 | 2.15k | Name.startswith("fma.vfmsub.")2.02k || |
3039 | 2.15k | Name.startswith("fma.vfnmadd.")1.90k || |
3040 | 2.15k | Name.startswith("fma.vfnmsub.")1.79k )) { |
3041 | 470 | bool NegMul = Name[6] == 'n'; |
3042 | 470 | bool NegAcc = NegMul ? Name[8] == 's'222 : Name[7] == 's'248 ; |
3043 | 470 | bool IsScalar = NegMul ? Name[12] == 's'222 : Name[11] == 's'248 ; |
3044 | 470 | |
3045 | 470 | Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3046 | 470 | CI->getArgOperand(2) }; |
3047 | 470 | |
3048 | 470 | if (IsScalar) { |
3049 | 154 | Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); |
3050 | 154 | Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); |
3051 | 154 | Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); |
3052 | 154 | } |
3053 | 470 | |
3054 | 470 | if (NegMul && !IsScalar222 ) |
3055 | 144 | Ops[0] = Builder.CreateFNeg(Ops[0]); |
3056 | 470 | if (NegMul && IsScalar222 ) |
3057 | 78 | Ops[1] = Builder.CreateFNeg(Ops[1]); |
3058 | 470 | if (NegAcc) |
3059 | 228 | Ops[2] = Builder.CreateFNeg(Ops[2]); |
3060 | 470 | |
3061 | 470 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), |
3062 | 470 | Intrinsic::fma, |
3063 | 470 | Ops[0]->getType()), |
3064 | 470 | Ops); |
3065 | 470 | |
3066 | 470 | if (IsScalar) |
3067 | 154 | Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, |
3068 | 154 | (uint64_t)0); |
3069 | 1.71k | } else if (IsX86 && Name.startswith("fma4.vfmadd.s")1.68k ) { |
3070 | 46 | Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3071 | 46 | CI->getArgOperand(2) }; |
3072 | 46 | |
3073 | 46 | Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); |
3074 | 46 | Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); |
3075 | 46 | Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); |
3076 | 46 | |
3077 | 46 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), |
3078 | 46 | Intrinsic::fma, |
3079 | 46 | Ops[0]->getType()), |
3080 | 46 | Ops); |
3081 | 46 | |
3082 | 46 | Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), |
3083 | 46 | Rep, (uint64_t)0); |
3084 | 1.67k | } else if (IsX86 && (1.63k Name.startswith("avx512.mask.vfmadd.s")1.63k || |
3085 | 1.63k | Name.startswith("avx512.maskz.vfmadd.s")1.57k || |
3086 | 1.63k | Name.startswith("avx512.mask3.vfmadd.s")1.54k || |
3087 | 1.63k | Name.startswith("avx512.mask3.vfmsub.s")1.50k || |
3088 | 1.63k | Name.startswith("avx512.mask3.vfnmsub.s")1.47k )) { |
3089 | 175 | bool IsMask3 = Name[11] == '3'; |
3090 | 175 | bool IsMaskZ = Name[11] == 'z'; |
3091 | 175 | // Drop the "avx512.mask." to make it easier. |
3092 | 175 | Name = Name.drop_front(IsMask3 || IsMaskZ95 ? 13115 : 1260 ); |
3093 | 175 | bool NegMul = Name[2] == 'n'; |
3094 | 175 | bool NegAcc = NegMul ? Name[4] == 's'16 : Name[3] == 's'159 ; |
3095 | 175 | |
3096 | 175 | Value *A = CI->getArgOperand(0); |
3097 | 175 | Value *B = CI->getArgOperand(1); |
3098 | 175 | Value *C = CI->getArgOperand(2); |
3099 | 175 | |
3100 | 175 | if (NegMul && (16 IsMask316 || IsMaskZ0 )) |
3101 | 16 | A = Builder.CreateFNeg(A); |
3102 | 175 | if (NegMul && !(16 IsMask316 || IsMaskZ0 )) |
3103 | 0 | B = Builder.CreateFNeg(B); |
3104 | 175 | if (NegAcc) |
3105 | 44 | C = Builder.CreateFNeg(C); |
3106 | 175 | |
3107 | 175 | A = Builder.CreateExtractElement(A, (uint64_t)0); |
3108 | 175 | B = Builder.CreateExtractElement(B, (uint64_t)0); |
3109 | 175 | C = Builder.CreateExtractElement(C, (uint64_t)0); |
3110 | 175 | |
3111 | 175 | if (!isa<ConstantInt>(CI->getArgOperand(4)) || |
3112 | 175 | cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) { |
3113 | 44 | Value *Ops[] = { A, B, C, CI->getArgOperand(4) }; |
3114 | 44 | |
3115 | 44 | Intrinsic::ID IID; |
3116 | 44 | if (Name.back() == 'd') |
3117 | 18 | IID = Intrinsic::x86_avx512_vfmadd_f64; |
3118 | 26 | else |
3119 | 26 | IID = Intrinsic::x86_avx512_vfmadd_f32; |
3120 | 44 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID); |
3121 | 44 | Rep = Builder.CreateCall(FMA, Ops); |
3122 | 131 | } else { |
3123 | 131 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), |
3124 | 131 | Intrinsic::fma, |
3125 | 131 | A->getType()); |
3126 | 131 | Rep = Builder.CreateCall(FMA, { A, B, C }); |
3127 | 131 | } |
3128 | 175 | |
3129 | 175 | Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())35 : |
3130 | 175 | IsMask3 140 ? C80 : A60 ; |
3131 | 175 | |
3132 | 175 | // For Mask3 with NegAcc, we need to create a new extractelement that |
3133 | 175 | // avoids the negation above. |
3134 | 175 | if (NegAcc && IsMask344 ) |
3135 | 44 | PassThru = Builder.CreateExtractElement(CI->getArgOperand(2), |
3136 | 44 | (uint64_t)0); |
3137 | 175 | |
3138 | 175 | Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3), |
3139 | 175 | Rep, PassThru); |
3140 | 175 | Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 280 : 095 ), |
3141 | 175 | Rep, (uint64_t)0); |
3142 | 1.49k | } else if (IsX86 && (1.46k Name.startswith("avx512.mask.vfmadd.p")1.46k || |
3143 | 1.46k | Name.startswith("avx512.mask.vfnmadd.p")1.40k || |
3144 | 1.46k | Name.startswith("avx512.mask.vfnmsub.p")1.37k || |
3145 | 1.46k | Name.startswith("avx512.mask3.vfmadd.p")1.33k || |
3146 | 1.46k | Name.startswith("avx512.mask3.vfmsub.p")1.32k || |
3147 | 1.46k | Name.startswith("avx512.mask3.vfnmsub.p")1.31k || |
3148 | 1.46k | Name.startswith("avx512.maskz.vfmadd.p")1.30k )) { |
3149 | 164 | bool IsMask3 = Name[11] == '3'; |
3150 | 164 | bool IsMaskZ = Name[11] == 'z'; |
3151 | 164 | // Drop the "avx512.mask." to make it easier. |
3152 | 164 | Name = Name.drop_front(IsMask3 || IsMaskZ134 ? 1340 : 12124 ); |
3153 | 164 | bool NegMul = Name[2] == 'n'; |
3154 | 164 | bool NegAcc = NegMul ? Name[4] == 's'72 : Name[3] == 's'92 ; |
3155 | 164 | |
3156 | 164 | Value *A = CI->getArgOperand(0); |
3157 | 164 | Value *B = CI->getArgOperand(1); |
3158 | 164 | Value *C = CI->getArgOperand(2); |
3159 | 164 | |
3160 | 164 | if (NegMul && (72 IsMask372 || IsMaskZ62 )) |
3161 | 10 | A = Builder.CreateFNeg(A); |
3162 | 164 | if (NegMul && !(72 IsMask372 || IsMaskZ62 )) |
3163 | 62 | B = Builder.CreateFNeg(B); |
3164 | 164 | if (NegAcc) |
3165 | 58 | C = Builder.CreateFNeg(C); |
3166 | 164 | |
3167 | 164 | if (CI->getNumArgOperands() == 5 && |
3168 | 164 | (60 !isa<ConstantInt>(CI->getArgOperand(4))60 || |
3169 | 60 | cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { |
3170 | 28 | Intrinsic::ID IID; |
3171 | 28 | // Check the character before ".512" in string. |
3172 | 28 | if (Name[Name.size()-5] == 's') |
3173 | 12 | IID = Intrinsic::x86_avx512_vfmadd_ps_512; |
3174 | 16 | else |
3175 | 16 | IID = Intrinsic::x86_avx512_vfmadd_pd_512; |
3176 | 28 | |
3177 | 28 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
3178 | 28 | { A, B, C, CI->getArgOperand(4) }); |
3179 | 136 | } else { |
3180 | 136 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), |
3181 | 136 | Intrinsic::fma, |
3182 | 136 | A->getType()); |
3183 | 136 | Rep = Builder.CreateCall(FMA, { A, B, C }); |
3184 | 136 | } |
3185 | 164 | |
3186 | 164 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())10 : |
3187 | 164 | IsMask3 154 ? CI->getArgOperand(2)30 : |
3188 | 154 | CI->getArgOperand(0)124 ; |
3189 | 164 | |
3190 | 164 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); |
3191 | 1.33k | } else if (IsX86 && (1.29k Name.startswith("fma.vfmaddsub.p")1.29k || |
3192 | 1.29k | Name.startswith("fma.vfmsubadd.p")1.27k )) { |
3193 | 48 | bool IsSubAdd = Name[7] == 's'; |
3194 | 48 | int NumElts = CI->getType()->getVectorNumElements(); |
3195 | 48 | |
3196 | 48 | Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3197 | 48 | CI->getArgOperand(2) }; |
3198 | 48 | |
3199 | 48 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, |
3200 | 48 | Ops[0]->getType()); |
3201 | 48 | Value *Odd = Builder.CreateCall(FMA, Ops); |
3202 | 48 | Ops[2] = Builder.CreateFNeg(Ops[2]); |
3203 | 48 | Value *Even = Builder.CreateCall(FMA, Ops); |
3204 | 48 | |
3205 | 48 | if (IsSubAdd) |
3206 | 24 | std::swap(Even, Odd); |
3207 | 48 | |
3208 | 48 | SmallVector<uint32_t, 32> Idxs(NumElts); |
3209 | 264 | for (int i = 0; i != NumElts; ++i216 ) |
3210 | 216 | Idxs[i] = i + (i % 2) * NumElts; |
3211 | 48 | |
3212 | 48 | Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); |
3213 | 1.28k | } else if (IsX86 && (1.25k Name.startswith("avx512.mask.vfmaddsub.p")1.25k || |
3214 | 1.25k | Name.startswith("avx512.mask3.vfmaddsub.p")1.22k || |
3215 | 1.25k | Name.startswith("avx512.maskz.vfmaddsub.p")1.21k || |
3216 | 1.25k | Name.startswith("avx512.mask3.vfmsubadd.p")1.20k )) { |
3217 | 52 | bool IsMask3 = Name[11] == '3'; |
3218 | 52 | bool IsMaskZ = Name[11] == 'z'; |
3219 | 52 | // Drop the "avx512.mask." to make it easier. |
3220 | 52 | Name = Name.drop_front(IsMask3 || IsMaskZ32 ? 1330 : 1222 ); |
3221 | 52 | bool IsSubAdd = Name[3] == 's'; |
3222 | 52 | if (CI->getNumArgOperands() == 5 && |
3223 | 52 | (12 !isa<ConstantInt>(CI->getArgOperand(4))12 || |
3224 | 12 | cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { |
3225 | 0 | Intrinsic::ID IID; |
3226 | 0 | // Check the character before ".512" in string. |
3227 | 0 | if (Name[Name.size()-5] == 's') |
3228 | 0 | IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; |
3229 | 0 | else |
3230 | 0 | IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; |
3231 | 0 |
|
3232 | 0 | Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3233 | 0 | CI->getArgOperand(2), CI->getArgOperand(4) }; |
3234 | 0 | if (IsSubAdd) |
3235 | 0 | Ops[2] = Builder.CreateFNeg(Ops[2]); |
3236 | 0 |
|
3237 | 0 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), |
3238 | 0 | {CI->getArgOperand(0), CI->getArgOperand(1), |
3239 | 0 | CI->getArgOperand(2), CI->getArgOperand(4)}); |
3240 | 52 | } else { |
3241 | 52 | int NumElts = CI->getType()->getVectorNumElements(); |
3242 | 52 | |
3243 | 52 | Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3244 | 52 | CI->getArgOperand(2) }; |
3245 | 52 | |
3246 | 52 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, |
3247 | 52 | Ops[0]->getType()); |
3248 | 52 | Value *Odd = Builder.CreateCall(FMA, Ops); |
3249 | 52 | Ops[2] = Builder.CreateFNeg(Ops[2]); |
3250 | 52 | Value *Even = Builder.CreateCall(FMA, Ops); |
3251 | 52 | |
3252 | 52 | if (IsSubAdd) |
3253 | 10 | std::swap(Even, Odd); |
3254 | 52 | |
3255 | 52 | SmallVector<uint32_t, 32> Idxs(NumElts); |
3256 | 376 | for (int i = 0; i != NumElts; ++i324 ) |
3257 | 324 | Idxs[i] = i + (i % 2) * NumElts; |
3258 | 52 | |
3259 | 52 | Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); |
3260 | 52 | } |
3261 | 52 | |
3262 | 52 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())10 : |
3263 | 52 | IsMask3 42 ? CI->getArgOperand(2)20 : |
3264 | 42 | CI->getArgOperand(0)22 ; |
3265 | 52 | |
3266 | 52 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); |
3267 | 1.23k | } else if (IsX86 && (1.19k Name.startswith("avx512.mask.pternlog.")1.19k || |
3268 | 1.19k | Name.startswith("avx512.maskz.pternlog.")1.17k )) { |
3269 | 48 | bool ZeroMask = Name[11] == 'z'; |
3270 | 48 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3271 | 48 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
3272 | 48 | Intrinsic::ID IID; |
3273 | 48 | if (VecWidth == 128 && EltWidth == 3216 ) |
3274 | 8 | IID = Intrinsic::x86_avx512_pternlog_d_128; |
3275 | 40 | else if (VecWidth == 256 && EltWidth == 3216 ) |
3276 | 8 | IID = Intrinsic::x86_avx512_pternlog_d_256; |
3277 | 32 | else if (VecWidth == 512 && EltWidth == 3216 ) |
3278 | 8 | IID = Intrinsic::x86_avx512_pternlog_d_512; |
3279 | 24 | else if (VecWidth == 128 && EltWidth == 648 ) |
3280 | 8 | IID = Intrinsic::x86_avx512_pternlog_q_128; |
3281 | 16 | else if (VecWidth == 256 && EltWidth == 648 ) |
3282 | 8 | IID = Intrinsic::x86_avx512_pternlog_q_256; |
3283 | 8 | else if (VecWidth == 512 && EltWidth == 64) |
3284 | 8 | IID = Intrinsic::x86_avx512_pternlog_q_512; |
3285 | 8 | else |
3286 | 8 | llvm_unreachable0 ("Unexpected intrinsic"); |
3287 | 48 | |
3288 | 48 | Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), |
3289 | 48 | CI->getArgOperand(2), CI->getArgOperand(3) }; |
3290 | 48 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), |
3291 | 48 | Args); |
3292 | 48 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())24 |
3293 | 48 | : CI->getArgOperand(0)24 ; |
3294 | 48 | Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); |
3295 | 1.18k | } else if (IsX86 && (1.15k Name.startswith("avx512.mask.vpmadd52")1.15k || |
3296 | 1.15k | Name.startswith("avx512.maskz.vpmadd52")1.08k )) { |
3297 | 120 | bool ZeroMask = Name[11] == 'z'; |
3298 | 120 | bool High = Name[20] == 'h' || Name[21] == 'h'80 ; |
3299 | 120 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3300 | 120 | Intrinsic::ID IID; |
3301 | 120 | if (VecWidth == 128 && !High32 ) |
3302 | 16 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; |
3303 | 104 | else if (VecWidth == 256 && !High32 ) |
3304 | 16 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; |
3305 | 88 | else if (VecWidth == 512 && !High56 ) |
3306 | 16 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; |
3307 | 72 | else if (VecWidth == 128 && High16 ) |
3308 | 16 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; |
3309 | 56 | else if (VecWidth == 256 && High16 ) |
3310 | 16 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; |
3311 | 40 | else if (VecWidth == 512 && High) |
3312 | 40 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; |
3313 | 40 | else |
3314 | 40 | llvm_unreachable0 ("Unexpected intrinsic"); |
3315 | 120 | |
3316 | 120 | Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), |
3317 | 120 | CI->getArgOperand(2) }; |
3318 | 120 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), |
3319 | 120 | Args); |
3320 | 120 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())56 |
3321 | 120 | : CI->getArgOperand(0)64 ; |
3322 | 120 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); |
3323 | 1.06k | } else if (IsX86 && (1.03k Name.startswith("avx512.mask.vpermi2var.")1.03k || |
3324 | 1.03k | Name.startswith("avx512.mask.vpermt2var.")869 || |
3325 | 1.03k | Name.startswith("avx512.maskz.vpermt2var.")813 )) { |
3326 | 397 | bool ZeroMask = Name[11] == 'z'; |
3327 | 397 | bool IndexForm = Name[17] == 'i'; |
3328 | 397 | Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm); |
3329 | 669 | } else if (IsX86 && (634 Name.startswith("avx512.mask.vpdpbusd.")634 || |
3330 | 634 | Name.startswith("avx512.maskz.vpdpbusd.")622 || |
3331 | 634 | Name.startswith("avx512.mask.vpdpbusds.")616 || |
3332 | 634 | Name.startswith("avx512.maskz.vpdpbusds.")604 )) { |
3333 | 36 | bool ZeroMask = Name[11] == 'z'; |
3334 | 36 | bool IsSaturating = Name[ZeroMask ? 2112 : 2024 ] == 's'; |
3335 | 36 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3336 | 36 | Intrinsic::ID IID; |
3337 | 36 | if (VecWidth == 128 && !IsSaturating12 ) |
3338 | 6 | IID = Intrinsic::x86_avx512_vpdpbusd_128; |
3339 | 30 | else if (VecWidth == 256 && !IsSaturating12 ) |
3340 | 6 | IID = Intrinsic::x86_avx512_vpdpbusd_256; |
3341 | 24 | else if (VecWidth == 512 && !IsSaturating12 ) |
3342 | 6 | IID = Intrinsic::x86_avx512_vpdpbusd_512; |
3343 | 18 | else if (VecWidth == 128 && IsSaturating6 ) |
3344 | 6 | IID = Intrinsic::x86_avx512_vpdpbusds_128; |
3345 | 12 | else if (VecWidth == 256 && IsSaturating6 ) |
3346 | 6 | IID = Intrinsic::x86_avx512_vpdpbusds_256; |
3347 | 6 | else if (VecWidth == 512 && IsSaturating) |
3348 | 6 | IID = Intrinsic::x86_avx512_vpdpbusds_512; |
3349 | 6 | else |
3350 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
3351 | 36 | |
3352 | 36 | Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3353 | 36 | CI->getArgOperand(2) }; |
3354 | 36 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), |
3355 | 36 | Args); |
3356 | 36 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())12 |
3357 | 36 | : CI->getArgOperand(0)24 ; |
3358 | 36 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); |
3359 | 633 | } else if (IsX86 && (598 Name.startswith("avx512.mask.vpdpwssd.")598 || |
3360 | 598 | Name.startswith("avx512.maskz.vpdpwssd.")586 || |
3361 | 598 | Name.startswith("avx512.mask.vpdpwssds.")580 || |
3362 | 598 | Name.startswith("avx512.maskz.vpdpwssds.")568 )) { |
3363 | 36 | bool ZeroMask = Name[11] == 'z'; |
3364 | 36 | bool IsSaturating = Name[ZeroMask ? 2112 : 2024 ] == 's'; |
3365 | 36 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3366 | 36 | Intrinsic::ID IID; |
3367 | 36 | if (VecWidth == 128 && !IsSaturating12 ) |
3368 | 6 | IID = Intrinsic::x86_avx512_vpdpwssd_128; |
3369 | 30 | else if (VecWidth == 256 && !IsSaturating12 ) |
3370 | 6 | IID = Intrinsic::x86_avx512_vpdpwssd_256; |
3371 | 24 | else if (VecWidth == 512 && !IsSaturating12 ) |
3372 | 6 | IID = Intrinsic::x86_avx512_vpdpwssd_512; |
3373 | 18 | else if (VecWidth == 128 && IsSaturating6 ) |
3374 | 6 | IID = Intrinsic::x86_avx512_vpdpwssds_128; |
3375 | 12 | else if (VecWidth == 256 && IsSaturating6 ) |
3376 | 6 | IID = Intrinsic::x86_avx512_vpdpwssds_256; |
3377 | 6 | else if (VecWidth == 512 && IsSaturating) |
3378 | 6 | IID = Intrinsic::x86_avx512_vpdpwssds_512; |
3379 | 6 | else |
3380 | 6 | llvm_unreachable0 ("Unexpected intrinsic"); |
3381 | 36 | |
3382 | 36 | Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3383 | 36 | CI->getArgOperand(2) }; |
3384 | 36 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), |
3385 | 36 | Args); |
3386 | 36 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())12 |
3387 | 36 | : CI->getArgOperand(0)24 ; |
3388 | 36 | Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); |
3389 | 597 | } else if (IsX86 && (562 Name == "addcarryx.u32"562 || Name == "addcarryx.u64"560 || |
3390 | 562 | Name == "addcarry.u32"554 || Name == "addcarry.u64"552 || |
3391 | 562 | Name == "subborrow.u32"550 || Name == "subborrow.u64"548 )) { |
3392 | 16 | Intrinsic::ID IID; |
3393 | 16 | if (Name[0] == 'a' && Name.back() == '2'12 ) |
3394 | 4 | IID = Intrinsic::x86_addcarry_32; |
3395 | 12 | else if (Name[0] == 'a' && Name.back() == '4'8 ) |
3396 | 8 | IID = Intrinsic::x86_addcarry_64; |
3397 | 4 | else if (Name[0] == 's' && Name.back() == '2') |
3398 | 2 | IID = Intrinsic::x86_subborrow_32; |
3399 | 2 | else if (Name[0] == 's' && Name.back() == '4') |
3400 | 2 | IID = Intrinsic::x86_subborrow_64; |
3401 | 2 | else |
3402 | 2 | llvm_unreachable0 ("Unexpected intrinsic"); |
3403 | 16 | |
3404 | 16 | // Make a call with 3 operands. |
3405 | 16 | Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), |
3406 | 16 | CI->getArgOperand(2)}; |
3407 | 16 | Value *NewCall = Builder.CreateCall( |
3408 | 16 | Intrinsic::getDeclaration(CI->getModule(), IID), |
3409 | 16 | Args); |
3410 | 16 | |
3411 | 16 | // Extract the second result and store it. |
3412 | 16 | Value *Data = Builder.CreateExtractValue(NewCall, 1); |
3413 | 16 | // Cast the pointer to the right type. |
3414 | 16 | Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), |
3415 | 16 | llvm::PointerType::getUnqual(Data->getType())); |
3416 | 16 | Builder.CreateAlignedStore(Data, Ptr, 1); |
3417 | 16 | // Replace the original call result with the first result of the new call. |
3418 | 16 | Value *CF = Builder.CreateExtractValue(NewCall, 0); |
3419 | 16 | |
3420 | 16 | CI->replaceAllUsesWith(CF); |
3421 | 16 | Rep = nullptr; |
3422 | 581 | } else if (IsX86 && Name.startswith("avx512.mask.")546 && |
3423 | 581 | upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)546 ) { |
3424 | 546 | // Rep will be updated by the call in the condition. |
3425 | 546 | } else if (35 IsNVVM35 && (35 Name == "abs.i"35 || Name == "abs.ll"33 )) { |
3426 | 4 | Value *Arg = CI->getArgOperand(0); |
3427 | 4 | Value *Neg = Builder.CreateNeg(Arg, "neg"); |
3428 | 4 | Value *Cmp = Builder.CreateICmpSGE( |
3429 | 4 | Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); |
3430 | 4 | Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); |
3431 | 31 | } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") || |
3432 | 31 | Name.startswith("atomic.load.add.f64.p")28 )) { |
3433 | 9 | Value *Ptr = CI->getArgOperand(0); |
3434 | 9 | Value *Val = CI->getArgOperand(1); |
3435 | 9 | Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, |
3436 | 9 | AtomicOrdering::SequentiallyConsistent); |
3437 | 22 | } else if (IsNVVM && (Name == "max.i" || Name == "max.ll"20 || |
3438 | 22 | Name == "max.ui"18 || Name == "max.ull"16 )) { |
3439 | 8 | Value *Arg0 = CI->getArgOperand(0); |
3440 | 8 | Value *Arg1 = CI->getArgOperand(1); |
3441 | 8 | Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")6 |
3442 | 8 | ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")4 |
3443 | 8 | : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond")4 ; |
3444 | 8 | Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); |
3445 | 14 | } else if (IsNVVM && (Name == "min.i" || Name == "min.ll"12 || |
3446 | 14 | Name == "min.ui"10 || Name == "min.ull"8 )) { |
3447 | 8 | Value *Arg0 = CI->getArgOperand(0); |
3448 | 8 | Value *Arg1 = CI->getArgOperand(1); |
3449 | 8 | Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")6 |
3450 | 8 | ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")4 |
3451 | 8 | : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond")4 ; |
3452 | 8 | Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); |
3453 | 8 | } else if (6 IsNVVM6 && Name == "clz.ll"6 ) { |
3454 | 2 | // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64. |
3455 | 2 | Value *Arg = CI->getArgOperand(0); |
3456 | 2 | Value *Ctlz = Builder.CreateCall( |
3457 | 2 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, |
3458 | 2 | {Arg->getType()}), |
3459 | 2 | {Arg, Builder.getFalse()}, "ctlz"); |
3460 | 2 | Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); |
3461 | 4 | } else if (IsNVVM && Name == "popc.ll") { |
3462 | 2 | // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an |
3463 | 2 | // i64. |
3464 | 2 | Value *Arg = CI->getArgOperand(0); |
3465 | 2 | Value *Popc = Builder.CreateCall( |
3466 | 2 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, |
3467 | 2 | {Arg->getType()}), |
3468 | 2 | Arg, "ctpop"); |
3469 | 2 | Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); |
3470 | 2 | } else if (IsNVVM && Name == "h2f") { |
3471 | 2 | Rep = Builder.CreateCall(Intrinsic::getDeclaration( |
3472 | 2 | F->getParent(), Intrinsic::convert_from_fp16, |
3473 | 2 | {Builder.getFloatTy()}), |
3474 | 2 | CI->getArgOperand(0), "h2f"); |
3475 | 2 | } else { |
3476 | 0 | llvm_unreachable("Unknown function for CallInst upgrade."); |
3477 | 0 | } |
3478 | 9.97k | |
3479 | 9.97k | if (Rep) |
3480 | 9.95k | CI->replaceAllUsesWith(Rep); |
3481 | 9.97k | CI->eraseFromParent(); |
3482 | 9.97k | return; |
3483 | 9.97k | } |
3484 | 72.8k | |
3485 | 72.8k | const auto &DefaultCase = [&NewFn, &CI]() -> void { |
3486 | 62.1k | // Handle generic mangling change, but nothing else |
3487 | 62.1k | assert( |
3488 | 62.1k | (CI->getCalledFunction()->getName() != NewFn->getName()) && |
3489 | 62.1k | "Unknown function for CallInst upgrade and isn't just a name change"); |
3490 | 62.1k | CI->setCalledFunction(NewFn); |
3491 | 62.1k | }; |
3492 | 72.8k | CallInst *NewCall = nullptr; |
3493 | 72.8k | switch (NewFn->getIntrinsicID()) { |
3494 | 72.8k | default: { |
3495 | 62.0k | DefaultCase(); |
3496 | 62.0k | return; |
3497 | 72.8k | } |
3498 | 72.8k | case Intrinsic::experimental_vector_reduce_v2_fmul: { |
3499 | 8 | SmallVector<Value *, 2> Args; |
3500 | 8 | if (CI->isFast()) |
3501 | 4 | Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0)); |
3502 | 4 | else |
3503 | 4 | Args.push_back(CI->getOperand(0)); |
3504 | 8 | Args.push_back(CI->getOperand(1)); |
3505 | 8 | NewCall = Builder.CreateCall(NewFn, Args); |
3506 | 8 | cast<Instruction>(NewCall)->copyFastMathFlags(CI); |
3507 | 8 | break; |
3508 | 72.8k | } |
3509 | 72.8k | case Intrinsic::experimental_vector_reduce_v2_fadd: { |
3510 | 8 | SmallVector<Value *, 2> Args; |
3511 | 8 | if (CI->isFast()) |
3512 | 4 | Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType())); |
3513 | 4 | else |
3514 | 4 | Args.push_back(CI->getOperand(0)); |
3515 | 8 | Args.push_back(CI->getOperand(1)); |
3516 | 8 | NewCall = Builder.CreateCall(NewFn, Args); |
3517 | 8 | cast<Instruction>(NewCall)->copyFastMathFlags(CI); |
3518 | 8 | break; |
3519 | 72.8k | } |
3520 | 72.8k | case Intrinsic::arm_neon_vld1: |
3521 | 14 | case Intrinsic::arm_neon_vld2: |
3522 | 14 | case Intrinsic::arm_neon_vld3: |
3523 | 14 | case Intrinsic::arm_neon_vld4: |
3524 | 14 | case Intrinsic::arm_neon_vld2lane: |
3525 | 14 | case Intrinsic::arm_neon_vld3lane: |
3526 | 14 | case Intrinsic::arm_neon_vld4lane: |
3527 | 14 | case Intrinsic::arm_neon_vst1: |
3528 | 14 | case Intrinsic::arm_neon_vst2: |
3529 | 14 | case Intrinsic::arm_neon_vst3: |
3530 | 14 | case Intrinsic::arm_neon_vst4: |
3531 | 14 | case Intrinsic::arm_neon_vst2lane: |
3532 | 14 | case Intrinsic::arm_neon_vst3lane: |
3533 | 14 | case Intrinsic::arm_neon_vst4lane: { |
3534 | 14 | SmallVector<Value *, 4> Args(CI->arg_operands().begin(), |
3535 | 14 | CI->arg_operands().end()); |
3536 | 14 | NewCall = Builder.CreateCall(NewFn, Args); |
3537 | 14 | break; |
3538 | 14 | } |
3539 | 14 | |
3540 | 14 | case Intrinsic::bitreverse: |
3541 | 8 | NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); |
3542 | 8 | break; |
3543 | 14 | |
3544 | 72 | case Intrinsic::ctlz: |
3545 | 72 | case Intrinsic::cttz: |
3546 | 72 | assert(CI->getNumArgOperands() == 1 && |
3547 | 72 | "Mismatch between function args and call args"); |
3548 | 72 | NewCall = |
3549 | 72 | Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()}); |
3550 | 72 | break; |
3551 | 72 | |
3552 | 81 | case Intrinsic::objectsize: { |
3553 | 81 | Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 |
3554 | 81 | ? Builder.getFalse()77 |
3555 | 81 | : CI->getArgOperand(2)4 ; |
3556 | 81 | Value *Dynamic = |
3557 | 81 | CI->getNumArgOperands() < 4 ? Builder.getFalse()78 : CI->getArgOperand(3)3 ; |
3558 | 81 | NewCall = Builder.CreateCall( |
3559 | 81 | NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic}); |
3560 | 81 | break; |
3561 | 72 | } |
3562 | 72 | |
3563 | 72 | case Intrinsic::ctpop: |
3564 | 6 | NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); |
3565 | 6 | break; |
3566 | 72 | |
3567 | 72 | case Intrinsic::convert_from_fp16: |
3568 | 0 | NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); |
3569 | 0 | break; |
3570 | 72 | |
3571 | 434 | case Intrinsic::dbg_value: |
3572 | 434 | // Upgrade from the old version that had an extra offset argument. |
3573 | 434 | assert(CI->getNumArgOperands() == 4); |
3574 | 434 | // Drop nonzero offsets instead of attempting to upgrade them. |
3575 | 434 | if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1))) |
3576 | 434 | if (Offset->isZeroValue()) { |
3577 | 432 | NewCall = Builder.CreateCall( |
3578 | 432 | NewFn, |
3579 | 432 | {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)}); |
3580 | 432 | break; |
3581 | 432 | } |
3582 | 2 | CI->eraseFromParent(); |
3583 | 2 | return; |
3584 | 2 | |
3585 | 2 | case Intrinsic::x86_xop_vfrcz_ss: |
3586 | 0 | case Intrinsic::x86_xop_vfrcz_sd: |
3587 | 0 | NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); |
3588 | 0 | break; |
3589 | 0 |
|
3590 | 8 | case Intrinsic::x86_xop_vpermil2pd: |
3591 | 8 | case Intrinsic::x86_xop_vpermil2ps: |
3592 | 8 | case Intrinsic::x86_xop_vpermil2pd_256: |
3593 | 8 | case Intrinsic::x86_xop_vpermil2ps_256: { |
3594 | 8 | SmallVector<Value *, 4> Args(CI->arg_operands().begin(), |
3595 | 8 | CI->arg_operands().end()); |
3596 | 8 | VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); |
3597 | 8 | VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); |
3598 | 8 | Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); |
3599 | 8 | NewCall = Builder.CreateCall(NewFn, Args); |
3600 | 8 | break; |
3601 | 8 | } |
3602 | 8 | |
3603 | 12 | case Intrinsic::x86_sse41_ptestc: |
3604 | 12 | case Intrinsic::x86_sse41_ptestz: |
3605 | 12 | case Intrinsic::x86_sse41_ptestnzc: { |
3606 | 12 | // The arguments for these intrinsics used to be v4f32, and changed |
3607 | 12 | // to v2i64. This is purely a nop, since those are bitwise intrinsics. |
3608 | 12 | // So, the only thing required is a bitcast for both arguments. |
3609 | 12 | // First, check the arguments have the old type. |
3610 | 12 | Value *Arg0 = CI->getArgOperand(0); |
3611 | 12 | if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) |
3612 | 0 | return; |
3613 | 12 | |
3614 | 12 | // Old intrinsic, add bitcasts |
3615 | 12 | Value *Arg1 = CI->getArgOperand(1); |
3616 | 12 | |
3617 | 12 | Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); |
3618 | 12 | |
3619 | 12 | Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); |
3620 | 12 | Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); |
3621 | 12 | |
3622 | 12 | NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); |
3623 | 12 | break; |
3624 | 12 | } |
3625 | 12 | |
3626 | 12 | case Intrinsic::x86_rdtscp: { |
3627 | 2 | // This used to take 1 arguments. If we have no arguments, it is already |
3628 | 2 | // upgraded. |
3629 | 2 | if (CI->getNumOperands() == 0) |
3630 | 0 | return; |
3631 | 2 | |
3632 | 2 | NewCall = Builder.CreateCall(NewFn); |
3633 | 2 | // Extract the second result and store it. |
3634 | 2 | Value *Data = Builder.CreateExtractValue(NewCall, 1); |
3635 | 2 | // Cast the pointer to the right type. |
3636 | 2 | Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0), |
3637 | 2 | llvm::PointerType::getUnqual(Data->getType())); |
3638 | 2 | Builder.CreateAlignedStore(Data, Ptr, 1); |
3639 | 2 | // Replace the original call result with the first result of the new call. |
3640 | 2 | Value *TSC = Builder.CreateExtractValue(NewCall, 0); |
3641 | 2 | |
3642 | 2 | std::string Name = CI->getName(); |
3643 | 2 | if (!Name.empty()) { |
3644 | 0 | CI->setName(Name + ".old"); |
3645 | 0 | NewCall->setName(Name); |
3646 | 0 | } |
3647 | 2 | CI->replaceAllUsesWith(TSC); |
3648 | 2 | CI->eraseFromParent(); |
3649 | 2 | return; |
3650 | 2 | } |
3651 | 2 | |
3652 | 111 | case Intrinsic::x86_sse41_insertps: |
3653 | 111 | case Intrinsic::x86_sse41_dppd: |
3654 | 111 | case Intrinsic::x86_sse41_dpps: |
3655 | 111 | case Intrinsic::x86_sse41_mpsadbw: |
3656 | 111 | case Intrinsic::x86_avx_dp_ps_256: |
3657 | 111 | case Intrinsic::x86_avx2_mpsadbw: { |
3658 | 111 | // Need to truncate the last argument from i32 to i8 -- this argument models |
3659 | 111 | // an inherently 8-bit immediate operand to these x86 instructions. |
3660 | 111 | SmallVector<Value *, 4> Args(CI->arg_operands().begin(), |
3661 | 111 | CI->arg_operands().end()); |
3662 | 111 | |
3663 | 111 | // Replace the last argument with a trunc. |
3664 | 111 | Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); |
3665 | 111 | NewCall = Builder.CreateCall(NewFn, Args); |
3666 | 111 | break; |
3667 | 111 | } |
3668 | 111 | |
3669 | 111 | case Intrinsic::thread_pointer: { |
3670 | 2 | NewCall = Builder.CreateCall(NewFn, {}); |
3671 | 2 | break; |
3672 | 111 | } |
3673 | 111 | |
3674 | 111 | case Intrinsic::invariant_start: |
3675 | 102 | case Intrinsic::invariant_end: |
3676 | 102 | case Intrinsic::masked_load: |
3677 | 102 | case Intrinsic::masked_store: |
3678 | 102 | case Intrinsic::masked_gather: |
3679 | 102 | case Intrinsic::masked_scatter: { |
3680 | 102 | SmallVector<Value *, 4> Args(CI->arg_operands().begin(), |
3681 | 102 | CI->arg_operands().end()); |
3682 | 102 | NewCall = Builder.CreateCall(NewFn, Args); |
3683 | 102 | break; |
3684 | 102 | } |
3685 | 102 | |
3686 | 9.91k | case Intrinsic::memcpy: |
3687 | 9.91k | case Intrinsic::memmove: |
3688 | 9.91k | case Intrinsic::memset: { |
3689 | 9.91k | // We have to make sure that the call signature is what we're expecting. |
3690 | 9.91k | // We only want to change the old signatures by removing the alignment arg: |
3691 | 9.91k | // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) |
3692 | 9.91k | // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) |
3693 | 9.91k | // @llvm.memset...(i8*, i8, i[32|64], i32, i1) |
3694 | 9.91k | // -> @llvm.memset...(i8*, i8, i[32|64], i1) |
3695 | 9.91k | // Note: i8*'s in the above can be any pointer type |
3696 | 9.91k | if (CI->getNumArgOperands() != 5) { |
3697 | 24 | DefaultCase(); |
3698 | 24 | return; |
3699 | 24 | } |
3700 | 9.89k | // Remove alignment argument (3), and add alignment attributes to the |
3701 | 9.89k | // dest/src pointers. |
3702 | 9.89k | Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), |
3703 | 9.89k | CI->getArgOperand(2), CI->getArgOperand(4)}; |
3704 | 9.89k | NewCall = Builder.CreateCall(NewFn, Args); |
3705 | 9.89k | auto *MemCI = cast<MemIntrinsic>(NewCall); |
3706 | 9.89k | // All mem intrinsics support dest alignment. |
3707 | 9.89k | const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3)); |
3708 | 9.89k | MemCI->setDestAlignment(Align->getZExtValue()); |
3709 | 9.89k | // Memcpy/Memmove also support source alignment. |
3710 | 9.89k | if (auto *MTI = dyn_cast<MemTransferInst>(MemCI)) |
3711 | 7.07k | MTI->setSourceAlignment(Align->getZExtValue()); |
3712 | 9.89k | break; |
3713 | 9.89k | } |
3714 | 10.7k | } |
3715 | 10.7k | assert(NewCall && "Should have either set this variable or returned through " |
3716 | 10.7k | "the default case"); |
3717 | 10.7k | std::string Name = CI->getName(); |
3718 | 10.7k | if (!Name.empty()) { |
3719 | 253 | CI->setName(Name + ".old"); |
3720 | 253 | NewCall->setName(Name); |
3721 | 253 | } |
3722 | 10.7k | CI->replaceAllUsesWith(NewCall); |
3723 | 10.7k | CI->eraseFromParent(); |
3724 | 10.7k | } |
3725 | | |
3726 | 380k | void llvm::UpgradeCallsToIntrinsic(Function *F) { |
3727 | 380k | assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); |
3728 | 380k | |
3729 | 380k | // Check if this function should be upgraded and get the replacement function |
3730 | 380k | // if there is one. |
3731 | 380k | Function *NewFn; |
3732 | 380k | if (UpgradeIntrinsicFunction(F, NewFn)) { |
3733 | 5.15k | // Replace all users of the old function with the new function or new |
3734 | 5.15k | // instructions. This is not a range loop because the call is deleted. |
3735 | 16.7k | for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) |
3736 | 11.6k | if (CallInst *CI = dyn_cast<CallInst>(*UI++)) |
3737 | 11.6k | UpgradeIntrinsicCall(CI, NewFn); |
3738 | 5.15k | |
3739 | 5.15k | // Remove old function, no longer used, from the module. |
3740 | 5.15k | F->eraseFromParent(); |
3741 | 5.15k | } |
3742 | 380k | } |
3743 | | |
3744 | 1.06M | MDNode *llvm::UpgradeTBAANode(MDNode &MD) { |
3745 | 1.06M | // Check if the tag uses struct-path aware TBAA format. |
3746 | 1.06M | if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 31.04M ) |
3747 | 1.04M | return &MD; |
3748 | 17.6k | |
3749 | 17.6k | auto &Context = MD.getContext(); |
3750 | 17.6k | if (MD.getNumOperands() == 3) { |
3751 | 26 | Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; |
3752 | 26 | MDNode *ScalarType = MDNode::get(Context, Elts); |
3753 | 26 | // Create a MDNode <ScalarType, ScalarType, offset 0, const> |
3754 | 26 | Metadata *Elts2[] = {ScalarType, ScalarType, |
3755 | 26 | ConstantAsMetadata::get( |
3756 | 26 | Constant::getNullValue(Type::getInt64Ty(Context))), |
3757 | 26 | MD.getOperand(2)}; |
3758 | 26 | return MDNode::get(Context, Elts2); |
3759 | 26 | } |
3760 | 17.5k | // Create a MDNode <MD, MD, offset 0> |
3761 | 17.5k | Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( |
3762 | 17.5k | Type::getInt64Ty(Context)))}; |
3763 | 17.5k | return MDNode::get(Context, Elts); |
3764 | 17.5k | } |
3765 | | |
3766 | | Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, |
3767 | 718k | Instruction *&Temp) { |
3768 | 718k | if (Opc != Instruction::BitCast) |
3769 | 393k | return nullptr; |
3770 | 325k | |
3771 | 325k | Temp = nullptr; |
3772 | 325k | Type *SrcTy = V->getType(); |
3773 | 325k | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy()321k && |
3774 | 325k | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()321k ) { |
3775 | 0 | LLVMContext &Context = V->getContext(); |
3776 | 0 |
|
3777 | 0 | // We have no information about target data layout, so we assume that |
3778 | 0 | // the maximum pointer size is 64bit. |
3779 | 0 | Type *MidTy = Type::getInt64Ty(Context); |
3780 | 0 | Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); |
3781 | 0 |
|
3782 | 0 | return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); |
3783 | 0 | } |
3784 | 325k | |
3785 | 325k | return nullptr; |
3786 | 325k | } |
3787 | | |
3788 | 21.2k | Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { |
3789 | 21.2k | if (Opc != Instruction::BitCast) |
3790 | 390 | return nullptr; |
3791 | 20.8k | |
3792 | 20.8k | Type *SrcTy = C->getType(); |
3793 | 20.8k | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy()20.8k && |
3794 | 20.8k | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()20.8k ) { |
3795 | 0 | LLVMContext &Context = C->getContext(); |
3796 | 0 |
|
3797 | 0 | // We have no information about target data layout, so we assume that |
3798 | 0 | // the maximum pointer size is 64bit. |
3799 | 0 | Type *MidTy = Type::getInt64Ty(Context); |
3800 | 0 |
|
3801 | 0 | return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), |
3802 | 0 | DestTy); |
3803 | 0 | } |
3804 | 20.8k | |
3805 | 20.8k | return nullptr; |
3806 | 20.8k | } |
3807 | | |
3808 | | /// Check the debug info version number, if it is out-dated, drop the debug |
3809 | | /// info. Return true if module is modified. |
3810 | 46.6k | bool llvm::UpgradeDebugInfo(Module &M) { |
3811 | 46.6k | unsigned Version = getDebugMetadataVersionFromModule(M); |
3812 | 46.6k | if (Version == DEBUG_METADATA_VERSION) { |
3813 | 2.44k | bool BrokenDebugInfo = false; |
3814 | 2.44k | if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) |
3815 | 0 | report_fatal_error("Broken module found, compilation aborted!"); |
3816 | 2.44k | if (!BrokenDebugInfo) |
3817 | 2.38k | // Everything is ok. |
3818 | 2.38k | return false; |
3819 | 54 | else { |
3820 | 54 | // Diagnose malformed debug info. |
3821 | 54 | DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); |
3822 | 54 | M.getContext().diagnose(Diag); |
3823 | 54 | } |
3824 | 2.44k | } |
3825 | 46.6k | bool Modified = StripDebugInfo(M); |
3826 | 44.2k | if (Modified && Version != DEBUG_METADATA_VERSION107 ) { |
3827 | 53 | // Diagnose a version mismatch. |
3828 | 53 | DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); |
3829 | 53 | M.getContext().diagnose(DiagVersion); |
3830 | 53 | } |
3831 | 44.2k | return Modified; |
3832 | 46.6k | } |
3833 | | |
3834 | 6.98k | bool llvm::UpgradeRetainReleaseMarker(Module &M) { |
3835 | 6.98k | bool Changed = false; |
3836 | 6.98k | const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; |
3837 | 6.98k | NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey); |
3838 | 6.98k | if (ModRetainReleaseMarker) { |
3839 | 1 | MDNode *Op = ModRetainReleaseMarker->getOperand(0); |
3840 | 1 | if (Op) { |
3841 | 1 | MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0)); |
3842 | 1 | if (ID) { |
3843 | 1 | SmallVector<StringRef, 4> ValueComp; |
3844 | 1 | ID->getString().split(ValueComp, "#"); |
3845 | 1 | if (ValueComp.size() == 2) { |
3846 | 1 | std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); |
3847 | 1 | ID = MDString::get(M.getContext(), NewValue); |
3848 | 1 | } |
3849 | 1 | M.addModuleFlag(Module::Error, MarkerKey, ID); |
3850 | 1 | M.eraseNamedMetadata(ModRetainReleaseMarker); |
3851 | 1 | Changed = true; |
3852 | 1 | } |
3853 | 1 | } |
3854 | 1 | } |
3855 | 6.98k | return Changed; |
3856 | 6.98k | } |
3857 | | |
3858 | 44.7k | bool llvm::UpgradeModuleFlags(Module &M) { |
3859 | 44.7k | NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); |
3860 | 44.7k | if (!ModFlags) |
3861 | 37.1k | return false; |
3862 | 7.55k | |
3863 | 7.55k | bool HasObjCFlag = false, HasClassProperties = false, Changed = false; |
3864 | 26.7k | for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I19.2k ) { |
3865 | 19.2k | MDNode *Op = ModFlags->getOperand(I); |
3866 | 19.2k | if (Op->getNumOperands() != 3) |
3867 | 1 | continue; |
3868 | 19.2k | MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); |
3869 | 19.2k | if (!ID) |
3870 | 3 | continue; |
3871 | 19.2k | if (ID->getString() == "Objective-C Image Info Version") |
3872 | 34 | HasObjCFlag = true; |
3873 | 19.2k | if (ID->getString() == "Objective-C Class Properties") |
3874 | 16 | HasClassProperties = true; |
3875 | 19.2k | // Upgrade PIC/PIE Module Flags. The module flag behavior for these two |
3876 | 19.2k | // field was Error and now they are Max. |
3877 | 19.2k | if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level"18.7k ) { |
3878 | 546 | if (auto *Behavior = |
3879 | 546 | mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { |
3880 | 546 | if (Behavior->getLimitedValue() == Module::Error) { |
3881 | 301 | Type *Int32Ty = Type::getInt32Ty(M.getContext()); |
3882 | 301 | Metadata *Ops[3] = { |
3883 | 301 | ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), |
3884 | 301 | MDString::get(M.getContext(), ID->getString()), |
3885 | 301 | Op->getOperand(2)}; |
3886 | 301 | ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); |
3887 | 301 | Changed = true; |
3888 | 301 | } |
3889 | 546 | } |
3890 | 546 | } |
3891 | 19.2k | // Upgrade Objective-C Image Info Section. Removed the whitespce in the |
3892 | 19.2k | // section name so that llvm-lto will not complain about mismatching |
3893 | 19.2k | // module flags that is functionally the same. |
3894 | 19.2k | if (ID->getString() == "Objective-C Image Info Section") { |
3895 | 32 | if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) { |
3896 | 32 | SmallVector<StringRef, 4> ValueComp; |
3897 | 32 | Value->getString().split(ValueComp, " "); |
3898 | 32 | if (ValueComp.size() != 1) { |
3899 | 18 | std::string NewValue; |
3900 | 18 | for (auto &S : ValueComp) |
3901 | 70 | NewValue += S.str(); |
3902 | 18 | Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1), |
3903 | 18 | MDString::get(M.getContext(), NewValue)}; |
3904 | 18 | ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); |
3905 | 18 | Changed = true; |
3906 | 18 | } |
3907 | 32 | } |
3908 | 32 | } |
3909 | 19.2k | } |
3910 | 7.55k | |
3911 | 7.55k | // "Objective-C Class Properties" is recently added for Objective-C. We |
3912 | 7.55k | // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module |
3913 | 7.55k | // flag of value 0, so we can correclty downgrade this flag when trying to |
3914 | 7.55k | // link an ObjC bitcode without this module flag with an ObjC bitcode with |
3915 | 7.55k | // this module flag. |
3916 | 7.55k | if (HasObjCFlag && !HasClassProperties34 ) { |
3917 | 18 | M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", |
3918 | 18 | (uint32_t)0); |
3919 | 18 | Changed = true; |
3920 | 18 | } |
3921 | 7.55k | |
3922 | 7.55k | return Changed; |
3923 | 7.55k | } |
3924 | | |
3925 | 37.7k | void llvm::UpgradeSectionAttributes(Module &M) { |
3926 | 37.7k | auto TrimSpaces = [](StringRef Section) -> std::string { |
3927 | 2 | SmallVector<StringRef, 5> Components; |
3928 | 2 | Section.split(Components, ','); |
3929 | 2 | |
3930 | 2 | SmallString<32> Buffer; |
3931 | 2 | raw_svector_ostream OS(Buffer); |
3932 | 2 | |
3933 | 2 | for (auto Component : Components) |
3934 | 8 | OS << ',' << Component.trim(); |
3935 | 2 | |
3936 | 2 | return OS.str().substr(1); |
3937 | 2 | }; |
3938 | 37.7k | |
3939 | 37.7k | for (auto &GV : M.globals()) { |
3940 | 30.4k | if (!GV.hasSection()) |
3941 | 29.8k | continue; |
3942 | 632 | |
3943 | 632 | StringRef Section = GV.getSection(); |
3944 | 632 | |
3945 | 632 | if (!Section.startswith("__DATA, __objc_catlist")) |
3946 | 630 | continue; |
3947 | 2 | |
3948 | 2 | // __DATA, __objc_catlist, regular, no_dead_strip |
3949 | 2 | // __DATA,__objc_catlist,regular,no_dead_strip |
3950 | 2 | GV.setSection(TrimSpaces(Section)); |
3951 | 2 | } |
3952 | 37.7k | } |
3953 | | |
3954 | 4 | static bool isOldLoopArgument(Metadata *MD) { |
3955 | 4 | auto *T = dyn_cast_or_null<MDTuple>(MD); |
3956 | 4 | if (!T) |
3957 | 0 | return false; |
3958 | 4 | if (T->getNumOperands() < 1) |
3959 | 0 | return false; |
3960 | 4 | auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); |
3961 | 4 | if (!S) |
3962 | 2 | return false; |
3963 | 2 | return S->getString().startswith("llvm.vectorizer."); |
3964 | 2 | } |
3965 | | |
3966 | 8 | static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { |
3967 | 8 | StringRef OldPrefix = "llvm.vectorizer."; |
3968 | 8 | assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); |
3969 | 8 | |
3970 | 8 | if (OldTag == "llvm.vectorizer.unroll") |
3971 | 2 | return MDString::get(C, "llvm.loop.interleave.count"); |
3972 | 6 | |
3973 | 6 | return MDString::get( |
3974 | 6 | C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) |
3975 | 6 | .str()); |
3976 | 6 | } |
3977 | | |
3978 | 10 | static Metadata *upgradeLoopArgument(Metadata *MD) { |
3979 | 10 | auto *T = dyn_cast_or_null<MDTuple>(MD); |
3980 | 10 | if (!T) |
3981 | 0 | return MD; |
3982 | 10 | if (T->getNumOperands() < 1) |
3983 | 0 | return MD; |
3984 | 10 | auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); |
3985 | 10 | if (!OldTag) |
3986 | 2 | return MD; |
3987 | 8 | if (!OldTag->getString().startswith("llvm.vectorizer.")) |
3988 | 0 | return MD; |
3989 | 8 | |
3990 | 8 | // This has an old tag. Upgrade it. |
3991 | 8 | SmallVector<Metadata *, 8> Ops; |
3992 | 8 | Ops.reserve(T->getNumOperands()); |
3993 | 8 | Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); |
3994 | 16 | for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I8 ) |
3995 | 8 | Ops.push_back(T->getOperand(I)); |
3996 | 8 | |
3997 | 8 | return MDTuple::get(T->getContext(), Ops); |
3998 | 8 | } |
3999 | | |
4000 | 2 | MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { |
4001 | 2 | auto *T = dyn_cast<MDTuple>(&N); |
4002 | 2 | if (!T) |
4003 | 0 | return &N; |
4004 | 2 | |
4005 | 2 | if (none_of(T->operands(), isOldLoopArgument)) |
4006 | 0 | return &N; |
4007 | 2 | |
4008 | 2 | SmallVector<Metadata *, 8> Ops; |
4009 | 2 | Ops.reserve(T->getNumOperands()); |
4010 | 2 | for (Metadata *MD : T->operands()) |
4011 | 10 | Ops.push_back(upgradeLoopArgument(MD)); |
4012 | 2 | |
4013 | 2 | return MDTuple::get(T->getContext(), Ops); |
4014 | 2 | } |