Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
Line
Count
Source
1
//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Define several functions to decode x86 specific shuffle semantics into a
10
// generic vector mask.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "X86ShuffleDecode.h"
15
#include "llvm/ADT/ArrayRef.h"
16
17
//===----------------------------------------------------------------------===//
18
//  Vector Mask Decoding
19
//===----------------------------------------------------------------------===//
20
21
namespace llvm {
22
23
21.8k
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
24
21.8k
  // Defaults the copying the dest value.
25
21.8k
  ShuffleMask.push_back(0);
26
21.8k
  ShuffleMask.push_back(1);
27
21.8k
  ShuffleMask.push_back(2);
28
21.8k
  ShuffleMask.push_back(3);
29
21.8k
30
21.8k
  // Decode the immediate.
31
21.8k
  unsigned ZMask = Imm & 15;
32
21.8k
  unsigned CountD = (Imm >> 4) & 3;
33
21.8k
  unsigned CountS = (Imm >> 6) & 3;
34
21.8k
35
21.8k
  // CountS selects which input element to use.
36
21.8k
  unsigned InVal = 4 + CountS;
37
21.8k
  // CountD specifies which element of destination to update.
38
21.8k
  ShuffleMask[CountD] = InVal;
39
21.8k
  // ZMask zaps values, potentially overriding the CountD elt.
40
21.8k
  if (ZMask & 1) 
ShuffleMask[0] = SM_SentinelZero718
;
41
21.8k
  if (ZMask & 2) 
ShuffleMask[1] = SM_SentinelZero732
;
42
21.8k
  if (ZMask & 4) 
ShuffleMask[2] = SM_SentinelZero1.73k
;
43
21.8k
  if (ZMask & 8) 
ShuffleMask[3] = SM_SentinelZero1.75k
;
44
21.8k
}
45
46
void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len,
47
712
                             SmallVectorImpl<int> &ShuffleMask) {
48
712
  assert((Idx + Len) <= NumElts && "Insertion out of range");
49
712
50
3.30k
  for (unsigned i = 0; i != NumElts; 
++i2.58k
)
51
2.58k
    ShuffleMask.push_back(i);
52
2.00k
  for (unsigned i = 0; i != Len; 
++i1.29k
)
53
1.29k
    ShuffleMask[Idx + i] = NumElts + i;
54
712
}
55
56
// <3,1> or <6,7,2,3>
57
498
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
58
1.24k
  for (unsigned i = NElts / 2; i != NElts; 
++i747
)
59
747
    ShuffleMask.push_back(NElts + i);
60
498
61
1.24k
  for (unsigned i = NElts / 2; i != NElts; 
++i747
)
62
747
    ShuffleMask.push_back(i);
63
498
}
64
65
// <0,2> or <0,1,4,5>
66
1.91k
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
67
4.74k
  for (unsigned i = 0; i != NElts / 2; 
++i2.83k
)
68
2.83k
    ShuffleMask.push_back(i);
69
1.91k
70
4.74k
  for (unsigned i = 0; i != NElts / 2; 
++i2.83k
)
71
2.83k
    ShuffleMask.push_back(NElts + i);
72
1.91k
}
73
74
1.06k
void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
75
5.20k
  for (int i = 0, e = NumElts / 2; i < e; 
++i4.14k
) {
76
4.14k
    ShuffleMask.push_back(2 * i);
77
4.14k
    ShuffleMask.push_back(2 * i);
78
4.14k
  }
79
1.06k
}
80
81
7.62k
void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
82
24.8k
  for (int i = 0, e = NumElts / 2; i < e; 
++i17.2k
) {
83
17.2k
    ShuffleMask.push_back(2 * i + 1);
84
17.2k
    ShuffleMask.push_back(2 * i + 1);
85
17.2k
  }
86
7.62k
}
87
88
2.79k
void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
89
2.79k
  const unsigned NumLaneElts = 2;
90
2.79k
91
6.78k
  for (unsigned l = 0; l < NumElts; 
l += NumLaneElts3.98k
)
92
11.9k
    
for (unsigned i = 0; 3.98k
i < NumLaneElts;
++i7.97k
)
93
7.97k
      ShuffleMask.push_back(l);
94
2.79k
}
95
96
void DecodePSLLDQMask(unsigned NumElts, unsigned Imm,
97
12.1k
                      SmallVectorImpl<int> &ShuffleMask) {
98
12.1k
  const unsigned NumLaneElts = 16;
99
12.1k
100
26.6k
  for (unsigned l = 0; l < NumElts; 
l += NumLaneElts14.4k
)
101
245k
    
for (unsigned i = 0; 14.4k
i < NumLaneElts;
++i231k
) {
102
231k
      int M = SM_SentinelZero;
103
231k
      if (i >= Imm) 
M = i - Imm + l72.1k
;
104
231k
      ShuffleMask.push_back(M);
105
231k
    }
106
12.1k
}
107
108
void DecodePSRLDQMask(unsigned NumElts, unsigned Imm,
109
9.13k
                      SmallVectorImpl<int> &ShuffleMask) {
110
9.13k
  const unsigned NumLaneElts = 16;
111
9.13k
112
19.4k
  for (unsigned l = 0; l < NumElts; 
l += NumLaneElts10.2k
)
113
174k
    
for (unsigned i = 0; 10.2k
i < NumLaneElts;
++i164k
) {
114
164k
      unsigned Base = i + Imm;
115
164k
      int M = Base + l;
116
164k
      if (Base >= NumLaneElts) 
M = SM_SentinelZero107k
;
117
164k
      ShuffleMask.push_back(M);
118
164k
    }
119
9.13k
}
120
121
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm,
122
20.0k
                       SmallVectorImpl<int> &ShuffleMask) {
123
20.0k
  const unsigned NumLaneElts = 16;
124
20.0k
125
46.7k
  for (unsigned l = 0; l != NumElts; 
l += NumLaneElts26.7k
) {
126
454k
    for (unsigned i = 0; i != NumLaneElts; 
++i427k
) {
127
427k
      unsigned Base = i + Imm;
128
427k
      // if i+imm is out of this lane then we actually need the other source
129
427k
      if (Base >= NumLaneElts) 
Base += NumElts - NumLaneElts196k
;
130
427k
      ShuffleMask.push_back(Base + l);
131
427k
    }
132
26.7k
  }
133
20.0k
}
134
135
void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
136
239
                      SmallVectorImpl<int> &ShuffleMask) {
137
239
  // Not all bits of the immediate are used so mask it.
138
239
  assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2");
139
239
  Imm = Imm & (NumElts - 1);
140
2.64k
  for (unsigned i = 0; i != NumElts; 
++i2.40k
)
141
2.40k
    ShuffleMask.push_back(i + Imm);
142
239
}
143
144
/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
145
/// VT indicates the type of the vector allowing it to handle different
146
/// datatypes and vector widths.
147
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
148
381k
                     SmallVectorImpl<int> &ShuffleMask) {
149
381k
  unsigned Size = NumElts * ScalarBits;
150
381k
  unsigned NumLanes = Size / 128;
151
381k
  if (NumLanes == 0) 
NumLanes = 186
; // Handle MMX
152
381k
  unsigned NumLaneElts = NumElts / NumLanes;
153
381k
154
381k
  uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
155
775k
  for (unsigned l = 0; l != NumElts; 
l += NumLaneElts393k
) {
156
1.93M
    for (unsigned i = 0; i != NumLaneElts; 
++i1.54M
) {
157
1.54M
      ShuffleMask.push_back(SplatImm % NumLaneElts + l);
158
1.54M
      SplatImm /= NumLaneElts;
159
1.54M
    }
160
393k
  }
161
381k
}
162
163
void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
164
16.3k
                       SmallVectorImpl<int> &ShuffleMask) {
165
35.4k
  for (unsigned l = 0; l != NumElts; 
l += 819.0k
) {
166
19.0k
    unsigned NewImm = Imm;
167
95.1k
    for (unsigned i = 0, e = 4; i != e; 
++i76.1k
) {
168
76.1k
      ShuffleMask.push_back(l + i);
169
76.1k
    }
170
95.1k
    for (unsigned i = 4, e = 8; i != e; 
++i76.1k
) {
171
76.1k
      ShuffleMask.push_back(l + 4 + (NewImm & 3));
172
76.1k
      NewImm >>= 2;
173
76.1k
    }
174
19.0k
  }
175
16.3k
}
176
177
void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
178
36.5k
                       SmallVectorImpl<int> &ShuffleMask) {
179
76.0k
  for (unsigned l = 0; l != NumElts; 
l += 839.5k
) {
180
39.5k
    unsigned NewImm = Imm;
181
197k
    for (unsigned i = 0, e = 4; i != e; 
++i158k
) {
182
158k
      ShuffleMask.push_back(l + (NewImm & 3));
183
158k
      NewImm >>= 2;
184
158k
    }
185
197k
    for (unsigned i = 4, e = 8; i != e; 
++i158k
) {
186
158k
      ShuffleMask.push_back(l + i);
187
158k
    }
188
39.5k
  }
189
36.5k
}
190
191
7
void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
192
7
  unsigned NumHalfElts = NumElts / 2;
193
7
194
14
  for (unsigned l = 0; l != NumHalfElts; 
++l7
)
195
7
    ShuffleMask.push_back(l + NumHalfElts);
196
14
  for (unsigned h = 0; h != NumHalfElts; 
++h7
)
197
7
    ShuffleMask.push_back(h);
198
7
}
199
200
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
201
/// the type of the vector allowing it to handle different datatypes and vector
202
/// widths.
203
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
204
36.2k
                     unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
205
36.2k
  unsigned NumLaneElts = 128 / ScalarBits;
206
36.2k
207
36.2k
  unsigned NewImm = Imm;
208
75.4k
  for (unsigned l = 0; l != NumElts; 
l += NumLaneElts39.2k
) {
209
39.2k
    // each half of a lane comes from different source
210
117k
    for (unsigned s = 0; s != NumElts * 2; 
s += NumElts78.4k
) {
211
227k
      for (unsigned i = 0; i != NumLaneElts / 2; 
++i149k
) {
212
149k
        ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
213
149k
        NewImm /= NumLaneElts;
214
149k
      }
215
78.4k
    }
216
39.2k
    if (NumLaneElts == 4) 
NewImm = Imm35.4k
; // reload imm
217
39.2k
  }
218
36.2k
}
219
220
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
221
/// and punpckh*. VT indicates the type of the vector allowing it to handle
222
/// different datatypes and vector widths.
223
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
224
59.7k
                      SmallVectorImpl<int> &ShuffleMask) {
225
59.7k
  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
226
59.7k
  // independently on 128-bit lanes.
227
59.7k
  unsigned NumLanes = (NumElts * ScalarBits) / 128;
228
59.7k
  if (NumLanes == 0) 
NumLanes = 1105
; // Handle MMX
229
59.7k
  unsigned NumLaneElts = NumElts / NumLanes;
230
59.7k
231
137k
  for (unsigned l = 0; l != NumElts; 
l += NumLaneElts77.6k
) {
232
503k
    for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; 
++i425k
) {
233
425k
      ShuffleMask.push_back(i);           // Reads from dest/src1
234
425k
      ShuffleMask.push_back(i + NumElts); // Reads from src/src2
235
425k
    }
236
77.6k
  }
237
59.7k
}
238
239
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
240
/// and punpckl*. VT indicates the type of the vector allowing it to handle
241
/// different datatypes and vector widths.
242
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
243
157k
                      SmallVectorImpl<int> &ShuffleMask) {
244
157k
  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
245
157k
  // independently on 128-bit lanes.
246
157k
  unsigned NumLanes = (NumElts * ScalarBits) / 128;
247
157k
  if (NumLanes == 0 ) 
NumLanes = 1439
; // Handle MMX
248
157k
  unsigned NumLaneElts = NumElts / NumLanes;
249
157k
250
344k
  for (unsigned l = 0; l != NumElts; 
l += NumLaneElts186k
) {
251
1.08M
    for (unsigned i = l, e = l + NumLaneElts / 2; i != e; 
++i902k
) {
252
902k
      ShuffleMask.push_back(i);           // Reads from dest/src1
253
902k
      ShuffleMask.push_back(i + NumElts); // Reads from src/src2
254
902k
    }
255
186k
  }
256
157k
}
257
258
/// Decodes a broadcast of the first element of a vector.
259
void DecodeVectorBroadcast(unsigned NumElts,
260
7.04k
                           SmallVectorImpl<int> &ShuffleMask) {
261
7.04k
  ShuffleMask.append(NumElts, 0);
262
7.04k
}
263
264
/// Decodes a broadcast of a subvector to a larger vector type.
265
void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
266
1.27k
                              SmallVectorImpl<int> &ShuffleMask) {
267
1.27k
  unsigned Scale = DstNumElts / SrcNumElts;
268
1.27k
269
5.00k
  for (unsigned i = 0; i != Scale; 
++i3.73k
)
270
15.1k
    
for (unsigned j = 0; 3.73k
j != SrcNumElts;
++j11.4k
)
271
11.4k
      ShuffleMask.push_back(j);
272
1.27k
}
273
274
/// Decode a shuffle packed values at 128-bit granularity
275
/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
276
/// immediate mask into a shuffle mask.
277
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
278
                               unsigned Imm,
279
2.15k
                               SmallVectorImpl<int> &ShuffleMask) {
280
2.15k
  unsigned NumElementsInLane = 128 / ScalarSize;
281
2.15k
  unsigned NumLanes = NumElts / NumElementsInLane;
282
2.15k
283
9.21k
  for (unsigned l = 0; l != NumElts; 
l += NumElementsInLane7.05k
) {
284
7.05k
    unsigned Index = (Imm % NumLanes) * NumElementsInLane;
285
7.05k
    Imm /= NumLanes; // Discard the bits we just used.
286
7.05k
    // We actually need the other source.
287
7.05k
    if (l >= (NumElts / 2))
288
3.52k
      Index += NumElts;
289
22.9k
    for (unsigned i = 0; i != NumElementsInLane; 
++i15.9k
)
290
15.9k
      ShuffleMask.push_back(Index + i);
291
7.05k
  }
292
2.15k
}
293
294
void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
295
3.86k
                          SmallVectorImpl<int> &ShuffleMask) {
296
3.86k
  unsigned HalfSize = NumElts / 2;
297
3.86k
298
11.5k
  for (unsigned l = 0; l != 2; 
++l7.72k
) {
299
7.72k
    unsigned HalfMask = Imm >> (l * 4);
300
7.72k
    unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
301
23.1k
    for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; 
++i15.4k
)
302
15.4k
      ShuffleMask.push_back((HalfMask & 8) ? 
SM_SentinelZero838
:
(int)i14.6k
);
303
7.72k
  }
304
3.86k
}
305
306
void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
307
108k
                      SmallVectorImpl<int> &ShuffleMask) {
308
2.40M
  for (int i = 0, e = RawMask.size(); i < e; 
++i2.29M
) {
309
2.29M
    uint64_t M = RawMask[i];
310
2.29M
    if (UndefElts[i]) {
311
414k
      ShuffleMask.push_back(SM_SentinelUndef);
312
414k
      continue;
313
414k
    }
314
1.88M
    // For 256/512-bit vectors the base of the shuffle is the 128-bit
315
1.88M
    // subvector we're inside.
316
1.88M
    int Base = (i / 16) * 16;
317
1.88M
    // If the high bit (7) of the byte is set, the element is zeroed.
318
1.88M
    if (M & (1 << 7))
319
110k
      ShuffleMask.push_back(SM_SentinelZero);
320
1.77M
    else {
321
1.77M
      // Only the least significant 4 bits of the byte are used.
322
1.77M
      int Index = Base + (M & 0xf);
323
1.77M
      ShuffleMask.push_back(Index);
324
1.77M
    }
325
1.88M
  }
326
108k
}
327
328
void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
329
55.2k
                     SmallVectorImpl<int> &ShuffleMask) {
330
383k
  for (unsigned i = 0; i < NumElts; 
++i328k
) {
331
328k
    // If there are more than 8 elements in the vector, then any immediate blend
332
328k
    // mask wraps around.
333
328k
    unsigned Bit = i % 8;
334
328k
    ShuffleMask.push_back(((Imm >> Bit) & 1) ? 
NumElts + i121k
:
i206k
);
335
328k
  }
336
55.2k
}
337
338
void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
339
2.85k
                      SmallVectorImpl<int> &ShuffleMask) {
340
2.85k
  assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
341
2.85k
342
2.85k
  // VPPERM Operation
343
2.85k
  // Bits[4:0] - Byte Index (0 - 31)
344
2.85k
  // Bits[7:5] - Permute Operation
345
2.85k
  //
346
2.85k
  // Permute Operation:
347
2.85k
  // 0 - Source byte (no logical operation).
348
2.85k
  // 1 - Invert source byte.
349
2.85k
  // 2 - Bit reverse of source byte.
350
2.85k
  // 3 - Bit reverse of inverted source byte.
351
2.85k
  // 4 - 00h (zero - fill).
352
2.85k
  // 5 - FFh (ones - fill).
353
2.85k
  // 6 - Most significant bit of source byte replicated in all bit positions.
354
2.85k
  // 7 - Invert most significant bit of source byte and replicate in all bit positions.
355
20.8k
  for (int i = 0, e = RawMask.size(); i < e; 
++i17.9k
) {
356
19.6k
    if (UndefElts[i]) {
357
409
      ShuffleMask.push_back(SM_SentinelUndef);
358
409
      continue;
359
409
    }
360
19.2k
361
19.2k
    uint64_t M = RawMask[i];
362
19.2k
    uint64_t PermuteOp = (M >> 5) & 0x7;
363
19.2k
    if (PermuteOp == 4) {
364
592
      ShuffleMask.push_back(SM_SentinelZero);
365
592
      continue;
366
592
    }
367
18.6k
    if (PermuteOp != 0) {
368
1.74k
      ShuffleMask.clear();
369
1.74k
      return;
370
1.74k
    }
371
16.9k
372
16.9k
    uint64_t Index = M & 0x1F;
373
16.9k
    ShuffleMask.push_back((int)Index);
374
16.9k
  }
375
2.85k
}
376
377
/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
378
void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
379
22.1k
                     SmallVectorImpl<int> &ShuffleMask) {
380
45.1k
  for (unsigned l = 0; l != NumElts; 
l += 423.0k
)
381
115k
    
for (unsigned i = 0; 23.0k
i != 4;
++i92.0k
)
382
92.0k
      ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3));
383
22.1k
}
384
385
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
386
                          unsigned NumDstElts, bool IsAnyExtend,
387
13.4k
                          SmallVectorImpl<int> &Mask) {
388
13.4k
  unsigned Scale = DstScalarBits / SrcScalarBits;
389
13.4k
  assert(SrcScalarBits < DstScalarBits &&
390
13.4k
         "Expected zero extension mask to increase scalar size");
391
13.4k
392
97.4k
  for (unsigned i = 0; i != NumDstElts; 
i++83.9k
) {
393
83.9k
    Mask.push_back(i);
394
241k
    for (unsigned j = 1; j != Scale; 
j++157k
)
395
157k
      Mask.push_back(IsAnyExtend ? 
SM_SentinelUndef3.59k
:
SM_SentinelZero154k
);
396
83.9k
  }
397
13.4k
}
398
399
void DecodeZeroMoveLowMask(unsigned NumElts,
400
12.5k
                           SmallVectorImpl<int> &ShuffleMask) {
401
12.5k
  ShuffleMask.push_back(0);
402
44.4k
  for (unsigned i = 1; i < NumElts; 
i++31.8k
)
403
31.8k
    ShuffleMask.push_back(SM_SentinelZero);
404
12.5k
}
405
406
void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
407
19.6k
                          SmallVectorImpl<int> &Mask) {
408
19.6k
  // First element comes from the first element of second source.
409
19.6k
  // Remaining elements: Load zero extends / Move copies from first source.
410
19.6k
  Mask.push_back(NumElts);
411
54.9k
  for (unsigned i = 1; i < NumElts; 
i++35.3k
)
412
35.3k
    Mask.push_back(IsLoad ? 
static_cast<int>(SM_SentinelZero)8.87k
:
i26.4k
);
413
19.6k
}
414
415
void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
416
330
                      SmallVectorImpl<int> &ShuffleMask) {
417
330
  unsigned HalfElts = NumElts / 2;
418
330
419
330
  // Only the bottom 6 bits are valid for each immediate.
420
330
  Len &= 0x3F;
421
330
  Idx &= 0x3F;
422
330
423
330
  // We can only decode this bit extraction instruction as a shuffle if both the
424
330
  // length and index work with whole elements.
425
330
  if (0 != (Len % EltSize) || 
0 != (Idx % EltSize)249
)
426
81
    return;
427
249
428
249
  // A length of zero is equivalent to a bit length of 64.
429
249
  if (Len == 0)
430
8
    Len = 64;
431
249
432
249
  // If the length + index exceeds the bottom 64 bits the result is undefined.
433
249
  if ((Len + Idx) > 64) {
434
3
    ShuffleMask.append(NumElts, SM_SentinelUndef);
435
3
    return;
436
3
  }
437
246
438
246
  // Convert index and index to work with elements.
439
246
  Len /= EltSize;
440
246
  Idx /= EltSize;
441
246
442
246
  // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
443
246
  // elements of the lower 64-bits. The upper 64-bits are undefined.
444
641
  for (int i = 0; i != Len; 
++i395
)
445
395
    ShuffleMask.push_back(i + Idx);
446
1.31k
  for (int i = Len; i != (int)HalfElts; 
++i1.07k
)
447
1.07k
    ShuffleMask.push_back(SM_SentinelZero);
448
1.71k
  for (int i = HalfElts; i != (int)NumElts; 
++i1.46k
)
449
1.46k
    ShuffleMask.push_back(SM_SentinelUndef);
450
246
}
451
452
void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
453
313
                        SmallVectorImpl<int> &ShuffleMask) {
454
313
  unsigned HalfElts = NumElts / 2;
455
313
456
313
  // Only the bottom 6 bits are valid for each immediate.
457
313
  Len &= 0x3F;
458
313
  Idx &= 0x3F;
459
313
460
313
  // We can only decode this bit insertion instruction as a shuffle if both the
461
313
  // length and index work with whole elements.
462
313
  if (0 != (Len % EltSize) || 
0 != (Idx % EltSize)232
)
463
81
    return;
464
232
465
232
  // A length of zero is equivalent to a bit length of 64.
466
232
  if (Len == 0)
467
8
    Len = 64;
468
232
469
232
  // If the length + index exceeds the bottom 64 bits the result is undefined.
470
232
  if ((Len + Idx) > 64) {
471
3
    ShuffleMask.append(NumElts, SM_SentinelUndef);
472
3
    return;
473
3
  }
474
229
475
229
  // Convert index and index to work with elements.
476
229
  Len /= EltSize;
477
229
  Idx /= EltSize;
478
229
479
229
  // INSERTQ: Extract lowest Len elements from lower half of second source and
480
229
  // insert over first source starting at Idx element. The upper 64-bits are
481
229
  // undefined.
482
475
  for (int i = 0; i != Idx; 
++i246
)
483
246
    ShuffleMask.push_back(i);
484
648
  for (int i = 0; i != Len; 
++i419
)
485
419
    ShuffleMask.push_back(i + NumElts);
486
1.02k
  for (int i = Idx + Len; i != (int)HalfElts; 
++i793
)
487
793
    ShuffleMask.push_back(i);
488
1.68k
  for (int i = HalfElts; i != (int)NumElts; 
++i1.45k
)
489
1.45k
    ShuffleMask.push_back(SM_SentinelUndef);
490
229
}
491
492
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
493
                        ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
494
1.33k
                        SmallVectorImpl<int> &ShuffleMask) {
495
1.33k
  unsigned VecSize = NumElts * ScalarBits;
496
1.33k
  unsigned NumLanes = VecSize / 128;
497
1.33k
  unsigned NumEltsPerLane = NumElts / NumLanes;
498
1.33k
  assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
499
1.33k
         "Unexpected vector size");
500
1.33k
  assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
501
1.33k
502
11.7k
  for (unsigned i = 0, e = RawMask.size(); i < e; 
++i10.3k
) {
503
10.3k
    if (UndefElts[i]) {
504
800
      ShuffleMask.push_back(SM_SentinelUndef);
505
800
      continue;
506
800
    }
507
9.59k
    uint64_t M = RawMask[i];
508
9.59k
    M = (ScalarBits == 64 ? 
((M >> 1) & 0x1)416
:
(M & 0x3)9.17k
);
509
9.59k
    unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
510
9.59k
    ShuffleMask.push_back((int)(LaneOffset + M));
511
9.59k
  }
512
1.33k
}
513
514
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
515
                         ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
516
438
                         SmallVectorImpl<int> &ShuffleMask) {
517
438
  unsigned VecSize = NumElts * ScalarBits;
518
438
  unsigned NumLanes = VecSize / 128;
519
438
  unsigned NumEltsPerLane = NumElts / NumLanes;
520
438
  assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
521
438
  assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
522
438
  assert((NumElts == RawMask.size()) && "Unexpected mask size");
523
438
524
2.74k
  for (unsigned i = 0, e = RawMask.size(); i < e; 
++i2.30k
) {
525
2.30k
    if (UndefElts[i]) {
526
74
      ShuffleMask.push_back(SM_SentinelUndef);
527
74
      continue;
528
74
    }
529
2.23k
530
2.23k
    // VPERMIL2 Operation.
531
2.23k
    // Bits[3] - Match Bit.
532
2.23k
    // Bits[2:1] - (Per Lane) PD Shuffle Mask.
533
2.23k
    // Bits[2:0] - (Per Lane) PS Shuffle Mask.
534
2.23k
    uint64_t Selector = RawMask[i];
535
2.23k
    unsigned MatchBit = (Selector >> 3) & 0x1;
536
2.23k
537
2.23k
    // M2Z[0:1]     MatchBit
538
2.23k
    //   0Xb           X        Source selected by Selector index.
539
2.23k
    //   10b           0        Source selected by Selector index.
540
2.23k
    //   10b           1        Zero.
541
2.23k
    //   11b           0        Zero.
542
2.23k
    //   11b           1        Source selected by Selector index.
543
2.23k
    if ((M2Z & 0x2) != 0 && 
MatchBit != (M2Z & 0x1)984
) {
544
464
      ShuffleMask.push_back(SM_SentinelZero);
545
464
      continue;
546
464
    }
547
1.77k
548
1.77k
    int Index = i & ~(NumEltsPerLane - 1);
549
1.77k
    if (ScalarBits == 64)
550
388
      Index += (Selector >> 1) & 0x1;
551
1.38k
    else
552
1.38k
      Index += Selector & 0x3;
553
1.77k
554
1.77k
    int Src = (Selector >> 2) & 0x1;
555
1.77k
    Index += Src * NumElts;
556
1.77k
    ShuffleMask.push_back(Index);
557
1.77k
  }
558
438
}
559
560
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
561
15.7k
                      SmallVectorImpl<int> &ShuffleMask) {
562
15.7k
  uint64_t EltMaskSize = RawMask.size() - 1;
563
178k
  for (int i = 0, e = RawMask.size(); i != e; 
++i162k
) {
564
162k
    if (UndefElts[i]) {
565
12.3k
      ShuffleMask.push_back(SM_SentinelUndef);
566
12.3k
      continue;
567
12.3k
    }
568
150k
    uint64_t M = RawMask[i];
569
150k
    M &= EltMaskSize;
570
150k
    ShuffleMask.push_back((int)M);
571
150k
  }
572
15.7k
}
573
574
void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
575
5.54k
                      SmallVectorImpl<int> &ShuffleMask) {
576
5.54k
  uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
577
77.9k
  for (int i = 0, e = RawMask.size(); i != e; 
++i72.4k
) {
578
72.4k
    if (UndefElts[i]) {
579
5.15k
      ShuffleMask.push_back(SM_SentinelUndef);
580
5.15k
      continue;
581
5.15k
    }
582
67.2k
    uint64_t M = RawMask[i];
583
67.2k
    M &= EltMaskSize;
584
67.2k
    ShuffleMask.push_back((int)M);
585
67.2k
  }
586
5.54k
}
587
588
} // llvm namespace