Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86InterleavedAccess.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- X86InterleavedAccess.cpp -------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file contains the X86 implementation of the interleaved accesses
11
/// optimization generating X86-specific instructions/intrinsics for
12
/// interleaved access groups.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "X86ISelLowering.h"
17
#include "X86Subtarget.h"
18
#include "llvm/ADT/ArrayRef.h"
19
#include "llvm/ADT/SmallVector.h"
20
#include "llvm/Analysis/VectorUtils.h"
21
#include "llvm/IR/Constants.h"
22
#include "llvm/IR/DataLayout.h"
23
#include "llvm/IR/DerivedTypes.h"
24
#include "llvm/IR/IRBuilder.h"
25
#include "llvm/IR/Instruction.h"
26
#include "llvm/IR/Instructions.h"
27
#include "llvm/IR/Module.h"
28
#include "llvm/IR/Type.h"
29
#include "llvm/IR/Value.h"
30
#include "llvm/Support/Casting.h"
31
#include "llvm/Support/MachineValueType.h"
32
#include <algorithm>
33
#include <cassert>
34
#include <cmath>
35
#include <cstdint>
36
37
using namespace llvm;
38
39
namespace {
40
41
/// This class holds necessary information to represent an interleaved
42
/// access group and supports utilities to lower the group into
43
/// X86-specific instructions/intrinsics.
44
///  E.g. A group of interleaving access loads (Factor = 2; accessing every
45
///       other element)
46
///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
47
///        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
48
///        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
49
class X86InterleavedAccessGroup {
50
  /// Reference to the wide-load instruction of an interleaved access
51
  /// group.
52
  Instruction *const Inst;
53
54
  /// Reference to the shuffle(s), consumer(s) of the (load) 'Inst'.
55
  ArrayRef<ShuffleVectorInst *> Shuffles;
56
57
  /// Reference to the starting index of each user-shuffle.
58
  ArrayRef<unsigned> Indices;
59
60
  /// Reference to the interleaving stride in terms of elements.
61
  const unsigned Factor;
62
63
  /// Reference to the underlying target.
64
  const X86Subtarget &Subtarget;
65
66
  const DataLayout &DL;
67
68
  IRBuilder<> &Builder;
69
70
  /// Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors
71
  /// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors.
72
  void decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T,
73
                 SmallVectorImpl<Instruction *> &DecomposedVectors);
74
75
  /// Performs matrix transposition on a 4x4 matrix \p InputVectors and
76
  /// returns the transposed-vectors in \p TransposedVectors.
77
  /// E.g.
78
  /// InputVectors:
79
  ///   In-V0 = p1, p2, p3, p4
80
  ///   In-V1 = q1, q2, q3, q4
81
  ///   In-V2 = r1, r2, r3, r4
82
  ///   In-V3 = s1, s2, s3, s4
83
  /// OutputVectors:
84
  ///   Out-V0 = p1, q1, r1, s1
85
  ///   Out-V1 = p2, q2, r2, s2
86
  ///   Out-V2 = p3, q3, r3, s3
87
  ///   Out-V3 = P4, q4, r4, s4
88
  void transpose_4x4(ArrayRef<Instruction *> InputVectors,
89
                     SmallVectorImpl<Value *> &TransposedMatrix);
90
  void interleave8bitStride4(ArrayRef<Instruction *> InputVectors,
91
                             SmallVectorImpl<Value *> &TransposedMatrix,
92
                             unsigned NumSubVecElems);
93
  void interleave8bitStride4VF8(ArrayRef<Instruction *> InputVectors,
94
                                SmallVectorImpl<Value *> &TransposedMatrix);
95
  void interleave8bitStride3(ArrayRef<Instruction *> InputVectors,
96
                             SmallVectorImpl<Value *> &TransposedMatrix,
97
                             unsigned NumSubVecElems);
98
  void deinterleave8bitStride3(ArrayRef<Instruction *> InputVectors,
99
                               SmallVectorImpl<Value *> &TransposedMatrix,
100
                               unsigned NumSubVecElems);
101
102
public:
103
  /// In order to form an interleaved access group X86InterleavedAccessGroup
104
  /// requires a wide-load instruction \p 'I', a group of interleaved-vectors
105
  /// \p Shuffs, reference to the first indices of each interleaved-vector
106
  /// \p 'Ind' and the interleaving stride factor \p F. In order to generate
107
  /// X86-specific instructions/intrinsics it also requires the underlying
108
  /// target information \p STarget.
109
  explicit X86InterleavedAccessGroup(Instruction *I,
110
                                     ArrayRef<ShuffleVectorInst *> Shuffs,
111
                                     ArrayRef<unsigned> Ind, const unsigned F,
112
                                     const X86Subtarget &STarget,
113
                                     IRBuilder<> &B)
114
      : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget),
115
1.25k
        DL(Inst->getModule()->getDataLayout()), Builder(B) {}
116
117
  /// Returns true if this interleaved access group can be lowered into
118
  /// x86-specific instructions/intrinsics, false otherwise.
119
  bool isSupported() const;
120
121
  /// Lowers this interleaved access group into X86-specific
122
  /// instructions/intrinsics.
123
  bool lowerIntoOptimizedSequence();
124
};
125
126
} // end anonymous namespace
127
128
1.25k
bool X86InterleavedAccessGroup::isSupported() const {
129
1.25k
  VectorType *ShuffleVecTy = Shuffles[0]->getType();
130
1.25k
  Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
131
1.25k
  unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
132
1.25k
  unsigned WideInstSize;
133
1.25k
134
1.25k
  // Currently, lowering is supported for the following vectors:
135
1.25k
  // Stride 4:
136
1.25k
  //    1. Store and load of 4-element vectors of 64 bits on AVX.
137
1.25k
  //    2. Store of 16/32-element vectors of 8 bits on AVX.
138
1.25k
  // Stride 3:
139
1.25k
  //    1. Load of 16/32-element vectors of 8 bits on AVX.
140
1.25k
  if (!Subtarget.hasAVX() || 
(1.06k
Factor != 41.06k
&&
Factor != 3413
))
141
500
    return false;
142
754
143
754
  if (isa<LoadInst>(Inst)) {
144
271
    WideInstSize = DL.getTypeSizeInBits(Inst->getType());
145
271
    if (cast<LoadInst>(Inst)->getPointerAddressSpace())
146
3
      return false;
147
483
  } else
148
483
    WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType());
149
754
150
754
  // We support shuffle represents stride 4 for byte type with size of
151
754
  // WideInstSize.
152
754
  
if (751
ShuffleElemSize == 64751
&&
WideInstSize == 1024137
&&
Factor == 429
)
153
29
     return true;
154
722
155
722
  if (ShuffleElemSize == 8 && 
isa<StoreInst>(Inst)289
&&
Factor == 4154
&&
156
722
      
(118
WideInstSize == 256118
||
WideInstSize == 512114
||
WideInstSize == 1024110
||
157
118
       
WideInstSize == 2048106
))
158
16
    return true;
159
706
160
706
  if (ShuffleElemSize == 8 && 
Factor == 3273
&&
161
706
      
(64
WideInstSize == 38464
||
WideInstSize == 76855
||
WideInstSize == 153638
))
162
35
    return true;
163
671
164
671
  return false;
165
671
}
166
167
void X86InterleavedAccessGroup::decompose(
168
    Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
169
80
    SmallVectorImpl<Instruction *> &DecomposedVectors) {
170
80
  assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
171
80
         "Expected Load or Shuffle");
172
80
173
80
  Type *VecWidth = VecInst->getType();
174
80
  (void)VecWidth;
175
80
  assert(VecWidth->isVectorTy() &&
176
80
         DL.getTypeSizeInBits(VecWidth) >=
177
80
             DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
178
80
         "Invalid Inst-size!!!");
179
80
180
80
  if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) {
181
46
    Value *Op0 = SVI->getOperand(0);
182
46
    Value *Op1 = SVI->getOperand(1);
183
46
184
46
    // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type.
185
210
    for (unsigned i = 0; i < NumSubVectors; 
++i164
)
186
164
      DecomposedVectors.push_back(
187
164
          cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
188
164
              Op0, Op1,
189
164
              createSequentialMask(Builder, Indices[i],
190
164
                                   SubVecTy->getVectorNumElements(), 0))));
191
46
    return;
192
46
  }
193
34
194
34
  // Decompose the load instruction.
195
34
  LoadInst *LI = cast<LoadInst>(VecInst);
196
34
  Type *VecBaseTy, *VecBasePtrTy;
197
34
  Value *VecBasePtr;
198
34
  unsigned int NumLoads = NumSubVectors;
199
34
  // In the case of stride 3 with a vector of 32 elements load the information
200
34
  // in the following way:
201
34
  // [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
202
34
  unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
203
34
  if (VecLength == 768 || 
VecLength == 153629
) {
204
10
    VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
205
10
    VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
206
10
    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
207
10
    NumLoads = NumSubVectors * (VecLength / 384);
208
24
  } else {
209
24
    VecBaseTy = SubVecTy;
210
24
    VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
211
24
    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
212
24
  }
213
34
  // Generate N loads of T type.
214
215
  for (unsigned i = 0; i < NumLoads; 
i++181
) {
215
181
    // TODO: Support inbounds GEP.
216
181
    Value *NewBasePtr =
217
181
        Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i));
218
181
    Instruction *NewLoad =
219
181
        Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, LI->getAlignment());
220
181
    DecomposedVectors.push_back(NewLoad);
221
181
  }
222
34
}
223
224
// Changing the scale of the vector type by reducing the number of elements and
225
// doubling the scalar size.
226
12
static MVT scaleVectorType(MVT VT) {
227
12
  unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2;
228
12
  return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize),
229
12
                          VT.getVectorNumElements() / 2);
230
12
}
231
232
static uint32_t Concat[] = {
233
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
234
  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
235
  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
236
  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
237
238
// genShuffleBland - Creates shuffle according to two vectors.This function is
239
// only works on instructions with lane inside 256 registers. According to
240
// the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The
241
// offset amount depends on the two integer, 'LowOffset' and 'HighOffset'.
242
// Where the 'LowOffset' refers to the first vector and the highOffset refers to
243
// the second vector.
244
// |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20|
245
// |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25|
246
// |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31|
247
// For the sequence to work as a mirror to the load.
248
// We must consider the elements order as above.
249
// In this function we are combining two types of shuffles.
250
// The first one is vpshufed and the second is a type of "blend" shuffle.
251
// By computing the shuffle on a sequence of 16 elements(one lane) and add the
252
// correct offset. We are creating a vpsuffed + blend sequence between two
253
// shuffles.
254
static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
255
  SmallVectorImpl<uint32_t> &Out, int LowOffset,
256
108
  int HighOffset) {
257
108
  assert(VT.getSizeInBits() >= 256 &&
258
108
    "This function doesn't accept width smaller then 256");
259
108
  unsigned NumOfElm = VT.getVectorNumElements();
260
1.83k
  for (unsigned i = 0; i < Mask.size(); 
i++1.72k
)
261
1.72k
    Out.push_back(Mask[i] + LowOffset);
262
1.83k
  for (unsigned i = 0; i < Mask.size(); 
i++1.72k
)
263
1.72k
    Out.push_back(Mask[i] + HighOffset + NumOfElm);
264
108
}
265
266
// reorderSubVector returns the data to is the original state. And de-facto is
267
// the opposite of  the function concatSubVector.
268
269
// For VecElems = 16
270
// Invec[0] -  |0|      TransposedMatrix[0] - |0|
271
// Invec[1] -  |1|  =>  TransposedMatrix[1] - |1|
272
// Invec[2] -  |2|      TransposedMatrix[2] - |2|
273
274
// For VecElems = 32
275
// Invec[0] -  |0|3|      TransposedMatrix[0] - |0|1|
276
// Invec[1] -  |1|4|  =>  TransposedMatrix[1] - |2|3|
277
// Invec[2] -  |2|5|      TransposedMatrix[2] - |4|5|
278
279
// For VecElems = 64
280
// Invec[0] -  |0|3|6|9 |     TransposedMatrix[0] - |0|1|2 |3 |
281
// Invec[1] -  |1|4|7|10| =>  TransposedMatrix[1] - |4|5|6 |7 |
282
// Invec[2] -  |2|5|8|11|     TransposedMatrix[2] - |8|9|10|11|
283
284
static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
285
  ArrayRef<Value *> Vec, ArrayRef<uint32_t> VPShuf,
286
  unsigned VecElems, unsigned Stride,
287
28
  IRBuilder<> Builder) {
288
28
289
28
  if (VecElems == 16) {
290
16
    for (unsigned i = 0; i < Stride; 
i++12
)
291
12
      TransposedMatrix[i] = Builder.CreateShuffleVector(
292
12
        Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf);
293
4
    return;
294
4
  }
295
24
296
24
  SmallVector<uint32_t, 32> OptimizeShuf;
297
24
  Value *Temp[8];
298
24
299
132
  for (unsigned i = 0; i < (VecElems / 16) * Stride; 
i += 2108
) {
300
108
    genShuffleBland(VT, VPShuf, OptimizeShuf, (i / Stride) * 16,
301
108
      (i + 1) / Stride * 16);
302
108
    Temp[i / 2] = Builder.CreateShuffleVector(
303
108
      Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf);
304
108
    OptimizeShuf.clear();
305
108
  }
306
24
307
24
  if (VecElems == 32) {
308
16
    std::copy(Temp, Temp + Stride, TransposedMatrix.begin());
309
16
    return;
310
16
  }
311
8
  else
312
36
    
for (unsigned i = 0; 8
i < Stride;
i++28
)
313
28
      TransposedMatrix[i] =
314
28
      Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat);
315
24
}
316
317
void X86InterleavedAccessGroup::interleave8bitStride4VF8(
318
    ArrayRef<Instruction *> Matrix,
319
4
    SmallVectorImpl<Value *> &TransposedMatrix) {
320
4
  // Assuming we start from the following vectors:
321
4
  // Matrix[0]= c0 c1 c2 c3 c4 ... c7
322
4
  // Matrix[1]= m0 m1 m2 m3 m4 ... m7
323
4
  // Matrix[2]= y0 y1 y2 y3 y4 ... y7
324
4
  // Matrix[3]= k0 k1 k2 k3 k4 ... k7
325
4
326
4
  MVT VT = MVT::v8i16;
327
4
  TransposedMatrix.resize(2);
328
4
  SmallVector<uint32_t, 16> MaskLow;
329
4
  SmallVector<uint32_t, 32> MaskLowTemp1, MaskLowWord;
330
4
  SmallVector<uint32_t, 32> MaskHighTemp1, MaskHighWord;
331
4
332
36
  for (unsigned i = 0; i < 8; 
++i32
) {
333
32
    MaskLow.push_back(i);
334
32
    MaskLow.push_back(i + 8);
335
32
  }
336
4
337
4
  createUnpackShuffleMask<uint32_t>(VT, MaskLowTemp1, true, false);
338
4
  createUnpackShuffleMask<uint32_t>(VT, MaskHighTemp1, false, false);
339
4
  scaleShuffleMask<uint32_t>(2, MaskHighTemp1, MaskHighWord);
340
4
  scaleShuffleMask<uint32_t>(2, MaskLowTemp1, MaskLowWord);
341
4
  // IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7
342
4
  // IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7
343
4
  Value *IntrVec1Low =
344
4
      Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow);
345
4
  Value *IntrVec2Low =
346
4
      Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow);
347
4
348
4
  // TransposedMatrix[0] = c0 m0 y0 k0 c1 m1 y1 k1 c2 m2 y2 k2 c3 m3 y3 k3
349
4
  // TransposedMatrix[1] = c4 m4 y4 k4 c5 m5 y5 k5 c6 m6 y6 k6 c7 m7 y7 k7
350
4
351
4
  TransposedMatrix[0] =
352
4
      Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskLowWord);
353
4
  TransposedMatrix[1] =
354
4
      Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskHighWord);
355
4
}
356
357
void X86InterleavedAccessGroup::interleave8bitStride4(
358
    ArrayRef<Instruction *> Matrix, SmallVectorImpl<Value *> &TransposedMatrix,
359
12
    unsigned NumOfElm) {
360
12
  // Example: Assuming we start from the following vectors:
361
12
  // Matrix[0]= c0 c1 c2 c3 c4 ... c31
362
12
  // Matrix[1]= m0 m1 m2 m3 m4 ... m31
363
12
  // Matrix[2]= y0 y1 y2 y3 y4 ... y31
364
12
  // Matrix[3]= k0 k1 k2 k3 k4 ... k31
365
12
366
12
  MVT VT = MVT::getVectorVT(MVT::i8, NumOfElm);
367
12
  MVT HalfVT = scaleVectorType(VT);
368
12
369
12
  TransposedMatrix.resize(4);
370
12
  SmallVector<uint32_t, 32> MaskHigh;
371
12
  SmallVector<uint32_t, 32> MaskLow;
372
12
  SmallVector<uint32_t, 32> LowHighMask[2];
373
12
  SmallVector<uint32_t, 32> MaskHighTemp;
374
12
  SmallVector<uint32_t, 32> MaskLowTemp;
375
12
376
12
  // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
377
12
  // shuffle pattern.
378
12
379
12
  createUnpackShuffleMask<uint32_t>(VT, MaskLow, true, false);
380
12
  createUnpackShuffleMask<uint32_t>(VT, MaskHigh, false, false);
381
12
382
12
  // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
383
12
  // shuffle pattern.
384
12
385
12
  createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp, true, false);
386
12
  createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp, false, false);
387
12
  scaleShuffleMask<uint32_t>(2, MaskLowTemp, LowHighMask[0]);
388
12
  scaleShuffleMask<uint32_t>(2, MaskHighTemp, LowHighMask[1]);
389
12
390
12
  // IntrVec1Low  = c0  m0  c1  m1 ... c7  m7  | c16 m16 c17 m17 ... c23 m23
391
12
  // IntrVec1High = c8  m8  c9  m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31
392
12
  // IntrVec2Low  = y0  k0  y1  k1 ... y7  k7  | y16 k16 y17 k17 ... y23 k23
393
12
  // IntrVec2High = y8  k8  y9  k9 ... y15 k15 | y24 k24 y25 k25 ... y31 k31
394
12
  Value *IntrVec[4];
395
12
396
12
  IntrVec[0] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow);
397
12
  IntrVec[1] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskHigh);
398
12
  IntrVec[2] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow);
399
12
  IntrVec[3] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskHigh);
400
12
401
12
  // cmyk4  cmyk5  cmyk6   cmyk7  | cmyk20 cmyk21 cmyk22 cmyk23
402
12
  // cmyk12 cmyk13 cmyk14  cmyk15 | cmyk28 cmyk29 cmyk30 cmyk31
403
12
  // cmyk0  cmyk1  cmyk2   cmyk3  | cmyk16 cmyk17 cmyk18 cmyk19
404
12
  // cmyk8  cmyk9  cmyk10  cmyk11 | cmyk24 cmyk25 cmyk26 cmyk27
405
12
406
12
  Value *VecOut[4];
407
60
  for (int i = 0; i < 4; 
i++48
)
408
48
    VecOut[i] = Builder.CreateShuffleVector(IntrVec[i / 2], IntrVec[i / 2 + 2],
409
48
                                            LowHighMask[i % 2]);
410
12
411
12
  // cmyk0  cmyk1  cmyk2  cmyk3   | cmyk4  cmyk5  cmyk6  cmyk7
412
12
  // cmyk8  cmyk9  cmyk10 cmyk11  | cmyk12 cmyk13 cmyk14 cmyk15
413
12
  // cmyk16 cmyk17 cmyk18 cmyk19  | cmyk20 cmyk21 cmyk22 cmyk23
414
12
  // cmyk24 cmyk25 cmyk26 cmyk27  | cmyk28 cmyk29 cmyk30 cmyk31
415
12
416
12
  if (VT == MVT::v16i8) {
417
4
    std::copy(VecOut, VecOut + 4, TransposedMatrix.begin());
418
4
    return;
419
4
  }
420
8
421
8
  reorderSubVector(VT, TransposedMatrix, VecOut, makeArrayRef(Concat, 16),
422
8
                   NumOfElm, 4, Builder);
423
8
}
424
425
//  createShuffleStride returns shuffle mask of size N.
426
//  The shuffle pattern is as following :
427
//  {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane),
428
//  (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),...,
429
//  (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)}
430
//  Where Lane is the # of lanes in a register:
431
//  VectorSize = 128 => Lane = 1
432
//  VectorSize = 256 => Lane = 2
433
//  For example shuffle pattern for VF 16 register size 256 -> lanes = 2
434
//  {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
435
static void createShuffleStride(MVT VT, int Stride,
436
15
                                SmallVectorImpl<uint32_t> &Mask) {
437
15
  int VectorSize = VT.getSizeInBits();
438
15
  int VF = VT.getVectorNumElements();
439
15
  int LaneCount = std::max(VectorSize / 128, 1);
440
50
  for (int Lane = 0; Lane < LaneCount; 
Lane++35
)
441
595
    
for (int i = 0, LaneSize = VF / LaneCount; 35
i != LaneSize;
++i560
)
442
560
      Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane);
443
15
}
444
445
//  setGroupSize sets 'SizeInfo' to the size(number of elements) of group
446
//  inside mask a shuffleMask. A mask contains exactly 3 groups, where
447
//  each group is a monotonically increasing sequence with stride 3.
448
//  For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
449
35
static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
450
35
  int VectorSize = VT.getSizeInBits();
451
35
  int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
452
140
  for (int i = 0, FirstGroupElement = 0; i < 3; 
i++105
) {
453
105
    int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0);
454
105
    SizeInfo.push_back(GroupSize);
455
105
    FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF;
456
105
  }
457
35
}
458
459
//  DecodePALIGNRMask returns the shuffle mask of vpalign instruction.
460
//  vpalign works according to lanes
461
//  Where Lane is the # of lanes in a register:
462
//  VectorWide = 128 => Lane = 1
463
//  VectorWide = 256 => Lane = 2
464
//  For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}.
465
//  For Lane = 2 shuffle pattern is:
466
//  {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
467
//  Imm variable sets the offset amount. The result of the
468
//  function is stored inside ShuffleMask vector and it built as described in
469
//  the begin of the description. AlignDirection is a boolean that indicates the
470
//  direction of the alignment. (false - align to the "right" side while true -
471
//  align to the "left" side)
472
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
473
                              SmallVectorImpl<uint32_t> &ShuffleMask,
474
160
                              bool AlignDirection = true, bool Unary = false) {
475
160
  unsigned NumElts = VT.getVectorNumElements();
476
160
  unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
477
160
  unsigned NumLaneElts = NumElts / NumLanes;
478
160
479
160
  Imm = AlignDirection ? 
Imm90
:
(NumLaneElts - Imm)70
;
480
160
  unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8);
481
160
482
520
  for (unsigned l = 0; l != NumElts; 
l += NumLaneElts360
) {
483
6.12k
    for (unsigned i = 0; i != NumLaneElts; 
++i5.76k
) {
484
5.76k
      unsigned Base = i + Offset;
485
5.76k
      // if i+offset is out of this lane then we actually need the other source
486
5.76k
      // If Unary the other source is the first source.
487
5.76k
      if (Base >= NumLaneElts)
488
2.74k
        Base = Unary ? 
Base % NumLaneElts1.27k
:
Base + NumElts - NumLaneElts1.47k
;
489
5.76k
      ShuffleMask.push_back(Base + l);
490
5.76k
    }
491
360
  }
492
160
}
493
494
// concatSubVector - The function rebuilds the data to a correct expected
495
// order. An assumption(The shape of the matrix) was taken for the
496
// deinterleaved to work with lane's instructions like 'vpalign' or 'vphuf'.
497
// This function ensures that the data is built in correct way for the lane
498
// instructions. Each lane inside the vector is a 128-bit length.
499
//
500
// The 'InVec' argument contains the data in increasing order. In InVec[0] You
501
// can find the first 128 bit data. The number of different lanes inside a
502
// vector depends on the 'VecElems'.In general, the formula is
503
// VecElems * type / 128. The size of the array 'InVec' depends and equal to
504
// 'VecElems'.
505
506
// For VecElems = 16
507
// Invec[0] - |0|      Vec[0] - |0|
508
// Invec[1] - |1|  =>  Vec[1] - |1|
509
// Invec[2] - |2|      Vec[2] - |2|
510
511
// For VecElems = 32
512
// Invec[0] - |0|1|      Vec[0] - |0|3|
513
// Invec[1] - |2|3|  =>  Vec[1] - |1|4|
514
// Invec[2] - |4|5|      Vec[2] - |2|5|
515
516
// For VecElems = 64
517
// Invec[0] - |0|1|2 |3 |      Vec[0] - |0|3|6|9 |
518
// Invec[1] - |4|5|6 |7 |  =>  Vec[1] - |1|4|7|10|
519
// Invec[2] - |8|9|10|11|      Vec[2] - |2|5|8|11|
520
521
static void concatSubVector(Value **Vec, ArrayRef<Instruction *> InVec,
522
15
                            unsigned VecElems, IRBuilder<> Builder) {
523
15
  if (VecElems == 16) {
524
20
    for (int i = 0; i < 3; 
i++15
)
525
15
      Vec[i] = InVec[i];
526
5
    return;
527
5
  }
528
10
529
25
  
for (unsigned j = 0; 10
j < VecElems / 32;
j++15
)
530
60
    
for (int i = 0; 15
i < 3;
i++45
)
531
45
      Vec[i + j * 3] = Builder.CreateShuffleVector(
532
45
          InVec[j * 6 + i], InVec[j * 6 + i + 3], makeArrayRef(Concat, 32));
533
10
534
10
  if (VecElems == 32)
535
5
    return;
536
5
537
20
  
for (int i = 0; 5
i < 3;
i++15
)
538
15
    Vec[i] = Builder.CreateShuffleVector(Vec[i], Vec[i + 3], Concat);
539
5
}
540
541
void X86InterleavedAccessGroup::deinterleave8bitStride3(
542
    ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
543
15
    unsigned VecElems) {
544
15
  // Example: Assuming we start from the following vectors:
545
15
  // Matrix[0]= a0 b0 c0 a1 b1 c1 a2 b2
546
15
  // Matrix[1]= c2 a3 b3 c3 a4 b4 c4 a5
547
15
  // Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7
548
15
549
15
  TransposedMatrix.resize(3);
550
15
  SmallVector<uint32_t, 32> VPShuf;
551
15
  SmallVector<uint32_t, 32> VPAlign[2];
552
15
  SmallVector<uint32_t, 32> VPAlign2;
553
15
  SmallVector<uint32_t, 32> VPAlign3;
554
15
  SmallVector<uint32_t, 3> GroupSize;
555
15
  Value *Vec[6], *TempVector[3];
556
15
557
15
  MVT VT = MVT::getVT(Shuffles[0]->getType());
558
15
559
15
  createShuffleStride(VT, 3, VPShuf);
560
15
  setGroupSize(VT, GroupSize);
561
15
562
45
  for (int i = 0; i < 2; 
i++30
)
563
30
    DecodePALIGNRMask(VT, GroupSize[2 - i], VPAlign[i], false);
564
15
565
15
  DecodePALIGNRMask(VT, GroupSize[2] + GroupSize[1], VPAlign2, true, true);
566
15
  DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, true, true);
567
15
568
15
  concatSubVector(Vec, InVec, VecElems, Builder);
569
15
  // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1
570
15
  // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4
571
15
  // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7
572
15
573
60
  for (int i = 0; i < 3; 
i++45
)
574
45
    Vec[i] = Builder.CreateShuffleVector(
575
45
        Vec[i], UndefValue::get(Vec[0]->getType()), VPShuf);
576
15
577
15
  // TempVector[0]= a6 a7 a0 a1 a2 b0 b1 b2
578
15
  // TempVector[1]= c0 c1 c2 c3 c4 a3 a4 a5
579
15
  // TempVector[2]= b3 b4 b5 b6 b7 c5 c6 c7
580
15
581
60
  for (int i = 0; i < 3; 
i++45
)
582
45
    TempVector[i] =
583
45
        Builder.CreateShuffleVector(Vec[(i + 2) % 3], Vec[i], VPAlign[0]);
584
15
585
15
  // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2
586
15
  // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
587
15
  // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7
588
15
589
60
  for (int i = 0; i < 3; 
i++45
)
590
45
    Vec[i] = Builder.CreateShuffleVector(TempVector[(i + 1) % 3], TempVector[i],
591
45
                                         VPAlign[1]);
592
15
593
15
  // TransposedMatrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
594
15
  // TransposedMatrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
595
15
  // TransposedMatrix[2]= c0 c1 c2 c3 c4 c5 c6 c7
596
15
597
15
  Value *TempVec = Builder.CreateShuffleVector(
598
15
      Vec[1], UndefValue::get(Vec[1]->getType()), VPAlign3);
599
15
  TransposedMatrix[0] = Builder.CreateShuffleVector(
600
15
      Vec[0], UndefValue::get(Vec[1]->getType()), VPAlign2);
601
15
  TransposedMatrix[1] = VecElems == 8 ? 
Vec[2]0
: TempVec;
602
15
  TransposedMatrix[2] = VecElems == 8 ? 
TempVec0
: Vec[2];
603
15
}
604
605
// group2Shuffle reorder the shuffle stride back into continuous order.
606
// For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
607
// MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
608
static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
609
20
                          SmallVectorImpl<uint32_t> &Output) {
610
20
  int IndexGroup[3] = {0, 0, 0};
611
20
  int Index = 0;
612
20
  int VectorWidth = VT.getSizeInBits();
613
20
  int VF = VT.getVectorNumElements();
614
20
  // Find the index of the different groups.
615
20
  int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 
10
;
616
80
  for (int i = 0; i < 3; 
i++60
) {
617
60
    IndexGroup[(Index * 3) % (VF / Lane)] = Index;
618
60
    Index += Mask[i];
619
60
  }
620
20
  // According to the index compute the convert mask.
621
340
  for (int i = 0; i < VF / Lane; 
i++320
) {
622
320
    Output.push_back(IndexGroup[i % 3]);
623
320
    IndexGroup[i % 3]++;
624
320
  }
625
20
}
626
627
void X86InterleavedAccessGroup::interleave8bitStride3(
628
    ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
629
20
    unsigned VecElems) {
630
20
  // Example: Assuming we start from the following vectors:
631
20
  // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
632
20
  // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
633
20
  // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7
634
20
635
20
  TransposedMatrix.resize(3);
636
20
  SmallVector<uint32_t, 3> GroupSize;
637
20
  SmallVector<uint32_t, 32> VPShuf;
638
20
  SmallVector<uint32_t, 32> VPAlign[3];
639
20
  SmallVector<uint32_t, 32> VPAlign2;
640
20
  SmallVector<uint32_t, 32> VPAlign3;
641
20
642
20
  Value *Vec[3], *TempVector[3];
643
20
  MVT VT = MVT::getVectorVT(MVT::i8, VecElems);
644
20
645
20
  setGroupSize(VT, GroupSize);
646
20
647
80
  for (int i = 0; i < 3; 
i++60
)
648
60
    DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]);
649
20
650
20
  DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true);
651
20
  DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true);
652
20
653
20
  // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2
654
20
  // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
655
20
  // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7
656
20
657
20
  Vec[0] = Builder.CreateShuffleVector(
658
20
      InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2);
659
20
  Vec[1] = Builder.CreateShuffleVector(
660
20
      InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3);
661
20
  Vec[2] = InVec[2];
662
20
663
20
  // Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2
664
20
  // Vec[1]= c0 c1 c2 c3 c4 a3 a4 a5
665
20
  // Vec[2]= b3 b4 b5 b6 b7 c5 c6 c7
666
20
667
80
  for (int i = 0; i < 3; 
i++60
)
668
60
    TempVector[i] =
669
60
        Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]);
670
20
671
20
  // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1
672
20
  // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4
673
20
  // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7
674
20
675
80
  for (int i = 0; i < 3; 
i++60
)
676
60
    Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3],
677
60
                                         VPAlign[2]);
678
20
679
20
  // TransposedMatrix[0] = a0 b0 c0 a1 b1 c1 a2 b2
680
20
  // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5
681
20
  // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7
682
20
683
20
  unsigned NumOfElm = VT.getVectorNumElements();
684
20
  group2Shuffle(VT, GroupSize, VPShuf);
685
20
  reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder);
686
20
}
687
688
void X86InterleavedAccessGroup::transpose_4x4(
689
    ArrayRef<Instruction *> Matrix,
690
29
    SmallVectorImpl<Value *> &TransposedMatrix) {
691
29
  assert(Matrix.size() == 4 && "Invalid matrix size");
692
29
  TransposedMatrix.resize(4);
693
29
694
29
  // dst = src1[0,1],src2[0,1]
695
29
  uint32_t IntMask1[] = {0, 1, 4, 5};
696
29
  ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4);
697
29
  Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
698
29
  Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
699
29
700
29
  // dst = src1[2,3],src2[2,3]
701
29
  uint32_t IntMask2[] = {2, 3, 6, 7};
702
29
  Mask = makeArrayRef(IntMask2, 4);
703
29
  Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
704
29
  Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
705
29
706
29
  // dst = src1[0],src2[0],src1[2],src2[2]
707
29
  uint32_t IntMask3[] = {0, 4, 2, 6};
708
29
  Mask = makeArrayRef(IntMask3, 4);
709
29
  TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
710
29
  TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
711
29
712
29
  // dst = src1[1],src2[1],src1[3],src2[3]
713
29
  uint32_t IntMask4[] = {1, 5, 3, 7};
714
29
  Mask = makeArrayRef(IntMask4, 4);
715
29
  TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
716
29
  TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
717
29
}
718
719
// Lowers this interleaved access group into X86-specific
720
// instructions/intrinsics.
721
80
bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
722
80
  SmallVector<Instruction *, 4> DecomposedVectors;
723
80
  SmallVector<Value *, 4> TransposedVectors;
724
80
  VectorType *ShuffleTy = Shuffles[0]->getType();
725
80
726
80
  if (isa<LoadInst>(Inst)) {
727
34
    // Try to generate target-sized register(/instruction).
728
34
    decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
729
34
730
34
    Type *ShuffleEltTy = Inst->getType();
731
34
    unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor;
732
34
    // Perform matrix-transposition in order to compute interleaved
733
34
    // results by generating some sort of (optimized) target-specific
734
34
    // instructions.
735
34
736
34
    switch (NumSubVecElems) {
737
34
    default:
738
0
      return false;
739
34
    case 4:
740
19
      transpose_4x4(DecomposedVectors, TransposedVectors);
741
19
      break;
742
34
    case 8:
743
15
    case 16:
744
15
    case 32:
745
15
    case 64:
746
15
      deinterleave8bitStride3(DecomposedVectors, TransposedVectors,
747
15
                              NumSubVecElems);
748
15
      break;
749
34
    }
750
34
751
34
    // Now replace the unoptimized-interleaved-vectors with the
752
34
    // transposed-interleaved vectors.
753
129
    
for (unsigned i = 0, e = Shuffles.size(); 34
i < e;
++i95
)
754
95
      Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
755
34
756
34
    return true;
757
34
  }
758
46
759
46
  Type *ShuffleEltTy = ShuffleTy->getVectorElementType();
760
46
  unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor;
761
46
762
46
  // Lower the interleaved stores:
763
46
  //   1. Decompose the interleaved wide shuffle into individual shuffle
764
46
  //   vectors.
765
46
  decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
766
46
            DecomposedVectors);
767
46
768
46
  //   2. Transpose the interleaved-vectors into vectors of contiguous
769
46
  //      elements.
770
46
  switch (NumSubVecElems) {
771
46
  case 4:
772
10
    transpose_4x4(DecomposedVectors, TransposedVectors);
773
10
    break;
774
46
  case 8:
775
4
    interleave8bitStride4VF8(DecomposedVectors, TransposedVectors);
776
4
    break;
777
46
  case 16:
778
32
  case 32:
779
32
  case 64:
780
32
    if (Factor == 4)
781
12
      interleave8bitStride4(DecomposedVectors, TransposedVectors,
782
12
                            NumSubVecElems);
783
32
    if (Factor == 3)
784
20
      interleave8bitStride3(DecomposedVectors, TransposedVectors,
785
20
                            NumSubVecElems);
786
32
    break;
787
32
  default:
788
0
    return false;
789
46
  }
790
46
791
46
  //   3. Concatenate the contiguous-vectors back into a wide vector.
792
46
  Value *WideVec = concatenateVectors(Builder, TransposedVectors);
793
46
794
46
  //   4. Generate a store instruction for wide-vec.
795
46
  StoreInst *SI = cast<StoreInst>(Inst);
796
46
  Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(),
797
46
                             SI->getAlignment());
798
46
799
46
  return true;
800
46
}
801
802
// Lower interleaved load(s) into target specific instructions/
803
// intrinsics. Lowering sequence varies depending on the vector-types, factor,
804
// number of shuffles and ISA.
805
// Currently, lowering is supported for 4x64 bits with Factor = 4 on AVX.
806
bool X86TargetLowering::lowerInterleavedLoad(
807
    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
808
638
    ArrayRef<unsigned> Indices, unsigned Factor) const {
809
638
  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
810
638
         "Invalid interleave factor");
811
638
  assert(!Shuffles.empty() && "Empty shufflevector input");
812
638
  assert(Shuffles.size() == Indices.size() &&
813
638
         "Unmatched number of shufflevectors and indices");
814
638
815
638
  // Create an interleaved access group.
816
638
  IRBuilder<> Builder(LI);
817
638
  X86InterleavedAccessGroup Grp(LI, Shuffles, Indices, Factor, Subtarget,
818
638
                                Builder);
819
638
820
638
  return Grp.isSupported() && 
Grp.lowerIntoOptimizedSequence()34
;
821
638
}
822
823
bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
824
                                              ShuffleVectorInst *SVI,
825
616
                                              unsigned Factor) const {
826
616
  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
827
616
         "Invalid interleave factor");
828
616
829
616
  assert(SVI->getType()->getVectorNumElements() % Factor == 0 &&
830
616
         "Invalid interleaved store");
831
616
832
616
  // Holds the indices of SVI that correspond to the starting index of each
833
616
  // interleaved shuffle.
834
616
  SmallVector<unsigned, 4> Indices;
835
616
  auto Mask = SVI->getShuffleMask();
836
2.95k
  for (unsigned i = 0; i < Factor; 
i++2.33k
)
837
2.33k
    Indices.push_back(Mask[i]);
838
616
839
616
  ArrayRef<ShuffleVectorInst *> Shuffles = makeArrayRef(SVI);
840
616
841
616
  // Create an interleaved access group.
842
616
  IRBuilder<> Builder(SI);
843
616
  X86InterleavedAccessGroup Grp(SI, Shuffles, Indices, Factor, Subtarget,
844
616
                                Builder);
845
616
846
616
  return Grp.isSupported() && 
Grp.lowerIntoOptimizedSequence()46
;
847
616
}