Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements the targeting of the Machinelegalizer class for
10
/// AArch64.
11
/// \todo This should be generated by TableGen.
12
//===----------------------------------------------------------------------===//
13
14
#include "AArch64LegalizerInfo.h"
15
#include "AArch64Subtarget.h"
16
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18
#include "llvm/CodeGen/GlobalISel/Utils.h"
19
#include "llvm/CodeGen/MachineInstr.h"
20
#include "llvm/CodeGen/MachineRegisterInfo.h"
21
#include "llvm/CodeGen/TargetOpcodes.h"
22
#include "llvm/CodeGen/ValueTypes.h"
23
#include "llvm/IR/DerivedTypes.h"
24
#include "llvm/IR/Type.h"
25
26
#define DEBUG_TYPE "aarch64-legalinfo"
27
28
using namespace llvm;
29
using namespace LegalizeActions;
30
using namespace LegalizeMutations;
31
using namespace LegalityPredicates;
32
33
9.10k
AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
34
9.10k
  using namespace TargetOpcode;
35
9.10k
  const LLT p0 = LLT::pointer(0, 64);
36
9.10k
  const LLT s1 = LLT::scalar(1);
37
9.10k
  const LLT s8 = LLT::scalar(8);
38
9.10k
  const LLT s16 = LLT::scalar(16);
39
9.10k
  const LLT s32 = LLT::scalar(32);
40
9.10k
  const LLT s64 = LLT::scalar(64);
41
9.10k
  const LLT s128 = LLT::scalar(128);
42
9.10k
  const LLT s256 = LLT::scalar(256);
43
9.10k
  const LLT s512 = LLT::scalar(512);
44
9.10k
  const LLT v16s8 = LLT::vector(16, 8);
45
9.10k
  const LLT v8s8 = LLT::vector(8, 8);
46
9.10k
  const LLT v4s8 = LLT::vector(4, 8);
47
9.10k
  const LLT v8s16 = LLT::vector(8, 16);
48
9.10k
  const LLT v4s16 = LLT::vector(4, 16);
49
9.10k
  const LLT v2s16 = LLT::vector(2, 16);
50
9.10k
  const LLT v2s32 = LLT::vector(2, 32);
51
9.10k
  const LLT v4s32 = LLT::vector(4, 32);
52
9.10k
  const LLT v2s64 = LLT::vector(2, 64);
53
9.10k
  const LLT v2p0 = LLT::vector(2, p0);
54
9.10k
55
9.10k
  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
56
9.10k
    .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
57
9.10k
    .clampScalar(0, s1, s64)
58
9.10k
    .widenScalarToNextPow2(0, 8)
59
9.10k
    .fewerElementsIf(
60
9.10k
      [=](const LegalityQuery &Query) {
61
308
        return Query.Types[0].isVector() &&
62
308
          (Query.Types[0].getElementType() != s64 ||
63
308
           
Query.Types[0].getNumElements() != 21
);
64
308
      },
65
9.10k
      [=](const LegalityQuery &Query) {
66
308
        LLT EltTy = Query.Types[0].getElementType();
67
308
        if (EltTy == s64)
68
1
          return std::make_pair(0, LLT::vector(2, 64));
69
307
        return std::make_pair(0, EltTy);
70
307
      });
71
9.10k
72
9.10k
  getActionDefinitionsBuilder(G_PHI)
73
9.10k
      .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
74
9.10k
      .clampScalar(0, s16, s64)
75
9.10k
      .widenScalarToNextPow2(0);
76
9.10k
77
9.10k
  getActionDefinitionsBuilder(G_BSWAP)
78
9.10k
      .legalFor({s32, s64, v4s32, v2s32, v2s64})
79
9.10k
      .clampScalar(0, s16, s64)
80
9.10k
      .widenScalarToNextPow2(0);
81
9.10k
82
9.10k
  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
83
9.10k
      .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
84
9.10k
      .clampScalar(0, s32, s64)
85
9.10k
      .widenScalarToNextPow2(0)
86
9.10k
      .clampNumElements(0, v2s32, v4s32)
87
9.10k
      .clampNumElements(0, v2s64, v2s64)
88
9.10k
      .moreElementsToNextPow2(0);
89
9.10k
90
9.10k
  getActionDefinitionsBuilder(G_SHL)
91
9.10k
    .legalFor({{s32, s32}, {s64, s64},
92
9.10k
               {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
93
9.10k
    .clampScalar(1, s32, s64)
94
9.10k
    .clampScalar(0, s32, s64)
95
9.10k
    .widenScalarToNextPow2(0)
96
9.10k
    .clampNumElements(0, v2s32, v4s32)
97
9.10k
    .clampNumElements(0, v2s64, v2s64)
98
9.10k
    .moreElementsToNextPow2(0)
99
9.10k
    .minScalarSameAs(1, 0);
100
9.10k
101
9.10k
  getActionDefinitionsBuilder(G_GEP)
102
9.10k
      .legalFor({{p0, s64}})
103
9.10k
      .clampScalar(1, s64, s64);
104
9.10k
105
9.10k
  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
106
9.10k
107
9.10k
  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
108
9.10k
      .legalFor({s32, s64})
109
9.10k
      .clampScalar(0, s32, s64)
110
9.10k
      .widenScalarToNextPow2(0)
111
9.10k
      .scalarize(0);
112
9.10k
113
9.10k
  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
114
80.4k
      .customIf([=](const LegalityQuery &Query) {
115
80.4k
        const auto &SrcTy = Query.Types[0];
116
80.4k
        const auto &AmtTy = Query.Types[1];
117
80.4k
        return !SrcTy.isVector() && 
SrcTy.getSizeInBits() == 3280.1k
&&
118
80.4k
               
AmtTy.getSizeInBits() == 3232.1k
;
119
80.4k
      })
120
9.10k
      .legalFor(
121
9.10k
          {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
122
9.10k
      .clampScalar(1, s32, s64)
123
9.10k
      .clampScalar(0, s32, s64)
124
9.10k
      .minScalarSameAs(1, 0);
125
9.10k
126
9.10k
  getActionDefinitionsBuilder({G_SREM, G_UREM})
127
9.10k
      .lowerFor({s1, s8, s16, s32, s64});
128
9.10k
129
9.10k
  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
130
9.10k
      .lowerFor({{s64, s1}});
131
9.10k
132
9.10k
  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
133
9.10k
134
9.10k
  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
135
9.10k
      .legalFor({{s32, s1}, {s64, s1}});
136
9.10k
137
9.10k
  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
138
9.10k
    .legalFor({s32, s64, v2s64, v4s32, v2s32});
139
9.10k
140
9.10k
  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
141
9.10k
142
9.10k
  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
143
9.10k
                               G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
144
9.10k
                               G_FNEARBYINT})
145
9.10k
      // If we don't have full FP16 support, then scalarize the elements of
146
9.10k
      // vectors containing fp16 types.
147
9.10k
      .fewerElementsIf(
148
9.10k
          [=, &ST](const LegalityQuery &Query) {
149
2.82k
            const auto &Ty = Query.Types[0];
150
2.82k
            return Ty.isVector() && 
Ty.getElementType() == s16269
&&
151
2.82k
                   
!ST.hasFullFP16()60
;
152
2.82k
          },
153
9.10k
          [=](const LegalityQuery &Query) 
{ return std::make_pair(0, s16); }32
)
154
9.10k
      // If we don't have full FP16 support, then widen s16 to s32 if we
155
9.10k
      // encounter it.
156
9.10k
      .widenScalarIf(
157
9.10k
          [=, &ST](const LegalityQuery &Query) {
158
2.79k
            return Query.Types[0] == s16 && 
!ST.hasFullFP16()218
;
159
2.79k
          },
160
9.10k
          [=](const LegalityQuery &Query) 
{ return std::make_pair(0, s32); }205
)
161
9.10k
      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
162
9.10k
163
9.10k
  getActionDefinitionsBuilder(
164
9.10k
      {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
165
9.10k
      // We need a call for these, so we always need to scalarize.
166
9.10k
      .scalarize(0)
167
9.10k
      // Regardless of FP16 support, widen 16-bit elements to 32-bits.
168
9.10k
      .minScalar(0, s32)
169
9.10k
      .libcallFor({s32, s64, v2s32, v4s32, v2s64});
170
9.10k
171
9.10k
  getActionDefinitionsBuilder(G_INSERT)
172
9.10k
      .unsupportedIf([=](const LegalityQuery &Query) {
173
5
        return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
174
5
      })
175
9.10k
      .legalIf([=](const LegalityQuery &Query) {
176
5
        const LLT &Ty0 = Query.Types[0];
177
5
        const LLT &Ty1 = Query.Types[1];
178
5
        if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
179
5
          return false;
180
0
        return isPowerOf2_32(Ty1.getSizeInBits()) &&
181
0
               (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
182
0
      })
183
9.10k
      .clampScalar(0, s32, s64)
184
9.10k
      .widenScalarToNextPow2(0)
185
9.10k
      .maxScalarIf(typeInSet(0, {s32}), 1, s16)
186
9.10k
      .maxScalarIf(typeInSet(0, {s64}), 1, s32)
187
9.10k
      .widenScalarToNextPow2(1);
188
9.10k
189
9.10k
  getActionDefinitionsBuilder(G_EXTRACT)
190
9.10k
      .unsupportedIf([=](const LegalityQuery &Query) {
191
10
        return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
192
10
      })
193
9.10k
      .legalIf([=](const LegalityQuery &Query) {
194
10
        const LLT &Ty0 = Query.Types[0];
195
10
        const LLT &Ty1 = Query.Types[1];
196
10
        if (Ty1 != s32 && 
Ty1 != s649
&&
Ty1 != s1288
)
197
4
          return false;
198
6
        if (Ty1 == p0)
199
0
          return true;
200
6
        return isPowerOf2_32(Ty0.getSizeInBits()) &&
201
6
               (Ty0.getSizeInBits() == 1 || 
Ty0.getSizeInBits() >= 85
);
202
6
      })
203
9.10k
      .clampScalar(1, s32, s128)
204
9.10k
      .widenScalarToNextPow2(1)
205
9.10k
      .maxScalarIf(typeInSet(1, {s32}), 0, s16)
206
9.10k
      .maxScalarIf(typeInSet(1, {s64}), 0, s32)
207
9.10k
      .widenScalarToNextPow2(0);
208
9.10k
209
9.10k
  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
210
9.10k
      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
211
9.10k
                                 {s32, p0, 16, 8},
212
9.10k
                                 {s32, p0, 32, 8},
213
9.10k
                                 {s64, p0, 8, 2},
214
9.10k
                                 {s64, p0, 16, 2},
215
9.10k
                                 {s64, p0, 32, 4},
216
9.10k
                                 {s64, p0, 64, 8},
217
9.10k
                                 {p0, p0, 64, 8},
218
9.10k
                                 {v2s32, p0, 64, 8}})
219
9.10k
      .clampScalar(0, s32, s64)
220
9.10k
      .widenScalarToNextPow2(0)
221
9.10k
      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
222
9.10k
      //       how to do that yet.
223
9.10k
      .unsupportedIfMemSizeNotPow2()
224
9.10k
      // Lower anything left over into G_*EXT and G_LOAD
225
9.10k
      .lower();
226
9.10k
227
9.10k
  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
228
688
    const LLT &ValTy = Query.Types[0];
229
688
    if (!ValTy.isVector())
230
0
      return false;
231
688
    const LLT EltTy = ValTy.getElementType();
232
688
    return EltTy.isPointer() && 
EltTy.getAddressSpace() == 0563
;
233
688
  };
234
9.10k
235
9.10k
  getActionDefinitionsBuilder(G_LOAD)
236
9.10k
      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
237
9.10k
                                 {s16, p0, 16, 8},
238
9.10k
                                 {s32, p0, 32, 8},
239
9.10k
                                 {s64, p0, 64, 8},
240
9.10k
                                 {p0, p0, 64, 8},
241
9.10k
                                 {s128, p0, 128, 8},
242
9.10k
                                 {v8s8, p0, 64, 8},
243
9.10k
                                 {v16s8, p0, 128, 8},
244
9.10k
                                 {v4s16, p0, 64, 8},
245
9.10k
                                 {v8s16, p0, 128, 8},
246
9.10k
                                 {v2s32, p0, 64, 8},
247
9.10k
                                 {v4s32, p0, 128, 8},
248
9.10k
                                 {v2s64, p0, 128, 8}})
249
9.10k
      // These extends are also legal
250
9.10k
      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
251
9.10k
                                 {s32, p0, 16, 8}})
252
9.10k
      .clampScalar(0, s8, s64)
253
9.10k
      .widenScalarToNextPow2(0)
254
9.10k
      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
255
9.10k
      //       how to do that yet.
256
9.10k
      .unsupportedIfMemSizeNotPow2()
257
9.10k
      // Lower any any-extending loads left into G_ANYEXT and G_LOAD
258
9.10k
      .lowerIf([=](const LegalityQuery &Query) {
259
5.79k
        return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
260
5.79k
      })
261
9.10k
      .clampMaxNumElements(0, s32, 2)
262
9.10k
      .clampMaxNumElements(0, s64, 1)
263
9.10k
      .customIf(IsPtrVecPred);
264
9.10k
265
9.10k
  getActionDefinitionsBuilder(G_STORE)
266
9.10k
      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
267
9.10k
                                 {s16, p0, 16, 8},
268
9.10k
                                 {s32, p0, 32, 8},
269
9.10k
                                 {s64, p0, 64, 8},
270
9.10k
                                 {p0, p0, 64, 8},
271
9.10k
                                 {s128, p0, 128, 8},
272
9.10k
                                 {v16s8, p0, 128, 8},
273
9.10k
                                 {v4s16, p0, 64, 8},
274
9.10k
                                 {v8s16, p0, 128, 8},
275
9.10k
                                 {v2s32, p0, 64, 8},
276
9.10k
                                 {v4s32, p0, 128, 8},
277
9.10k
                                 {v2s64, p0, 128, 8}})
278
9.10k
      .clampScalar(0, s8, s64)
279
9.10k
      .widenScalarToNextPow2(0)
280
9.10k
      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
281
9.10k
      //       how to do that yet.
282
9.10k
      .unsupportedIfMemSizeNotPow2()
283
9.10k
      .lowerIf([=](const LegalityQuery &Query) {
284
709
        return Query.Types[0].isScalar() &&
285
709
               
Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits0
;
286
709
      })
287
9.10k
      .clampMaxNumElements(0, s32, 2)
288
9.10k
      .clampMaxNumElements(0, s64, 1)
289
9.10k
      .customIf(IsPtrVecPred);
290
9.10k
291
9.10k
  // Constants
292
9.10k
  getActionDefinitionsBuilder(G_CONSTANT)
293
9.10k
    .legalFor({p0, s8, s16, s32, s64})
294
9.10k
      .clampScalar(0, s8, s64)
295
9.10k
      .widenScalarToNextPow2(0);
296
9.10k
  getActionDefinitionsBuilder(G_FCONSTANT)
297
9.10k
      .legalFor({s32, s64})
298
9.10k
      .clampScalar(0, s32, s64);
299
9.10k
300
9.10k
  getActionDefinitionsBuilder(G_ICMP)
301
9.10k
      .legalFor({{s32, s32},
302
9.10k
                 {s32, s64},
303
9.10k
                 {s32, p0},
304
9.10k
                 {v4s32, v4s32},
305
9.10k
                 {v2s32, v2s32},
306
9.10k
                 {v2s64, v2s64},
307
9.10k
                 {v2s64, v2p0},
308
9.10k
                 {v4s16, v4s16},
309
9.10k
                 {v8s16, v8s16},
310
9.10k
                 {v8s8, v8s8},
311
9.10k
                 {v16s8, v16s8}})
312
9.10k
      .clampScalar(0, s32, s32)
313
9.10k
      .clampScalar(1, s32, s64)
314
9.10k
      .minScalarEltSameAsIf(
315
9.10k
          [=](const LegalityQuery &Query) {
316
701
            const LLT &Ty = Query.Types[0];
317
701
            const LLT &SrcTy = Query.Types[1];
318
701
            return Ty.isVector() && 
!SrcTy.getElementType().isPointer()698
&&
319
701
                   
Ty.getElementType() != SrcTy.getElementType()697
;
320
701
          },
321
9.10k
          0, 1)
322
9.10k
      .minScalarOrEltIf(
323
9.10k
          [=](const LegalityQuery &Query) 
{ return Query.Types[1] == v2s16; }35
,
324
9.10k
          1, s32)
325
9.10k
      .minScalarOrEltIf(
326
9.10k
          [=](const LegalityQuery &Query) 
{ return Query.Types[1] == v2p0; }15
, 0,
327
9.10k
          s64)
328
9.10k
      .widenScalarOrEltToNextPow2(1);
329
9.10k
330
9.10k
  getActionDefinitionsBuilder(G_FCMP)
331
9.10k
      .legalFor({{s32, s32}, {s32, s64}})
332
9.10k
      .clampScalar(0, s32, s32)
333
9.10k
      .clampScalar(1, s32, s64)
334
9.10k
      .widenScalarToNextPow2(1);
335
9.10k
336
9.10k
  // Extensions
337
9.10k
  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
338
286k
      .legalIf([=](const LegalityQuery &Query) {
339
286k
        unsigned DstSize = Query.Types[0].getSizeInBits();
340
286k
341
286k
        // Make sure that we have something that will fit in a register, and
342
286k
        // make sure it's a power of 2.
343
286k
        if (DstSize < 8 || DstSize > 128 || 
!isPowerOf2_32(DstSize)286k
)
344
1
          return false;
345
286k
346
286k
        const LLT &SrcTy = Query.Types[1];
347
286k
348
286k
        // Special case for s1.
349
286k
        if (SrcTy == s1)
350
66
          return true;
351
286k
352
286k
        // Make sure we fit in a register otherwise. Don't bother checking that
353
286k
        // the source type is below 128 bits. We shouldn't be allowing anything
354
286k
        // through which is wider than the destination in the first place.
355
286k
        unsigned SrcSize = SrcTy.getSizeInBits();
356
286k
        if (SrcSize < 8 || 
!isPowerOf2_32(SrcSize)286k
)
357
4
          return false;
358
286k
359
286k
        return true;
360
286k
      });
361
9.10k
362
9.10k
  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
363
9.10k
364
9.10k
  // FP conversions
365
9.10k
  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
366
9.10k
      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
367
9.10k
  getActionDefinitionsBuilder(G_FPEXT).legalFor(
368
9.10k
      {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
369
9.10k
370
9.10k
  // Conversions
371
9.10k
  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
372
9.10k
      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
373
9.10k
      .clampScalar(0, s32, s64)
374
9.10k
      .widenScalarToNextPow2(0)
375
9.10k
      .clampScalar(1, s32, s64)
376
9.10k
      .widenScalarToNextPow2(1);
377
9.10k
378
9.10k
  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
379
9.10k
      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
380
9.10k
      .clampScalar(1, s32, s64)
381
9.10k
      .widenScalarToNextPow2(1)
382
9.10k
      .clampScalar(0, s32, s64)
383
9.10k
      .widenScalarToNextPow2(0);
384
9.10k
385
9.10k
  // Control-flow
386
9.10k
  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
387
9.10k
  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
388
9.10k
389
9.10k
  // Select
390
9.10k
  // FIXME: We can probably do a bit better than just scalarizing vector
391
9.10k
  // selects.
392
9.10k
  getActionDefinitionsBuilder(G_SELECT)
393
9.10k
      .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
394
9.10k
      .clampScalar(0, s32, s64)
395
9.10k
      .widenScalarToNextPow2(0)
396
9.10k
      .scalarize(0);
397
9.10k
398
9.10k
  // Pointer-handling
399
9.10k
  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
400
9.10k
  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
401
9.10k
402
9.10k
  getActionDefinitionsBuilder(G_PTRTOINT)
403
9.10k
      .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
404
9.10k
      .maxScalar(0, s64)
405
9.10k
      .widenScalarToNextPow2(0, /*Min*/ 8);
406
9.10k
407
9.10k
  getActionDefinitionsBuilder(G_INTTOPTR)
408
146k
      .unsupportedIf([&](const LegalityQuery &Query) {
409
146k
        return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
410
146k
      })
411
9.10k
      .legalFor({{p0, s64}});
412
9.10k
413
9.10k
  // Casts for 32 and 64-bit width type are just copies.
414
9.10k
  // Same for 128-bit width type, except they are on the FPR bank.
415
9.10k
  getActionDefinitionsBuilder(G_BITCAST)
416
9.10k
      // FIXME: This is wrong since G_BITCAST is not allowed to change the
417
9.10k
      // number of bits but it's what the previous code described and fixing
418
9.10k
      // it breaks tests.
419
9.10k
      .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
420
9.10k
                                 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
421
9.10k
                                 v2p0});
422
9.10k
423
9.10k
  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
424
9.10k
425
9.10k
  // va_list must be a pointer, but most sized types are pretty easy to handle
426
9.10k
  // as the destination.
427
9.10k
  getActionDefinitionsBuilder(G_VAARG)
428
9.10k
      .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
429
9.10k
      .clampScalar(0, s8, s64)
430
9.10k
      .widenScalarToNextPow2(0, /*Min*/ 8);
431
9.10k
432
9.10k
  if (ST.hasLSE()) {
433
54
    getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
434
54
        .lowerIf(all(
435
54
            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
436
54
            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
437
54
438
54
    getActionDefinitionsBuilder(
439
54
        {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
440
54
         G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
441
54
         G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
442
54
        .legalIf(all(
443
54
            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
444
54
            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
445
54
  }
446
9.10k
447
9.10k
  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
448
9.10k
449
9.10k
  // Merge/Unmerge
450
18.2k
  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
451
18.2k
    unsigned BigTyIdx = Op == G_MERGE_VALUES ? 
09.10k
:
19.10k
;
452
18.2k
    unsigned LitTyIdx = Op == G_MERGE_VALUES ? 
19.10k
:
09.10k
;
453
18.2k
454
18.2k
    auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
455
784
      const LLT &Ty = Query.Types[TypeIdx];
456
784
      if (Ty.isVector()) {
457
377
        const LLT &EltTy = Ty.getElementType();
458
377
        if (EltTy.getSizeInBits() < 8 || 
EltTy.getSizeInBits() > 64215
)
459
162
          return true;
460
215
        if (!isPowerOf2_32(EltTy.getSizeInBits()))
461
0
          return true;
462
622
      }
463
622
      return false;
464
622
    };
465
18.2k
466
18.2k
    // FIXME: This rule is horrible, but specifies the same as what we had
467
18.2k
    // before with the particularly strange definitions removed (e.g.
468
18.2k
    // s8 = G_MERGE_VALUES s32, s32).
469
18.2k
    // Part of the complexity comes from these ops being extremely flexible. For
470
18.2k
    // example, you can build/decompose vectors with it, concatenate vectors,
471
18.2k
    // etc. and in addition to this you can also bitcast with it at the same
472
18.2k
    // time. We've been considering breaking it up into multiple ops to make it
473
18.2k
    // more manageable throughout the backend.
474
18.2k
    getActionDefinitionsBuilder(Op)
475
18.2k
        // Break up vectors with weird elements into scalars
476
18.2k
        .fewerElementsIf(
477
18.2k
            [=](const LegalityQuery &Query) 
{ return notValidElt(Query, 0); }392
,
478
18.2k
            scalarize(0))
479
18.2k
        .fewerElementsIf(
480
18.2k
            [=](const LegalityQuery &Query) 
{ return notValidElt(Query, 1); }392
,
481
18.2k
            scalarize(1))
482
18.2k
        // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
483
18.2k
        // or 384.
484
18.2k
        .clampScalar(BigTyIdx, s8, s512)
485
18.2k
        .widenScalarIf(
486
18.2k
            [=](const LegalityQuery &Query) {
487
230
              const LLT &Ty = Query.Types[BigTyIdx];
488
230
              return !isPowerOf2_32(Ty.getSizeInBits()) &&
489
230
                     
Ty.getSizeInBits() % 64 != 00
;
490
230
            },
491
18.2k
            [=](const LegalityQuery &Query) {
492
0
              // Pick the next power of 2, or a multiple of 64 over 128.
493
0
              // Whichever is smaller.
494
0
              const LLT &Ty = Query.Types[BigTyIdx];
495
0
              unsigned NewSizeInBits = 1
496
0
                                       << Log2_32_Ceil(Ty.getSizeInBits() + 1);
497
0
              if (NewSizeInBits >= 256) {
498
0
                unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
499
0
                if (RoundedTo < NewSizeInBits)
500
0
                  NewSizeInBits = RoundedTo;
501
0
              }
502
0
              return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
503
0
            })
504
18.2k
        // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
505
18.2k
        // worth considering the multiples of 64 since 2*192 and 2*384 are not
506
18.2k
        // valid.
507
18.2k
        .clampScalar(LitTyIdx, s8, s256)
508
18.2k
        .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
509
18.2k
        // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
510
18.2k
        // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
511
18.2k
        // At this point it's simple enough to accept the legal types.
512
18.2k
        .legalIf([=](const LegalityQuery &Query) {
513
227
          const LLT &BigTy = Query.Types[BigTyIdx];
514
227
          const LLT &LitTy = Query.Types[LitTyIdx];
515
227
          if (BigTy.isVector() && 
BigTy.getSizeInBits() < 32214
)
516
0
            return false;
517
227
          if (LitTy.isVector() && 
LitTy.getSizeInBits() < 321
)
518
0
            return false;
519
227
          return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
520
227
        })
521
18.2k
        // Any vectors left are the wrong size. Scalarize them.
522
18.2k
      .scalarize(0)
523
18.2k
      .scalarize(1);
524
18.2k
  }
525
9.10k
526
9.10k
  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
527
9.10k
      .unsupportedIf([=](const LegalityQuery &Query) {
528
3.48k
        const LLT &EltTy = Query.Types[1].getElementType();
529
3.48k
        return Query.Types[0] != EltTy;
530
3.48k
      })
531
9.10k
      .minScalar(2, s64)
532
9.10k
      .legalIf([=](const LegalityQuery &Query) {
533
3.48k
        const LLT &VecTy = Query.Types[1];
534
3.48k
        return VecTy == v2s16 || 
VecTy == v4s163.47k
||
VecTy == v8s163.44k
||
535
3.48k
               
VecTy == v4s323.42k
||
VecTy == v2s643.11k
||
VecTy == v2s321.37k
;
536
3.48k
      });
537
9.10k
538
9.10k
  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
539
10.2k
      .legalIf([=](const LegalityQuery &Query) {
540
10.2k
        const LLT &VecTy = Query.Types[0];
541
10.2k
        // TODO: Support s8 and s16
542
10.2k
        return VecTy == v2s32 || 
VecTy == v4s327.33k
||
VecTy == v2s645.94k
;
543
10.2k
      });
544
9.10k
545
9.10k
  getActionDefinitionsBuilder(G_BUILD_VECTOR)
546
9.10k
      .legalFor({{v4s16, s16},
547
9.10k
                 {v8s16, s16},
548
9.10k
                 {v2s32, s32},
549
9.10k
                 {v4s32, s32},
550
9.10k
                 {v2p0, p0},
551
9.10k
                 {v2s64, s64}})
552
9.10k
      .clampNumElements(0, v4s32, v4s32)
553
9.10k
      .clampNumElements(0, v2s64, v2s64)
554
9.10k
555
9.10k
      // Deal with larger scalar types, which will be implicitly truncated.
556
9.10k
      .legalIf([=](const LegalityQuery &Query) {
557
308
        return Query.Types[0].getScalarSizeInBits() <
558
308
               Query.Types[1].getSizeInBits();
559
308
      })
560
9.10k
      .minScalarSameAs(1, 0);
561
9.10k
562
9.10k
  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
563
9.10k
      {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
564
9.10k
      .scalarize(1);
565
9.10k
566
9.10k
  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
567
11.0k
      .legalIf([=](const LegalityQuery &Query) {
568
11.0k
        const LLT &DstTy = Query.Types[0];
569
11.0k
        const LLT &SrcTy = Query.Types[1];
570
11.0k
        // For now just support the TBL2 variant which needs the source vectors
571
11.0k
        // to be the same size as the dest.
572
11.0k
        if (DstTy != SrcTy)
573
1.38k
          return false;
574
23.7k
        
for (auto &Ty : {v2s32, v4s32, v2s64})9.66k
{
575
23.7k
          if (DstTy == Ty)
576
9.59k
            return true;
577
23.7k
        }
578
9.66k
        
return false67
;
579
9.66k
      })
580
9.10k
      // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
581
9.10k
      // just want those lowered into G_BUILD_VECTOR
582
9.10k
      .lowerIf([=](const LegalityQuery &Query) {
583
1.45k
        return !Query.Types[1].isVector();
584
1.45k
      })
585
9.10k
      .clampNumElements(0, v4s32, v4s32)
586
9.10k
      .clampNumElements(0, v2s64, v2s64);
587
9.10k
588
9.10k
  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
589
9.10k
      .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
590
9.10k
591
9.10k
  getActionDefinitionsBuilder(G_JUMP_TABLE)
592
9.10k
    .legalFor({{p0}, {s64}});
593
9.10k
594
9.10k
  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
595
2.19k
    return Query.Types[0] == p0 && Query.Types[1] == s64;
596
2.19k
  });
597
9.10k
598
9.10k
  computeTables();
599
9.10k
  verify(*ST.getInstrInfo());
600
9.10k
}
601
602
bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
603
                                          MachineRegisterInfo &MRI,
604
                                          MachineIRBuilder &MIRBuilder,
605
22.1k
                                          GISelChangeObserver &Observer) const {
606
22.1k
  switch (MI.getOpcode()) {
607
22.1k
  default:
608
0
    // No idea what to do.
609
0
    return false;
610
22.1k
  case TargetOpcode::G_VAARG:
611
84
    return legalizeVaArg(MI, MRI, MIRBuilder);
612
22.1k
  case TargetOpcode::G_LOAD:
613
562
  case TargetOpcode::G_STORE:
614
562
    return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
615
21.4k
  case TargetOpcode::G_SHL:
616
21.4k
  case TargetOpcode::G_ASHR:
617
21.4k
  case TargetOpcode::G_LSHR:
618
21.4k
    return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
619
0
  }
620
0
621
0
  llvm_unreachable("expected switch to return");
622
0
}
623
624
bool AArch64LegalizerInfo::legalizeIntrinsic(
625
    MachineInstr &MI, MachineRegisterInfo &MRI,
626
91.9k
    MachineIRBuilder &MIRBuilder) const {
627
91.9k
  switch (MI.getIntrinsicID()) {
628
91.9k
  case Intrinsic::memcpy:
629
34.0k
  case Intrinsic::memset:
630
34.0k
  case Intrinsic::memmove:
631
34.0k
    if (createMemLibcall(MIRBuilder, MRI, MI) ==
632
34.0k
        LegalizerHelper::UnableToLegalize)
633
0
      return false;
634
34.0k
    MI.eraseFromParent();
635
34.0k
    return true;
636
57.8k
  default:
637
57.8k
    break;
638
57.8k
  }
639
57.8k
  return true;
640
57.8k
}
641
642
bool AArch64LegalizerInfo::legalizeShlAshrLshr(
643
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
644
21.4k
    GISelChangeObserver &Observer) const {
645
21.4k
  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
646
21.4k
         MI.getOpcode() == TargetOpcode::G_LSHR ||
647
21.4k
         MI.getOpcode() == TargetOpcode::G_SHL);
648
21.4k
  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
649
21.4k
  // imported patterns can select it later. Either way, it will be legal.
650
21.4k
  Register AmtReg = MI.getOperand(2).getReg();
651
21.4k
  auto *CstMI = MRI.getVRegDef(AmtReg);
652
21.4k
  assert(CstMI && "expected to find a vreg def");
653
21.4k
  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
654
2.46k
    return true;
655
18.9k
  // Check the shift amount is in range for an immediate form.
656
18.9k
  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
657
18.9k
  if (Amount > 31)
658
0
    return true; // This will have to remain a register variant.
659
18.9k
  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
660
18.9k
  MIRBuilder.setInstr(MI);
661
18.9k
  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
662
18.9k
  MI.getOperand(2).setReg(ExtCst.getReg(0));
663
18.9k
  return true;
664
18.9k
}
665
666
bool AArch64LegalizerInfo::legalizeLoadStore(
667
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
668
562
    GISelChangeObserver &Observer) const {
669
562
  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
670
562
         MI.getOpcode() == TargetOpcode::G_LOAD);
671
562
  // Here we just try to handle vector loads/stores where our value type might
672
562
  // have pointer elements, which the SelectionDAG importer can't handle. To
673
562
  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
674
562
  // the value to use s64 types.
675
562
676
562
  // Custom legalization requires the instruction, if not deleted, must be fully
677
562
  // legalized. In order to allow further legalization of the inst, we create
678
562
  // a new instruction and erase the existing one.
679
562
680
562
  unsigned ValReg = MI.getOperand(0).getReg();
681
562
  const LLT ValTy = MRI.getType(ValReg);
682
562
683
562
  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
684
562
      ValTy.getElementType().getAddressSpace() != 0) {
685
0
    LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
686
0
    return false;
687
0
  }
688
562
689
562
  MIRBuilder.setInstr(MI);
690
562
  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
691
562
  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
692
562
  auto &MMO = **MI.memoperands_begin();
693
562
  if (MI.getOpcode() == TargetOpcode::G_STORE) {
694
561
    auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
695
561
    MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
696
561
  } else {
697
1
    unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
698
1
    auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
699
1
    MIRBuilder.buildBitcast({ValReg}, {NewLoad});
700
1
  }
701
562
  MI.eraseFromParent();
702
562
  return true;
703
562
}
704
705
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
706
                                         MachineRegisterInfo &MRI,
707
84
                                         MachineIRBuilder &MIRBuilder) const {
708
84
  MIRBuilder.setInstr(MI);
709
84
  MachineFunction &MF = MIRBuilder.getMF();
710
84
  unsigned Align = MI.getOperand(2).getImm();
711
84
  Register Dst = MI.getOperand(0).getReg();
712
84
  Register ListPtr = MI.getOperand(1).getReg();
713
84
714
84
  LLT PtrTy = MRI.getType(ListPtr);
715
84
  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
716
84
717
84
  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
718
84
  Register List = MRI.createGenericVirtualRegister(PtrTy);
719
84
  MIRBuilder.buildLoad(
720
84
      List, ListPtr,
721
84
      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
722
84
                               PtrSize, /* Align = */ PtrSize));
723
84
724
84
  Register DstPtr;
725
84
  if (Align > PtrSize) {
726
1
    // Realign the list to the actual required alignment.
727
1
    auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
728
1
729
1
    auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
730
1
731
1
    DstPtr = MRI.createGenericVirtualRegister(PtrTy);
732
1
    MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
733
1
  } else
734
83
    DstPtr = List;
735
84
736
84
  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
737
84
  MIRBuilder.buildLoad(
738
84
      Dst, DstPtr,
739
84
      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
740
84
                               ValSize, std::max(Align, PtrSize)));
741
84
742
84
  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
743
84
744
84
  auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
745
84
746
84
  MIRBuilder.buildStore(
747
84
      NewList, ListPtr,
748
84
      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
749
84
                               PtrSize, /* Align = */ PtrSize));
750
84
751
84
  MI.eraseFromParent();
752
84
  return true;
753
84
}