Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file This file implements the LegalizerHelper class to legalize
10
/// individual instructions and the LegalizeMachineIR wrapper pass for the
11
/// primary legalization.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19
#include "llvm/CodeGen/MachineRegisterInfo.h"
20
#include "llvm/CodeGen/TargetInstrInfo.h"
21
#include "llvm/CodeGen/TargetLowering.h"
22
#include "llvm/CodeGen/TargetSubtargetInfo.h"
23
#include "llvm/Support/Debug.h"
24
#include "llvm/Support/MathExtras.h"
25
#include "llvm/Support/raw_ostream.h"
26
27
#define DEBUG_TYPE "legalizer"
28
29
using namespace llvm;
30
using namespace LegalizeActions;
31
32
/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33
///
34
/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35
/// with any leftover piece as type \p LeftoverTy
36
///
37
/// Returns -1 in the first element of the pair if the breakdown is not
38
/// satisfiable.
39
static std::pair<int, int>
40
323
getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41
323
  assert(!LeftoverTy.isValid() && "this is an out argument");
42
323
43
323
  unsigned Size = OrigTy.getSizeInBits();
44
323
  unsigned NarrowSize = NarrowTy.getSizeInBits();
45
323
  unsigned NumParts = Size / NarrowSize;
46
323
  unsigned LeftoverSize = Size - NumParts * NarrowSize;
47
323
  assert(Size > NarrowSize);
48
323
49
323
  if (LeftoverSize == 0)
50
310
    return {NumParts, 0};
51
13
52
13
  if (NarrowTy.isVector()) {
53
6
    unsigned EltSize = OrigTy.getScalarSizeInBits();
54
6
    if (LeftoverSize % EltSize != 0)
55
0
      return {-1, -1};
56
6
    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57
7
  } else {
58
7
    LeftoverTy = LLT::scalar(LeftoverSize);
59
7
  }
60
13
61
13
  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62
13
  return std::make_pair(NumParts, NumLeftover);
63
13
}
64
65
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
66
                                 GISelChangeObserver &Observer,
67
                                 MachineIRBuilder &Builder)
68
    : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69
240k
      LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70
240k
  MIRBuilder.setMF(MF);
71
240k
  MIRBuilder.setChangeObserver(Observer);
72
240k
}
73
74
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
75
                                 GISelChangeObserver &Observer,
76
                                 MachineIRBuilder &B)
77
23
    : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78
23
  MIRBuilder.setMF(MF);
79
23
  MIRBuilder.setChangeObserver(Observer);
80
23
}
81
LegalizerHelper::LegalizeResult
82
13.9M
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
83
13.9M
  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84
13.9M
85
13.9M
  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
86
13.9M
      
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS13.9M
)
87
92.1k
    return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? 
Legalized92.1k
88
92.1k
                                                     : 
UnableToLegalize4
;
89
13.8M
  auto Step = LI.getAction(MI, MRI);
90
13.8M
  switch (Step.Action) {
91
13.8M
  case Legal:
92
12.4M
    LLVM_DEBUG(dbgs() << ".. Already legal\n");
93
12.4M
    return AlreadyLegal;
94
13.8M
  case Libcall:
95
1.41k
    LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
96
1.41k
    return libcall(MI);
97
13.8M
  case NarrowScalar:
98
580
    LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
99
580
    return narrowScalar(MI, Step.TypeIdx, Step.NewType);
100
13.8M
  case WidenScalar:
101
1.28M
    LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
102
1.28M
    return widenScalar(MI, Step.TypeIdx, Step.NewType);
103
13.8M
  case Lower:
104
9.95k
    LLVM_DEBUG(dbgs() << ".. Lower\n");
105
9.95k
    return lower(MI, Step.TypeIdx, Step.NewType);
106
13.8M
  case FewerElements:
107
2.37k
    LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
108
2.37k
    return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
109
13.8M
  case MoreElements:
110
207
    LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
111
207
    return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
112
13.8M
  case Custom:
113
22.5k
    LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
114
22.5k
    return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
115
22.5k
                                                            : 
UnableToLegalize0
;
116
13.8M
  default:
117
4.45k
    LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
118
4.45k
    return UnableToLegalize;
119
13.8M
  }
120
13.8M
}
121
122
void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
123
3.37k
                                   SmallVectorImpl<Register> &VRegs) {
124
14.3k
  for (int i = 0; i < NumParts; 
++i11.0k
)
125
11.0k
    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
126
3.37k
  MIRBuilder.buildUnmerge(VRegs, Reg);
127
3.37k
}
128
129
bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
130
                                   LLT MainTy, LLT &LeftoverTy,
131
                                   SmallVectorImpl<Register> &VRegs,
132
601
                                   SmallVectorImpl<Register> &LeftoverRegs) {
133
601
  assert(!LeftoverTy.isValid() && "this is an out argument");
134
601
135
601
  unsigned RegSize = RegTy.getSizeInBits();
136
601
  unsigned MainSize = MainTy.getSizeInBits();
137
601
  unsigned NumParts = RegSize / MainSize;
138
601
  unsigned LeftoverSize = RegSize - NumParts * MainSize;
139
601
140
601
  // Use an unmerge when possible.
141
601
  if (LeftoverSize == 0) {
142
2.03k
    for (unsigned I = 0; I < NumParts; 
++I1.47k
)
143
1.47k
      VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
144
562
    MIRBuilder.buildUnmerge(VRegs, Reg);
145
562
    return true;
146
562
  }
147
39
148
39
  if (MainTy.isVector()) {
149
9
    unsigned EltSize = MainTy.getScalarSizeInBits();
150
9
    if (LeftoverSize % EltSize != 0)
151
0
      return false;
152
9
    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
153
30
  } else {
154
30
    LeftoverTy = LLT::scalar(LeftoverSize);
155
30
  }
156
39
157
39
  // For irregular sizes, extract the individual parts.
158
88
  
for (unsigned I = 0; 39
I != NumParts;
++I49
) {
159
49
    Register NewReg = MRI.createGenericVirtualRegister(MainTy);
160
49
    VRegs.push_back(NewReg);
161
49
    MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
162
49
  }
163
39
164
78
  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
165
39
       Offset += LeftoverSize) {
166
39
    Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
167
39
    LeftoverRegs.push_back(NewReg);
168
39
    MIRBuilder.buildExtract(NewReg, Reg, Offset);
169
39
  }
170
39
171
39
  return true;
172
39
}
173
174
void LegalizerHelper::insertParts(Register DstReg,
175
                                  LLT ResultTy, LLT PartTy,
176
                                  ArrayRef<Register> PartRegs,
177
                                  LLT LeftoverTy,
178
428
                                  ArrayRef<Register> LeftoverRegs) {
179
428
  if (!LeftoverTy.isValid()) {
180
404
    assert(LeftoverRegs.empty());
181
404
182
404
    if (!ResultTy.isVector()) {
183
122
      MIRBuilder.buildMerge(DstReg, PartRegs);
184
122
      return;
185
122
    }
186
282
187
282
    if (PartTy.isVector())
188
101
      MIRBuilder.buildConcatVectors(DstReg, PartRegs);
189
181
    else
190
181
      MIRBuilder.buildBuildVector(DstReg, PartRegs);
191
282
    return;
192
282
  }
193
24
194
24
  unsigned PartSize = PartTy.getSizeInBits();
195
24
  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
196
24
197
24
  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
198
24
  MIRBuilder.buildUndef(CurResultReg);
199
24
200
24
  unsigned Offset = 0;
201
35
  for (Register PartReg : PartRegs) {
202
35
    Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
203
35
    MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
204
35
    CurResultReg = NewResultReg;
205
35
    Offset += PartSize;
206
35
  }
207
24
208
48
  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; 
++I24
) {
209
24
    // Use the original output register for the final insert to avoid a copy.
210
24
    Register NewResultReg = (I + 1 == E) ?
211
24
      DstReg : 
MRI.createGenericVirtualRegister(ResultTy)0
;
212
24
213
24
    MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
214
24
    CurResultReg = NewResultReg;
215
24
    Offset += LeftoverPartSize;
216
24
  }
217
24
}
218
219
1.34k
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
220
1.34k
  switch (Opcode) {
221
1.34k
  case TargetOpcode::G_SDIV:
222
20
    assert((Size == 32 || Size == 64) && "Unsupported size");
223
20
    return Size == 64 ? 
RTLIB::SDIV_I642
:
RTLIB::SDIV_I3218
;
224
1.34k
  case TargetOpcode::G_UDIV:
225
20
    assert((Size == 32 || Size == 64) && "Unsupported size");
226
20
    return Size == 64 ? 
RTLIB::UDIV_I642
:
RTLIB::UDIV_I3218
;
227
1.34k
  case TargetOpcode::G_SREM:
228
11
    assert((Size == 32 || Size == 64) && "Unsupported size");
229
11
    return Size == 64 ? 
RTLIB::SREM_I642
:
RTLIB::SREM_I329
;
230
1.34k
  case TargetOpcode::G_UREM:
231
11
    assert((Size == 32 || Size == 64) && "Unsupported size");
232
11
    return Size == 64 ? 
RTLIB::UREM_I642
:
RTLIB::UREM_I329
;
233
1.34k
  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
234
4
    assert(Size == 32 && "Unsupported size");
235
4
    return RTLIB::CTLZ_I32;
236
1.34k
  case TargetOpcode::G_FADD:
237
12
    assert((Size == 32 || Size == 64) && "Unsupported size");
238
12
    return Size == 64 ? 
RTLIB::ADD_F646
:
RTLIB::ADD_F326
;
239
1.34k
  case TargetOpcode::G_FSUB:
240
20
    assert((Size == 32 || Size == 64) && "Unsupported size");
241
20
    return Size == 64 ? 
RTLIB::SUB_F6410
:
RTLIB::SUB_F3210
;
242
1.34k
  case TargetOpcode::G_FMUL:
243
8
    assert((Size == 32 || Size == 64) && "Unsupported size");
244
8
    return Size == 64 ? 
RTLIB::MUL_F644
:
RTLIB::MUL_F324
;
245
1.34k
  case TargetOpcode::G_FDIV:
246
8
    assert((Size == 32 || Size == 64) && "Unsupported size");
247
8
    return Size == 64 ? 
RTLIB::DIV_F644
:
RTLIB::DIV_F324
;
248
1.34k
  case TargetOpcode::G_FEXP:
249
182
    assert((Size == 32 || Size == 64) && "Unsupported size");
250
182
    return Size == 64 ? 
RTLIB::EXP_F64125
:
RTLIB::EXP_F3257
;
251
1.34k
  case TargetOpcode::G_FEXP2:
252
80
    assert((Size == 32 || Size == 64) && "Unsupported size");
253
80
    return Size == 64 ? 
RTLIB::EXP2_F6423
:
RTLIB::EXP2_F3257
;
254
1.34k
  case TargetOpcode::G_FREM:
255
48
    return Size == 64 ? 
RTLIB::REM_F6426
:
RTLIB::REM_F3222
;
256
1.34k
  case TargetOpcode::G_FPOW:
257
147
    return Size == 64 ? 
RTLIB::POW_F6473
:
RTLIB::POW_F3274
;
258
1.34k
  case TargetOpcode::G_FMA:
259
6
    assert((Size == 32 || Size == 64) && "Unsupported size");
260
6
    return Size == 64 ? 
RTLIB::FMA_F643
:
RTLIB::FMA_F323
;
261
1.34k
  case TargetOpcode::G_FSIN:
262
215
    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
263
215
    return Size == 128 ? 
RTLIB::SIN_F1280
264
215
                       : Size == 64 ? 
RTLIB::SIN_F64117
:
RTLIB::SIN_F3298
;
265
1.34k
  case TargetOpcode::G_FCOS:
266
256
    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
267
256
    return Size == 128 ? 
RTLIB::COS_F1280
268
256
                       : Size == 64 ? 
RTLIB::COS_F64158
:
RTLIB::COS_F3298
;
269
1.34k
  case TargetOpcode::G_FLOG10:
270
102
    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
271
102
    return Size == 128 ? 
RTLIB::LOG10_F1280
272
102
                       : Size == 64 ? 
RTLIB::LOG10_F6445
:
RTLIB::LOG10_F3257
;
273
1.34k
  case TargetOpcode::G_FLOG:
274
114
    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
275
114
    return Size == 128 ? 
RTLIB::LOG_F1280
276
114
                       : Size == 64 ? 
RTLIB::LOG_F6457
:
RTLIB::LOG_F3257
;
277
1.34k
  case TargetOpcode::G_FLOG2:
278
66
    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
279
66
    return Size == 128 ? 
RTLIB::LOG2_F1280
280
66
                       : Size == 64 ? 
RTLIB::LOG2_F649
:
RTLIB::LOG2_F3257
;
281
1.34k
  case TargetOpcode::G_FCEIL:
282
8
    assert((Size == 32 || Size == 64) && "Unsupported size");
283
8
    return Size == 64 ? 
RTLIB::CEIL_F644
:
RTLIB::CEIL_F324
;
284
1.34k
  case TargetOpcode::G_FFLOOR:
285
8
    assert((Size == 32 || Size == 64) && "Unsupported size");
286
8
    return Size == 64 ? 
RTLIB::FLOOR_F644
:
RTLIB::FLOOR_F324
;
287
0
  }
288
0
  llvm_unreachable("Unknown libcall function");
289
0
}
290
291
LegalizerHelper::LegalizeResult
292
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
293
                    const CallLowering::ArgInfo &Result,
294
1.57k
                    ArrayRef<CallLowering::ArgInfo> Args) {
295
1.57k
  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
296
1.57k
  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
297
1.57k
  const char *Name = TLI.getLibcallName(Libcall);
298
1.57k
299
1.57k
  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
300
1.57k
  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
301
1.57k
                     MachineOperand::CreateES(Name), Result, Args))
302
0
    return LegalizerHelper::UnableToLegalize;
303
1.57k
304
1.57k
  return LegalizerHelper::Legalized;
305
1.57k
}
306
307
// Useful for libcalls where all operands have the same type.
308
static LegalizerHelper::LegalizeResult
309
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
310
1.34k
              Type *OpType) {
311
1.34k
  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
312
1.34k
313
1.34k
  SmallVector<CallLowering::ArgInfo, 3> Args;
314
3.00k
  for (unsigned i = 1; i < MI.getNumOperands(); 
i++1.66k
)
315
1.66k
    Args.push_back({MI.getOperand(i).getReg(), OpType});
316
1.34k
  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
317
1.34k
                       Args);
318
1.34k
}
319
320
LegalizerHelper::LegalizeResult
321
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
322
34.0k
                       MachineInstr &MI) {
323
34.0k
  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
324
34.0k
  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
325
34.0k
326
34.0k
  SmallVector<CallLowering::ArgInfo, 3> Args;
327
136k
  for (unsigned i = 1; i < MI.getNumOperands(); 
i++102k
) {
328
102k
    Register Reg = MI.getOperand(i).getReg();
329
102k
330
102k
    // Need derive an IR type for call lowering.
331
102k
    LLT OpLLT = MRI.getType(Reg);
332
102k
    Type *OpTy = nullptr;
333
102k
    if (OpLLT.isPointer())
334
47.3k
      OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
335
54.8k
    else
336
54.8k
      OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
337
102k
    Args.push_back({Reg, OpTy});
338
102k
  }
339
34.0k
340
34.0k
  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
341
34.0k
  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
342
34.0k
  Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
343
34.0k
  RTLIB::Libcall RTLibcall;
344
34.0k
  switch (ID) {
345
34.0k
  case Intrinsic::memcpy:
346
12.7k
    RTLibcall = RTLIB::MEMCPY;
347
12.7k
    break;
348
34.0k
  case Intrinsic::memset:
349
20.8k
    RTLibcall = RTLIB::MEMSET;
350
20.8k
    break;
351
34.0k
  case Intrinsic::memmove:
352
504
    RTLibcall = RTLIB::MEMMOVE;
353
504
    break;
354
34.0k
  default:
355
0
    return LegalizerHelper::UnableToLegalize;
356
34.0k
  }
357
34.0k
  const char *Name = TLI.getLibcallName(RTLibcall);
358
34.0k
359
34.0k
  MIRBuilder.setInstr(MI);
360
34.0k
  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
361
34.0k
  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(RTLibcall),
362
34.0k
                     MachineOperand::CreateES(Name),
363
34.0k
                     CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)), Args))
364
0
    return LegalizerHelper::UnableToLegalize;
365
34.0k
366
34.0k
  return LegalizerHelper::Legalized;
367
34.0k
}
368
369
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
370
72
                                       Type *FromType) {
371
72
  auto ToMVT = MVT::getVT(ToType);
372
72
  auto FromMVT = MVT::getVT(FromType);
373
72
374
72
  switch (Opcode) {
375
72
  case TargetOpcode::G_FPEXT:
376
4
    return RTLIB::getFPEXT(FromMVT, ToMVT);
377
72
  case TargetOpcode::G_FPTRUNC:
378
4
    return RTLIB::getFPROUND(FromMVT, ToMVT);
379
72
  case TargetOpcode::G_FPTOSI:
380
16
    return RTLIB::getFPTOSINT(FromMVT, ToMVT);
381
72
  case TargetOpcode::G_FPTOUI:
382
16
    return RTLIB::getFPTOUINT(FromMVT, ToMVT);
383
72
  case TargetOpcode::G_SITOFP:
384
16
    return RTLIB::getSINTTOFP(FromMVT, ToMVT);
385
72
  case TargetOpcode::G_UITOFP:
386
16
    return RTLIB::getUINTTOFP(FromMVT, ToMVT);
387
0
  }
388
0
  llvm_unreachable("Unsupported libcall function");
389
0
}
390
391
static LegalizerHelper::LegalizeResult
392
conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
393
72
                  Type *FromType) {
394
72
  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
395
72
  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
396
72
                       {{MI.getOperand(1).getReg(), FromType}});
397
72
}
398
399
LegalizerHelper::LegalizeResult
400
1.41k
LegalizerHelper::libcall(MachineInstr &MI) {
401
1.41k
  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
402
1.41k
  unsigned Size = LLTy.getSizeInBits();
403
1.41k
  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
404
1.41k
405
1.41k
  MIRBuilder.setInstr(MI);
406
1.41k
407
1.41k
  switch (MI.getOpcode()) {
408
1.41k
  default:
409
0
    return UnableToLegalize;
410
1.41k
  case TargetOpcode::G_SDIV:
411
66
  case TargetOpcode::G_UDIV:
412
66
  case TargetOpcode::G_SREM:
413
66
  case TargetOpcode::G_UREM:
414
66
  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
415
66
    Type *HLTy = IntegerType::get(Ctx, Size);
416
66
    auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
417
66
    if (Status != Legalized)
418
0
      return Status;
419
66
    break;
420
66
  }
421
1.28k
  case TargetOpcode::G_FADD:
422
1.28k
  case TargetOpcode::G_FSUB:
423
1.28k
  case TargetOpcode::G_FMUL:
424
1.28k
  case TargetOpcode::G_FDIV:
425
1.28k
  case TargetOpcode::G_FMA:
426
1.28k
  case TargetOpcode::G_FPOW:
427
1.28k
  case TargetOpcode::G_FREM:
428
1.28k
  case TargetOpcode::G_FCOS:
429
1.28k
  case TargetOpcode::G_FSIN:
430
1.28k
  case TargetOpcode::G_FLOG10:
431
1.28k
  case TargetOpcode::G_FLOG:
432
1.28k
  case TargetOpcode::G_FLOG2:
433
1.28k
  case TargetOpcode::G_FEXP:
434
1.28k
  case TargetOpcode::G_FEXP2:
435
1.28k
  case TargetOpcode::G_FCEIL:
436
1.28k
  case TargetOpcode::G_FFLOOR: {
437
1.28k
    if (Size > 64) {
438
0
      LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
439
0
      return UnableToLegalize;
440
0
    }
441
1.28k
    Type *HLTy = Size == 64 ? 
Type::getDoubleTy(Ctx)668
:
Type::getFloatTy(Ctx)612
;
442
1.28k
    auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
443
1.28k
    if (Status != Legalized)
444
0
      return Status;
445
1.28k
    break;
446
1.28k
  }
447
1.28k
  case TargetOpcode::G_FPEXT: {
448
4
    // FIXME: Support other floating point types (half, fp128 etc)
449
4
    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
450
4
    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
451
4
    if (ToSize != 64 || FromSize != 32)
452
0
      return UnableToLegalize;
453
4
    LegalizeResult Status = conversionLibcall(
454
4
        MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
455
4
    if (Status != Legalized)
456
0
      return Status;
457
4
    break;
458
4
  }
459
4
  case TargetOpcode::G_FPTRUNC: {
460
4
    // FIXME: Support other floating point types (half, fp128 etc)
461
4
    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
462
4
    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
463
4
    if (ToSize != 32 || FromSize != 64)
464
0
      return UnableToLegalize;
465
4
    LegalizeResult Status = conversionLibcall(
466
4
        MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
467
4
    if (Status != Legalized)
468
0
      return Status;
469
4
    break;
470
4
  }
471
32
  case TargetOpcode::G_FPTOSI:
472
32
  case TargetOpcode::G_FPTOUI: {
473
32
    // FIXME: Support other types
474
32
    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
475
32
    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
476
32
    if ((ToSize != 32 && 
ToSize != 6416
) || (FromSize != 32 &&
FromSize != 6416
))
477
0
      return UnableToLegalize;
478
32
    LegalizeResult Status = conversionLibcall(
479
32
        MI, MIRBuilder,
480
32
        ToSize == 32 ? 
Type::getInt32Ty(Ctx)16
:
Type::getInt64Ty(Ctx)16
,
481
32
        FromSize == 64 ? 
Type::getDoubleTy(Ctx)16
:
Type::getFloatTy(Ctx)16
);
482
32
    if (Status != Legalized)
483
0
      return Status;
484
32
    break;
485
32
  }
486
32
  case TargetOpcode::G_SITOFP:
487
32
  case TargetOpcode::G_UITOFP: {
488
32
    // FIXME: Support other types
489
32
    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
490
32
    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
491
32
    if ((FromSize != 32 && 
FromSize != 6416
) || (ToSize != 32 &&
ToSize != 6416
))
492
0
      return UnableToLegalize;
493
32
    LegalizeResult Status = conversionLibcall(
494
32
        MI, MIRBuilder,
495
32
        ToSize == 64 ? 
Type::getDoubleTy(Ctx)16
:
Type::getFloatTy(Ctx)16
,
496
32
        FromSize == 32 ? 
Type::getInt32Ty(Ctx)16
:
Type::getInt64Ty(Ctx)16
);
497
32
    if (Status != Legalized)
498
0
      return Status;
499
32
    break;
500
32
  }
501
1.41k
  }
502
1.41k
503
1.41k
  MI.eraseFromParent();
504
1.41k
  return Legalized;
505
1.41k
}
506
507
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
508
                                                              unsigned TypeIdx,
509
633
                                                              LLT NarrowTy) {
510
633
  MIRBuilder.setInstr(MI);
511
633
512
633
  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
513
633
  uint64_t NarrowSize = NarrowTy.getSizeInBits();
514
633
515
633
  switch (MI.getOpcode()) {
516
633
  default:
517
2
    return UnableToLegalize;
518
633
  case TargetOpcode::G_IMPLICIT_DEF: {
519
6
    // FIXME: add support for when SizeOp0 isn't an exact multiple of
520
6
    // NarrowSize.
521
6
    if (SizeOp0 % NarrowSize != 0)
522
0
      return UnableToLegalize;
523
6
    int NumParts = SizeOp0 / NarrowSize;
524
6
525
6
    SmallVector<Register, 2> DstRegs;
526
18
    for (int i = 0; i < NumParts; 
++i12
)
527
12
      DstRegs.push_back(
528
12
          MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
529
6
530
6
    Register DstReg = MI.getOperand(0).getReg();
531
6
    if(MRI.getType(DstReg).isVector())
532
0
      MIRBuilder.buildBuildVector(DstReg, DstRegs);
533
6
    else
534
6
      MIRBuilder.buildMerge(DstReg, DstRegs);
535
6
    MI.eraseFromParent();
536
6
    return Legalized;
537
6
  }
538
30
  case TargetOpcode::G_CONSTANT: {
539
30
    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
540
30
    const APInt &Val = MI.getOperand(1).getCImm()->getValue();
541
30
    unsigned TotalSize = Ty.getSizeInBits();
542
30
    unsigned NarrowSize = NarrowTy.getSizeInBits();
543
30
    int NumParts = TotalSize / NarrowSize;
544
30
545
30
    SmallVector<Register, 4> PartRegs;
546
89
    for (int I = 0; I != NumParts; 
++I59
) {
547
59
      unsigned Offset = I * NarrowSize;
548
59
      auto K = MIRBuilder.buildConstant(NarrowTy,
549
59
                                        Val.lshr(Offset).trunc(NarrowSize));
550
59
      PartRegs.push_back(K.getReg(0));
551
59
    }
552
30
553
30
    LLT LeftoverTy;
554
30
    unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
555
30
    SmallVector<Register, 1> LeftoverRegs;
556
30
    if (LeftoverBits != 0) {
557
1
      LeftoverTy = LLT::scalar(LeftoverBits);
558
1
      auto K = MIRBuilder.buildConstant(
559
1
        LeftoverTy,
560
1
        Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
561
1
      LeftoverRegs.push_back(K.getReg(0));
562
1
    }
563
30
564
30
    insertParts(MI.getOperand(0).getReg(),
565
30
                Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
566
30
567
30
    MI.eraseFromParent();
568
30
    return Legalized;
569
6
  }
570
24
  case TargetOpcode::G_ADD: {
571
24
    // FIXME: add support for when SizeOp0 isn't an exact multiple of
572
24
    // NarrowSize.
573
24
    if (SizeOp0 % NarrowSize != 0)
574
2
      return UnableToLegalize;
575
22
    // Expand in terms of carry-setting/consuming G_ADDE instructions.
576
22
    int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
577
22
578
22
    SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
579
22
    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
580
22
    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
581
22
582
22
    Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
583
22
    MIRBuilder.buildConstant(CarryIn, 0);
584
22
585
79
    for (int i = 0; i < NumParts; 
++i57
) {
586
57
      Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
587
57
      Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
588
57
589
57
      MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
590
57
                            Src2Regs[i], CarryIn);
591
57
592
57
      DstRegs.push_back(DstReg);
593
57
      CarryIn = CarryOut;
594
57
    }
595
22
    Register DstReg = MI.getOperand(0).getReg();
596
22
    if(MRI.getType(DstReg).isVector())
597
0
      MIRBuilder.buildBuildVector(DstReg, DstRegs);
598
22
    else
599
22
      MIRBuilder.buildMerge(DstReg, DstRegs);
600
22
    MI.eraseFromParent();
601
22
    return Legalized;
602
22
  }
603
22
  case TargetOpcode::G_SUB: {
604
6
    // FIXME: add support for when SizeOp0 isn't an exact multiple of
605
6
    // NarrowSize.
606
6
    if (SizeOp0 % NarrowSize != 0)
607
0
      return UnableToLegalize;
608
6
609
6
    int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
610
6
611
6
    SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
612
6
    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
613
6
    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
614
6
615
6
    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
616
6
    Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
617
6
    MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
618
6
                          {Src1Regs[0], Src2Regs[0]});
619
6
    DstRegs.push_back(DstReg);
620
6
    Register BorrowIn = BorrowOut;
621
16
    for (int i = 1; i < NumParts; 
++i10
) {
622
10
      DstReg = MRI.createGenericVirtualRegister(NarrowTy);
623
10
      BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
624
10
625
10
      MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
626
10
                            {Src1Regs[i], Src2Regs[i], BorrowIn});
627
10
628
10
      DstRegs.push_back(DstReg);
629
10
      BorrowIn = BorrowOut;
630
10
    }
631
6
    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
632
6
    MI.eraseFromParent();
633
6
    return Legalized;
634
6
  }
635
14
  case TargetOpcode::G_MUL:
636
14
  case TargetOpcode::G_UMULH:
637
14
    return narrowScalarMul(MI, NarrowTy);
638
14
  case TargetOpcode::G_EXTRACT:
639
0
    return narrowScalarExtract(MI, TypeIdx, NarrowTy);
640
14
  case TargetOpcode::G_INSERT:
641
5
    return narrowScalarInsert(MI, TypeIdx, NarrowTy);
642
44
  case TargetOpcode::G_LOAD: {
643
44
    const auto &MMO = **MI.memoperands_begin();
644
44
    Register DstReg = MI.getOperand(0).getReg();
645
44
    LLT DstTy = MRI.getType(DstReg);
646
44
    if (DstTy.isVector())
647
0
      return UnableToLegalize;
648
44
649
44
    if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
650
8
      Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
651
8
      auto &MMO = **MI.memoperands_begin();
652
8
      MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
653
8
      MIRBuilder.buildAnyExt(DstReg, TmpReg);
654
8
      MI.eraseFromParent();
655
8
      return Legalized;
656
8
    }
657
36
658
36
    return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
659
36
  }
660
48
  case TargetOpcode::G_ZEXTLOAD:
661
48
  case TargetOpcode::G_SEXTLOAD: {
662
48
    bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
663
48
    Register DstReg = MI.getOperand(0).getReg();
664
48
    Register PtrReg = MI.getOperand(1).getReg();
665
48
666
48
    Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
667
48
    auto &MMO = **MI.memoperands_begin();
668
48
    if (MMO.getSizeInBits() == NarrowSize) {
669
16
      MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
670
32
    } else {
671
32
      unsigned ExtLoad = ZExt ? 
TargetOpcode::G_ZEXTLOAD16
672
32
        : 
TargetOpcode::G_SEXTLOAD16
;
673
32
      MIRBuilder.buildInstr(ExtLoad)
674
32
        .addDef(TmpReg)
675
32
        .addUse(PtrReg)
676
32
        .addMemOperand(&MMO);
677
32
    }
678
48
679
48
    if (ZExt)
680
24
      MIRBuilder.buildZExt(DstReg, TmpReg);
681
24
    else
682
24
      MIRBuilder.buildSExt(DstReg, TmpReg);
683
48
684
48
    MI.eraseFromParent();
685
48
    return Legalized;
686
48
  }
687
48
  case TargetOpcode::G_STORE: {
688
46
    const auto &MMO = **MI.memoperands_begin();
689
46
690
46
    Register SrcReg = MI.getOperand(0).getReg();
691
46
    LLT SrcTy = MRI.getType(SrcReg);
692
46
    if (SrcTy.isVector())
693
0
      return UnableToLegalize;
694
46
695
46
    int NumParts = SizeOp0 / NarrowSize;
696
46
    unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
697
46
    unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
698
46
    if (SrcTy.isVector() && 
LeftoverBits != 00
)
699
0
      return UnableToLegalize;
700
46
701
46
    if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
702
10
      Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
703
10
      auto &MMO = **MI.memoperands_begin();
704
10
      MIRBuilder.buildTrunc(TmpReg, SrcReg);
705
10
      MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
706
10
      MI.eraseFromParent();
707
10
      return Legalized;
708
10
    }
709
36
710
36
    return reduceLoadStoreWidth(MI, 0, NarrowTy);
711
36
  }
712
42
  case TargetOpcode::G_SELECT:
713
42
    return narrowScalarSelect(MI, TypeIdx, NarrowTy);
714
36
  case TargetOpcode::G_AND:
715
33
  case TargetOpcode::G_OR:
716
33
  case TargetOpcode::G_XOR: {
717
33
    // Legalize bitwise operation:
718
33
    // A = BinOp<Ty> B, C
719
33
    // into:
720
33
    // B1, ..., BN = G_UNMERGE_VALUES B
721
33
    // C1, ..., CN = G_UNMERGE_VALUES C
722
33
    // A1 = BinOp<Ty/N> B1, C2
723
33
    // ...
724
33
    // AN = BinOp<Ty/N> BN, CN
725
33
    // A = G_MERGE_VALUES A1, ..., AN
726
33
    return narrowScalarBasic(MI, TypeIdx, NarrowTy);
727
33
  }
728
279
  case TargetOpcode::G_SHL:
729
279
  case TargetOpcode::G_LSHR:
730
279
  case TargetOpcode::G_ASHR:
731
279
    return narrowScalarShift(MI, TypeIdx, NarrowTy);
732
279
  case TargetOpcode::G_CTLZ:
733
15
  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
734
15
  case TargetOpcode::G_CTTZ:
735
15
  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
736
15
  case TargetOpcode::G_CTPOP:
737
15
    if (TypeIdx != 0)
738
0
      return UnableToLegalize; // TODO
739
15
740
15
    Observer.changingInstr(MI);
741
15
    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
742
15
    Observer.changedInstr(MI);
743
15
    return Legalized;
744
15
  case TargetOpcode::G_INTTOPTR:
745
1
    if (TypeIdx != 1)
746
0
      return UnableToLegalize;
747
1
748
1
    Observer.changingInstr(MI);
749
1
    narrowScalarSrc(MI, NarrowTy, 1);
750
1
    Observer.changedInstr(MI);
751
1
    return Legalized;
752
1
  case TargetOpcode::G_PTRTOINT:
753
1
    if (TypeIdx != 0)
754
0
      return UnableToLegalize;
755
1
756
1
    Observer.changingInstr(MI);
757
1
    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
758
1
    Observer.changedInstr(MI);
759
1
    return Legalized;
760
14
  case TargetOpcode::G_PHI: {
761
14
    unsigned NumParts = SizeOp0 / NarrowSize;
762
14
    SmallVector<Register, 2> DstRegs;
763
14
    SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
764
14
    DstRegs.resize(NumParts);
765
14
    SrcRegs.resize(MI.getNumOperands() / 2);
766
14
    Observer.changingInstr(MI);
767
45
    for (unsigned i = 1; i < MI.getNumOperands(); 
i += 231
) {
768
31
      MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
769
31
      MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
770
31
      extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
771
31
                   SrcRegs[i / 2]);
772
31
    }
773
14
    MachineBasicBlock &MBB = *MI.getParent();
774
14
    MIRBuilder.setInsertPt(MBB, MI);
775
42
    for (unsigned i = 0; i < NumParts; 
++i28
) {
776
28
      DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
777
28
      MachineInstrBuilder MIB =
778
28
          MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
779
90
      for (unsigned j = 1; j < MI.getNumOperands(); 
j += 262
)
780
62
        MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
781
28
    }
782
14
    MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
783
14
    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
784
14
    Observer.changedInstr(MI);
785
14
    MI.eraseFromParent();
786
14
    return Legalized;
787
1
  }
788
3
  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
789
3
  case TargetOpcode::G_INSERT_VECTOR_ELT: {
790
3
    if (TypeIdx != 2)
791
0
      return UnableToLegalize;
792
3
793
3
    int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 
21
:
32
;
794
3
    Observer.changingInstr(MI);
795
3
    narrowScalarSrc(MI, NarrowTy, OpIdx);
796
3
    Observer.changedInstr(MI);
797
3
    return Legalized;
798
3
  }
799
20
  case TargetOpcode::G_ICMP: {
800
20
    uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
801
20
    if (NarrowSize * 2 != SrcSize)
802
0
      return UnableToLegalize;
803
20
804
20
    Observer.changingInstr(MI);
805
20
    Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
806
20
    Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
807
20
    MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
808
20
809
20
    Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
810
20
    Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
811
20
    MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
812
20
813
20
    CmpInst::Predicate Pred =
814
20
        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
815
20
816
20
    if (Pred == CmpInst::ICMP_EQ || 
Pred == CmpInst::ICMP_NE18
) {
817
4
      MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
818
4
      MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
819
4
      MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
820
4
      MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
821
4
      MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
822
16
    } else {
823
16
      const LLT s1 = LLT::scalar(1);
824
16
      MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
825
16
      MachineInstrBuilder CmpHEQ =
826
16
          MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
827
16
      MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
828
16
          ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
829
16
      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
830
16
    }
831
20
    Observer.changedInstr(MI);
832
20
    MI.eraseFromParent();
833
20
    return Legalized;
834
20
  }
835
633
  }
836
633
}
837
838
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
839
519k
                                     unsigned OpIdx, unsigned ExtOpcode) {
840
519k
  MachineOperand &MO = MI.getOperand(OpIdx);
841
519k
  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
842
519k
  MO.setReg(ExtB->getOperand(0).getReg());
843
519k
}
844
845
void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
846
134
                                      unsigned OpIdx) {
847
134
  MachineOperand &MO = MI.getOperand(OpIdx);
848
134
  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
849
134
                                    {MO.getReg()});
850
134
  MO.setReg(ExtB->getOperand(0).getReg());
851
134
}
852
853
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
854
1.14M
                                     unsigned OpIdx, unsigned TruncOpcode) {
855
1.14M
  MachineOperand &MO = MI.getOperand(OpIdx);
856
1.14M
  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
857
1.14M
  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
858
1.14M
  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
859
1.14M
  MO.setReg(DstExt);
860
1.14M
}
861
862
void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
863
16
                                      unsigned OpIdx, unsigned ExtOpcode) {
864
16
  MachineOperand &MO = MI.getOperand(OpIdx);
865
16
  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
866
16
  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
867
16
  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
868
16
  MO.setReg(DstTrunc);
869
16
}
870
871
void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
872
153
                                            unsigned OpIdx) {
873
153
  MachineOperand &MO = MI.getOperand(OpIdx);
874
153
  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
875
153
  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
876
153
  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
877
153
  MO.setReg(DstExt);
878
153
}
879
880
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
881
118
                                            unsigned OpIdx) {
882
118
  MachineOperand &MO = MI.getOperand(OpIdx);
883
118
884
118
  LLT OldTy = MRI.getType(MO.getReg());
885
118
  unsigned OldElts = OldTy.getNumElements();
886
118
  unsigned NewElts = MoreTy.getNumElements();
887
118
888
118
  unsigned NumParts = NewElts / OldElts;
889
118
890
118
  // Use concat_vectors if the result is a multiple of the number of elements.
891
118
  if (NumParts * OldElts == NewElts) {
892
2
    SmallVector<Register, 8> Parts;
893
2
    Parts.push_back(MO.getReg());
894
2
895
2
    Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
896
6
    for (unsigned I = 1; I != NumParts; 
++I4
)
897
4
      Parts.push_back(ImpDef);
898
2
899
2
    auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
900
2
    MO.setReg(Concat.getReg(0));
901
2
    return;
902
2
  }
903
116
904
116
  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
905
116
  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
906
116
  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
907
116
  MO.setReg(MoreReg);
908
116
}
909
910
LegalizerHelper::LegalizeResult
911
LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
912
40
                                        LLT WideTy) {
913
40
  if (TypeIdx != 1)
914
0
    return UnableToLegalize;
915
40
916
40
  Register DstReg = MI.getOperand(0).getReg();
917
40
  LLT DstTy = MRI.getType(DstReg);
918
40
  if (DstTy.isVector())
919
0
    return UnableToLegalize;
920
40
921
40
  Register Src1 = MI.getOperand(1).getReg();
922
40
  LLT SrcTy = MRI.getType(Src1);
923
40
  const int DstSize = DstTy.getSizeInBits();
924
40
  const int SrcSize = SrcTy.getSizeInBits();
925
40
  const int WideSize = WideTy.getSizeInBits();
926
40
  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
927
40
928
40
  unsigned NumOps = MI.getNumOperands();
929
40
  unsigned NumSrc = MI.getNumOperands() - 1;
930
40
  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
931
40
932
40
  if (WideSize >= DstSize) {
933
29
    // Directly pack the bits in the target type.
934
29
    Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
935
29
936
66
    for (unsigned I = 2; I != NumOps; 
++I37
) {
937
37
      const unsigned Offset = (I - 1) * PartSize;
938
37
939
37
      Register SrcReg = MI.getOperand(I).getReg();
940
37
      assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
941
37
942
37
      auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
943
37
944
37
      Register NextResult = I + 1 == NumOps && 
WideSize == DstSize29
?
DstReg28
:
945
37
        
MRI.createGenericVirtualRegister(WideTy)9
;
946
37
947
37
      auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
948
37
      auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
949
37
      MIRBuilder.buildOr(NextResult, ResultReg, Shl);
950
37
      ResultReg = NextResult;
951
37
    }
952
29
953
29
    if (WideSize > DstSize)
954
1
      MIRBuilder.buildTrunc(DstReg, ResultReg);
955
29
956
29
    MI.eraseFromParent();
957
29
    return Legalized;
958
29
  }
959
11
960
11
  // Unmerge the original values to the GCD type, and recombine to the next
961
11
  // multiple greater than the original type.
962
11
  //
963
11
  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
964
11
  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
965
11
  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
966
11
  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
967
11
  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
968
11
  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
969
11
  // %12:_(s12) = G_MERGE_VALUES %10, %11
970
11
  //
971
11
  // Padding with undef if necessary:
972
11
  //
973
11
  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
974
11
  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
975
11
  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
976
11
  // %7:_(s2) = G_IMPLICIT_DEF
977
11
  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
978
11
  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
979
11
  // %10:_(s12) = G_MERGE_VALUES %8, %9
980
11
981
11
  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
982
11
  LLT GCDTy = LLT::scalar(GCD);
983
11
984
11
  SmallVector<Register, 8> Parts;
985
11
  SmallVector<Register, 8> NewMergeRegs;
986
11
  SmallVector<Register, 8> Unmerges;
987
11
  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
988
11
989
11
  // Decompose the original operands if they don't evenly divide.
990
85
  for (int I = 1, E = MI.getNumOperands(); I != E; 
++I74
) {
991
74
    Register SrcReg = MI.getOperand(I).getReg();
992
74
    if (GCD == SrcSize) {
993
70
      Unmerges.push_back(SrcReg);
994
70
    } else {
995
4
      auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
996
72
      for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; 
++J68
)
997
68
        Unmerges.push_back(Unmerge.getReg(J));
998
4
    }
999
74
  }
1000
11
1001
11
  // Pad with undef to the next size that is a multiple of the requested size.
1002
11
  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1003
11
    Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1004
439
    for (int I = Unmerges.size(); I != NumMerge * WideSize; 
++I428
)
1005
428
      Unmerges.push_back(UndefReg);
1006
11
  }
1007
11
1008
11
  const int PartsPerGCD = WideSize / GCD;
1009
11
1010
11
  // Build merges of each piece.
1011
11
  ArrayRef<Register> Slicer(Unmerges);
1012
46
  for (int I = 0; I != NumMerge; 
++I, Slicer = Slicer.drop_front(PartsPerGCD)35
) {
1013
35
    auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1014
35
    NewMergeRegs.push_back(Merge.getReg(0));
1015
35
  }
1016
11
1017
11
  // A truncate may be necessary if the requested type doesn't evenly divide the
1018
11
  // original result type.
1019
11
  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1020
4
    MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1021
7
  } else {
1022
7
    auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1023
7
    MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1024
7
  }
1025
11
1026
11
  MI.eraseFromParent();
1027
11
  return Legalized;
1028
11
}
1029
1030
LegalizerHelper::LegalizeResult
1031
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1032
12
                                          LLT WideTy) {
1033
12
  if (TypeIdx != 0)
1034
0
    return UnableToLegalize;
1035
12
1036
12
  unsigned NumDst = MI.getNumOperands() - 1;
1037
12
  Register SrcReg = MI.getOperand(NumDst).getReg();
1038
12
  LLT SrcTy = MRI.getType(SrcReg);
1039
12
  if (!SrcTy.isScalar())
1040
5
    return UnableToLegalize;
1041
7
1042
7
  Register Dst0Reg = MI.getOperand(0).getReg();
1043
7
  LLT DstTy = MRI.getType(Dst0Reg);
1044
7
  if (!DstTy.isScalar())
1045
0
    return UnableToLegalize;
1046
7
1047
7
  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1048
7
  LLT NewSrcTy = LLT::scalar(NewSrcSize);
1049
7
  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1050
7
1051
7
  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1052
7
1053
38
  for (unsigned I = 1; I != NumDst; 
++I31
) {
1054
31
    auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1055
31
    auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1056
31
    WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1057
31
  }
1058
7
1059
7
  Observer.changingInstr(MI);
1060
7
1061
7
  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1062
45
  for (unsigned I = 0; I != NumDst; 
++I38
)
1063
38
    widenScalarDst(MI, WideTy, I);
1064
7
1065
7
  Observer.changedInstr(MI);
1066
7
1067
7
  return Legalized;
1068
7
}
1069
1070
LegalizerHelper::LegalizeResult
1071
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1072
36
                                    LLT WideTy) {
1073
36
  Register DstReg = MI.getOperand(0).getReg();
1074
36
  Register SrcReg = MI.getOperand(1).getReg();
1075
36
  LLT SrcTy = MRI.getType(SrcReg);
1076
36
1077
36
  LLT DstTy = MRI.getType(DstReg);
1078
36
  unsigned Offset = MI.getOperand(2).getImm();
1079
36
1080
36
  if (TypeIdx == 0) {
1081
16
    if (SrcTy.isVector() || 
DstTy.isVector()15
)
1082
1
      return UnableToLegalize;
1083
15
1084
15
    SrcOp Src(SrcReg);
1085
15
    if (SrcTy.isPointer()) {
1086
2
      // Extracts from pointers can be handled only if they are really just
1087
2
      // simple integers.
1088
2
      const DataLayout &DL = MIRBuilder.getDataLayout();
1089
2
      if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1090
0
        return UnableToLegalize;
1091
2
1092
2
      LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1093
2
      Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1094
2
      SrcTy = SrcAsIntTy;
1095
2
    }
1096
15
1097
15
    if (DstTy.isPointer())
1098
0
      return UnableToLegalize;
1099
15
1100
15
    if (Offset == 0) {
1101
5
      // Avoid a shift in the degenerate case.
1102
5
      MIRBuilder.buildTrunc(DstReg,
1103
5
                            MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1104
5
      MI.eraseFromParent();
1105
5
      return Legalized;
1106
5
    }
1107
10
1108
10
    // Do a shift in the source type.
1109
10
    LLT ShiftTy = SrcTy;
1110
10
    if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1111
0
      Src = MIRBuilder.buildAnyExt(WideTy, Src);
1112
0
      ShiftTy = WideTy;
1113
10
    } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1114
0
      return UnableToLegalize;
1115
10
1116
10
    auto LShr = MIRBuilder.buildLShr(
1117
10
      ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1118
10
    MIRBuilder.buildTrunc(DstReg, LShr);
1119
10
    MI.eraseFromParent();
1120
10
    return Legalized;
1121
10
  }
1122
20
1123
20
  if (SrcTy.isScalar()) {
1124
8
    Observer.changingInstr(MI);
1125
8
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1126
8
    Observer.changedInstr(MI);
1127
8
    return Legalized;
1128
8
  }
1129
12
1130
12
  if (!SrcTy.isVector())
1131
0
    return UnableToLegalize;
1132
12
1133
12
  if (DstTy != SrcTy.getElementType())
1134
6
    return UnableToLegalize;
1135
6
1136
6
  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1137
0
    return UnableToLegalize;
1138
6
1139
6
  Observer.changingInstr(MI);
1140
6
  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1141
6
1142
6
  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1143
6
                          Offset);
1144
6
  widenScalarDst(MI, WideTy.getScalarType(), 0);
1145
6
  Observer.changedInstr(MI);
1146
6
  return Legalized;
1147
6
}
1148
1149
LegalizerHelper::LegalizeResult
1150
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1151
14
                                   LLT WideTy) {
1152
14
  if (TypeIdx != 0)
1153
5
    return UnableToLegalize;
1154
9
  Observer.changingInstr(MI);
1155
9
  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1156
9
  widenScalarDst(MI, WideTy);
1157
9
  Observer.changedInstr(MI);
1158
9
  return Legalized;
1159
9
}
1160
1161
LegalizerHelper::LegalizeResult
1162
1.28M
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1163
1.28M
  MIRBuilder.setInstr(MI);
1164
1.28M
1165
1.28M
  switch (MI.getOpcode()) {
1166
1.28M
  default:
1167
0
    return UnableToLegalize;
1168
1.28M
  case TargetOpcode::G_EXTRACT:
1169
36
    return widenScalarExtract(MI, TypeIdx, WideTy);
1170
1.28M
  case TargetOpcode::G_INSERT:
1171
14
    return widenScalarInsert(MI, TypeIdx, WideTy);
1172
1.28M
  case TargetOpcode::G_MERGE_VALUES:
1173
40
    return widenScalarMergeValues(MI, TypeIdx, WideTy);
1174
1.28M
  case TargetOpcode::G_UNMERGE_VALUES:
1175
12
    return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1176
1.28M
  case TargetOpcode::G_UADDO:
1177
6
  case TargetOpcode::G_USUBO: {
1178
6
    if (TypeIdx == 1)
1179
0
      return UnableToLegalize; // TODO
1180
6
    auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1181
6
                                         {MI.getOperand(2).getReg()});
1182
6
    auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1183
6
                                         {MI.getOperand(3).getReg()});
1184
6
    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1185
6
                          ? 
TargetOpcode::G_ADD3
1186
6
                          : 
TargetOpcode::G_SUB3
;
1187
6
    // Do the arithmetic in the larger type.
1188
6
    auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1189
6
    LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1190
6
    APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
1191
6
    auto AndOp = MIRBuilder.buildInstr(
1192
6
        TargetOpcode::G_AND, {WideTy},
1193
6
        {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1194
6
    // There is no overflow if the AndOp is the same as NewOp.
1195
6
    MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
1196
6
                         AndOp);
1197
6
    // Now trunc the NewOp to the original result.
1198
6
    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1199
6
    MI.eraseFromParent();
1200
6
    return Legalized;
1201
6
  }
1202
63
  case TargetOpcode::G_CTTZ:
1203
63
  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1204
63
  case TargetOpcode::G_CTLZ:
1205
63
  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1206
63
  case TargetOpcode::G_CTPOP: {
1207
63
    if (TypeIdx == 0) {
1208
25
      Observer.changingInstr(MI);
1209
25
      widenScalarDst(MI, WideTy, 0);
1210
25
      Observer.changedInstr(MI);
1211
25
      return Legalized;
1212
25
    }
1213
38
1214
38
    Register SrcReg = MI.getOperand(1).getReg();
1215
38
1216
38
    // First ZEXT the input.
1217
38
    auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1218
38
    LLT CurTy = MRI.getType(SrcReg);
1219
38
    if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1220
6
      // The count is the same in the larger type except if the original
1221
6
      // value was zero.  This can be handled by setting the bit just off
1222
6
      // the top of the original type.
1223
6
      auto TopBit =
1224
6
          APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1225
6
      MIBSrc = MIRBuilder.buildOr(
1226
6
        WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1227
6
    }
1228
38
1229
38
    // Perform the operation at the larger size.
1230
38
    auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1231
38
    // This is already the correct result for CTPOP and CTTZs
1232
38
    if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1233
38
        
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF29
) {
1234
18
      // The correct result is NewOp - (Difference in widety and current ty).
1235
18
      unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1236
18
      MIBNewOp = MIRBuilder.buildInstr(
1237
18
          TargetOpcode::G_SUB, {WideTy},
1238
18
          {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1239
18
    }
1240
38
1241
38
    MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1242
38
    MI.eraseFromParent();
1243
38
    return Legalized;
1244
38
  }
1245
38
  case TargetOpcode::G_BSWAP: {
1246
5
    Observer.changingInstr(MI);
1247
5
    Register DstReg = MI.getOperand(0).getReg();
1248
5
1249
5
    Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1250
5
    Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1251
5
    Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1252
5
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1253
5
1254
5
    MI.getOperand(0).setReg(DstExt);
1255
5
1256
5
    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1257
5
1258
5
    LLT Ty = MRI.getType(DstReg);
1259
5
    unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1260
5
    MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1261
5
    MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1262
5
      .addDef(ShrReg)
1263
5
      .addUse(DstExt)
1264
5
      .addUse(ShiftAmtReg);
1265
5
1266
5
    MIRBuilder.buildTrunc(DstReg, ShrReg);
1267
5
    Observer.changedInstr(MI);
1268
5
    return Legalized;
1269
38
  }
1270
96.3k
  case TargetOpcode::G_ADD:
1271
96.3k
  case TargetOpcode::G_AND:
1272
96.3k
  case TargetOpcode::G_MUL:
1273
96.3k
  case TargetOpcode::G_OR:
1274
96.3k
  case TargetOpcode::G_XOR:
1275
96.3k
  case TargetOpcode::G_SUB:
1276
96.3k
    // Perform operation at larger width (any extension is fines here, high bits
1277
96.3k
    // don't affect the result) and then truncate the result back to the
1278
96.3k
    // original type.
1279
96.3k
    Observer.changingInstr(MI);
1280
96.3k
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1281
96.3k
    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1282
96.3k
    widenScalarDst(MI, WideTy);
1283
96.3k
    Observer.changedInstr(MI);
1284
96.3k
    return Legalized;
1285
96.3k
1286
96.3k
  case TargetOpcode::G_SHL:
1287
5.50k
    Observer.changingInstr(MI);
1288
5.50k
1289
5.50k
    if (TypeIdx == 0) {
1290
2.75k
      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1291
2.75k
      widenScalarDst(MI, WideTy);
1292
2.75k
    } else {
1293
2.74k
      assert(TypeIdx == 1);
1294
2.74k
      // The "number of bits to shift" operand must preserve its value as an
1295
2.74k
      // unsigned integer:
1296
2.74k
      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1297
2.74k
    }
1298
5.50k
1299
5.50k
    Observer.changedInstr(MI);
1300
5.50k
    return Legalized;
1301
96.3k
1302
96.3k
  case TargetOpcode::G_SDIV:
1303
226
  case TargetOpcode::G_SREM:
1304
226
  case TargetOpcode::G_SMIN:
1305
226
  case TargetOpcode::G_SMAX:
1306
226
    Observer.changingInstr(MI);
1307
226
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1308
226
    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1309
226
    widenScalarDst(MI, WideTy);
1310
226
    Observer.changedInstr(MI);
1311
226
    return Legalized;
1312
226
1313
2.12k
  case TargetOpcode::G_ASHR:
1314
2.12k
  case TargetOpcode::G_LSHR:
1315
2.12k
    Observer.changingInstr(MI);
1316
2.12k
1317
2.12k
    if (TypeIdx == 0) {
1318
1.05k
      unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1319
727
        
TargetOpcode::G_SEXT332
: TargetOpcode::G_ZEXT;
1320
1.05k
1321
1.05k
      widenScalarSrc(MI, WideTy, 1, CvtOp);
1322
1.05k
      widenScalarDst(MI, WideTy);
1323
1.06k
    } else {
1324
1.06k
      assert(TypeIdx == 1);
1325
1.06k
      // The "number of bits to shift" operand must preserve its value as an
1326
1.06k
      // unsigned integer:
1327
1.06k
      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1328
1.06k
    }
1329
2.12k
1330
2.12k
    Observer.changedInstr(MI);
1331
2.12k
    return Legalized;
1332
2.12k
  case TargetOpcode::G_UDIV:
1333
294
  case TargetOpcode::G_UREM:
1334
294
  case TargetOpcode::G_UMIN:
1335
294
  case TargetOpcode::G_UMAX:
1336
294
    Observer.changingInstr(MI);
1337
294
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1338
294
    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1339
294
    widenScalarDst(MI, WideTy);
1340
294
    Observer.changedInstr(MI);
1341
294
    return Legalized;
1342
294
1343
7.22k
  case TargetOpcode::G_SELECT:
1344
7.22k
    Observer.changingInstr(MI);
1345
7.22k
    if (TypeIdx == 0) {
1346
7.16k
      // Perform operation at larger width (any extension is fine here, high
1347
7.16k
      // bits don't affect the result) and then truncate the result back to the
1348
7.16k
      // original type.
1349
7.16k
      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1350
7.16k
      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1351
7.16k
      widenScalarDst(MI, WideTy);
1352
7.16k
    } else {
1353
58
      bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1354
58
      // Explicit extension is required here since high bits affect the result.
1355
58
      widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1356
58
    }
1357
7.22k
    Observer.changedInstr(MI);
1358
7.22k
    return Legalized;
1359
294
1360
294
  case TargetOpcode::G_FPTOSI:
1361
289
  case TargetOpcode::G_FPTOUI:
1362
289
    if (TypeIdx != 0)
1363
9
      return UnableToLegalize;
1364
280
    Observer.changingInstr(MI);
1365
280
    widenScalarDst(MI, WideTy);
1366
280
    Observer.changedInstr(MI);
1367
280
    return Legalized;
1368
280
1369
528
  case TargetOpcode::G_SITOFP:
1370
528
    if (TypeIdx != 1)
1371
4
      return UnableToLegalize;
1372
524
    Observer.changingInstr(MI);
1373
524
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1374
524
    Observer.changedInstr(MI);
1375
524
    return Legalized;
1376
524
1377
524
  case TargetOpcode::G_UITOFP:
1378
502
    if (TypeIdx != 1)
1379
4
      return UnableToLegalize;
1380
498
    Observer.changingInstr(MI);
1381
498
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1382
498
    Observer.changedInstr(MI);
1383
498
    return Legalized;
1384
498
1385
2.60k
  case TargetOpcode::G_LOAD:
1386
2.60k
  case TargetOpcode::G_SEXTLOAD:
1387
2.60k
  case TargetOpcode::G_ZEXTLOAD:
1388
2.60k
    Observer.changingInstr(MI);
1389
2.60k
    widenScalarDst(MI, WideTy);
1390
2.60k
    Observer.changedInstr(MI);
1391
2.60k
    return Legalized;
1392
2.60k
1393
9.30k
  case TargetOpcode::G_STORE: {
1394
9.30k
    if (TypeIdx != 0)
1395
0
      return UnableToLegalize;
1396
9.30k
1397
9.30k
    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1398
9.30k
    if (!isPowerOf2_32(Ty.getSizeInBits()))
1399
12
      return UnableToLegalize;
1400
9.28k
1401
9.28k
    Observer.changingInstr(MI);
1402
9.28k
1403
9.28k
    unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1404
9.28k
      TargetOpcode::G_ZEXT : 
TargetOpcode::G_ANYEXT8
;
1405
9.28k
    widenScalarSrc(MI, WideTy, 0, ExtType);
1406
9.28k
1407
9.28k
    Observer.changedInstr(MI);
1408
9.28k
    return Legalized;
1409
9.28k
  }
1410
24.2k
  case TargetOpcode::G_CONSTANT: {
1411
24.2k
    MachineOperand &SrcMO = MI.getOperand(1);
1412
24.2k
    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1413
24.2k
    const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1414
24.2k
    Observer.changingInstr(MI);
1415
24.2k
    SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1416
24.2k
1417
24.2k
    widenScalarDst(MI, WideTy);
1418
24.2k
    Observer.changedInstr(MI);
1419
24.2k
    return Legalized;
1420
9.28k
  }
1421
9.28k
  case TargetOpcode::G_FCONSTANT: {
1422
6
    MachineOperand &SrcMO = MI.getOperand(1);
1423
6
    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1424
6
    APFloat Val = SrcMO.getFPImm()->getValueAPF();
1425
6
    bool LosesInfo;
1426
6
    switch (WideTy.getSizeInBits()) {
1427
6
    case 32:
1428
6
      Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1429
6
                  &LosesInfo);
1430
6
      break;
1431
6
    case 64:
1432
0
      Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1433
0
                  &LosesInfo);
1434
0
      break;
1435
6
    default:
1436
0
      return UnableToLegalize;
1437
6
    }
1438
6
1439
6
    assert(!LosesInfo && "extend should always be lossless");
1440
6
1441
6
    Observer.changingInstr(MI);
1442
6
    SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1443
6
1444
6
    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1445
6
    Observer.changedInstr(MI);
1446
6
    return Legalized;
1447
6
  }
1448
74
  case TargetOpcode::G_IMPLICIT_DEF: {
1449
74
    Observer.changingInstr(MI);
1450
74
    widenScalarDst(MI, WideTy);
1451
74
    Observer.changedInstr(MI);
1452
74
    return Legalized;
1453
6
  }
1454
66
  case TargetOpcode::G_BRCOND:
1455
66
    Observer.changingInstr(MI);
1456
66
    widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1457
66
    Observer.changedInstr(MI);
1458
66
    return Legalized;
1459
6
1460
16.5k
  case TargetOpcode::G_FCMP:
1461
16.5k
    Observer.changingInstr(MI);
1462
16.5k
    if (TypeIdx == 0)
1463
16.5k
      widenScalarDst(MI, WideTy);
1464
41
    else {
1465
41
      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1466
41
      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1467
41
    }
1468
16.5k
    Observer.changedInstr(MI);
1469
16.5k
    return Legalized;
1470
6
1471
1.10M
  case TargetOpcode::G_ICMP:
1472
1.10M
    Observer.changingInstr(MI);
1473
1.10M
    if (TypeIdx == 0)
1474
982k
      widenScalarDst(MI, WideTy);
1475
127k
    else {
1476
127k
      unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1477
127k
                               MI.getOperand(1).getPredicate()))
1478
127k
                               ? 
TargetOpcode::G_SEXT13.1k
1479
127k
                               : 
TargetOpcode::G_ZEXT114k
;
1480
127k
      widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1481
127k
      widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1482
127k
    }
1483
1.10M
    Observer.changedInstr(MI);
1484
1.10M
    return Legalized;
1485
6
1486
6
  case TargetOpcode::G_GEP:
1487
5
    assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1488
5
    Observer.changingInstr(MI);
1489
5
    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1490
5
    Observer.changedInstr(MI);
1491
5
    return Legalized;
1492
6
1493
11.9k
  case TargetOpcode::G_PHI: {
1494
11.9k
    assert(TypeIdx == 0 && "Expecting only Idx 0");
1495
11.9k
1496
11.9k
    Observer.changingInstr(MI);
1497
49.2k
    for (unsigned I = 1; I < MI.getNumOperands(); 
I += 237.2k
) {
1498
37.2k
      MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1499
37.2k
      MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1500
37.2k
      widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1501
37.2k
    }
1502
11.9k
1503
11.9k
    MachineBasicBlock &MBB = *MI.getParent();
1504
11.9k
    MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1505
11.9k
    widenScalarDst(MI, WideTy);
1506
11.9k
    Observer.changedInstr(MI);
1507
11.9k
    return Legalized;
1508
6
  }
1509
13
  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1510
13
    if (TypeIdx == 0) {
1511
11
      Register VecReg = MI.getOperand(1).getReg();
1512
11
      LLT VecTy = MRI.getType(VecReg);
1513
11
      Observer.changingInstr(MI);
1514
11
1515
11
      widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1516
11
                                     WideTy.getSizeInBits()),
1517
11
                     1, TargetOpcode::G_SEXT);
1518
11
1519
11
      widenScalarDst(MI, WideTy, 0);
1520
11
      Observer.changedInstr(MI);
1521
11
      return Legalized;
1522
11
    }
1523
2
1524
2
    if (TypeIdx != 2)
1525
0
      return UnableToLegalize;
1526
2
    Observer.changingInstr(MI);
1527
2
    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1528
2
    Observer.changedInstr(MI);
1529
2
    return Legalized;
1530
2
  }
1531
636
  case TargetOpcode::G_FADD:
1532
636
  case TargetOpcode::G_FMUL:
1533
636
  case TargetOpcode::G_FSUB:
1534
636
  case TargetOpcode::G_FMA:
1535
636
  case TargetOpcode::G_FNEG:
1536
636
  case TargetOpcode::G_FABS:
1537
636
  case TargetOpcode::G_FCANONICALIZE:
1538
636
  case TargetOpcode::G_FMINNUM:
1539
636
  case TargetOpcode::G_FMAXNUM:
1540
636
  case TargetOpcode::G_FMINNUM_IEEE:
1541
636
  case TargetOpcode::G_FMAXNUM_IEEE:
1542
636
  case TargetOpcode::G_FMINIMUM:
1543
636
  case TargetOpcode::G_FMAXIMUM:
1544
636
  case TargetOpcode::G_FDIV:
1545
636
  case TargetOpcode::G_FREM:
1546
636
  case TargetOpcode::G_FCEIL:
1547
636
  case TargetOpcode::G_FFLOOR:
1548
636
  case TargetOpcode::G_FCOS:
1549
636
  case TargetOpcode::G_FSIN:
1550
636
  case TargetOpcode::G_FLOG10:
1551
636
  case TargetOpcode::G_FLOG:
1552
636
  case TargetOpcode::G_FLOG2:
1553
636
  case TargetOpcode::G_FRINT:
1554
636
  case TargetOpcode::G_FNEARBYINT:
1555
636
  case TargetOpcode::G_FSQRT:
1556
636
  case TargetOpcode::G_FEXP:
1557
636
  case TargetOpcode::G_FEXP2:
1558
636
  case TargetOpcode::G_FPOW:
1559
636
  case TargetOpcode::G_INTRINSIC_TRUNC:
1560
636
  case TargetOpcode::G_INTRINSIC_ROUND:
1561
636
    assert(TypeIdx == 0);
1562
636
    Observer.changingInstr(MI);
1563
636
1564
1.43k
    for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
++I794
)
1565
794
      widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1566
636
1567
636
    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1568
636
    Observer.changedInstr(MI);
1569
636
    return Legalized;
1570
636
  case TargetOpcode::G_INTTOPTR:
1571
5
    if (TypeIdx != 1)
1572
0
      return UnableToLegalize;
1573
5
1574
5
    Observer.changingInstr(MI);
1575
5
    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1576
5
    Observer.changedInstr(MI);
1577
5
    return Legalized;
1578
5
  case TargetOpcode::G_PTRTOINT:
1579
5
    if (TypeIdx != 0)
1580
0
      return UnableToLegalize;
1581
5
1582
5
    Observer.changingInstr(MI);
1583
5
    widenScalarDst(MI, WideTy, 0);
1584
5
    Observer.changedInstr(MI);
1585
5
    return Legalized;
1586
66
  case TargetOpcode::G_BUILD_VECTOR: {
1587
66
    Observer.changingInstr(MI);
1588
66
1589
66
    const LLT WideEltTy = TypeIdx == 1 ? 
WideTy1
:
WideTy.getElementType()65
;
1590
253
    for (int I = 1, E = MI.getNumOperands(); I != E; 
++I187
)
1591
187
      widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1592
66
1593
66
    // Avoid changing the result vector type if the source element type was
1594
66
    // requested.
1595
66
    if (TypeIdx == 1) {
1596
1
      auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1597
1
      MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1598
65
    } else {
1599
65
      widenScalarDst(MI, WideTy, 0);
1600
65
    }
1601
66
1602
66
    Observer.changedInstr(MI);
1603
66
    return Legalized;
1604
5
  }
1605
1.28M
  }
1606
1.28M
}
1607
1608
LegalizerHelper::LegalizeResult
1609
10.0k
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1610
10.0k
  using namespace TargetOpcode;
1611
10.0k
  MIRBuilder.setInstr(MI);
1612
10.0k
1613
10.0k
  switch(MI.getOpcode()) {
1614
10.0k
  default:
1615
0
    return UnableToLegalize;
1616
10.0k
  case TargetOpcode::G_SREM:
1617
3.70k
  case TargetOpcode::G_UREM: {
1618
3.70k
    Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1619
3.70k
    MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? 
G_SDIV2.55k
:
G_UDIV1.15k
)
1620
3.70k
        .addDef(QuotReg)
1621
3.70k
        .addUse(MI.getOperand(1).getReg())
1622
3.70k
        .addUse(MI.getOperand(2).getReg());
1623
3.70k
1624
3.70k
    Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1625
3.70k
    MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1626
3.70k
    MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1627
3.70k
                        ProdReg);
1628
3.70k
    MI.eraseFromParent();
1629
3.70k
    return Legalized;
1630
3.70k
  }
1631
3.70k
  case TargetOpcode::G_SMULO:
1632
478
  case TargetOpcode::G_UMULO: {
1633
478
    // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1634
478
    // result.
1635
478
    Register Res = MI.getOperand(0).getReg();
1636
478
    Register Overflow = MI.getOperand(1).getReg();
1637
478
    Register LHS = MI.getOperand(2).getReg();
1638
478
    Register RHS = MI.getOperand(3).getReg();
1639
478
1640
478
    MIRBuilder.buildMul(Res, LHS, RHS);
1641
478
1642
478
    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1643
478
                          ? 
TargetOpcode::G_SMULH7
1644
478
                          : 
TargetOpcode::G_UMULH471
;
1645
478
1646
478
    Register HiPart = MRI.createGenericVirtualRegister(Ty);
1647
478
    MIRBuilder.buildInstr(Opcode)
1648
478
      .addDef(HiPart)
1649
478
      .addUse(LHS)
1650
478
      .addUse(RHS);
1651
478
1652
478
    Register Zero = MRI.createGenericVirtualRegister(Ty);
1653
478
    MIRBuilder.buildConstant(Zero, 0);
1654
478
1655
478
    // For *signed* multiply, overflow is detected by checking:
1656
478
    // (hi != (lo >> bitwidth-1))
1657
478
    if (Opcode == TargetOpcode::G_SMULH) {
1658
7
      Register Shifted = MRI.createGenericVirtualRegister(Ty);
1659
7
      Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1660
7
      MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1661
7
      MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1662
7
        .addDef(Shifted)
1663
7
        .addUse(Res)
1664
7
        .addUse(ShiftAmt);
1665
7
      MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1666
471
    } else {
1667
471
      MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1668
471
    }
1669
478
    MI.eraseFromParent();
1670
478
    return Legalized;
1671
478
  }
1672
478
  case TargetOpcode::G_FNEG: {
1673
12
    // TODO: Handle vector types once we are able to
1674
12
    // represent them.
1675
12
    if (Ty.isVector())
1676
0
      return UnableToLegalize;
1677
12
    Register Res = MI.getOperand(0).getReg();
1678
12
    Type *ZeroTy;
1679
12
    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1680
12
    switch (Ty.getSizeInBits()) {
1681
12
    case 16:
1682
0
      ZeroTy = Type::getHalfTy(Ctx);
1683
0
      break;
1684
12
    case 32:
1685
5
      ZeroTy = Type::getFloatTy(Ctx);
1686
5
      break;
1687
12
    case 64:
1688
7
      ZeroTy = Type::getDoubleTy(Ctx);
1689
7
      break;
1690
12
    case 128:
1691
0
      ZeroTy = Type::getFP128Ty(Ctx);
1692
0
      break;
1693
12
    default:
1694
0
      llvm_unreachable("unexpected floating-point type");
1695
12
    }
1696
12
    ConstantFP &ZeroForNegation =
1697
12
        *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1698
12
    auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1699
12
    Register SubByReg = MI.getOperand(1).getReg();
1700
12
    Register ZeroReg = Zero->getOperand(0).getReg();
1701
12
    MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1702
12
                          MI.getFlags());
1703
12
    MI.eraseFromParent();
1704
12
    return Legalized;
1705
12
  }
1706
42
  case TargetOpcode::G_FSUB: {
1707
42
    // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1708
42
    // First, check if G_FNEG is marked as Lower. If so, we may
1709
42
    // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1710
42
    if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1711
0
      return UnableToLegalize;
1712
42
    Register Res = MI.getOperand(0).getReg();
1713
42
    Register LHS = MI.getOperand(1).getReg();
1714
42
    Register RHS = MI.getOperand(2).getReg();
1715
42
    Register Neg = MRI.createGenericVirtualRegister(Ty);
1716
42
    MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1717
42
    MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1718
42
    MI.eraseFromParent();
1719
42
    return Legalized;
1720
42
  }
1721
42
  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1722
2
    Register OldValRes = MI.getOperand(0).getReg();
1723
2
    Register SuccessRes = MI.getOperand(1).getReg();
1724
2
    Register Addr = MI.getOperand(2).getReg();
1725
2
    Register CmpVal = MI.getOperand(3).getReg();
1726
2
    Register NewVal = MI.getOperand(4).getReg();
1727
2
    MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1728
2
                                  **MI.memoperands_begin());
1729
2
    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1730
2
    MI.eraseFromParent();
1731
2
    return Legalized;
1732
42
  }
1733
5.61k
  case TargetOpcode::G_LOAD:
1734
5.61k
  case TargetOpcode::G_SEXTLOAD:
1735
5.61k
  case TargetOpcode::G_ZEXTLOAD: {
1736
5.61k
    // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1737
5.61k
    Register DstReg = MI.getOperand(0).getReg();
1738
5.61k
    Register PtrReg = MI.getOperand(1).getReg();
1739
5.61k
    LLT DstTy = MRI.getType(DstReg);
1740
5.61k
    auto &MMO = **MI.memoperands_begin();
1741
5.61k
1742
5.61k
    if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1743
0
      // In the case of G_LOAD, this was a non-extending load already and we're
1744
0
      // about to lower to the same instruction.
1745
0
      if (MI.getOpcode() == TargetOpcode::G_LOAD)
1746
0
          return UnableToLegalize;
1747
0
      MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1748
0
      MI.eraseFromParent();
1749
0
      return Legalized;
1750
0
    }
1751
5.61k
1752
5.61k
    if (DstTy.isScalar()) {
1753
5.61k
      Register TmpReg =
1754
5.61k
          MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1755
5.61k
      MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1756
5.61k
      switch (MI.getOpcode()) {
1757
5.61k
      default:
1758
0
        llvm_unreachable("Unexpected opcode");
1759
5.61k
      case TargetOpcode::G_LOAD:
1760
5.60k
        MIRBuilder.buildAnyExt(DstReg, TmpReg);
1761
5.60k
        break;
1762
5.61k
      case TargetOpcode::G_SEXTLOAD:
1763
5
        MIRBuilder.buildSExt(DstReg, TmpReg);
1764
5
        break;
1765
5.61k
      case TargetOpcode::G_ZEXTLOAD:
1766
5
        MIRBuilder.buildZExt(DstReg, TmpReg);
1767
5
        break;
1768
5.61k
      }
1769
5.61k
      MI.eraseFromParent();
1770
5.61k
      return Legalized;
1771
5.61k
    }
1772
0
1773
0
    return UnableToLegalize;
1774
0
  }
1775
13
  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1776
13
  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1777
13
  case TargetOpcode::G_CTLZ:
1778
13
  case TargetOpcode::G_CTTZ:
1779
13
  case TargetOpcode::G_CTPOP:
1780
13
    return lowerBitCount(MI, TypeIdx, Ty);
1781
21
  case G_UADDO: {
1782
21
    Register Res = MI.getOperand(0).getReg();
1783
21
    Register CarryOut = MI.getOperand(1).getReg();
1784
21
    Register LHS = MI.getOperand(2).getReg();
1785
21
    Register RHS = MI.getOperand(3).getReg();
1786
21
1787
21
    MIRBuilder.buildAdd(Res, LHS, RHS);
1788
21
    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1789
21
1790
21
    MI.eraseFromParent();
1791
21
    return Legalized;
1792
13
  }
1793
13
  case G_UADDE: {
1794
12
    Register Res = MI.getOperand(0).getReg();
1795
12
    Register CarryOut = MI.getOperand(1).getReg();
1796
12
    Register LHS = MI.getOperand(2).getReg();
1797
12
    Register RHS = MI.getOperand(3).getReg();
1798
12
    Register CarryIn = MI.getOperand(4).getReg();
1799
12
1800
12
    Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1801
12
    Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1802
12
1803
12
    MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1804
12
    MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1805
12
    MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1806
12
    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1807
12
1808
12
    MI.eraseFromParent();
1809
12
    return Legalized;
1810
13
  }
1811
13
  case G_USUBO: {
1812
4
    Register Res = MI.getOperand(0).getReg();
1813
4
    Register BorrowOut = MI.getOperand(1).getReg();
1814
4
    Register LHS = MI.getOperand(2).getReg();
1815
4
    Register RHS = MI.getOperand(3).getReg();
1816
4
1817
4
    MIRBuilder.buildSub(Res, LHS, RHS);
1818
4
    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1819
4
1820
4
    MI.eraseFromParent();
1821
4
    return Legalized;
1822
13
  }
1823
13
  case G_USUBE: {
1824
8
    Register Res = MI.getOperand(0).getReg();
1825
8
    Register BorrowOut = MI.getOperand(1).getReg();
1826
8
    Register LHS = MI.getOperand(2).getReg();
1827
8
    Register RHS = MI.getOperand(3).getReg();
1828
8
    Register BorrowIn = MI.getOperand(4).getReg();
1829
8
1830
8
    Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1831
8
    Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1832
8
    Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1833
8
    Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1834
8
1835
8
    MIRBuilder.buildSub(TmpRes, LHS, RHS);
1836
8
    MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1837
8
    MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1838
8
    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1839
8
    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1840
8
    MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1841
8
1842
8
    MI.eraseFromParent();
1843
8
    return Legalized;
1844
13
  }
1845
13
  case G_UITOFP:
1846
2
    return lowerUITOFP(MI, TypeIdx, Ty);
1847
13
  case G_SITOFP:
1848
1
    return lowerSITOFP(MI, TypeIdx, Ty);
1849
40
  case G_SMIN:
1850
40
  case G_SMAX:
1851
40
  case G_UMIN:
1852
40
  case G_UMAX:
1853
40
    return lowerMinMax(MI, TypeIdx, Ty);
1854
54
  case G_FCOPYSIGN:
1855
54
    return lowerFCopySign(MI, TypeIdx, Ty);
1856
40
  case G_FMINNUM:
1857
0
  case G_FMAXNUM:
1858
0
    return lowerFMinNumMaxNum(MI);
1859
10.0k
  }
1860
10.0k
}
1861
1862
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1863
313
    MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1864
313
  SmallVector<Register, 2> DstRegs;
1865
313
1866
313
  unsigned NarrowSize = NarrowTy.getSizeInBits();
1867
313
  Register DstReg = MI.getOperand(0).getReg();
1868
313
  unsigned Size = MRI.getType(DstReg).getSizeInBits();
1869
313
  int NumParts = Size / NarrowSize;
1870
313
  // FIXME: Don't know how to handle the situation where the small vectors
1871
313
  // aren't all the same size yet.
1872
313
  if (Size % NarrowSize != 0)
1873
2
    return UnableToLegalize;
1874
311
1875
1.01k
  
for (int i = 0; 311
i < NumParts;
++i702
) {
1876
702
    Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1877
702
    MIRBuilder.buildUndef(TmpReg);
1878
702
    DstRegs.push_back(TmpReg);
1879
702
  }
1880
311
1881
311
  if (NarrowTy.isVector())
1882
4
    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1883
307
  else
1884
307
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
1885
311
1886
311
  MI.eraseFromParent();
1887
311
  return Legalized;
1888
311
}
1889
1890
LegalizerHelper::LegalizeResult
1891
LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1892
735
                                          LLT NarrowTy) {
1893
735
  const unsigned Opc = MI.getOpcode();
1894
735
  const unsigned NumOps = MI.getNumOperands() - 1;
1895
735
  const unsigned NarrowSize = NarrowTy.getSizeInBits();
1896
735
  const Register DstReg = MI.getOperand(0).getReg();
1897
735
  const unsigned Flags = MI.getFlags();
1898
735
  const LLT DstTy = MRI.getType(DstReg);
1899
735
  const unsigned Size = DstTy.getSizeInBits();
1900
735
  const int NumParts = Size / NarrowSize;
1901
735
  const LLT EltTy = DstTy.getElementType();
1902
735
  const unsigned EltSize = EltTy.getSizeInBits();
1903
735
  const unsigned BitsForNumParts = NarrowSize * NumParts;
1904
735
1905
735
  // Check if we have any leftovers. If we do, then only handle the case where
1906
735
  // the leftover is one element.
1907
735
  if (BitsForNumParts != Size && 
BitsForNumParts + EltSize != Size16
)
1908
0
    return UnableToLegalize;
1909
735
1910
735
  if (BitsForNumParts != Size) {
1911
16
    Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1912
16
    MIRBuilder.buildUndef(AccumDstReg);
1913
16
1914
16
    // Handle the pieces which evenly divide into the requested type with
1915
16
    // extract/op/insert sequence.
1916
36
    for (unsigned Offset = 0; Offset < BitsForNumParts; 
Offset += NarrowSize20
) {
1917
20
      SmallVector<SrcOp, 4> SrcOps;
1918
58
      for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
++I38
) {
1919
38
        Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1920
38
        MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1921
38
        SrcOps.push_back(PartOpReg);
1922
38
      }
1923
20
1924
20
      Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1925
20
      MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1926
20
1927
20
      Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1928
20
      MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1929
20
      AccumDstReg = PartInsertReg;
1930
20
    }
1931
16
1932
16
    // Handle the remaining element sized leftover piece.
1933
16
    SmallVector<SrcOp, 4> SrcOps;
1934
46
    for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
++I30
) {
1935
30
      Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1936
30
      MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1937
30
                              BitsForNumParts);
1938
30
      SrcOps.push_back(PartOpReg);
1939
30
    }
1940
16
1941
16
    Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1942
16
    MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1943
16
    MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1944
16
    MI.eraseFromParent();
1945
16
1946
16
    return Legalized;
1947
16
  }
1948
719
1949
719
  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1950
719
1951
719
  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1952
719
1953
719
  if (NumOps >= 2)
1954
462
    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1955
719
1956
719
  if (NumOps >= 3)
1957
23
    extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1958
719
1959
2.93k
  for (int i = 0; i < NumParts; 
++i2.21k
) {
1960
2.21k
    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1961
2.21k
1962
2.21k
    if (NumOps == 1)
1963
884
      MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1964
1.33k
    else if (NumOps == 2) {
1965
1.25k
      MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1966
1.25k
    } else 
if (77
NumOps == 377
) {
1967
77
      MIRBuilder.buildInstr(Opc, {DstReg},
1968
77
                            {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1969
77
    }
1970
2.21k
1971
2.21k
    DstRegs.push_back(DstReg);
1972
2.21k
  }
1973
719
1974
719
  if (NarrowTy.isVector())
1975
199
    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1976
520
  else
1977
520
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
1978
719
1979
719
  MI.eraseFromParent();
1980
719
  return Legalized;
1981
719
}
1982
1983
// Handle splitting vector operations which need to have the same number of
1984
// elements in each type index, but each type index may have a different element
1985
// type.
1986
//
1987
// e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1988
//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1989
//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1990
//
1991
// Also handles some irregular breakdown cases, e.g.
1992
// e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1993
//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1994
//             s64 = G_SHL s64, s32
1995
LegalizerHelper::LegalizeResult
1996
LegalizerHelper::fewerElementsVectorMultiEltType(
1997
163
  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1998
163
  if (TypeIdx != 0)
1999
2
    return UnableToLegalize;
2000
161
2001
161
  const LLT NarrowTy0 = NarrowTyArg;
2002
161
  const unsigned NewNumElts =
2003
161
      NarrowTy0.isVector() ? 
NarrowTy0.getNumElements()42
:
1119
;
2004
161
2005
161
  const Register DstReg = MI.getOperand(0).getReg();
2006
161
  LLT DstTy = MRI.getType(DstReg);
2007
161
  LLT LeftoverTy0;
2008
161
2009
161
  // All of the operands need to have the same number of elements, so if we can
2010
161
  // determine a type breakdown for the result type, we can for all of the
2011
161
  // source types.
2012
161
  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2013
161
  if (NumParts < 0)
2014
0
    return UnableToLegalize;
2015
161
2016
161
  SmallVector<MachineInstrBuilder, 4> NewInsts;
2017
161
2018
161
  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2019
161
  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2020
161
2021
468
  for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
++I307
) {
2022
307
    LLT LeftoverTy;
2023
307
    Register SrcReg = MI.getOperand(I).getReg();
2024
307
    LLT SrcTyI = MRI.getType(SrcReg);
2025
307
    LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2026
307
    LLT LeftoverTyI;
2027
307
2028
307
    // Split this operand into the requested typed registers, and any leftover
2029
307
    // required to reproduce the original type.
2030
307
    if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2031
307
                      LeftoverRegs))
2032
0
      return UnableToLegalize;
2033
307
2034
307
    if (I == 1) {
2035
161
      // For the first operand, create an instruction for each part and setup
2036
161
      // the result.
2037
371
      for (Register PartReg : PartRegs) {
2038
371
        Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2039
371
        NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2040
371
                               .addDef(PartDstReg)
2041
371
                               .addUse(PartReg));
2042
371
        DstRegs.push_back(PartDstReg);
2043
371
      }
2044
161
2045
161
      for (Register LeftoverReg : LeftoverRegs) {
2046
3
        Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2047
3
        NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2048
3
                               .addDef(PartDstReg)
2049
3
                               .addUse(LeftoverReg));
2050
3
        LeftoverDstRegs.push_back(PartDstReg);
2051
3
      }
2052
161
    } else {
2053
146
      assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2054
146
2055
146
      // Add the newly created operand splits to the existing instructions. The
2056
146
      // odd-sized pieces are ordered after the requested NarrowTyArg sized
2057
146
      // pieces.
2058
146
      unsigned InstCount = 0;
2059
487
      for (unsigned J = 0, JE = PartRegs.size(); J != JE; 
++J341
)
2060
341
        NewInsts[InstCount++].addUse(PartRegs[J]);
2061
149
      for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; 
++J3
)
2062
3
        NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2063
146
    }
2064
307
2065
307
    PartRegs.clear();
2066
307
    LeftoverRegs.clear();
2067
307
  }
2068
161
2069
161
  // Insert the newly built operations and rebuild the result register.
2070
161
  for (auto &MIB : NewInsts)
2071
374
    MIRBuilder.insertInstr(MIB);
2072
161
2073
161
  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2074
161
2075
161
  MI.eraseFromParent();
2076
161
  return Legalized;
2077
161
}
2078
2079
LegalizerHelper::LegalizeResult
2080
LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2081
79
                                          LLT NarrowTy) {
2082
79
  if (TypeIdx != 0)
2083
0
    return UnableToLegalize;
2084
79
2085
79
  Register DstReg = MI.getOperand(0).getReg();
2086
79
  Register SrcReg = MI.getOperand(1).getReg();
2087
79
  LLT DstTy = MRI.getType(DstReg);
2088
79
  LLT SrcTy = MRI.getType(SrcReg);
2089
79
2090
79
  LLT NarrowTy0 = NarrowTy;
2091
79
  LLT NarrowTy1;
2092
79
  unsigned NumParts;
2093
79
2094
79
  if (NarrowTy.isVector()) {
2095
0
    // Uneven breakdown not handled.
2096
0
    NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2097
0
    if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2098
0
      return UnableToLegalize;
2099
0
2100
0
    NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2101
79
  } else {
2102
79
    NumParts = DstTy.getNumElements();
2103
79
    NarrowTy1 = SrcTy.getElementType();
2104
79
  }
2105
79
2106
79
  SmallVector<Register, 4> SrcRegs, DstRegs;
2107
79
  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2108
79
2109
288
  for (unsigned I = 0; I < NumParts; 
++I209
) {
2110
209
    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2111
209
    MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2112
209
      .addDef(DstReg)
2113
209
      .addUse(SrcRegs[I]);
2114
209
2115
209
    NewInst->setFlags(MI.getFlags());
2116
209
    DstRegs.push_back(DstReg);
2117
209
  }
2118
79
2119
79
  if (NarrowTy.isVector())
2120
0
    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2121
79
  else
2122
79
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2123
79
2124
79
  MI.eraseFromParent();
2125
79
  return Legalized;
2126
79
}
2127
2128
LegalizerHelper::LegalizeResult
2129
LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2130
39
                                        LLT NarrowTy) {
2131
39
  Register DstReg = MI.getOperand(0).getReg();
2132
39
  Register Src0Reg = MI.getOperand(2).getReg();
2133
39
  LLT DstTy = MRI.getType(DstReg);
2134
39
  LLT SrcTy = MRI.getType(Src0Reg);
2135
39
2136
39
  unsigned NumParts;
2137
39
  LLT NarrowTy0, NarrowTy1;
2138
39
2139
39
  if (TypeIdx == 0) {
2140
39
    unsigned NewElts = NarrowTy.isVector() ? 
NarrowTy.getNumElements()0
: 1;
2141
39
    unsigned OldElts = DstTy.getNumElements();
2142
39
2143
39
    NarrowTy0 = NarrowTy;
2144
39
    NumParts = NarrowTy.isVector() ? 
(OldElts / NewElts)0
: DstTy.getNumElements();
2145
39
    NarrowTy1 = NarrowTy.isVector() ?
2146
0
      LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2147
39
      SrcTy.getElementType();
2148
39
2149
39
  } else {
2150
0
    unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2151
0
    unsigned OldElts = SrcTy.getNumElements();
2152
0
2153
0
    NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2154
0
      NarrowTy.getNumElements();
2155
0
    NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2156
0
                            DstTy.getScalarSizeInBits());
2157
0
    NarrowTy1 = NarrowTy;
2158
0
  }
2159
39
2160
39
  // FIXME: Don't know how to handle the situation where the small vectors
2161
39
  // aren't all the same size yet.
2162
39
  if (NarrowTy1.isVector() &&
2163
39
      
NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()0
)
2164
0
    return UnableToLegalize;
2165
39
2166
39
  CmpInst::Predicate Pred
2167
39
    = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2168
39
2169
39
  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2170
39
  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2171
39
  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2172
39
2173
138
  for (unsigned I = 0; I < NumParts; 
++I99
) {
2174
99
    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2175
99
    DstRegs.push_back(DstReg);
2176
99
2177
99
    if (MI.getOpcode() == TargetOpcode::G_ICMP)
2178
60
      MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2179
39
    else {
2180
39
      MachineInstr *NewCmp
2181
39
        = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2182
39
      NewCmp->setFlags(MI.getFlags());
2183
39
    }
2184
99
  }
2185
39
2186
39
  if (NarrowTy1.isVector())
2187
0
    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2188
39
  else
2189
39
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2190
39
2191
39
  MI.eraseFromParent();
2192
39
  return Legalized;
2193
39
}
2194
2195
LegalizerHelper::LegalizeResult
2196
LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2197
638
                                           LLT NarrowTy) {
2198
638
  Register DstReg = MI.getOperand(0).getReg();
2199
638
  Register CondReg = MI.getOperand(1).getReg();
2200
638
2201
638
  unsigned NumParts = 0;
2202
638
  LLT NarrowTy0, NarrowTy1;
2203
638
2204
638
  LLT DstTy = MRI.getType(DstReg);
2205
638
  LLT CondTy = MRI.getType(CondReg);
2206
638
  unsigned Size = DstTy.getSizeInBits();
2207
638
2208
638
  assert(TypeIdx == 0 || CondTy.isVector());
2209
638
2210
638
  if (TypeIdx == 0) {
2211
627
    NarrowTy0 = NarrowTy;
2212
627
    NarrowTy1 = CondTy;
2213
627
2214
627
    unsigned NarrowSize = NarrowTy0.getSizeInBits();
2215
627
    // FIXME: Don't know how to handle the situation where the small vectors
2216
627
    // aren't all the same size yet.
2217
627
    if (Size % NarrowSize != 0)
2218
0
      return UnableToLegalize;
2219
627
2220
627
    NumParts = Size / NarrowSize;
2221
627
2222
627
    // Need to break down the condition type
2223
627
    if (CondTy.isVector()) {
2224
613
      if (CondTy.getNumElements() == NumParts)
2225
613
        NarrowTy1 = CondTy.getElementType();
2226
0
      else
2227
0
        NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2228
0
                                CondTy.getScalarSizeInBits());
2229
613
    }
2230
627
  } else {
2231
11
    NumParts = CondTy.getNumElements();
2232
11
    if (NarrowTy.isVector()) {
2233
0
      // TODO: Handle uneven breakdown.
2234
0
      if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2235
0
        return UnableToLegalize;
2236
0
2237
0
      return UnableToLegalize;
2238
11
    } else {
2239
11
      NarrowTy0 = DstTy.getElementType();
2240
11
      NarrowTy1 = NarrowTy;
2241
11
    }
2242
11
  }
2243
638
2244
638
  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2245
638
  if (CondTy.isVector())
2246
624
    extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2247
638
2248
638
  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2249
638
  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2250
638
2251
2.88k
  for (unsigned i = 0; i < NumParts; 
++i2.24k
) {
2252
2.24k
    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2253
2.24k
    MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? 
Src0Regs[i]2.20k
:
CondReg42
,
2254
2.24k
                           Src1Regs[i], Src2Regs[i]);
2255
2.24k
    DstRegs.push_back(DstReg);
2256
2.24k
  }
2257
638
2258
638
  if (NarrowTy0.isVector())
2259
3
    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2260
635
  else
2261
635
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2262
638
2263
638
  MI.eraseFromParent();
2264
638
  return Legalized;
2265
638
}
2266
2267
LegalizerHelper::LegalizeResult
2268
LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2269
2
                                        LLT NarrowTy) {
2270
2
  const Register DstReg = MI.getOperand(0).getReg();
2271
2
  LLT PhiTy = MRI.getType(DstReg);
2272
2
  LLT LeftoverTy;
2273
2
2274
2
  // All of the operands need to have the same number of elements, so if we can
2275
2
  // determine a type breakdown for the result type, we can for all of the
2276
2
  // source types.
2277
2
  int NumParts, NumLeftover;
2278
2
  std::tie(NumParts, NumLeftover)
2279
2
    = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2280
2
  if (NumParts < 0)
2281
0
    return UnableToLegalize;
2282
2
2283
2
  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2284
2
  SmallVector<MachineInstrBuilder, 4> NewInsts;
2285
2
2286
2
  const int TotalNumParts = NumParts + NumLeftover;
2287
2
2288
2
  // Insert the new phis in the result block first.
2289
7
  for (int I = 0; I != TotalNumParts; 
++I5
) {
2290
5
    LLT Ty = I < NumParts ? 
NarrowTy4
:
LeftoverTy1
;
2291
5
    Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2292
5
    NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2293
5
                       .addDef(PartDstReg));
2294
5
    if (I < NumParts)
2295
4
      DstRegs.push_back(PartDstReg);
2296
1
    else
2297
1
      LeftoverDstRegs.push_back(PartDstReg);
2298
5
  }
2299
2
2300
2
  MachineBasicBlock *MBB = MI.getParent();
2301
2
  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2302
2
  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2303
2
2304
2
  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2305
2
2306
2
  // Insert code to extract the incoming values in each predecessor block.
2307
6
  for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
I += 24
) {
2308
4
    PartRegs.clear();
2309
4
    LeftoverRegs.clear();
2310
4
2311
4
    Register SrcReg = MI.getOperand(I).getReg();
2312
4
    MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2313
4
    MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2314
4
2315
4
    LLT Unused;
2316
4
    if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2317
4
                      LeftoverRegs))
2318
0
      return UnableToLegalize;
2319
4
2320
4
    // Add the newly created operand splits to the existing instructions. The
2321
4
    // odd-sized pieces are ordered after the requested NarrowTyArg sized
2322
4
    // pieces.
2323
14
    
for (int J = 0; 4
J != TotalNumParts;
++J10
) {
2324
10
      MachineInstrBuilder MIB = NewInsts[J];
2325
10
      MIB.addUse(J < NumParts ? 
PartRegs[J]8
:
LeftoverRegs[J - NumParts]2
);
2326
10
      MIB.addMBB(&OpMBB);
2327
10
    }
2328
4
  }
2329
2
2330
2
  MI.eraseFromParent();
2331
2
  return Legalized;
2332
2
}
2333
2334
LegalizerHelper::LegalizeResult
2335
LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2336
302
                                      LLT NarrowTy) {
2337
302
  // FIXME: Don't know how to handle secondary types yet.
2338
302
  if (TypeIdx != 0)
2339
0
    return UnableToLegalize;
2340
302
2341
302
  MachineMemOperand *MMO = *MI.memoperands_begin();
2342
302
2343
302
  // This implementation doesn't work for atomics. Give up instead of doing
2344
302
  // something invalid.
2345
302
  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2346
302
      
MMO->getFailureOrdering() != AtomicOrdering::NotAtomic300
)
2347
2
    return UnableToLegalize;
2348
300
2349
300
  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2350
300
  Register ValReg = MI.getOperand(0).getReg();
2351
300
  Register AddrReg = MI.getOperand(1).getReg();
2352
300
  LLT ValTy = MRI.getType(ValReg);
2353
300
2354
300
  int NumParts = -1;
2355
300
  int NumLeftover = -1;
2356
300
  LLT LeftoverTy;
2357
300
  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2358
300
  if (IsLoad) {
2359
160
    std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2360
160
  } else {
2361
140
    if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2362
140
                     NarrowLeftoverRegs)) {
2363
140
      NumParts = NarrowRegs.size();
2364
140
      NumLeftover = NarrowLeftoverRegs.size();
2365
140
    }
2366
140
  }
2367
300
2368
300
  if (NumParts == -1)
2369
0
    return UnableToLegalize;
2370
300
2371
300
  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2372
300
2373
300
  unsigned TotalSize = ValTy.getSizeInBits();
2374
300
2375
300
  // Split the load/store into PartTy sized pieces starting at Offset. If this
2376
300
  // is a load, return the new registers in ValRegs. For a store, each elements
2377
300
  // of ValRegs should be PartTy. Returns the next offset that needs to be
2378
300
  // handled.
2379
300
  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2380
320
                             unsigned Offset) -> unsigned {
2381
320
    MachineFunction &MF = MIRBuilder.getMF();
2382
320
    unsigned PartSize = PartTy.getSizeInBits();
2383
1.41k
    for (unsigned Idx = 0, E = NumParts; Idx != E && 
Offset < TotalSize1.09k
;
2384
1.09k
         Offset += PartSize, ++Idx) {
2385
1.09k
      unsigned ByteSize = PartSize / 8;
2386
1.09k
      unsigned ByteOffset = Offset / 8;
2387
1.09k
      Register NewAddrReg;
2388
1.09k
2389
1.09k
      MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2390
1.09k
2391
1.09k
      MachineMemOperand *NewMMO =
2392
1.09k
        MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2393
1.09k
2394
1.09k
      if (IsLoad) {
2395
570
        Register Dst = MRI.createGenericVirtualRegister(PartTy);
2396
570
        ValRegs.push_back(Dst);
2397
570
        MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2398
570
      } else {
2399
524
        MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2400
524
      }
2401
1.09k
    }
2402
320
2403
320
    return Offset;
2404
320
  };
2405
300
2406
300
  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2407
300
2408
300
  // Handle the rest of the register if this isn't an even type breakdown.
2409
300
  if (LeftoverTy.isValid())
2410
20
    splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2411
300
2412
300
  if (IsLoad) {
2413
160
    insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2414
160
                LeftoverTy, NarrowLeftoverRegs);
2415
160
  }
2416
300
2417
300
  MI.eraseFromParent();
2418
300
  return Legalized;
2419
300
}
2420
2421
LegalizerHelper::LegalizeResult
2422
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2423
2.38k
                                     LLT NarrowTy) {
2424
2.38k
  using namespace TargetOpcode;
2425
2.38k
2426
2.38k
  MIRBuilder.setInstr(MI);
2427
2.38k
  switch (MI.getOpcode()) {
2428
2.38k
  case G_IMPLICIT_DEF:
2429
313
    return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2430
2.38k
  case G_AND:
2431
735
  case G_OR:
2432
735
  case G_XOR:
2433
735
  case G_ADD:
2434
735
  case G_SUB:
2435
735
  case G_MUL:
2436
735
  case G_SMULH:
2437
735
  case G_UMULH:
2438
735
  case G_FADD:
2439
735
  case G_FMUL:
2440
735
  case G_FSUB:
2441
735
  case G_FNEG:
2442
735
  case G_FABS:
2443
735
  case G_FCANONICALIZE:
2444
735
  case G_FDIV:
2445
735
  case G_FREM:
2446
735
  case G_FMA:
2447
735
  case G_FPOW:
2448
735
  case G_FEXP:
2449
735
  case G_FEXP2:
2450
735
  case G_FLOG:
2451
735
  case G_FLOG2:
2452
735
  case G_FLOG10:
2453
735
  case G_FNEARBYINT:
2454
735
  case G_FCEIL:
2455
735
  case G_FFLOOR:
2456
735
  case G_FRINT:
2457
735
  case G_INTRINSIC_ROUND:
2458
735
  case G_INTRINSIC_TRUNC:
2459
735
  case G_FCOS:
2460
735
  case G_FSIN:
2461
735
  case G_FSQRT:
2462
735
  case G_BSWAP:
2463
735
  case G_SDIV:
2464
735
  case G_SMIN:
2465
735
  case G_SMAX:
2466
735
  case G_UMIN:
2467
735
  case G_UMAX:
2468
735
  case G_FMINNUM:
2469
735
  case G_FMAXNUM:
2470
735
  case G_FMINNUM_IEEE:
2471
735
  case G_FMAXNUM_IEEE:
2472
735
  case G_FMINIMUM:
2473
735
  case G_FMAXIMUM:
2474
735
    return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2475
735
  case G_SHL:
2476
163
  case G_LSHR:
2477
163
  case G_ASHR:
2478
163
  case G_CTLZ:
2479
163
  case G_CTLZ_ZERO_UNDEF:
2480
163
  case G_CTTZ:
2481
163
  case G_CTTZ_ZERO_UNDEF:
2482
163
  case G_CTPOP:
2483
163
  case G_FCOPYSIGN:
2484
163
    return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2485
163
  case G_ZEXT:
2486
79
  case G_SEXT:
2487
79
  case G_ANYEXT:
2488
79
  case G_FPEXT:
2489
79
  case G_FPTRUNC:
2490
79
  case G_SITOFP:
2491
79
  case G_UITOFP:
2492
79
  case G_FPTOSI:
2493
79
  case G_FPTOUI:
2494
79
  case G_INTTOPTR:
2495
79
  case G_PTRTOINT:
2496
79
  case G_ADDRSPACE_CAST:
2497
79
    return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2498
79
  case G_ICMP:
2499
39
  case G_FCMP:
2500
39
    return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2501
638
  case G_SELECT:
2502
638
    return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2503
39
  case G_PHI:
2504
2
    return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2505
230
  case G_LOAD:
2506
230
  case G_STORE:
2507
230
    return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2508
230
  default:
2509
189
    return UnableToLegalize;
2510
2.38k
  }
2511
2.38k
}
2512
2513
LegalizerHelper::LegalizeResult
2514
LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2515
57
                                             const LLT HalfTy, const LLT AmtTy) {
2516
57
2517
57
  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2518
57
  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2519
57
  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2520
57
2521
57
  if (Amt.isNullValue()) {
2522
9
    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2523
9
    MI.eraseFromParent();
2524
9
    return Legalized;
2525
9
  }
2526
48
2527
48
  LLT NVT = HalfTy;
2528
48
  unsigned NVTBits = HalfTy.getSizeInBits();
2529
48
  unsigned VTBits = 2 * NVTBits;
2530
48
2531
48
  SrcOp Lo(Register(0)), Hi(Register(0));
2532
48
  if (MI.getOpcode() == TargetOpcode::G_SHL) {
2533
15
    if (Amt.ugt(VTBits)) {
2534
0
      Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2535
15
    } else if (Amt.ugt(NVTBits)) {
2536
3
      Lo = MIRBuilder.buildConstant(NVT, 0);
2537
3
      Hi = MIRBuilder.buildShl(NVT, InL,
2538
3
                               MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2539
12
    } else if (Amt == NVTBits) {
2540
0
      Lo = MIRBuilder.buildConstant(NVT, 0);
2541
0
      Hi = InL;
2542
12
    } else {
2543
12
      Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2544
12
      auto OrLHS =
2545
12
          MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2546
12
      auto OrRHS = MIRBuilder.buildLShr(
2547
12
          NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2548
12
      Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2549
12
    }
2550
33
  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2551
15
    if (Amt.ugt(VTBits)) {
2552
0
      Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2553
15
    } else if (Amt.ugt(NVTBits)) {
2554
3
      Lo = MIRBuilder.buildLShr(NVT, InH,
2555
3
                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2556
3
      Hi = MIRBuilder.buildConstant(NVT, 0);
2557
12
    } else if (Amt == NVTBits) {
2558
0
      Lo = InH;
2559
0
      Hi = MIRBuilder.buildConstant(NVT, 0);
2560
12
    } else {
2561
12
      auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2562
12
2563
12
      auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2564
12
      auto OrRHS = MIRBuilder.buildShl(
2565
12
          NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2566
12
2567
12
      Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2568
12
      Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2569
12
    }
2570
18
  } else {
2571
18
    if (Amt.ugt(VTBits)) {
2572
0
      Hi = Lo = MIRBuilder.buildAShr(
2573
0
          NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2574
18
    } else if (Amt.ugt(NVTBits)) {
2575
6
      Lo = MIRBuilder.buildAShr(NVT, InH,
2576
6
                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2577
6
      Hi = MIRBuilder.buildAShr(NVT, InH,
2578
6
                                MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2579
12
    } else if (Amt == NVTBits) {
2580
0
      Lo = InH;
2581
0
      Hi = MIRBuilder.buildAShr(NVT, InH,
2582
0
                                MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2583
12
    } else {
2584
12
      auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2585
12
2586
12
      auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2587
12
      auto OrRHS = MIRBuilder.buildShl(
2588
12
          NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2589
12
2590
12
      Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2591
12
      Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2592
12
    }
2593
18
  }
2594
48
2595
48
  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2596
48
  MI.eraseFromParent();
2597
48
2598
48
  return Legalized;
2599
48
}
2600
2601
// TODO: Optimize if constant shift amount.
2602
LegalizerHelper::LegalizeResult
2603
LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2604
279
                                   LLT RequestedTy) {
2605
279
  if (TypeIdx == 1) {
2606
130
    Observer.changingInstr(MI);
2607
130
    narrowScalarSrc(MI, RequestedTy, 2);
2608
130
    Observer.changedInstr(MI);
2609
130
    return Legalized;
2610
130
  }
2611
149
2612
149
  Register DstReg = MI.getOperand(0).getReg();
2613
149
  LLT DstTy = MRI.getType(DstReg);
2614
149
  if (DstTy.isVector())
2615
0
    return UnableToLegalize;
2616
149
2617
149
  Register Amt = MI.getOperand(2).getReg();
2618
149
  LLT ShiftAmtTy = MRI.getType(Amt);
2619
149
  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2620
149
  if (DstEltSize % 2 != 0)
2621
0
    return UnableToLegalize;
2622
149
2623
149
  // Ignore the input type. We can only go to exactly half the size of the
2624
149
  // input. If that isn't small enough, the resulting pieces will be further
2625
149
  // legalized.
2626
149
  const unsigned NewBitSize = DstEltSize / 2;
2627
149
  const LLT HalfTy = LLT::scalar(NewBitSize);
2628
149
  const LLT CondTy = LLT::scalar(1);
2629
149
2630
149
  if (const MachineInstr *KShiftAmt =
2631
57
          getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2632
57
    return narrowScalarShiftByConstant(
2633
57
        MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2634
57
  }
2635
92
2636
92
  // TODO: Expand with known bits.
2637
92
2638
92
  // Handle the fully general expansion by an unknown amount.
2639
92
  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2640
92
2641
92
  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2642
92
  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2643
92
  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2644
92
2645
92
  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2646
92
  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2647
92
2648
92
  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2649
92
  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2650
92
  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2651
92
2652
92
  Register ResultRegs[2];
2653
92
  switch (MI.getOpcode()) {
2654
92
  case TargetOpcode::G_SHL: {
2655
36
    // Short: ShAmt < NewBitSize
2656
36
    auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2657
36
2658
36
    auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2659
36
    auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2660
36
    auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2661
36
2662
36
    // Long: ShAmt >= NewBitSize
2663
36
    auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
2664
36
    auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2665
36
2666
36
    auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2667
36
    auto Hi = MIRBuilder.buildSelect(
2668
36
        HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2669
36
2670
36
    ResultRegs[0] = Lo.getReg(0);
2671
36
    ResultRegs[1] = Hi.getReg(0);
2672
36
    break;
2673
92
  }
2674
92
  case TargetOpcode::G_LSHR: {
2675
34
    // Short: ShAmt < NewBitSize
2676
34
    auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2677
34
2678
34
    auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2679
34
    auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2680
34
    auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2681
34
2682
34
    // Long: ShAmt >= NewBitSize
2683
34
    auto HiL = MIRBuilder.buildConstant(HalfTy, 0);          // Hi part is zero.
2684
34
    auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2685
34
2686
34
    auto Lo = MIRBuilder.buildSelect(
2687
34
        HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2688
34
    auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2689
34
2690
34
    ResultRegs[0] = Lo.getReg(0);
2691
34
    ResultRegs[1] = Hi.getReg(0);
2692
34
    break;
2693
92
  }
2694
92
  case TargetOpcode::G_ASHR: {
2695
22
    // Short: ShAmt < NewBitSize
2696
22
    auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2697
22
2698
22
    auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2699
22
    auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2700
22
    auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2701
22
2702
22
    // Long: ShAmt >= NewBitSize
2703
22
2704
22
    // Sign of Hi part.
2705
22
    auto HiL = MIRBuilder.buildAShr(
2706
22
        HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2707
22
2708
22
    auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2709
22
2710
22
    auto Lo = MIRBuilder.buildSelect(
2711
22
        HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2712
22
2713
22
    auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2714
22
2715
22
    ResultRegs[0] = Lo.getReg(0);
2716
22
    ResultRegs[1] = Hi.getReg(0);
2717
22
    break;
2718
92
  }
2719
92
  default:
2720
0
    llvm_unreachable("not a shift");
2721
92
  }
2722
92
2723
92
  MIRBuilder.buildMerge(DstReg, ResultRegs);
2724
92
  MI.eraseFromParent();
2725
92
  return Legalized;
2726
92
}
2727
2728
LegalizerHelper::LegalizeResult
2729
LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2730
1
                                       LLT MoreTy) {
2731
1
  assert(TypeIdx == 0 && "Expecting only Idx 0");
2732
1
2733
1
  Observer.changingInstr(MI);
2734
3
  for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
I += 22
) {
2735
2
    MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2736
2
    MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2737
2
    moreElementsVectorSrc(MI, MoreTy, I);
2738
2
  }
2739
1
2740
1
  MachineBasicBlock &MBB = *MI.getParent();
2741
1
  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2742
1
  moreElementsVectorDst(MI, MoreTy, 0);
2743
1
  Observer.changedInstr(MI);
2744
1
  return Legalized;
2745
1
}
2746
2747
LegalizerHelper::LegalizeResult
2748
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
2749
208
                                    LLT MoreTy) {
2750
208
  MIRBuilder.setInstr(MI);
2751
208
  unsigned Opc = MI.getOpcode();
2752
208
  switch (Opc) {
2753
208
  case TargetOpcode::G_IMPLICIT_DEF: {
2754
101
    Observer.changingInstr(MI);
2755
101
    moreElementsVectorDst(MI, MoreTy, 0);
2756
101
    Observer.changedInstr(MI);
2757
101
    return Legalized;
2758
208
  }
2759
208
  case TargetOpcode::G_AND:
2760
20
  case TargetOpcode::G_OR:
2761
20
  case TargetOpcode::G_XOR:
2762
20
  case TargetOpcode::G_SMIN:
2763
20
  case TargetOpcode::G_SMAX:
2764
20
  case TargetOpcode::G_UMIN:
2765
20
  case TargetOpcode::G_UMAX: {
2766
20
    Observer.changingInstr(MI);
2767
20
    moreElementsVectorSrc(MI, MoreTy, 1);
2768
20
    moreElementsVectorSrc(MI, MoreTy, 2);
2769
20
    moreElementsVectorDst(MI, MoreTy, 0);
2770
20
    Observer.changedInstr(MI);
2771
20
    return Legalized;
2772
20
  }
2773
43
  case TargetOpcode::G_EXTRACT:
2774
43
    if (TypeIdx != 1)
2775
0
      return UnableToLegalize;
2776
43
    Observer.changingInstr(MI);
2777
43
    moreElementsVectorSrc(MI, MoreTy, 1);
2778
43
    Observer.changedInstr(MI);
2779
43
    return Legalized;
2780
43
  case TargetOpcode::G_INSERT:
2781
29
    if (TypeIdx != 0)
2782
0
      return UnableToLegalize;
2783
29
    Observer.changingInstr(MI);
2784
29
    moreElementsVectorSrc(MI, MoreTy, 1);
2785
29
    moreElementsVectorDst(MI, MoreTy, 0);
2786
29
    Observer.changedInstr(MI);
2787
29
    return Legalized;
2788
29
  case TargetOpcode::G_SELECT:
2789
2
    if (TypeIdx != 0)
2790
0
      return UnableToLegalize;
2791
2
    if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2792
0
      return UnableToLegalize;
2793
2
2794
2
    Observer.changingInstr(MI);
2795
2
    moreElementsVectorSrc(MI, MoreTy, 2);
2796
2
    moreElementsVectorSrc(MI, MoreTy, 3);
2797
2
    moreElementsVectorDst(MI, MoreTy, 0);
2798
2
    Observer.changedInstr(MI);
2799
2
    return Legalized;
2800
2
  case TargetOpcode::G_PHI:
2801
1
    return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
2802
12
  default:
2803
12
    return UnableToLegalize;
2804
208
  }
2805
208
}
2806
2807
void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
2808
                                        ArrayRef<Register> Src1Regs,
2809
                                        ArrayRef<Register> Src2Regs,
2810
14
                                        LLT NarrowTy) {
2811
14
  MachineIRBuilder &B = MIRBuilder;
2812
14
  unsigned SrcParts = Src1Regs.size();
2813
14
  unsigned DstParts = DstRegs.size();
2814
14
2815
14
  unsigned DstIdx = 0; // Low bits of the result.
2816
14
  Register FactorSum =
2817
14
      B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
2818
14
  DstRegs[DstIdx] = FactorSum;
2819
14
2820
14
  unsigned CarrySumPrevDstIdx;
2821
14
  SmallVector<Register, 4> Factors;
2822
14
2823
34
  for (DstIdx = 1; DstIdx < DstParts; 
DstIdx++20
) {
2824
20
    // Collect low parts of muls for DstIdx.
2825
20
    for (unsigned i = DstIdx + 1 < SrcParts ? 
04
:
DstIdx - SrcParts + 116
;
2826
63
         i <= std::min(DstIdx, SrcParts - 1); 
++i43
) {
2827
43
      MachineInstrBuilder Mul =
2828
43
          B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
2829
43
      Factors.push_back(Mul.getReg(0));
2830
43
    }
2831
20
    // Collect high parts of muls from previous DstIdx.
2832
20
    for (unsigned i = DstIdx < SrcParts ? 
018
:
DstIdx - SrcParts2
;
2833
47
         i <= std::min(DstIdx - 1, SrcParts - 1); 
++i27
) {
2834
27
      MachineInstrBuilder Umulh =
2835
27
          B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
2836
27
      Factors.push_back(Umulh.getReg(0));
2837
27
    }
2838
20
    // Add CarrySum from additons calculated for previous DstIdx.
2839
20
    if (DstIdx != 1) {
2840
6
      Factors.push_back(CarrySumPrevDstIdx);
2841
6
    }
2842
20
2843
20
    Register CarrySum;
2844
20
    // Add all factors and accumulate all carries into CarrySum.
2845
20
    if (DstIdx != DstParts - 1) {
2846
6
      MachineInstrBuilder Uaddo =
2847
6
          B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
2848
6
      FactorSum = Uaddo.getReg(0);
2849
6
      CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
2850
19
      for (unsigned i = 2; i < Factors.size(); 
++i13
) {
2851
13
        MachineInstrBuilder Uaddo =
2852
13
            B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
2853
13
        FactorSum = Uaddo.getReg(0);
2854
13
        MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
2855
13
        CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
2856
13
      }
2857
14
    } else {
2858
14
      // Since value for the next index is not calculated, neither is CarrySum.
2859
14
      FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
2860
37
      for (unsigned i = 2; i < Factors.size(); 
++i23
)
2861
23
        FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
2862
14
    }
2863
20
2864
20
    CarrySumPrevDstIdx = CarrySum;
2865
20
    DstRegs[DstIdx] = FactorSum;
2866
20
    Factors.clear();
2867
20
  }
2868
14
}
2869
2870
LegalizerHelper::LegalizeResult
2871
14
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
2872
14
  Register DstReg = MI.getOperand(0).getReg();
2873
14
  Register Src1 = MI.getOperand(1).getReg();
2874
14
  Register Src2 = MI.getOperand(2).getReg();
2875
14
2876
14
  LLT Ty = MRI.getType(DstReg);
2877
14
  if (Ty.isVector())
2878
0
    return UnableToLegalize;
2879
14
2880
14
  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
2881
14
  unsigned DstSize = Ty.getSizeInBits();
2882
14
  unsigned NarrowSize = NarrowTy.getSizeInBits();
2883
14
  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
2884
0
    return UnableToLegalize;
2885
14
2886
14
  unsigned NumDstParts = DstSize / NarrowSize;
2887
14
  unsigned NumSrcParts = SrcSize / NarrowSize;
2888
14
  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
2889
14
  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 
21
:
113
);
2890
14
2891
14
  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
2892
14
  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
2893
14
  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
2894
14
  DstTmpRegs.resize(DstTmpParts);
2895
14
  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
2896
14
2897
14
  // Take only high half of registers if this is high mul.
2898
14
  ArrayRef<Register> DstRegs(
2899
14
      IsMulHigh ? 
&DstTmpRegs[DstTmpParts / 2]1
:
&DstTmpRegs[0]13
, NumDstParts);
2900
14
  MIRBuilder.buildMerge(DstReg, DstRegs);
2901
14
  MI.eraseFromParent();
2902
14
  return Legalized;
2903
14
}
2904
2905
LegalizerHelper::LegalizeResult
2906
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2907
0
                                     LLT NarrowTy) {
2908
0
  if (TypeIdx != 1)
2909
0
    return UnableToLegalize;
2910
0
2911
0
  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2912
0
2913
0
  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2914
0
  // FIXME: add support for when SizeOp1 isn't an exact multiple of
2915
0
  // NarrowSize.
2916
0
  if (SizeOp1 % NarrowSize != 0)
2917
0
    return UnableToLegalize;
2918
0
  int NumParts = SizeOp1 / NarrowSize;
2919
0
2920
0
  SmallVector<Register, 2> SrcRegs, DstRegs;
2921
0
  SmallVector<uint64_t, 2> Indexes;
2922
0
  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2923
0
2924
0
  Register OpReg = MI.getOperand(0).getReg();
2925
0
  uint64_t OpStart = MI.getOperand(2).getImm();
2926
0
  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2927
0
  for (int i = 0; i < NumParts; ++i) {
2928
0
    unsigned SrcStart = i * NarrowSize;
2929
0
2930
0
    if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2931
0
      // No part of the extract uses this subregister, ignore it.
2932
0
      continue;
2933
0
    } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2934
0
      // The entire subregister is extracted, forward the value.
2935
0
      DstRegs.push_back(SrcRegs[i]);
2936
0
      continue;
2937
0
    }
2938
0
2939
0
    // OpSegStart is where this destination segment would start in OpReg if it
2940
0
    // extended infinitely in both directions.
2941
0
    int64_t ExtractOffset;
2942
0
    uint64_t SegSize;
2943
0
    if (OpStart < SrcStart) {
2944
0
      ExtractOffset = 0;
2945
0
      SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2946
0
    } else {
2947
0
      ExtractOffset = OpStart - SrcStart;
2948
0
      SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2949
0
    }
2950
0
2951
0
    Register SegReg = SrcRegs[i];
2952
0
    if (ExtractOffset != 0 || SegSize != NarrowSize) {
2953
0
      // A genuine extract is needed.
2954
0
      SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2955
0
      MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2956
0
    }
2957
0
2958
0
    DstRegs.push_back(SegReg);
2959
0
  }
2960
0
2961
0
  Register DstReg = MI.getOperand(0).getReg();
2962
0
  if(MRI.getType(DstReg).isVector())
2963
0
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
2964
0
  else
2965
0
    MIRBuilder.buildMerge(DstReg, DstRegs);
2966
0
  MI.eraseFromParent();
2967
0
  return Legalized;
2968
0
}
2969
2970
LegalizerHelper::LegalizeResult
2971
LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2972
5
                                    LLT NarrowTy) {
2973
5
  // FIXME: Don't know how to handle secondary types yet.
2974
5
  if (TypeIdx != 0)
2975
0
    return UnableToLegalize;
2976
5
2977
5
  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2978
5
  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2979
5
2980
5
  // FIXME: add support for when SizeOp0 isn't an exact multiple of
2981
5
  // NarrowSize.
2982
5
  if (SizeOp0 % NarrowSize != 0)
2983
4
    return UnableToLegalize;
2984
1
2985
1
  int NumParts = SizeOp0 / NarrowSize;
2986
1
2987
1
  SmallVector<Register, 2> SrcRegs, DstRegs;
2988
1
  SmallVector<uint64_t, 2> Indexes;
2989
1
  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2990
1
2991
1
  Register OpReg = MI.getOperand(2).getReg();
2992
1
  uint64_t OpStart = MI.getOperand(3).getImm();
2993
1
  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2994
4
  for (int i = 0; i < NumParts; 
++i3
) {
2995
3
    unsigned DstStart = i * NarrowSize;
2996
3
2997
3
    if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2998
2
      // No part of the insert affects this subregister, forward the original.
2999
2
      DstRegs.push_back(SrcRegs[i]);
3000
2
      continue;
3001
2
    } else 
if (1
DstStart == OpStart1
&&
NarrowTy == MRI.getType(OpReg)1
) {
3002
1
      // The entire subregister is defined by this insert, forward the new
3003
1
      // value.
3004
1
      DstRegs.push_back(OpReg);
3005
1
      continue;
3006
1
    }
3007
0
3008
0
    // OpSegStart is where this destination segment would start in OpReg if it
3009
0
    // extended infinitely in both directions.
3010
0
    int64_t ExtractOffset, InsertOffset;
3011
0
    uint64_t SegSize;
3012
0
    if (OpStart < DstStart) {
3013
0
      InsertOffset = 0;
3014
0
      ExtractOffset = DstStart - OpStart;
3015
0
      SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3016
0
    } else {
3017
0
      InsertOffset = OpStart - DstStart;
3018
0
      ExtractOffset = 0;
3019
0
      SegSize =
3020
0
        std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3021
0
    }
3022
0
3023
0
    Register SegReg = OpReg;
3024
0
    if (ExtractOffset != 0 || SegSize != OpSize) {
3025
0
      // A genuine extract is needed.
3026
0
      SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3027
0
      MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3028
0
    }
3029
0
3030
0
    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3031
0
    MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3032
0
    DstRegs.push_back(DstReg);
3033
0
  }
3034
1
3035
1
  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3036
1
  Register DstReg = MI.getOperand(0).getReg();
3037
1
  if(MRI.getType(DstReg).isVector())
3038
0
    MIRBuilder.buildBuildVector(DstReg, DstRegs);
3039
1
  else
3040
1
    MIRBuilder.buildMerge(DstReg, DstRegs);
3041
1
  MI.eraseFromParent();
3042
1
  return Legalized;
3043
1
}
3044
3045
LegalizerHelper::LegalizeResult
3046
LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3047
33
                                   LLT NarrowTy) {
3048
33
  Register DstReg = MI.getOperand(0).getReg();
3049
33
  LLT DstTy = MRI.getType(DstReg);
3050
33
3051
33
  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3052
33
3053
33
  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3054
33
  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3055
33
  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3056
33
  LLT LeftoverTy;
3057
33
  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3058
33
                    Src0Regs, Src0LeftoverRegs))
3059
0
    return UnableToLegalize;
3060
33
3061
33
  LLT Unused;
3062
33
  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3063
33
                    Src1Regs, Src1LeftoverRegs))
3064
33
    
llvm_unreachable0
("inconsistent extractParts result");
3065
33
3066
96
  
for (unsigned I = 0, E = Src1Regs.size(); 33
I != E;
++I63
) {
3067
63
    auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3068
63
                                        {Src0Regs[I], Src1Regs[I]});
3069
63
    DstRegs.push_back(Inst->getOperand(0).getReg());
3070
63
  }
3071
33
3072
39
  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; 
++I6
) {
3073
6
    auto Inst = MIRBuilder.buildInstr(
3074
6
      MI.getOpcode(),
3075
6
      {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3076
6
    DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3077
6
  }
3078
33
3079
33
  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3080
33
              LeftoverTy, DstLeftoverRegs);
3081
33
3082
33
  MI.eraseFromParent();
3083
33
  return Legalized;
3084
33
}
3085
3086
LegalizerHelper::LegalizeResult
3087
LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3088
42
                                    LLT NarrowTy) {
3089
42
  if (TypeIdx != 0)
3090
0
    return UnableToLegalize;
3091
42
3092
42
  Register CondReg = MI.getOperand(1).getReg();
3093
42
  LLT CondTy = MRI.getType(CondReg);
3094
42
  if (CondTy.isVector()) // TODO: Handle vselect
3095
0
    return UnableToLegalize;
3096
42
3097
42
  Register DstReg = MI.getOperand(0).getReg();
3098
42
  LLT DstTy = MRI.getType(DstReg);
3099
42
3100
42
  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3101
42
  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3102
42
  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3103
42
  LLT LeftoverTy;
3104
42
  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3105
42
                    Src1Regs, Src1LeftoverRegs))
3106
0
    return UnableToLegalize;
3107
42
3108
42
  LLT Unused;
3109
42
  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3110
42
                    Src2Regs, Src2LeftoverRegs))
3111
42
    
llvm_unreachable0
("inconsistent extractParts result");
3112
42
3113
123
  
for (unsigned I = 0, E = Src1Regs.size(); 42
I != E;
++I81
) {
3114
81
    auto Select = MIRBuilder.buildSelect(NarrowTy,
3115
81
                                         CondReg, Src1Regs[I], Src2Regs[I]);
3116
81
    DstRegs.push_back(Select->getOperand(0).getReg());
3117
81
  }
3118
42
3119
46
  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; 
++I4
) {
3120
4
    auto Select = MIRBuilder.buildSelect(
3121
4
      LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3122
4
    DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3123
4
  }
3124
42
3125
42
  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3126
42
              LeftoverTy, DstLeftoverRegs);
3127
42
3128
42
  MI.eraseFromParent();
3129
42
  return Legalized;
3130
42
}
3131
3132
LegalizerHelper::LegalizeResult
3133
13
LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3134
13
  unsigned Opc = MI.getOpcode();
3135
13
  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3136
13
  auto isSupported = [this](const LegalityQuery &Q) {
3137
11
    auto QAction = LI.getAction(Q).Action;
3138
11
    return QAction == Legal || 
QAction == Libcall7
||
QAction == Custom4
;
3139
11
  };
3140
13
  switch (Opc) {
3141
13
  default:
3142
0
    return UnableToLegalize;
3143
13
  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3144
4
    // This trivially expands to CTLZ.
3145
4
    Observer.changingInstr(MI);
3146
4
    MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3147
4
    Observer.changedInstr(MI);
3148
4
    return Legalized;
3149
13
  }
3150
13
  case TargetOpcode::G_CTLZ: {
3151
5
    Register SrcReg = MI.getOperand(1).getReg();
3152
5
    unsigned Len = Ty.getSizeInBits();
3153
5
    if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3154
4
      // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3155
4
      auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3156
4
                                             {Ty}, {SrcReg});
3157
4
      auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3158
4
      auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3159
4
      auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3160
4
                                          SrcReg, MIBZero);
3161
4
      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3162
4
                             MIBCtlzZU);
3163
4
      MI.eraseFromParent();
3164
4
      return Legalized;
3165
4
    }
3166
1
    // for now, we do this:
3167
1
    // NewLen = NextPowerOf2(Len);
3168
1
    // x = x | (x >> 1);
3169
1
    // x = x | (x >> 2);
3170
1
    // ...
3171
1
    // x = x | (x >>16);
3172
1
    // x = x | (x >>32); // for 64-bit input
3173
1
    // Upto NewLen/2
3174
1
    // return Len - popcount(x);
3175
1
    //
3176
1
    // Ref: "Hacker's Delight" by Henry Warren
3177
1
    Register Op = SrcReg;
3178
1
    unsigned NewLen = PowerOf2Ceil(Len);
3179
4
    for (unsigned i = 0; (1U << i) <= (NewLen / 2); 
++i3
) {
3180
3
      auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3181
3
      auto MIBOp = MIRBuilder.buildInstr(
3182
3
          TargetOpcode::G_OR, {Ty},
3183
3
          {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3184
3
                                     {Op, MIBShiftAmt})});
3185
3
      Op = MIBOp->getOperand(0).getReg();
3186
3
    }
3187
1
    auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3188
1
    MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3189
1
                          {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3190
1
    MI.eraseFromParent();
3191
1
    return Legalized;
3192
1
  }
3193
1
  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3194
1
    // This trivially expands to CTTZ.
3195
1
    Observer.changingInstr(MI);
3196
1
    MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3197
1
    Observer.changedInstr(MI);
3198
1
    return Legalized;
3199
1
  }
3200
3
  case TargetOpcode::G_CTTZ: {
3201
3
    Register SrcReg = MI.getOperand(1).getReg();
3202
3
    unsigned Len = Ty.getSizeInBits();
3203
3
    if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3204
1
      // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3205
1
      // zero.
3206
1
      auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3207
1
                                             {Ty}, {SrcReg});
3208
1
      auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3209
1
      auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3210
1
      auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3211
1
                                          SrcReg, MIBZero);
3212
1
      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3213
1
                             MIBCttzZU);
3214
1
      MI.eraseFromParent();
3215
1
      return Legalized;
3216
1
    }
3217
2
    // for now, we use: { return popcount(~x & (x - 1)); }
3218
2
    // unless the target has ctlz but not ctpop, in which case we use:
3219
2
    // { return 32 - nlz(~x & (x-1)); }
3220
2
    // Ref: "Hacker's Delight" by Henry Warren
3221
2
    auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3222
2
    auto MIBNot =
3223
2
        MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3224
2
    auto MIBTmp = MIRBuilder.buildInstr(
3225
2
        TargetOpcode::G_AND, {Ty},
3226
2
        {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3227
2
                                       {SrcReg, MIBCstNeg1})});
3228
2
    if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3229
2
        
isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})1
) {
3230
1
      auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3231
1
      MIRBuilder.buildInstr(
3232
1
          TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3233
1
          {MIBCstLen,
3234
1
           MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3235
1
      MI.eraseFromParent();
3236
1
      return Legalized;
3237
1
    }
3238
1
    MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3239
1
    MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3240
1
    return Legalized;
3241
1
  }
3242
13
  }
3243
13
}
3244
3245
// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3246
// representation.
3247
LegalizerHelper::LegalizeResult
3248
2
LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3249
2
  Register Dst = MI.getOperand(0).getReg();
3250
2
  Register Src = MI.getOperand(1).getReg();
3251
2
  const LLT S64 = LLT::scalar(64);
3252
2
  const LLT S32 = LLT::scalar(32);
3253
2
  const LLT S1 = LLT::scalar(1);
3254
2
3255
2
  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3256
2
3257
2
  // unsigned cul2f(ulong u) {
3258
2
  //   uint lz = clz(u);
3259
2
  //   uint e = (u != 0) ? 127U + 63U - lz : 0;
3260
2
  //   u = (u << lz) & 0x7fffffffffffffffUL;
3261
2
  //   ulong t = u & 0xffffffffffUL;
3262
2
  //   uint v = (e << 23) | (uint)(u >> 40);
3263
2
  //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3264
2
  //   return as_float(v + r);
3265
2
  // }
3266
2
3267
2
  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3268
2
  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3269
2
3270
2
  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3271
2
3272
2
  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3273
2
  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3274
2
3275
2
  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3276
2
  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3277
2
3278
2
  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3279
2
  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3280
2
3281
2
  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3282
2
3283
2
  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3284
2
  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3285
2
3286
2
  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3287
2
  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3288
2
  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3289
2
3290
2
  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3291
2
  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3292
2
  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3293
2
  auto One = MIRBuilder.buildConstant(S32, 1);
3294
2
3295
2
  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3296
2
  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3297
2
  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3298
2
  MIRBuilder.buildAdd(Dst, V, R);
3299
2
3300
2
  return Legalized;
3301
2
}
3302
3303
LegalizerHelper::LegalizeResult
3304
2
LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3305
2
  Register Dst = MI.getOperand(0).getReg();
3306
2
  Register Src = MI.getOperand(1).getReg();
3307
2
  LLT DstTy = MRI.getType(Dst);
3308
2
  LLT SrcTy = MRI.getType(Src);
3309
2
3310
2
  if (SrcTy != LLT::scalar(64))
3311
0
    return UnableToLegalize;
3312
2
3313
2
  if (DstTy == LLT::scalar(32)) {
3314
2
    // TODO: SelectionDAG has several alternative expansions to port which may
3315
2
    // be more reasonble depending on the available instructions. If a target
3316
2
    // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3317
2
    // intermediate type, this is probably worse.
3318
2
    return lowerU64ToF32BitOps(MI);
3319
2
  }
3320
0
3321
0
  return UnableToLegalize;
3322
0
}
3323
3324
LegalizerHelper::LegalizeResult
3325
1
LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3326
1
  Register Dst = MI.getOperand(0).getReg();
3327
1
  Register Src = MI.getOperand(1).getReg();
3328
1
  LLT DstTy = MRI.getType(Dst);
3329
1
  LLT SrcTy = MRI.getType(Src);
3330
1
3331
1
  const LLT S64 = LLT::scalar(64);
3332
1
  const LLT S32 = LLT::scalar(32);
3333
1
  const LLT S1 = LLT::scalar(1);
3334
1
3335
1
  if (SrcTy != S64)
3336
0
    return UnableToLegalize;
3337
1
3338
1
  if (DstTy == S32) {
3339
1
    // signed cl2f(long l) {
3340
1
    //   long s = l >> 63;
3341
1
    //   float r = cul2f((l + s) ^ s);
3342
1
    //   return s ? -r : r;
3343
1
    // }
3344
1
    Register L = Src;
3345
1
    auto SignBit = MIRBuilder.buildConstant(S64, 63);
3346
1
    auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3347
1
3348
1
    auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3349
1
    auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3350
1
    auto R = MIRBuilder.buildUITOFP(S32, Xor);
3351
1
3352
1
    auto RNeg = MIRBuilder.buildFNeg(S32, R);
3353
1
    auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3354
1
                                            MIRBuilder.buildConstant(S64, 0));
3355
1
    MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3356
1
    return Legalized;
3357
1
  }
3358
0
3359
0
  return UnableToLegalize;
3360
0
}
3361
3362
40
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
3363
40
  switch (Opc) {
3364
40
  case TargetOpcode::G_SMIN:
3365
10
    return CmpInst::ICMP_SLT;
3366
40
  case TargetOpcode::G_SMAX:
3367
10
    return CmpInst::ICMP_SGT;
3368
40
  case TargetOpcode::G_UMIN:
3369
10
    return CmpInst::ICMP_ULT;
3370
40
  case TargetOpcode::G_UMAX:
3371
10
    return CmpInst::ICMP_UGT;
3372
40
  default:
3373
0
    llvm_unreachable("not in integer min/max");
3374
40
  }
3375
40
}
3376
3377
LegalizerHelper::LegalizeResult
3378
40
LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3379
40
  Register Dst = MI.getOperand(0).getReg();
3380
40
  Register Src0 = MI.getOperand(1).getReg();
3381
40
  Register Src1 = MI.getOperand(2).getReg();
3382
40
3383
40
  const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
3384
40
  LLT CmpType = MRI.getType(Dst).changeElementSize(1);
3385
40
3386
40
  auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
3387
40
  MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
3388
40
3389
40
  MI.eraseFromParent();
3390
40
  return Legalized;
3391
40
}
3392
3393
LegalizerHelper::LegalizeResult
3394
54
LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3395
54
  Register Dst = MI.getOperand(0).getReg();
3396
54
  Register Src0 = MI.getOperand(1).getReg();
3397
54
  Register Src1 = MI.getOperand(2).getReg();
3398
54
3399
54
  const LLT Src0Ty = MRI.getType(Src0);
3400
54
  const LLT Src1Ty = MRI.getType(Src1);
3401
54
3402
54
  const int Src0Size = Src0Ty.getScalarSizeInBits();
3403
54
  const int Src1Size = Src1Ty.getScalarSizeInBits();
3404
54
3405
54
  auto SignBitMask = MIRBuilder.buildConstant(
3406
54
    Src0Ty, APInt::getSignMask(Src0Size));
3407
54
3408
54
  auto NotSignBitMask = MIRBuilder.buildConstant(
3409
54
    Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
3410
54
3411
54
  auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
3412
54
  MachineInstr *Or;
3413
54
3414
54
  if (Src0Ty == Src1Ty) {
3415
24
    auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
3416
24
    Or = MIRBuilder.buildOr(Dst, And0, And1);
3417
30
  } else if (Src0Size > Src1Size) {
3418
15
    auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
3419
15
    auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
3420
15
    auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
3421
15
    auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
3422
15
    Or = MIRBuilder.buildOr(Dst, And0, And1);
3423
15
  } else {
3424
15
    auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
3425
15
    auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
3426
15
    auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
3427
15
    auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
3428
15
    Or = MIRBuilder.buildOr(Dst, And0, And1);
3429
15
  }
3430
54
3431
54
  // Be careful about setting nsz/nnan/ninf on every instruction, since the
3432
54
  // constants are a nan and -0.0, but the final result should preserve
3433
54
  // everything.
3434
54
  if (unsigned Flags = MI.getFlags())
3435
9
    Or->setFlags(Flags);
3436
54
3437
54
  MI.eraseFromParent();
3438
54
  return Legalized;
3439
54
}
3440
3441
LegalizerHelper::LegalizeResult
3442
100
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
3443
100
  unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
3444
100
    TargetOpcode::G_FMINNUM_IEEE : 
TargetOpcode::G_FMAXNUM_IEEE0
;
3445
100
3446
100
  Register Dst = MI.getOperand(0).getReg();
3447
100
  Register Src0 = MI.getOperand(1).getReg();
3448
100
  Register Src1 = MI.getOperand(2).getReg();
3449
100
  LLT Ty = MRI.getType(Dst);
3450
100
3451
100
  if (!MI.getFlag(MachineInstr::FmNoNans)) {
3452
94
    // Insert canonicalizes if it's possible we need to quiet to get correct
3453
94
    // sNaN behavior.
3454
94
3455
94
    // Note this must be done here, and not as an optimization combine in the
3456
94
    // absence of a dedicate quiet-snan instruction as we're using an
3457
94
    // omni-purpose G_FCANONICALIZE.
3458
94
    if (!isKnownNeverSNaN(Src0, MRI))
3459
62
      Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
3460
94
3461
94
    if (!isKnownNeverSNaN(Src1, MRI))
3462
62
      Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
3463
94
  }
3464
100
3465
100
  // If there are no nans, it's safe to simply replace this with the non-IEEE
3466
100
  // version.
3467
100
  MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
3468
100
  MI.eraseFromParent();
3469
100
  return Legalized;
3470
100
}