Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file defines an instruction selector for the Hexagon target.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "Hexagon.h"
15
#include "HexagonISelLowering.h"
16
#include "HexagonMachineFunctionInfo.h"
17
#include "HexagonTargetMachine.h"
18
#include "llvm/CodeGen/FunctionLoweringInfo.h"
19
#include "llvm/CodeGen/MachineInstrBuilder.h"
20
#include "llvm/CodeGen/SelectionDAGISel.h"
21
#include "llvm/IR/Intrinsics.h"
22
#include "llvm/Support/CommandLine.h"
23
#include "llvm/Support/Debug.h"
24
using namespace llvm;
25
26
#define DEBUG_TYPE "hexagon-isel"
27
28
static
29
cl::opt<bool>
30
EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
31
  cl::desc("Rebalance address calculation trees to improve "
32
          "instruction selection"));
33
34
// Rebalance only if this allows e.g. combining a GA with an offset or
35
// factoring out a shift.
36
static
37
cl::opt<bool>
38
RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
39
  cl::desc("Rebalance address tree only if this allows optimizations"));
40
41
static
42
cl::opt<bool>
43
RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
44
  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
45
46
//===----------------------------------------------------------------------===//
47
// Instruction Selector Implementation
48
//===----------------------------------------------------------------------===//
49
50
//===--------------------------------------------------------------------===//
51
/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
52
/// instructions for SelectionDAG operations.
53
///
54
namespace {
55
class HexagonDAGToDAGISel : public SelectionDAGISel {
56
  const HexagonSubtarget *HST;
57
  const HexagonInstrInfo *HII;
58
  const HexagonRegisterInfo *HRI;
59
public:
60
  explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
61
                               CodeGenOpt::Level OptLevel)
62
      : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr),
63
441
        HRI(nullptr) {}
64
65
2.40k
  bool runOnMachineFunction(MachineFunction &MF) override {
66
2.40k
    // Reset the subtarget each time through.
67
2.40k
    HST = &MF.getSubtarget<HexagonSubtarget>();
68
2.40k
    HII = HST->getInstrInfo();
69
2.40k
    HRI = HST->getRegisterInfo();
70
2.40k
    SelectionDAGISel::runOnMachineFunction(MF);
71
2.40k
    return true;
72
2.40k
  }
73
74
8.82k
  bool ComplexPatternFuncMutatesDAG() const override {
75
8.82k
    return true;
76
8.82k
  }
77
  void PreprocessISelDAG() override;
78
  void EmitFunctionEntryCode() override;
79
80
  void Select(SDNode *N) override;
81
82
  // Complex Pattern Selectors.
83
  inline bool SelectAddrGA(SDValue &N, SDValue &R);
84
  inline bool SelectAddrGP(SDValue &N, SDValue &R);
85
  bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
86
  bool SelectAddrFI(SDValue &N, SDValue &R);
87
  bool DetectUseSxtw(SDValue &N, SDValue &R);
88
89
0
  StringRef getPassName() const override {
90
0
    return "Hexagon DAG->DAG Pattern Instruction Selection";
91
0
  }
92
93
  // Generate a machine instruction node corresponding to the circ/brev
94
  // load intrinsic.
95
  MachineSDNode *LoadInstrForLoadIntrinsic(SDNode *IntN);
96
  // Given the circ/brev load intrinsic and the already generated machine
97
  // instruction, generate the appropriate store (that is a part of the
98
  // intrinsic's functionality).
99
  SDNode *StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN);
100
101
  void SelectFrameIndex(SDNode *N);
102
  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
103
  /// inline asm expressions.
104
  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
105
                                    unsigned ConstraintID,
106
                                    std::vector<SDValue> &OutOps) override;
107
  bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
108
  void SelectLoad(SDNode *N);
109
  void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
110
  void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
111
  void SelectStore(SDNode *N);
112
  void SelectSHL(SDNode *N);
113
  void SelectZeroExtend(SDNode *N);
114
  void SelectIntrinsicWChain(SDNode *N);
115
  void SelectIntrinsicWOChain(SDNode *N);
116
  void SelectConstant(SDNode *N);
117
  void SelectConstantFP(SDNode *N);
118
  void SelectBitcast(SDNode *N);
119
120
  // Include the pieces autogenerated from the target description.
121
  #include "HexagonGenDAGISel.inc"
122
123
private:
124
  bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src);
125
  bool isOrEquivalentToAdd(const SDNode *N) const;
126
  bool isAlignedMemNode(const MemSDNode *N) const;
127
  bool isSmallStackStore(const StoreSDNode *N) const;
128
  bool isPositiveHalfWord(const SDNode *N) const;
129
130
  // DAG preprocessing functions.
131
  void ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes);
132
  void ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes);
133
  void ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes);
134
  void ppHoistZextI1(std::vector<SDNode*> &&Nodes);
135
136
  SmallDenseMap<SDNode *,int> RootWeights;
137
  SmallDenseMap<SDNode *,int> RootHeights;
138
  SmallDenseMap<const Value *,int> GAUsesInFunction;
139
  int getWeight(SDNode *N);
140
  int getHeight(SDNode *N);
141
  SDValue getMultiplierForSHL(SDNode *N);
142
  SDValue factorOutPowerOf2(SDValue V, unsigned Power);
143
  unsigned getUsesInFunction(const Value *V);
144
  SDValue balanceSubTree(SDNode *N, bool Factorize = false);
145
  void rebalanceAddressTrees();
146
}; // end HexagonDAGToDAGISel
147
}  // end anonymous namespace
148
149
150
/// createHexagonISelDag - This pass converts a legalized DAG into a
151
/// Hexagon-specific DAG, ready for instruction scheduling.
152
///
153
namespace llvm {
154
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
155
441
                                   CodeGenOpt::Level OptLevel) {
156
441
  return new HexagonDAGToDAGISel(TM, OptLevel);
157
441
}
158
}
159
160
// Intrinsics that return a a predicate.
161
0
static bool doesIntrinsicReturnPredicate(unsigned ID) {
162
0
  switch (ID) {
163
0
    default:
164
0
      return false;
165
0
    case Intrinsic::hexagon_C2_cmpeq:
166
0
    case Intrinsic::hexagon_C2_cmpgt:
167
0
    case Intrinsic::hexagon_C2_cmpgtu:
168
0
    case Intrinsic::hexagon_C2_cmpgtup:
169
0
    case Intrinsic::hexagon_C2_cmpgtp:
170
0
    case Intrinsic::hexagon_C2_cmpeqp:
171
0
    case Intrinsic::hexagon_C2_bitsset:
172
0
    case Intrinsic::hexagon_C2_bitsclr:
173
0
    case Intrinsic::hexagon_C2_cmpeqi:
174
0
    case Intrinsic::hexagon_C2_cmpgti:
175
0
    case Intrinsic::hexagon_C2_cmpgtui:
176
0
    case Intrinsic::hexagon_C2_cmpgei:
177
0
    case Intrinsic::hexagon_C2_cmpgeui:
178
0
    case Intrinsic::hexagon_C2_cmplt:
179
0
    case Intrinsic::hexagon_C2_cmpltu:
180
0
    case Intrinsic::hexagon_C2_bitsclri:
181
0
    case Intrinsic::hexagon_C2_and:
182
0
    case Intrinsic::hexagon_C2_or:
183
0
    case Intrinsic::hexagon_C2_xor:
184
0
    case Intrinsic::hexagon_C2_andn:
185
0
    case Intrinsic::hexagon_C2_not:
186
0
    case Intrinsic::hexagon_C2_orn:
187
0
    case Intrinsic::hexagon_C2_pxfer_map:
188
0
    case Intrinsic::hexagon_C2_any8:
189
0
    case Intrinsic::hexagon_C2_all8:
190
0
    case Intrinsic::hexagon_A2_vcmpbeq:
191
0
    case Intrinsic::hexagon_A2_vcmpbgtu:
192
0
    case Intrinsic::hexagon_A2_vcmpheq:
193
0
    case Intrinsic::hexagon_A2_vcmphgt:
194
0
    case Intrinsic::hexagon_A2_vcmphgtu:
195
0
    case Intrinsic::hexagon_A2_vcmpweq:
196
0
    case Intrinsic::hexagon_A2_vcmpwgt:
197
0
    case Intrinsic::hexagon_A2_vcmpwgtu:
198
0
    case Intrinsic::hexagon_C2_tfrrp:
199
0
    case Intrinsic::hexagon_S2_tstbit_i:
200
0
    case Intrinsic::hexagon_S2_tstbit_r:
201
0
      return true;
202
0
  }
203
0
}
204
205
53
void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
206
53
  SDValue Chain = LD->getChain();
207
53
  SDValue Base = LD->getBasePtr();
208
53
  SDValue Offset = LD->getOffset();
209
53
  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
210
53
  EVT LoadedVT = LD->getMemoryVT();
211
53
  unsigned Opcode = 0;
212
53
213
53
  // Check for zero extended loads. Treat any-extend loads as zero extended
214
53
  // loads.
215
53
  ISD::LoadExtType ExtType = LD->getExtensionType();
216
51
  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
217
53
  bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
218
53
219
53
  assert(LoadedVT.isSimple());
220
53
  switch (LoadedVT.getSimpleVT().SimpleTy) {
221
2
  case MVT::i8:
222
2
    if (IsZeroExt)
223
2
      
Opcode = IsValidInc ? 2
Hexagon::L2_loadrub_pi2
:
Hexagon::L2_loadrub_io0
;
224
2
    else
225
0
      
Opcode = IsValidInc ? 0
Hexagon::L2_loadrb_pi0
:
Hexagon::L2_loadrb_io0
;
226
2
    break;
227
2
  case MVT::i16:
228
2
    if (IsZeroExt)
229
0
      
Opcode = IsValidInc ? 0
Hexagon::L2_loadruh_pi0
:
Hexagon::L2_loadruh_io0
;
230
2
    else
231
2
      
Opcode = IsValidInc ? 2
Hexagon::L2_loadrh_pi2
:
Hexagon::L2_loadrh_io0
;
232
2
    break;
233
21
  case MVT::i32:
234
21
    Opcode = IsValidInc ? 
Hexagon::L2_loadri_pi21
:
Hexagon::L2_loadri_io0
;
235
21
    break;
236
24
  case MVT::i64:
237
24
    Opcode = IsValidInc ? 
Hexagon::L2_loadrd_pi24
:
Hexagon::L2_loadrd_io0
;
238
24
    break;
239
53
  // 64B
240
4
  case MVT::v64i8:
241
4
  case MVT::v32i16:
242
4
  case MVT::v16i32:
243
4
  case MVT::v8i64:
244
4
  case MVT::v128i8:
245
4
  case MVT::v64i16:
246
4
  case MVT::v32i32:
247
4
  case MVT::v16i64:
248
4
    if (
isAlignedMemNode(LD)4
) {
249
2
      if (LD->isNonTemporal())
250
0
        
Opcode = IsValidInc ? 0
Hexagon::V6_vL32b_nt_pi0
:
Hexagon::V6_vL32b_nt_ai0
;
251
2
      else
252
2
        
Opcode = IsValidInc ? 2
Hexagon::V6_vL32b_pi2
:
Hexagon::V6_vL32b_ai0
;
253
4
    } else {
254
2
      Opcode = IsValidInc ? 
Hexagon::V6_vL32Ub_pi2
:
Hexagon::V6_vL32Ub_ai0
;
255
2
    }
256
4
    break;
257
0
  default:
258
0
    llvm_unreachable("Unexpected memory type in indexed load");
259
53
  }
260
53
261
53
  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
262
53
  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
263
53
  MemOp[0] = LD->getMemOperand();
264
53
265
53
  auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
266
24
        -> MachineSDNode* {
267
24
    if (
ExtType == ISD::ZEXTLOAD || 24
ExtType == ISD::EXTLOAD24
) {
268
0
      SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
269
0
      return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
270
0
                                    Zero, SDValue(N, 0));
271
0
    }
272
24
    
if (24
ExtType == ISD::SEXTLOAD24
)
273
0
      return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
274
0
                                    SDValue(N, 0));
275
24
    return N;
276
24
  };
277
53
278
53
  //                  Loaded value   Next address   Chain
279
53
  SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
280
53
  SDValue To[3];
281
53
282
53
  EVT ValueVT = LD->getValueType(0);
283
53
  if (
ValueVT == MVT::i64 && 53
ExtType != ISD::NON_EXTLOAD24
) {
284
0
    // A load extending to i64 will actually produce i32, which will then
285
0
    // need to be extended to i64.
286
0
    assert(LoadedVT.getSizeInBits() <= 32);
287
0
    ValueVT = MVT::i32;
288
0
  }
289
53
290
53
  if (
IsValidInc53
) {
291
53
    MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
292
53
                                              MVT::i32, MVT::Other, Base,
293
53
                                              IncV, Chain);
294
53
    L->setMemRefs(MemOp, MemOp+1);
295
53
    To[1] = SDValue(L, 1); // Next address.
296
53
    To[2] = SDValue(L, 2); // Chain.
297
53
    // Handle special case for extension to i64.
298
53
    if (LD->getValueType(0) == MVT::i64)
299
24
      L = getExt64(L, dl);
300
53
    To[0] = SDValue(L, 0); // Loaded (extended) value.
301
0
  } else {
302
0
    SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
303
0
    MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
304
0
                                              Base, Zero, Chain);
305
0
    L->setMemRefs(MemOp, MemOp+1);
306
0
    To[2] = SDValue(L, 1); // Chain.
307
0
    MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
308
0
                                              Base, IncV);
309
0
    To[1] = SDValue(A, 0); // Next address.
310
0
    // Handle special case for extension to i64.
311
0
    if (LD->getValueType(0) == MVT::i64)
312
0
      L = getExt64(L, dl);
313
0
    To[0] = SDValue(L, 0); // Loaded (extended) value.
314
0
  }
315
53
  ReplaceUses(From, To, 3);
316
53
  CurDAG->RemoveDeadNode(LD);
317
53
}
318
319
320
109
MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
321
109
  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
322
0
    return nullptr;
323
109
324
109
  SDLoc dl(IntN);
325
109
  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
326
109
327
109
  static std::map<unsigned,unsigned> LoadPciMap = {
328
109
    { Intrinsic::hexagon_circ_ldb,  Hexagon::L2_loadrb_pci  },
329
109
    { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
330
109
    { Intrinsic::hexagon_circ_ldh,  Hexagon::L2_loadrh_pci  },
331
109
    { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
332
109
    { Intrinsic::hexagon_circ_ldw,  Hexagon::L2_loadri_pci  },
333
109
    { Intrinsic::hexagon_circ_ldd,  Hexagon::L2_loadrd_pci  },
334
109
  };
335
109
  auto FLC = LoadPciMap.find(IntNo);
336
109
  if (
FLC != LoadPciMap.end()109
) {
337
24
    SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
338
24
          IntN->getOperand(4));
339
24
    EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? 
MVT::i6417
:
MVT::i327
;
340
24
    EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
341
24
    // Operands: { Base, Increment, Modifier, Chain }
342
24
    auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
343
24
    SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
344
24
    MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
345
24
          { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) });
346
24
    return Res;
347
24
  }
348
85
349
85
  static std::map<unsigned,unsigned> LoadPbrMap = {
350
85
    { Intrinsic::hexagon_brev_ldb,  Hexagon::L2_loadrb_pbr  },
351
85
    { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr },
352
85
    { Intrinsic::hexagon_brev_ldh,  Hexagon::L2_loadrh_pbr  },
353
85
    { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr },
354
85
    { Intrinsic::hexagon_brev_ldw,  Hexagon::L2_loadri_pbr  },
355
85
    { Intrinsic::hexagon_brev_ldd,  Hexagon::L2_loadrd_pbr  },
356
85
  };
357
85
  auto FLB = LoadPbrMap.find(IntNo);
358
85
  if (
FLB != LoadPbrMap.end()85
) {
359
12
    SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
360
12
            IntN->getOperand(4));
361
12
    EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? 
MVT::i642
:
MVT::i3210
;
362
12
    EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
363
12
    // Operands: { Base, Modifier, Chain }
364
12
    MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys,
365
12
          { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) });
366
12
    return Res;
367
12
  }
368
73
369
73
  return nullptr;
370
73
}
371
372
SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
373
36
      SDNode *IntN) {
374
36
  // The "LoadN" is just a machine load instruction. The intrinsic also
375
36
  // involves storing it. Generate an appropriate store to the location
376
36
  // given in the intrinsic's operand(3).
377
36
  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
378
36
  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
379
36
                      HexagonII::MemAccesSizeMask;
380
36
  unsigned Size = 1U << (SizeBits-1);
381
36
382
36
  SDLoc dl(IntN);
383
36
  MachinePointerInfo PI;
384
36
  SDValue TS;
385
36
  SDValue Loc = IntN->getOperand(3);
386
36
387
36
  if (Size >= 4)
388
24
    TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
389
24
                          Size);
390
36
  else
391
12
    TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
392
12
                               PI, MVT::getIntegerVT(Size * 8), Size);
393
36
394
36
  SDNode *StoreN;
395
36
  {
396
36
    HandleSDNode Handle(TS);
397
36
    SelectStore(TS.getNode());
398
36
    StoreN = Handle.getValue().getNode();
399
36
  }
400
36
401
36
  // Load's results are { Loaded value, Updated pointer, Chain }
402
36
  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
403
36
  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
404
36
  return StoreN;
405
36
}
406
407
1.91k
bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
408
1.91k
  // The intrinsics for load circ/brev perform two operations:
409
1.91k
  // 1. Load a value V from the specified location, using the addressing
410
1.91k
  //    mode corresponding to the intrinsic.
411
1.91k
  // 2. Store V into a specified location. This location is typically a
412
1.91k
  //    local, temporary object.
413
1.91k
  // In many cases, the program using these intrinsics will immediately
414
1.91k
  // load V again from the local object. In those cases, when certain
415
1.91k
  // conditions are met, the last load can be removed.
416
1.91k
  // This function identifies and optimizes this pattern. If the pattern
417
1.91k
  // cannot be optimized, it returns nullptr, which will cause the load
418
1.91k
  // to be selected separately from the intrinsic (which will be handled
419
1.91k
  // in SelectIntrinsicWChain).
420
1.91k
421
1.91k
  SDValue Ch = N->getOperand(0);
422
1.91k
  SDValue Loc = N->getOperand(1);
423
1.91k
424
1.91k
  // Assume that the load and the intrinsic are connected directly with a
425
1.91k
  // chain:
426
1.91k
  //   t1: i32,ch = int.load ..., ..., ..., Loc, ...    // <-- C
427
1.91k
  //   t2: i32,ch = load t1:1, Loc, ...
428
1.91k
  SDNode *C = Ch.getNode();
429
1.91k
430
1.91k
  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
431
1.86k
    return false;
432
42
433
42
  // The second load can only be eliminated if its extension type matches
434
42
  // that of the load instruction corresponding to the intrinsic. The user
435
42
  // can provide an address of an unsigned variable to store the result of
436
42
  // a sign-extending intrinsic into (or the other way around).
437
42
  ISD::LoadExtType IntExt;
438
42
  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
439
6
    case Intrinsic::hexagon_brev_ldub:
440
6
    case Intrinsic::hexagon_brev_lduh:
441
6
    case Intrinsic::hexagon_circ_ldub:
442
6
    case Intrinsic::hexagon_circ_lduh:
443
6
      IntExt = ISD::ZEXTLOAD;
444
6
      break;
445
30
    case Intrinsic::hexagon_brev_ldw:
446
30
    case Intrinsic::hexagon_brev_ldd:
447
30
    case Intrinsic::hexagon_circ_ldw:
448
30
    case Intrinsic::hexagon_circ_ldd:
449
30
      IntExt = ISD::NON_EXTLOAD;
450
30
      break;
451
6
    default:
452
6
      IntExt = ISD::SEXTLOAD;
453
6
      break;
454
42
  }
455
42
  
if (42
N->getExtensionType() != IntExt42
)
456
0
    return false;
457
42
458
42
  // Make sure the target location for the loaded value in the load intrinsic
459
42
  // is the location from which LD (or N) is loading.
460
42
  
if (42
C->getNumOperands() < 4 || 42
Loc.getNode() != C->getOperand(3).getNode()42
)
461
8
    return false;
462
34
463
34
  
if (MachineSDNode *34
L34
= LoadInstrForLoadIntrinsic(C)) {
464
34
    SDNode *S = StoreInstrForLoadIntrinsic(L, C);
465
34
    SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
466
34
    SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
467
34
    ReplaceUses(F, T, array_lengthof(T));
468
34
    // This transformation will leave the intrinsic dead. If it remains in
469
34
    // the DAG, the selection code will see it again, but without the load,
470
34
    // and it will generate a store that is normally required for it.
471
34
    CurDAG->RemoveDeadNode(C);
472
34
    return true;
473
34
  }
474
0
475
0
  return false;
476
0
}
477
478
1.96k
void HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
479
1.96k
  SDLoc dl(N);
480
1.96k
  LoadSDNode *LD = cast<LoadSDNode>(N);
481
1.96k
  ISD::MemIndexedMode AM = LD->getAddressingMode();
482
1.96k
483
1.96k
  // Handle indexed loads.
484
1.96k
  if (
AM != ISD::UNINDEXED1.96k
) {
485
53
    SelectIndexedLoad(LD, dl);
486
53
    return;
487
53
  }
488
1.91k
489
1.91k
  // Handle patterns using circ/brev load intrinsics.
490
1.91k
  
if (1.91k
tryLoadOfLoadIntrinsic(LD)1.91k
)
491
34
    return;
492
1.87k
493
1.87k
  SelectCode(LD);
494
1.87k
}
495
496
38
void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
497
38
  SDValue Chain = ST->getChain();
498
38
  SDValue Base = ST->getBasePtr();
499
38
  SDValue Offset = ST->getOffset();
500
38
  SDValue Value = ST->getValue();
501
38
  // Get the constant value.
502
38
  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
503
38
  EVT StoredVT = ST->getMemoryVT();
504
38
  EVT ValueVT = Value.getValueType();
505
38
506
38
  bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
507
38
  unsigned Opcode = 0;
508
38
509
38
  assert(StoredVT.isSimple());
510
38
  switch (StoredVT.getSimpleVT().SimpleTy) {
511
11
  case MVT::i8:
512
11
    Opcode = IsValidInc ? 
Hexagon::S2_storerb_pi11
:
Hexagon::S2_storerb_io0
;
513
11
    break;
514
2
  case MVT::i16:
515
2
    Opcode = IsValidInc ? 
Hexagon::S2_storerh_pi2
:
Hexagon::S2_storerh_io0
;
516
2
    break;
517
19
  case MVT::i32:
518
19
    Opcode = IsValidInc ? 
Hexagon::S2_storeri_pi19
:
Hexagon::S2_storeri_io0
;
519
19
    break;
520
2
  case MVT::i64:
521
2
    Opcode = IsValidInc ? 
Hexagon::S2_storerd_pi2
:
Hexagon::S2_storerd_io0
;
522
2
    break;
523
4
  case MVT::v64i8:
524
4
  case MVT::v32i16:
525
4
  case MVT::v16i32:
526
4
  case MVT::v8i64:
527
4
  case MVT::v128i8:
528
4
  case MVT::v64i16:
529
4
  case MVT::v32i32:
530
4
  case MVT::v16i64:
531
4
    if (
isAlignedMemNode(ST)4
) {
532
2
      if (ST->isNonTemporal())
533
0
        
Opcode = IsValidInc ? 0
Hexagon::V6_vS32b_nt_pi0
:
Hexagon::V6_vS32b_nt_ai0
;
534
2
      else
535
2
        
Opcode = IsValidInc ? 2
Hexagon::V6_vS32b_pi2
:
Hexagon::V6_vS32b_ai0
;
536
4
    } else {
537
2
      Opcode = IsValidInc ? 
Hexagon::V6_vS32Ub_pi2
:
Hexagon::V6_vS32Ub_ai0
;
538
2
    }
539
4
    break;
540
0
  default:
541
0
    llvm_unreachable("Unexpected memory type in indexed store");
542
38
  }
543
38
544
38
  
if (38
ST->isTruncatingStore() && 38
ValueVT.getSizeInBits() == 6413
) {
545
0
    assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
546
0
    Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
547
0
                                           dl, MVT::i32, Value);
548
0
  }
549
38
550
38
  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
551
38
  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
552
38
  MemOp[0] = ST->getMemOperand();
553
38
554
38
  //                  Next address   Chain
555
38
  SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
556
38
  SDValue To[2];
557
38
558
38
  if (
IsValidInc38
) {
559
38
    // Build post increment store.
560
38
    SDValue Ops[] = { Base, IncV, Value, Chain };
561
38
    MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
562
38
                                              Ops);
563
38
    S->setMemRefs(MemOp, MemOp + 1);
564
38
    To[0] = SDValue(S, 0);
565
38
    To[1] = SDValue(S, 1);
566
38
  } else {
567
0
    SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
568
0
    SDValue Ops[] = { Base, Zero, Value, Chain };
569
0
    MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
570
0
    S->setMemRefs(MemOp, MemOp + 1);
571
0
    To[1] = SDValue(S, 0);
572
0
    MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
573
0
                                              Base, IncV);
574
0
    To[0] = SDValue(A, 0);
575
0
  }
576
38
577
38
  ReplaceUses(From, To, 2);
578
38
  CurDAG->RemoveDeadNode(ST);
579
38
}
580
581
1.87k
void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
582
1.87k
  SDLoc dl(N);
583
1.87k
  StoreSDNode *ST = cast<StoreSDNode>(N);
584
1.87k
  ISD::MemIndexedMode AM = ST->getAddressingMode();
585
1.87k
586
1.87k
  // Handle indexed stores.
587
1.87k
  if (
AM != ISD::UNINDEXED1.87k
) {
588
38
    SelectIndexedStore(ST, dl);
589
38
    return;
590
38
  }
591
1.83k
592
1.83k
  SelectCode(ST);
593
1.83k
}
594
595
50
void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
596
50
  SDLoc dl(N);
597
50
  SDValue Shl_0 = N->getOperand(0);
598
50
  SDValue Shl_1 = N->getOperand(1);
599
50
600
50
  auto Default = [this,N] () -> void { SelectCode(N); };
601
50
602
50
  if (
N->getValueType(0) != MVT::i32 || 50
Shl_1.getOpcode() != ISD::Constant49
)
603
24
    return Default();
604
26
605
26
  // RHS is const.
606
26
  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
607
26
608
26
  if (
Shl_0.getOpcode() == ISD::MUL26
) {
609
1
    SDValue Mul_0 = Shl_0.getOperand(0); // Val
610
1
    SDValue Mul_1 = Shl_0.getOperand(1); // Const
611
1
    // RHS of mul is const.
612
1
    if (ConstantSDNode *
C1
= dyn_cast<ConstantSDNode>(Mul_1)) {
613
0
      int32_t ValConst = C->getSExtValue() << ShlConst;
614
0
      if (
isInt<9>(ValConst)0
) {
615
0
        SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
616
0
        SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
617
0
                                                MVT::i32, Mul_0, Val);
618
0
        ReplaceNode(N, Result);
619
0
        return;
620
0
      }
621
1
    }
622
1
    return Default();
623
1
  }
624
25
625
25
  
if (25
Shl_0.getOpcode() == ISD::SUB25
) {
626
1
    SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
627
1
    SDValue Sub_1 = Shl_0.getOperand(1); // Val
628
1
    if (ConstantSDNode *
C11
= dyn_cast<ConstantSDNode>(Sub_0)) {
629
0
      if (
C1->getSExtValue() != 0 || 0
Sub_1.getOpcode() != ISD::SHL0
)
630
0
        return Default();
631
0
      SDValue Shl2_0 = Sub_1.getOperand(0); // Val
632
0
      SDValue Shl2_1 = Sub_1.getOperand(1); // Const
633
0
      if (ConstantSDNode *
C20
= dyn_cast<ConstantSDNode>(Shl2_1)) {
634
0
        int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
635
0
        if (
isInt<9>(-ValConst)0
) {
636
0
          SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
637
0
          SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
638
0
                                                  MVT::i32, Shl2_0, Val);
639
0
          ReplaceNode(N, Result);
640
0
          return;
641
0
        }
642
25
      }
643
0
    }
644
1
  }
645
25
646
25
  return Default();
647
25
}
648
649
650
//
651
// If there is an zero_extend followed an intrinsic in DAG (this means - the
652
// result of the intrinsic is predicate); convert the zero_extend to
653
// transfer instruction.
654
//
655
// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
656
// converted into a MUX as predicate registers defined as 1 bit in the
657
// compiler. Architecture defines them as 8-bit registers.
658
// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
659
//
660
79
void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
661
79
  SDLoc dl(N);
662
79
663
79
  SDValue Op0 = N->getOperand(0);
664
79
  EVT OpVT = Op0.getValueType();
665
79
  unsigned OpBW = OpVT.getSizeInBits();
666
79
667
79
  // Special handling for zero-extending a vector of booleans.
668
79
  if (
OpVT.isVector() && 79
OpVT.getVectorElementType() == MVT::i15
&&
OpBW <= 640
) {
669
0
    SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
670
0
    unsigned NE = OpVT.getVectorNumElements();
671
0
    EVT ExVT = N->getValueType(0);
672
0
    unsigned ES = ExVT.getScalarSizeInBits();
673
0
    uint64_t MV = 0, Bit = 1;
674
0
    for (unsigned i = 0; 
i < NE0
;
++i0
) {
675
0
      MV |= Bit;
676
0
      Bit <<= ES;
677
0
    }
678
0
    SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
679
0
    SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl,
680
0
                                             MVT::i64, Ones);
681
0
    if (
ExVT.getSizeInBits() == 320
) {
682
0
      SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
683
0
                                           SDValue(Mask,0), SDValue(OnesReg,0));
684
0
      SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32);
685
0
      ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
686
0
                                            SDValue(And, 0), SubR));
687
0
      return;
688
0
    }
689
0
    ReplaceNode(N,
690
0
                CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
691
0
                                       SDValue(Mask, 0), SDValue(OnesReg, 0)));
692
0
    return;
693
0
  }
694
79
695
79
  SDNode *Int = N->getOperand(0).getNode();
696
79
  if (
(Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)79
) {
697
0
    unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue();
698
0
    if (
doesIntrinsicReturnPredicate(ID)0
) {
699
0
      // Now we need to differentiate target data types.
700
0
      if (
N->getValueType(0) == MVT::i640
) {
701
0
        // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
702
0
        SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
703
0
        SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
704
0
                                                  MVT::i32, SDValue(Int, 0));
705
0
        SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
706
0
                                                  MVT::i32, TargetConst0);
707
0
        SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
708
0
                                                  MVT::i64, MVT::Other,
709
0
                                                  SDValue(Result_2, 0),
710
0
                                                  SDValue(Result_1, 0));
711
0
        ReplaceNode(N, Result_3);
712
0
        return;
713
0
      }
714
0
      
if (0
N->getValueType(0) == MVT::i320
) {
715
0
        // Convert the zero_extend to Rs = Pd
716
0
        SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
717
0
                                              MVT::i32, SDValue(Int, 0));
718
0
        ReplaceNode(N, RsPd);
719
0
        return;
720
0
      }
721
0
      
llvm_unreachable0
("Unexpected value type");
722
0
    }
723
0
  }
724
79
  SelectCode(N);
725
79
}
726
727
728
//
729
// Handling intrinsics for circular load and bitreverse load.
730
//
731
75
void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
732
75
  if (MachineSDNode *
L75
= LoadInstrForLoadIntrinsic(N)) {
733
2
    StoreInstrForLoadIntrinsic(L, N);
734
2
    CurDAG->RemoveDeadNode(N);
735
2
    return;
736
2
  }
737
73
  SelectCode(N);
738
73
}
739
740
2.42k
void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
741
2.42k
  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
742
2.42k
  unsigned Bits;
743
2.42k
  switch (IID) {
744
3
  case Intrinsic::hexagon_S2_vsplatrb:
745
3
    Bits = 8;
746
3
    break;
747
2
  case Intrinsic::hexagon_S2_vsplatrh:
748
2
    Bits = 16;
749
2
    break;
750
2.41k
  default:
751
2.41k
    SelectCode(N);
752
2.41k
    return;
753
5
  }
754
5
755
5
  SDValue V = N->getOperand(1);
756
5
  SDValue U;
757
5
  if (
keepsLowBits(V, Bits, U)5
) {
758
1
    SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
759
1
                                N->getOperand(0), U);
760
1
    ReplaceNode(N, R.getNode());
761
1
    SelectCode(R.getNode());
762
1
    return;
763
1
  }
764
4
  SelectCode(N);
765
4
}
766
767
//
768
// Map floating point constant values.
769
//
770
8
void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
771
8
  SDLoc dl(N);
772
8
  ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
773
8
  APInt A = CN->getValueAPF().bitcastToAPInt();
774
8
  if (
N->getValueType(0) == MVT::f328
) {
775
8
    SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
776
8
    ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
777
8
    return;
778
8
  }
779
0
  
if (0
N->getValueType(0) == MVT::f640
) {
780
0
    SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
781
0
    ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
782
0
    return;
783
0
  }
784
0
785
0
  SelectCode(N);
786
0
}
787
788
//
789
// Map boolean values.
790
//
791
577
void HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
792
577
  if (
N->getValueType(0) == MVT::i1577
) {
793
14
    assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
794
14
    unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
795
6
                      ? Hexagon::PS_true
796
8
                      : Hexagon::PS_false;
797
14
    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1));
798
14
    return;
799
14
  }
800
563
801
563
  SelectCode(N);
802
563
}
803
804
805
92
void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
806
92
  MachineFrameInfo &MFI = MF->getFrameInfo();
807
92
  const HexagonFrameLowering *HFI = HST->getFrameLowering();
808
92
  int FX = cast<FrameIndexSDNode>(N)->getIndex();
809
92
  unsigned StkA = HFI->getStackAlignment();
810
92
  unsigned MaxA = MFI.getMaxAlignment();
811
92
  SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
812
92
  SDLoc DL(N);
813
92
  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
814
92
  SDNode *R = nullptr;
815
92
816
92
  // Use PS_fi when:
817
92
  // - the object is fixed, or
818
92
  // - there are no objects with higher-than-default alignment, or
819
92
  // - there are no dynamically allocated objects.
820
92
  // Otherwise, use PS_fia.
821
92
  if (
FX < 0 || 92
MaxA <= StkA92
||
!MFI.hasVarSizedObjects()25
) {
822
91
    R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
823
92
  } else {
824
1
    auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
825
1
    unsigned AR = HMFI.getStackAlignBaseVReg();
826
1
    SDValue CH = CurDAG->getEntryNode();
827
1
    SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
828
1
    R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
829
1
  }
830
92
831
92
  ReplaceNode(N, R);
832
92
}
833
834
835
229
void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) {
836
229
  EVT SVT = N->getOperand(0).getValueType();
837
229
  EVT DVT = N->getValueType(0);
838
229
  if (
!SVT.isVector() || 229
!DVT.isVector()128
||
839
79
      SVT.getVectorElementType() == MVT::i1 ||
840
79
      DVT.getVectorElementType() == MVT::i1 ||
841
229
      
SVT.getSizeInBits() != DVT.getSizeInBits()12
) {
842
217
    SelectCode(N);
843
217
    return;
844
217
  }
845
12
846
12
  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0));
847
12
  CurDAG->RemoveDeadNode(N);
848
12
}
849
850
851
46.4k
void HexagonDAGToDAGISel::Select(SDNode *N) {
852
46.4k
  if (N->isMachineOpcode())
853
50
    return N->setNodeId(-1);  // Already selected.
854
46.4k
855
46.4k
  switch (N->getOpcode()) {
856
577
  case ISD::Constant:             return SelectConstant(N);
857
8
  case ISD::ConstantFP:           return SelectConstantFP(N);
858
92
  case ISD::FrameIndex:           return SelectFrameIndex(N);
859
229
  case ISD::BITCAST:              return SelectBitcast(N);
860
50
  case ISD::SHL:                  return SelectSHL(N);
861
1.96k
  case ISD::LOAD:                 return SelectLoad(N);
862
1.83k
  case ISD::STORE:                return SelectStore(N);
863
79
  case ISD::ZERO_EXTEND:          return SelectZeroExtend(N);
864
75
  case ISD::INTRINSIC_W_CHAIN:    return SelectIntrinsicWChain(N);
865
2.42k
  case ISD::INTRINSIC_WO_CHAIN:   return SelectIntrinsicWOChain(N);
866
39.1k
  }
867
39.1k
868
39.1k
  SelectCode(N);
869
39.1k
}
870
871
bool HexagonDAGToDAGISel::
872
SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
873
5
                             std::vector<SDValue> &OutOps) {
874
5
  SDValue Inp = Op, Res;
875
5
876
5
  switch (ConstraintID) {
877
0
  default:
878
0
    return true;
879
5
  case InlineAsm::Constraint_i:
880
5
  case InlineAsm::Constraint_o: // Offsetable.
881
5
  case InlineAsm::Constraint_v: // Not offsetable.
882
5
  case InlineAsm::Constraint_m: // Memory.
883
5
    if (SelectAddrFI(Inp, Res))
884
3
      OutOps.push_back(Res);
885
5
    else
886
2
      OutOps.push_back(Inp);
887
5
    break;
888
5
  }
889
5
890
5
  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
891
5
  return false;
892
5
}
893
894
895
11
static bool isMemOPCandidate(SDNode *I, SDNode *U) {
896
11
  // I is an operand of U. Check if U is an arithmetic (binary) operation
897
11
  // usable in a memop, where the other operand is a loaded value, and the
898
11
  // result of U is stored in the same location.
899
11
900
11
  if (!U->hasOneUse())
901
5
    return false;
902
6
  unsigned Opc = U->getOpcode();
903
6
  switch (Opc) {
904
5
    case ISD::ADD:
905
5
    case ISD::SUB:
906
5
    case ISD::AND:
907
5
    case ISD::OR:
908
5
      break;
909
1
    default:
910
1
      return false;
911
5
  }
912
5
913
5
  SDValue S0 = U->getOperand(0);
914
5
  SDValue S1 = U->getOperand(1);
915
5
  SDValue SY = (S0.getNode() == I) ? 
S13
:
S02
;
916
5
917
5
  SDNode *UUse = *U->use_begin();
918
5
  if (UUse->getNumValues() != 1)
919
3
    return false;
920
2
921
2
  // Check if one of the inputs to U is a load instruction and the output
922
2
  // is used by a store instruction. If so and they also have the same
923
2
  // base pointer, then don't preoprocess this node sequence as it
924
2
  // can be matched to a memop.
925
2
  SDNode *SYNode = SY.getNode();
926
2
  if (
UUse->getOpcode() == ISD::STORE && 2
SYNode->getOpcode() == ISD::LOAD1
) {
927
0
    SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
928
0
    SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
929
0
    if (LDBasePtr == STBasePtr)
930
0
      return true;
931
2
  }
932
2
  return false;
933
2
}
934
935
936
// Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
937
//            (or (select c 0 y) z)  ->  (select c z (or y z))
938
3.38k
void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
939
3.38k
  SelectionDAG &DAG = *CurDAG;
940
3.38k
941
55.6k
  for (auto I : Nodes) {
942
55.6k
    if (I->getOpcode() != ISD::OR)
943
54.4k
      continue;
944
1.22k
945
1.22k
    
auto IsZero = [] (const SDValue &V) -> bool 1.22k
{
946
6
      if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
947
3
        return SC->isNullValue();
948
3
      return false;
949
3
    };
950
1.22k
    auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
951
1.22k
      if (Op.getOpcode() != ISD::SELECT)
952
1.22k
        return false;
953
3
      
return IsZero(Op.getOperand(1)) || 3
IsZero(Op.getOperand(2))3
;
954
1.22k
    };
955
1.22k
956
1.22k
    SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
957
1.22k
    EVT VT = I->getValueType(0);
958
1.22k
    bool SelN0 = IsSelect0(N0);
959
1.22k
    SDValue SOp = SelN0 ? 
N00
:
N11.22k
;
960
1.22k
    SDValue VOp = SelN0 ? 
N10
:
N01.22k
;
961
1.22k
962
1.22k
    if (
SOp.getOpcode() == ISD::SELECT && 1.22k
SOp.getNode()->hasOneUse()0
) {
963
0
      SDValue SC = SOp.getOperand(0);
964
0
      SDValue SX = SOp.getOperand(1);
965
0
      SDValue SY = SOp.getOperand(2);
966
0
      SDLoc DLS = SOp;
967
0
      if (
IsZero(SY)0
) {
968
0
        SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
969
0
        SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
970
0
        DAG.ReplaceAllUsesWith(I, NewSel.getNode());
971
0
      } else 
if (0
IsZero(SX)0
) {
972
0
        SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
973
0
        SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
974
0
        DAG.ReplaceAllUsesWith(I, NewSel.getNode());
975
0
      }
976
0
    }
977
55.6k
  }
978
3.38k
}
979
980
// Transform: (store ch val (add x (add (shl y c) e)))
981
//        to: (store ch val (add x (shl (add y d) c))),
982
// where e = (shl d c) for some integer d.
983
// The purpose of this is to enable generation of loads/stores with
984
// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
985
// value c must be 0, 1 or 2.
986
3.38k
void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
987
3.38k
  SelectionDAG &DAG = *CurDAG;
988
3.38k
989
55.6k
  for (auto I : Nodes) {
990
55.6k
    if (I->getOpcode() != ISD::STORE)
991
53.7k
      continue;
992
1.83k
993
1.83k
    // I matched: (store ch val Off)
994
1.83k
    SDValue Off = I->getOperand(2);
995
1.83k
    // Off needs to match: (add x (add (shl y c) (shl d c))))
996
1.83k
    if (Off.getOpcode() != ISD::ADD)
997
1.47k
      continue;
998
367
    // Off matched: (add x T0)
999
367
    SDValue T0 = Off.getOperand(1);
1000
367
    // T0 needs to match: (add T1 T2):
1001
367
    if (T0.getOpcode() != ISD::ADD)
1002
366
      continue;
1003
1
    // T0 matched: (add T1 T2)
1004
1
    SDValue T1 = T0.getOperand(0);
1005
1
    SDValue T2 = T0.getOperand(1);
1006
1
    // T1 needs to match: (shl y c)
1007
1
    if (T1.getOpcode() != ISD::SHL)
1008
0
      continue;
1009
1
    SDValue C = T1.getOperand(1);
1010
1
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode());
1011
1
    if (CN == nullptr)
1012
0
      continue;
1013
1
    unsigned CV = CN->getZExtValue();
1014
1
    if (CV > 2)
1015
0
      continue;
1016
1
    // T2 needs to match e, where e = (shl d c) for some d.
1017
1
    ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode());
1018
1
    if (EN == nullptr)
1019
0
      continue;
1020
1
    unsigned EV = EN->getZExtValue();
1021
1
    if (EV % (1 << CV) != 0)
1022
0
      continue;
1023
1
    unsigned DV = EV / (1 << CV);
1024
1
1025
1
    // Replace T0 with: (shl (add y d) c)
1026
1
    SDLoc DL = SDLoc(I);
1027
1
    EVT VT = T0.getValueType();
1028
1
    SDValue D = DAG.getConstant(DV, DL, VT);
1029
1
    // NewAdd = (add y d)
1030
1
    SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1031
1
    // NewShl = (shl NewAdd c)
1032
1
    SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1033
1
    ReplaceNode(T0.getNode(), NewShl.getNode());
1034
1
  }
1035
3.38k
}
1036
1037
// Transform: (load ch (add x (and (srl y c) Mask)))
1038
//        to: (load ch (add x (shl (srl y d) d-c)))
1039
// where
1040
// Mask = 00..0 111..1 0.0
1041
//          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1042
//          |     +-------- 1s
1043
//          +-------------- at most c 0s
1044
// Motivating example:
1045
// DAG combiner optimizes (add x (shl (srl y 5) 2))
1046
//                     to (add x (and (srl y 3) 1FFFFFFC))
1047
// which results in a constant-extended and(##...,lsr). This transformation
1048
// undoes this simplification for cases where the shl can be folded into
1049
// an addressing mode.
1050
3.38k
void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1051
3.38k
  SelectionDAG &DAG = *CurDAG;
1052
3.38k
1053
55.6k
  for (SDNode *N : Nodes) {
1054
55.6k
    unsigned Opc = N->getOpcode();
1055
55.6k
    if (
Opc != ISD::LOAD && 55.6k
Opc != ISD::STORE53.4k
)
1056
51.6k
      continue;
1057
3.99k
    
SDValue Addr = Opc == ISD::LOAD ? 3.99k
N->getOperand(1)2.15k
:
N->getOperand(2)1.83k
;
1058
3.99k
    // Addr must match: (add x T0)
1059
3.99k
    if (Addr.getOpcode() != ISD::ADD)
1060
3.18k
      continue;
1061
808
    SDValue T0 = Addr.getOperand(1);
1062
808
    // T0 must match: (and T1 Mask)
1063
808
    if (T0.getOpcode() != ISD::AND)
1064
805
      continue;
1065
3
1066
3
    // We have an AND.
1067
3
    //
1068
3
    // Check the first operand. It must be: (srl y c).
1069
3
    SDValue S = T0.getOperand(0);
1070
3
    if (S.getOpcode() != ISD::SRL)
1071
0
      continue;
1072
3
    ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
1073
3
    if (SN == nullptr)
1074
0
      continue;
1075
3
    
if (3
SN->getAPIntValue().getBitWidth() != 323
)
1076
0
      continue;
1077
3
    uint32_t CV = SN->getZExtValue();
1078
3
1079
3
    // Check the second operand: the supposed mask.
1080
3
    ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
1081
3
    if (MN == nullptr)
1082
0
      continue;
1083
3
    
if (3
MN->getAPIntValue().getBitWidth() != 323
)
1084
0
      continue;
1085
3
    uint32_t Mask = MN->getZExtValue();
1086
3
    // Examine the mask.
1087
3
    uint32_t TZ = countTrailingZeros(Mask);
1088
3
    uint32_t M1 = countTrailingOnes(Mask >> TZ);
1089
3
    uint32_t LZ = countLeadingZeros(Mask);
1090
3
    // Trailing zeros + middle ones + leading zeros must equal the width.
1091
3
    if (TZ + M1 + LZ != 32)
1092
0
      continue;
1093
3
    // The number of trailing zeros will be encoded in the addressing mode.
1094
3
    
if (3
TZ > 23
)
1095
0
      continue;
1096
3
    // The number of leading zeros must be at most c.
1097
3
    
if (3
LZ > CV3
)
1098
0
      continue;
1099
3
1100
3
    // All looks good.
1101
3
    SDValue Y = S.getOperand(0);
1102
3
    EVT VT = Addr.getValueType();
1103
3
    SDLoc dl(S);
1104
3
    // TZ = D-C, so D = TZ+C.
1105
3
    SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1106
3
    SDValue DC = DAG.getConstant(TZ, dl, VT);
1107
3
    SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1108
3
    SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1109
3
    ReplaceNode(T0.getNode(), NewShl.getNode());
1110
3
  }
1111
3.38k
}
1112
1113
// Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1114
//                                                  (op ... 1 ...))
1115
3.38k
void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1116
3.38k
  SelectionDAG &DAG = *CurDAG;
1117
3.38k
1118
55.6k
  for (SDNode *N : Nodes) {
1119
55.6k
    unsigned Opc = N->getOpcode();
1120
55.6k
    if (Opc != ISD::ZERO_EXTEND)
1121
55.5k
      continue;
1122
102
    SDValue OpI1 = N->getOperand(0);
1123
102
    EVT OpVT = OpI1.getValueType();
1124
102
    if (
!OpVT.isSimple() || 102
OpVT.getSimpleVT() != MVT::i1102
)
1125
44
      continue;
1126
121
    
for (auto I = N->use_begin(), E = N->use_end(); 58
I != E121
;
++I63
) {
1127
63
      SDNode *U = *I;
1128
63
      if (U->getNumValues() != 1)
1129
47
        continue;
1130
16
      EVT UVT = U->getValueType(0);
1131
16
      if (
!UVT.isSimple() || 16
!UVT.isInteger()16
||
UVT.getSimpleVT() == MVT::i112
)
1132
5
        continue;
1133
11
      
if (11
isMemOPCandidate(N, U)11
)
1134
0
        continue;
1135
11
1136
11
      // Potentially simplifiable operation.
1137
11
      unsigned I1N = I.getOperandNo();
1138
11
      SmallVector<SDValue,2> Ops(U->getNumOperands());
1139
39
      for (unsigned i = 0, n = U->getNumOperands(); 
i != n39
;
++i28
)
1140
28
        Ops[i] = U->getOperand(i);
1141
11
      EVT BVT = Ops[I1N].getValueType();
1142
11
1143
11
      SDLoc dl(U);
1144
11
      SDValue C0 = DAG.getConstant(0, dl, BVT);
1145
11
      SDValue C1 = DAG.getConstant(1, dl, BVT);
1146
11
      SDValue If0, If1;
1147
11
1148
11
      if (
isa<MachineSDNode>(U)11
) {
1149
0
        unsigned UseOpc = U->getMachineOpcode();
1150
0
        Ops[I1N] = C0;
1151
0
        If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1152
0
        Ops[I1N] = C1;
1153
0
        If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1154
11
      } else {
1155
11
        unsigned UseOpc = U->getOpcode();
1156
11
        Ops[I1N] = C0;
1157
11
        If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1158
11
        Ops[I1N] = C1;
1159
11
        If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1160
11
      }
1161
63
      SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
1162
63
      DAG.ReplaceAllUsesWith(U, Sel.getNode());
1163
63
    }
1164
55.6k
  }
1165
3.38k
}
1166
1167
3.38k
void HexagonDAGToDAGISel::PreprocessISelDAG() {
1168
3.38k
  // Repack all nodes before calling each preprocessing function,
1169
3.38k
  // because each of them can modify the set of nodes.
1170
13.5k
  auto getNodes = [this] () -> std::vector<SDNode*> {
1171
13.5k
    std::vector<SDNode*> T;
1172
13.5k
    T.reserve(CurDAG->allnodes_size());
1173
13.5k
    for (SDNode &N : CurDAG->allnodes())
1174
222k
      T.push_back(&N);
1175
13.5k
    return T;
1176
13.5k
  };
1177
3.38k
1178
3.38k
  // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1179
3.38k
  //            (or (select c 0 y) z)  ->  (select c z (or y z))
1180
3.38k
  ppSimplifyOrSelect0(getNodes());
1181
3.38k
1182
3.38k
  // Transform: (store ch val (add x (add (shl y c) e)))
1183
3.38k
  //        to: (store ch val (add x (shl (add y d) c))),
1184
3.38k
  // where e = (shl d c) for some integer d.
1185
3.38k
  // The purpose of this is to enable generation of loads/stores with
1186
3.38k
  // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1187
3.38k
  // value c must be 0, 1 or 2.
1188
3.38k
  ppAddrReorderAddShl(getNodes());
1189
3.38k
1190
3.38k
  // Transform: (load ch (add x (and (srl y c) Mask)))
1191
3.38k
  //        to: (load ch (add x (shl (srl y d) d-c)))
1192
3.38k
  // where
1193
3.38k
  // Mask = 00..0 111..1 0.0
1194
3.38k
  //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1195
3.38k
  //          |     +-------- 1s
1196
3.38k
  //          +-------------- at most c 0s
1197
3.38k
  // Motivating example:
1198
3.38k
  // DAG combiner optimizes (add x (shl (srl y 5) 2))
1199
3.38k
  //                     to (add x (and (srl y 3) 1FFFFFFC))
1200
3.38k
  // which results in a constant-extended and(##...,lsr). This transformation
1201
3.38k
  // undoes this simplification for cases where the shl can be folded into
1202
3.38k
  // an addressing mode.
1203
3.38k
  ppAddrRewriteAndSrl(getNodes());
1204
3.38k
1205
3.38k
  // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1206
3.38k
  //                                                  (op ... 1 ...))
1207
3.38k
  ppHoistZextI1(getNodes());
1208
3.38k
1209
3.38k
  DEBUG_WITH_TYPE("isel", {
1210
3.38k
    dbgs() << "Preprocessed (Hexagon) selection DAG:";
1211
3.38k
    CurDAG->dump();
1212
3.38k
  });
1213
3.38k
1214
3.38k
  if (
EnableAddressRebalancing3.38k
) {
1215
3.38k
    rebalanceAddressTrees();
1216
3.38k
1217
3.38k
    DEBUG_WITH_TYPE("isel", {
1218
3.38k
      dbgs() << "Address tree balanced selection DAG:";
1219
3.38k
      CurDAG->dump();
1220
3.38k
    });
1221
3.38k
  }
1222
3.38k
}
1223
1224
2.40k
void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
1225
2.40k
  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
1226
2.40k
  auto &HFI = *HST.getFrameLowering();
1227
2.40k
  if (!HFI.needsAligna(*MF))
1228
2.40k
    return;
1229
1
1230
1
  MachineFrameInfo &MFI = MF->getFrameInfo();
1231
1
  MachineBasicBlock *EntryBB = &MF->front();
1232
1
  unsigned AR = FuncInfo->CreateReg(MVT::i32);
1233
1
  unsigned MaxA = MFI.getMaxAlignment();
1234
1
  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1235
1
      .addImm(MaxA);
1236
1
  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1237
1
}
1238
1239
// Match a frame index that can be used in an addressing mode.
1240
2.11k
bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
1241
2.11k
  if (N.getOpcode() != ISD::FrameIndex)
1242
724
    return false;
1243
1.39k
  auto &HFI = *HST->getFrameLowering();
1244
1.39k
  MachineFrameInfo &MFI = MF->getFrameInfo();
1245
1.39k
  int FX = cast<FrameIndexSDNode>(N)->getIndex();
1246
1.39k
  if (
!MFI.isFixedObjectIndex(FX) && 1.39k
HFI.needsAligna(*MF)1.38k
)
1247
0
    return false;
1248
1.39k
  R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
1249
1.39k
  return true;
1250
1.39k
}
1251
1252
3.43k
inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
1253
3.43k
  return SelectGlobalAddress(N, R, false);
1254
3.43k
}
1255
1256
3.17k
inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
1257
3.17k
  return SelectGlobalAddress(N, R, true);
1258
3.17k
}
1259
1260
bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
1261
6.60k
                                              bool UseGP) {
1262
6.60k
  switch (N.getOpcode()) {
1263
1.20k
  case ISD::ADD: {
1264
1.20k
    SDValue N0 = N.getOperand(0);
1265
1.20k
    SDValue N1 = N.getOperand(1);
1266
1.20k
    unsigned GAOpc = N0.getOpcode();
1267
1.20k
    if (
UseGP && 1.20k
GAOpc != HexagonISD::CONST32_GP589
)
1268
589
      return false;
1269
612
    
if (612
!UseGP && 612
GAOpc != HexagonISD::CONST32612
)
1270
557
      return false;
1271
55
    
if (ConstantSDNode *55
Const55
= dyn_cast<ConstantSDNode>(N1)) {
1272
54
      SDValue Addr = N0.getOperand(0);
1273
54
      if (GlobalAddressSDNode *
GA54
= dyn_cast<GlobalAddressSDNode>(Addr)) {
1274
54
        if (
GA->getOpcode() == ISD::TargetGlobalAddress54
) {
1275
54
          uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1276
54
          R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1277
54
                                             N.getValueType(), NewOff);
1278
54
          return true;
1279
54
        }
1280
1
      }
1281
54
    }
1282
1
    break;
1283
1
  }
1284
374
  case HexagonISD::CONST32:
1285
374
    // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1286
374
    // want in the instruction.
1287
374
    if (!UseGP)
1288
245
      R = N.getOperand(0);
1289
374
    return !UseGP;
1290
60
  case HexagonISD::CONST32_GP:
1291
60
    if (UseGP)
1292
59
      R = N.getOperand(0);
1293
60
    return UseGP;
1294
4.97k
  default:
1295
4.97k
    return false;
1296
1
  }
1297
1
1298
1
  return false;
1299
1
}
1300
1301
110
bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
1302
110
  // This (complex pattern) function is meant to detect a sign-extension
1303
110
  // i32->i64 on a per-operand basis. This would allow writing single
1304
110
  // patterns that would cover a number of combinations of different ways
1305
110
  // a sign-extensions could be written. For example:
1306
110
  //   (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1307
110
  // could match either one of these:
1308
110
  //   (mul (sext x) (sext_inreg y))
1309
110
  //   (mul (sext-load *p) (sext_inreg y))
1310
110
  //   (mul (sext_inreg x) (sext y))
1311
110
  // etc.
1312
110
  //
1313
110
  // The returned value will have type i64 and its low word will
1314
110
  // contain the value being extended. The high bits are not specified.
1315
110
  // The returned type is i64 because the original type of N was i64,
1316
110
  // but the users of this function should only use the low-word of the
1317
110
  // result, e.g.
1318
110
  //  (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1319
110
1320
110
  if (N.getValueType() != MVT::i64)
1321
0
    return false;
1322
110
  EVT SrcVT;
1323
110
  unsigned Opc = N.getOpcode();
1324
110
  switch (Opc) {
1325
19
    case ISD::SIGN_EXTEND:
1326
19
    case ISD::SIGN_EXTEND_INREG: {
1327
19
      // sext_inreg has the source type as a separate operand.
1328
19
      EVT T = Opc == ISD::SIGN_EXTEND
1329
10
                ? N.getOperand(0).getValueType()
1330
9
                : cast<VTSDNode>(N.getOperand(1))->getVT();
1331
19
      if (T.getSizeInBits() != 32)
1332
0
        return false;
1333
19
      R = N.getOperand(0);
1334
19
      break;
1335
19
    }
1336
2
    case ISD::LOAD: {
1337
2
      LoadSDNode *L = cast<LoadSDNode>(N);
1338
2
      if (L->getExtensionType() != ISD::SEXTLOAD)
1339
1
        return false;
1340
1
      // All extending loads extend to i32, so even if the value in
1341
1
      // memory is shorter than 32 bits, it will be i32 after the load.
1342
1
      
if (1
L->getMemoryVT().getSizeInBits() > 321
)
1343
0
        return false;
1344
1
      R = N;
1345
1
      break;
1346
1
    }
1347
89
    default:
1348
89
      return false;
1349
20
  }
1350
20
  EVT RT = R.getValueType();
1351
20
  if (RT == MVT::i64)
1352
10
    return true;
1353
20
  assert(RT == MVT::i32);
1354
10
  // This is only to produce a value of type i64. Do not rely on the
1355
10
  // high bits produced by this.
1356
10
  const SDLoc &dl(N);
1357
10
  SDValue Ops[] = {
1358
10
    CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1359
10
    R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1360
10
    R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1361
10
  };
1362
10
  SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1363
10
                                     MVT::i64, Ops);
1364
10
  R = SDValue(T, 0);
1365
10
  return true;
1366
10
}
1367
1368
bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1369
5
      SDValue &Src) {
1370
5
  unsigned Opc = Val.getOpcode();
1371
5
  switch (Opc) {
1372
0
  case ISD::SIGN_EXTEND:
1373
0
  case ISD::ZERO_EXTEND:
1374
0
  case ISD::ANY_EXTEND: {
1375
0
    const SDValue &Op0 = Val.getOperand(0);
1376
0
    EVT T = Op0.getValueType();
1377
0
    if (
T.isInteger() && 0
T.getSizeInBits() == NumBits0
) {
1378
0
      Src = Op0;
1379
0
      return true;
1380
0
    }
1381
0
    break;
1382
0
  }
1383
0
  case ISD::SIGN_EXTEND_INREG:
1384
0
  case ISD::AssertSext:
1385
0
  case ISD::AssertZext:
1386
0
    if (
Val.getOperand(0).getValueType().isInteger()0
) {
1387
0
      VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1388
0
      if (
T->getVT().getSizeInBits() == NumBits0
) {
1389
0
        Src = Val.getOperand(0);
1390
0
        return true;
1391
0
      }
1392
0
    }
1393
0
    break;
1394
1
  case ISD::AND: {
1395
1
    // Check if this is an AND with NumBits of lower bits set to 1.
1396
1
    uint64_t Mask = (1 << NumBits) - 1;
1397
1
    if (ConstantSDNode *
C1
= dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1398
0
      if (
C->getZExtValue() == Mask0
) {
1399
0
        Src = Val.getOperand(1);
1400
0
        return true;
1401
0
      }
1402
1
    }
1403
1
    
if (ConstantSDNode *1
C1
= dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1404
1
      if (
C->getZExtValue() == Mask1
) {
1405
1
        Src = Val.getOperand(0);
1406
1
        return true;
1407
1
      }
1408
0
    }
1409
0
    break;
1410
0
  }
1411
0
  case ISD::OR:
1412
0
  case ISD::XOR: {
1413
0
    // OR/XOR with the lower NumBits bits set to 0.
1414
0
    uint64_t Mask = (1 << NumBits) - 1;
1415
0
    if (ConstantSDNode *
C0
= dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1416
0
      if (
(C->getZExtValue() & Mask) == 00
) {
1417
0
        Src = Val.getOperand(1);
1418
0
        return true;
1419
0
      }
1420
0
    }
1421
0
    
if (ConstantSDNode *0
C0
= dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1422
0
      if (
(C->getZExtValue() & Mask) == 00
) {
1423
0
        Src = Val.getOperand(0);
1424
0
        return true;
1425
0
      }
1426
0
    }
1427
0
  }
1428
4
  default:
1429
4
    break;
1430
4
  }
1431
4
  return false;
1432
4
}
1433
1434
1435
1.36k
bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
1436
1.36k
  assert(N->getOpcode() == ISD::OR);
1437
1.36k
  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1438
1.36k
  assert(C);
1439
1.36k
1440
1.36k
  // Detect when "or" is used to add an offset to a stack object.
1441
1.36k
  if (auto *
FN1.36k
= dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
1442
1.35k
    MachineFrameInfo &MFI = MF->getFrameInfo();
1443
1.35k
    unsigned A = MFI.getObjectAlignment(FN->getIndex());
1444
1.35k
    assert(isPowerOf2_32(A));
1445
1.35k
    int32_t Off = C->getSExtValue();
1446
1.35k
    // If the alleged offset fits in the zero bits guaranteed by
1447
1.35k
    // the alignment, then this or is really an add.
1448
1.35k
    return (Off >= 0) && (((A-1) & Off) == unsigned(Off));
1449
1.35k
  }
1450
10
  return false;
1451
10
}
1452
1453
4.70k
bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1454
4.70k
  return N->getAlignment() >= N->getMemoryVT().getStoreSize();
1455
4.70k
}
1456
1457
60
bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1458
60
  unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1459
60
  switch (N->getMemoryVT().getStoreSize()) {
1460
10
    case 1:
1461
10
      return StackSize <= 56;   // 1*2^6 - 8
1462
4
    case 2:
1463
4
      return StackSize <= 120;  // 2*2^6 - 8
1464
46
    case 4:
1465
46
      return StackSize <= 248;  // 4*2^6 - 8
1466
0
    default:
1467
0
      return false;
1468
0
  }
1469
0
}
1470
1471
// Return true when the given node fits in a positive half word.
1472
2
bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1473
2
  if (const ConstantSDNode *
CN2
= dyn_cast<const ConstantSDNode>(N)) {
1474
1
    int64_t V = CN->getSExtValue();
1475
0
    return V > 0 && isInt<16>(V);
1476
1
  }
1477
1
  
if (1
N->getOpcode() == ISD::SIGN_EXTEND_INREG1
) {
1478
1
    const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1479
1
    return VN->getVT().getSizeInBits() <= 16;
1480
1
  }
1481
0
  return false;
1482
0
}
1483
1484
////////////////////////////////////////////////////////////////////////////////
1485
// Rebalancing of address calculation trees
1486
1487
7.50k
static bool isOpcodeHandled(const SDNode *N) {
1488
7.50k
  switch (N->getOpcode()) {
1489
511
    case ISD::ADD:
1490
511
    case ISD::MUL:
1491
511
      return true;
1492
472
    case ISD::SHL:
1493
472
      // We only handle constant shifts because these can be easily flattened
1494
472
      // into multiplications by 2^Op1.
1495
472
      return isa<ConstantSDNode>(N->getOperand(1).getNode());
1496
6.52k
    default:
1497
6.52k
      return false;
1498
0
  }
1499
0
}
1500
1501
/// \brief Return the weight of an SDNode
1502
1.44k
int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1503
1.44k
  if (!isOpcodeHandled(N))
1504
1.27k
    return 1;
1505
1.44k
  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1506
174
  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1507
174
  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1508
174
  return RootWeights[N];
1509
174
}
1510
1511
1.43k
int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1512
1.43k
  if (!isOpcodeHandled(N))
1513
1.27k
    return 0;
1514
1.43k
  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1515
169
      "Cannot query height of unvisited/RAUW'd node!");
1516
169
  return RootHeights[N];
1517
169
}
1518
1519
namespace {
1520
struct WeightedLeaf {
1521
  SDValue Value;
1522
  int Weight;
1523
  int InsertionOrder;
1524
1525
71
  WeightedLeaf() : Value(SDValue()) { }
1526
1527
  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1528
59
    Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1529
59
    assert(Weight >= 0 && "Weight must be >= 0");
1530
59
  }
1531
1532
44
  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1533
44
    assert(A.Value.getNode() && B.Value.getNode());
1534
44
    return A.Weight == B.Weight ?
1535
15
            (A.InsertionOrder > B.InsertionOrder) :
1536
29
            (A.Weight > B.Weight);
1537
44
  }
1538
};
1539
1540
/// A specialized priority queue for WeigthedLeaves. It automatically folds
1541
/// constants and allows removal of non-top elements while maintaining the
1542
/// priority order.
1543
class LeafPrioQueue {
1544
  SmallVector<WeightedLeaf, 8> Q;
1545
  bool HaveConst;
1546
  WeightedLeaf ConstElt;
1547
  unsigned Opcode;
1548
1549
public:
1550
0
  bool empty() {
1551
0
    return (!HaveConst && Q.empty());
1552
0
  }
1553
1554
34
  size_t size() {
1555
34
    return Q.size() + HaveConst;
1556
34
  }
1557
1558
12
  bool hasConst() {
1559
12
    return HaveConst;
1560
12
  }
1561
1562
24
  const WeightedLeaf &top() {
1563
24
    if (HaveConst)
1564
0
      return ConstElt;
1565
24
    return Q.front();
1566
24
  }
1567
1568
51
  WeightedLeaf pop() {
1569
51
    if (
HaveConst51
) {
1570
7
      HaveConst = false;
1571
7
      return ConstElt;
1572
7
    }
1573
44
    std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1574
44
    return Q.pop_back_val();
1575
44
  }
1576
1577
64
  void push(WeightedLeaf L, bool SeparateConst=true) {
1578
64
    if (
!HaveConst && 64
SeparateConst62
&&
isa<ConstantSDNode>(L.Value)55
) {
1579
7
      if (Opcode == ISD::MUL &&
1580
0
          cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1581
0
        return;
1582
7
      
if (7
Opcode == ISD::ADD &&
1583
7
          cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1584
0
        return;
1585
7
1586
7
      HaveConst = true;
1587
7
      ConstElt = L;
1588
64
    } else {
1589
57
      Q.push_back(L);
1590
57
      std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1591
57
    }
1592
64
  }
1593
1594
  /// Push L to the bottom of the queue regardless of its weight. If L is
1595
  /// constant, it will not be folded with other constants in the queue.
1596
7
  void pushToBottom(WeightedLeaf L) {
1597
7
    L.Weight = 1000;
1598
7
    push(L, false);
1599
7
  }
1600
1601
  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1602
  /// lowest weight and remove it from the queue.
1603
  WeightedLeaf findSHL(uint64_t MaxAmount);
1604
1605
  WeightedLeaf findMULbyConst();
1606
1607
  LeafPrioQueue(unsigned Opcode) :
1608
12
    HaveConst(false), Opcode(Opcode) { }
1609
};
1610
} // end anonymous namespace
1611
1612
1
WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1613
1
  int ResultPos;
1614
1
  WeightedLeaf Result;
1615
1
1616
2
  for (int Pos = 0, End = Q.size(); 
Pos != End2
;
++Pos1
) {
1617
1
    const WeightedLeaf &L = Q[Pos];
1618
1
    const SDValue &Val = L.Value;
1619
1
    if (Val.getOpcode() != ISD::SHL ||
1620
1
        !isa<ConstantSDNode>(Val.getOperand(1)) ||
1621
1
        Val.getConstantOperandVal(1) > MaxAmount)
1622
0
      continue;
1623
1
    
if (1
!Result.Value.getNode() || 1
Result.Weight > L.Weight0
||
1624
0
        
(Result.Weight == L.Weight && 0
Result.InsertionOrder > L.InsertionOrder0
))
1625
1
    {
1626
1
      Result = L;
1627
1
      ResultPos = Pos;
1628
1
    }
1629
1
  }
1630
1
1631
1
  if (
Result.Value.getNode()1
) {
1632
1
    Q.erase(&Q[ResultPos]);
1633
1
    std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1634
1
  }
1635
1
1636
1
  return Result;
1637
1
}
1638
1639
22
WeightedLeaf LeafPrioQueue::findMULbyConst() {
1640
22
  int ResultPos;
1641
22
  WeightedLeaf Result;
1642
22
1643
55
  for (int Pos = 0, End = Q.size(); 
Pos != End55
;
++Pos33
) {
1644
33
    const WeightedLeaf &L = Q[Pos];
1645
33
    const SDValue &Val = L.Value;
1646
33
    if (Val.getOpcode() != ISD::MUL ||
1647
0
        !isa<ConstantSDNode>(Val.getOperand(1)) ||
1648
0
        Val.getConstantOperandVal(1) > 127)
1649
33
      continue;
1650
0
    
if (0
!Result.Value.getNode() || 0
Result.Weight > L.Weight0
||
1651
0
        
(Result.Weight == L.Weight && 0
Result.InsertionOrder > L.InsertionOrder0
))
1652
0
    {
1653
0
      Result = L;
1654
0
      ResultPos = Pos;
1655
0
    }
1656
33
  }
1657
22
1658
22
  if (
Result.Value.getNode()22
) {
1659
0
    Q.erase(&Q[ResultPos]);
1660
0
    std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1661
0
  }
1662
22
1663
22
  return Result;
1664
22
}
1665
1666
0
SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1667
0
  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1668
0
  return CurDAG->getConstant(MulFactor, SDLoc(N),
1669
0
                             N->getOperand(1).getValueType());
1670
0
}
1671
1672
/// @returns the value x for which 2^x is a factor of Val
1673
4
static unsigned getPowerOf2Factor(SDValue Val) {
1674
4
  if (
Val.getOpcode() == ISD::MUL4
) {
1675
1
    unsigned MaxFactor = 0;
1676
3
    for (int i = 0; 
i < 23
;
++i2
) {
1677
2
      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
1678
2
      if (!C)
1679
1
        continue;
1680
1
      const APInt &CInt = C->getAPIntValue();
1681
1
      if (CInt.getBoolValue())
1682
1
        MaxFactor = CInt.countTrailingZeros();
1683
2
    }
1684
1
    return MaxFactor;
1685
1
  }
1686
3
  
if (3
Val.getOpcode() == ISD::SHL3
) {
1687
3
    if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1688
0
      return 0;
1689
3
    return (unsigned) Val.getConstantOperandVal(1);
1690
3
  }
1691
0
1692
0
  return 0;
1693
0
}
1694
1695
/// @returns true if V>>Amount will eliminate V's operation on its child
1696
2
static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1697
2
  if (
V.getOpcode() == ISD::MUL2
) {
1698
1
    SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1699
2
    for (int i = 0; 
i < 22
;
++i1
)
1700
2
      
if (2
isa<ConstantSDNode>(Ops[i].getNode()) &&
1701
2
          
V.getConstantOperandVal(i) % (1ULL << Amount) == 01
) {
1702
1
        uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1703
1
        return (NewConst == 1);
1704
1
      }
1705
2
  } else 
if (1
V.getOpcode() == ISD::SHL1
) {
1706
1
    return (Amount == V.getConstantOperandVal(1));
1707
1
  }
1708
0
1709
0
  return false;
1710
0
}
1711
1712
2
SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1713
2
  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1714
2
  if (
V.getOpcode() == ISD::MUL2
) {
1715
2
    for (int i=0; 
i < 22
;
++i1
) {
1716
2
      if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1717
2
          
V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 01
) {
1718
1
        uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1719
1
        if (NewConst == 1)
1720
0
          return Ops[!i];
1721
1
        Ops[i] = CurDAG->getConstant(NewConst,
1722
1
                                     SDLoc(V), V.getValueType());
1723
1
        break;
1724
1
      }
1725
2
    }
1726
2
  } else 
if (1
V.getOpcode() == ISD::SHL1
) {
1727
1
    uint64_t ShiftAmount = V.getConstantOperandVal(1);
1728
1
    if (ShiftAmount == Power)
1729
1
      return Ops[0];
1730
0
    Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1731
0
                                 SDLoc(V), V.getValueType());
1732
0
  }
1733
2
1734
1
  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1735
2
}
1736
1737
27
static bool isTargetConstant(const SDValue &V) {
1738
27
  return V.getOpcode() == HexagonISD::CONST32 ||
1739
26
         V.getOpcode() == HexagonISD::CONST32_GP;
1740
27
}
1741
1742
0
unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1743
0
  if (GAUsesInFunction.count(V))
1744
0
    return GAUsesInFunction[V];
1745
0
1746
0
  unsigned Result = 0;
1747
0
  const Function *CurF = CurDAG->getMachineFunction().getFunction();
1748
0
  for (const User *U : V->users()) {
1749
0
    if (isa<Instruction>(U) &&
1750
0
        cast<Instruction>(U)->getParent()->getParent() == CurF)
1751
0
      ++Result;
1752
0
  }
1753
0
1754
0
  GAUsesInFunction[V] = Result;
1755
0
1756
0
  return Result;
1757
0
}
1758
1759
/// Note - After calling this, N may be dead. It may have been replaced by a
1760
/// new node, so always use the returned value in place of N.
1761
///
1762
/// @returns The SDValue taking the place of N (which could be N if it is
1763
/// unchanged)
1764
718
SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1765
718
  assert(RootWeights.count(N) && "Cannot balance non-root node.");
1766
718
  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1767
718
  assert(!TopLevel || N->getOpcode() == ISD::ADD);
1768
718
1769
718
  // Return early if this node was already visited
1770
718
  if (RootWeights[N] != -1)
1771
0
    return SDValue(N, 0);
1772
718
1773
718
  assert(isOpcodeHandled(N));
1774
718
1775
718
  SDValue Op0 = N->getOperand(0);
1776
718
  SDValue Op1 = N->getOperand(1);
1777
718
1778
718
  // Return early if the operands will remain unchanged or are all roots
1779
718
  if (
(!isOpcodeHandled(Op0.getNode()) || 718
RootWeights.count(Op0.getNode())110
) &&
1780
718
      
(!isOpcodeHandled(Op1.getNode()) || 710
RootWeights.count(Op1.getNode())71
)) {
1781
706
    SDNode *Op0N = Op0.getNode();
1782
706
    int Weight;
1783
706
    if (
isOpcodeHandled(Op0N) && 706
RootWeights[Op0N] == -1102
) {
1784
40
      Weight = getWeight(balanceSubTree(Op0N).getNode());
1785
40
      // Weight = calculateWeight(Op0N);
1786
40
    } else
1787
666
      Weight = getWeight(Op0N);
1788
706
1789
706
    SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1790
706
    if (
isOpcodeHandled(Op1N) && 706
RootWeights[Op1N] == -167
) {
1791
60
      Weight += getWeight(balanceSubTree(Op1N).getNode());
1792
60
      // Weight += calculateWeight(Op1N);
1793
60
    } else
1794
646
      Weight += getWeight(Op1N);
1795
706
1796
706
    RootWeights[N] = Weight;
1797
706
    RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1798
706
                              getHeight(N->getOperand(1).getNode())) + 1;
1799
706
1800
706
    DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1801
706
                 << " Height=" << RootHeights[N] << "): ");
1802
706
    DEBUG(N->dump());
1803
706
1804
706
    return SDValue(N, 0);
1805
706
  }
1806
12
1807
12
  
DEBUG12
(dbgs() << "** Balancing root node: ");
1808
12
  DEBUG(N->dump());
1809
12
1810
12
  unsigned NOpcode = N->getOpcode();
1811
12
1812
12
  LeafPrioQueue Leaves(NOpcode);
1813
12
  SmallVector<SDValue, 4> Worklist;
1814
12
  Worklist.push_back(SDValue(N, 0));
1815
12
1816
12
  // SHL nodes will be converted to MUL nodes
1817
12
  if (NOpcode == ISD::SHL)
1818
0
    NOpcode = ISD::MUL;
1819
12
1820
12
  bool CanFactorize = false;
1821
12
  WeightedLeaf Mul1, Mul2;
1822
12
  unsigned MaxPowerOf2 = 0;
1823
12
  WeightedLeaf GA;
1824
12
1825
12
  // Do not try to factor out a shift if there is already a shift at the tip of
1826
12
  // the tree.
1827
12
  bool HaveTopLevelShift = false;
1828
12
  if (TopLevel &&
1829
12
      
((isOpcodeHandled(Op0.getNode()) && 12
Op0.getOpcode() == ISD::SHL8
&&
1830
0
                        Op0.getConstantOperandVal(1) < 4) ||
1831
12
       
(isOpcodeHandled(Op1.getNode()) && 12
Op1.getOpcode() == ISD::SHL4
&&
1832
0
                        Op1.getConstantOperandVal(1) < 4)))
1833
0
    HaveTopLevelShift = true;
1834
12
1835
12
  // Flatten the subtree into an ordered list of leaves; at the same time
1836
12
  // determine whether the tree is already balanced.
1837
12
  int InsertionOrder = 0;
1838
12
  SmallDenseMap<SDValue, int> NodeHeights;
1839
12
  bool Imbalanced = false;
1840
12
  int CurrentWeight = 0;
1841
96
  while (
!Worklist.empty()96
) {
1842
84
    SDValue Child = Worklist.pop_back_val();
1843
84
1844
84
    if (
Child.getNode() != N && 84
RootWeights.count(Child.getNode())60
) {
1845
9
      // CASE 1: Child is a root note
1846
9
1847
9
      int Weight = RootWeights[Child.getNode()];
1848
9
      if (
Weight == -19
) {
1849
5
        Child = balanceSubTree(Child.getNode());
1850
5
        // calculateWeight(Child.getNode());
1851
5
        Weight = getWeight(Child.getNode());
1852
9
      } else 
if (4
Weight == -24
) {
1853
0
        // Whoops, this node was RAUWd by one of the balanceSubTree calls we
1854
0
        // made. Our worklist isn't up to date anymore.
1855
0
        // Restart the whole process.
1856
0
        DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
1857
4
        return balanceSubTree(N, TopLevel);
1858
4
      }
1859
9
1860
9
      NodeHeights[Child] = 1;
1861
9
      CurrentWeight += Weight;
1862
9
1863
9
      unsigned PowerOf2;
1864
9
      if (
TopLevel && 9
!CanFactorize9
&&
!HaveTopLevelShift9
&&
1865
9
          
(Child.getOpcode() == ISD::MUL || 9
Child.getOpcode() == ISD::SHL8
) &&
1866
9
          
Child.hasOneUse()7
&&
(PowerOf2 = getPowerOf2Factor(Child))4
) {
1867
4
        // Try to identify two factorizable MUL/SHL children greedily. Leave
1868
4
        // them out of the priority queue for now so we can deal with them
1869
4
        // after.
1870
4
        if (
!Mul1.Value.getNode()4
) {
1871
3
          Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
1872
3
          MaxPowerOf2 = PowerOf2;
1873
4
        } else {
1874
1
          Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
1875
1
          MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
1876
1
1877
1
          // Our addressing modes can only shift by a maximum of 3
1878
1
          if (MaxPowerOf2 > 3)
1879
0
            MaxPowerOf2 = 3;
1880
1
1881
1
          CanFactorize = true;
1882
1
        }
1883
4
      } else
1884
5
        Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1885
84
    } else 
if (75
!isOpcodeHandled(Child.getNode())75
) {
1886
27
      // CASE 2: Child is an unhandled kind of node (e.g. constant)
1887
27
      int Weight = getWeight(Child.getNode());
1888
27
1889
27
      NodeHeights[Child] = getHeight(Child.getNode());
1890
27
      CurrentWeight += Weight;
1891
27
1892
27
      if (
isTargetConstant(Child) && 27
!GA.Value.getNode()1
)
1893
1
        GA = WeightedLeaf(Child, Weight, InsertionOrder++);
1894
27
      else
1895
26
        Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1896
75
    } else {
1897
48
      // CASE 3: Child is a subtree of same opcode
1898
48
      // Visit children first, then flatten.
1899
48
      unsigned ChildOpcode = Child.getOpcode();
1900
48
      assert(ChildOpcode == NOpcode ||
1901
48
             (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
1902
48
1903
48
      // Convert SHL to MUL
1904
48
      SDValue Op1;
1905
48
      if (ChildOpcode == ISD::SHL)
1906
0
        Op1 = getMultiplierForSHL(Child.getNode());
1907
48
      else
1908
48
        Op1 = Child->getOperand(1);
1909
48
1910
48
      if (
!NodeHeights.count(Op1) || 48
!NodeHeights.count(Child->getOperand(0))24
) {
1911
24
        assert(!NodeHeights.count(Child) && "Parent visited before children?");
1912
24
        // Visit children first, then re-visit this node
1913
24
        Worklist.push_back(Child);
1914
24
        Worklist.push_back(Op1);
1915
24
        Worklist.push_back(Child->getOperand(0));
1916
48
      } else {
1917
24
        // Back at this node after visiting the children
1918
24
        if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
1919
6
          Imbalanced = true;
1920
24
1921
24
        NodeHeights[Child] = std::max(NodeHeights[Op1],
1922
24
                                      NodeHeights[Child->getOperand(0)]) + 1;
1923
24
      }
1924
75
    }
1925
84
  }
1926
12
1927
12
  
DEBUG12
(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
1928
12
               << " weight=" << CurrentWeight << " imbalanced="
1929
12
               << Imbalanced << "\n");
1930
12
1931
12
  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
1932
12
  //  This factors out a shift in order to match memw(a<<Y+b).
1933
12
  if (
CanFactorize && 12
(willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
1934
12
                       
willShiftRightEliminate(Mul2.Value, MaxPowerOf2)1
)) {
1935
1
    DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
1936
1
    int Weight = Mul1.Weight + Mul2.Weight;
1937
1
    int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
1938
1
    SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
1939
1
    SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
1940
1
    SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
1941
1
                                  Mul1Factored, Mul2Factored);
1942
1
    SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
1943
1
                                        Mul1.Value.getValueType());
1944
1
    SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
1945
1
                                  Sum, Const);
1946
1
    NodeHeights[New] = Height;
1947
1
    Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
1948
12
  } else 
if (11
Mul1.Value.getNode()11
) {
1949
2
    // We failed to factorize two MULs, so now the Muls are left outside the
1950
2
    // queue... add them back.
1951
2
    Leaves.push(Mul1);
1952
2
    if (Mul2.Value.getNode())
1953
0
      Leaves.push(Mul2);
1954
11
    CanFactorize = false;
1955
11
  }
1956
12
1957
12
  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
1958
12
  // and the root node itself is not used more than twice. This reduces the
1959
12
  // amount of additional constant extenders introduced by this optimization.
1960
12
  bool CombinedGA = false;
1961
12
  if (
NOpcode == ISD::ADD && 12
GA.Value.getNode()12
&&
Leaves.hasConst()1
&&
1962
12
      
GA.Value.hasOneUse()0
&&
N->use_size() < 30
) {
1963
0
    GlobalAddressSDNode *GANode =
1964
0
      cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
1965
0
    ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
1966
0
1967
0
    if (
getUsesInFunction(GANode->getGlobal()) == 1 && 0
Offset->hasOneUse()0
&&
1968
0
        
getTargetLowering()->isOffsetFoldingLegal(GANode)0
) {
1969
0
      DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
1970
0
          << "): ");
1971
0
      DEBUG(GANode->dump());
1972
0
1973
0
      SDValue NewTGA =
1974
0
        CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
1975
0
            GANode->getValueType(0),
1976
0
            GANode->getOffset() + (uint64_t)Offset->getSExtValue());
1977
0
      GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
1978
0
          GA.Value.getValueType(), NewTGA);
1979
0
      GA.Weight += Leaves.top().Weight;
1980
0
1981
0
      NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
1982
0
      CombinedGA = true;
1983
0
1984
0
      Leaves.pop(); // Remove the offset constant from the queue
1985
0
    }
1986
0
  }
1987
12
1988
12
  if (
(RebalanceOnlyForOptimizations && 12
!CanFactorize0
&&
!CombinedGA0
) ||
1989
12
      
(RebalanceOnlyImbalancedTrees && 12
!Imbalanced0
)) {
1990
0
    RootWeights[N] = CurrentWeight;
1991
0
    RootHeights[N] = NodeHeights[SDValue(N, 0)];
1992
0
1993
0
    return SDValue(N, 0);
1994
0
  }
1995
12
1996
12
  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
1997
12
  
if (12
NOpcode == ISD::ADD && 12
GA.Value.getNode()12
) {
1998
1
    WeightedLeaf SHL = Leaves.findSHL(31);
1999
1
    if (
SHL.Value.getNode()1
) {
2000
1
      int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2001
1
      GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2002
1
                                 GA.Value.getValueType(),
2003
1
                                 GA.Value, SHL.Value);
2004
1
      GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2005
1
      NodeHeights[GA.Value] = Height;
2006
1
    }
2007
1
  }
2008
12
2009
12
  if (GA.Value.getNode())
2010
1
    Leaves.push(GA);
2011
12
2012
12
  // If this is the top level and we haven't factored out a shift, we should try
2013
12
  // to move a constant to the bottom to match addressing modes like memw(rX+C)
2014
12
  if (
TopLevel && 12
!CanFactorize12
&&
Leaves.hasConst()11
) {
2015
7
    DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2016
7
    Leaves.pushToBottom(Leaves.pop());
2017
7
  }
2018
12
2019
12
  const DataLayout &DL = CurDAG->getDataLayout();
2020
12
  const TargetLowering &TLI = *getTargetLowering();
2021
12
2022
12
  // Rebuild the tree using Huffman's algorithm
2023
34
  while (
Leaves.size() > 134
) {
2024
22
    WeightedLeaf L0 = Leaves.pop();
2025
22
2026
22
    // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2027
22
    // otherwise just get the next leaf
2028
22
    WeightedLeaf L1 = Leaves.findMULbyConst();
2029
22
    if (!L1.Value.getNode())
2030
22
      L1 = Leaves.pop();
2031
22
2032
22
    assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2033
22
2034
22
    SDValue V0 = L0.Value;
2035
22
    int V0Weight = L0.Weight;
2036
22
    SDValue V1 = L1.Value;
2037
22
    int V1Weight = L1.Weight;
2038
22
2039
22
    // Make sure that none of these nodes have been RAUW'd
2040
22
    if (
(RootWeights.count(V0.getNode()) && 22
RootWeights[V0.getNode()] == -22
) ||
2041
22
        
(RootWeights.count(V1.getNode()) && 22
RootWeights[V1.getNode()] == -25
)) {
2042
0
      DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2043
0
      return balanceSubTree(N, TopLevel);
2044
0
    }
2045
22
2046
22
    ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
2047
22
    ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
2048
22
    EVT VT = N->getValueType(0);
2049
22
    SDValue NewNode;
2050
22
2051
22
    if (
V0C && 22
!V1C0
) {
2052
0
      std::swap(V0, V1);
2053
0
      std::swap(V0C, V1C);
2054
0
    }
2055
22
2056
22
    // Calculate height of this node
2057
22
    assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2058
22
           "Children must have been visited before re-combining them!");
2059
22
    int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2060
22
2061
22
    // Rebuild this node (and restore SHL from MUL if needed)
2062
22
    if (
V1C && 22
NOpcode == ISD::MUL7
&&
V1C->getAPIntValue().isPowerOf2()0
)
2063
0
      NewNode = CurDAG->getNode(
2064
0
          ISD::SHL, SDLoc(V0), VT, V0,
2065
0
          CurDAG->getConstant(
2066
0
              V1C->getAPIntValue().logBase2(), SDLoc(N),
2067
0
              TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2068
22
    else
2069
22
      NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2070
22
2071
22
    NodeHeights[NewNode] = Height;
2072
22
2073
22
    int Weight = V0Weight + V1Weight;
2074
22
    Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2075
22
2076
22
    DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
2077
22
                 << Height << "):\n");
2078
22
    DEBUG(NewNode.dump());
2079
22
  }
2080
12
2081
12
  assert(Leaves.size() == 1);
2082
12
  SDValue NewRoot = Leaves.top().Value;
2083
12
2084
12
  assert(NodeHeights.count(NewRoot));
2085
12
  int Height = NodeHeights[NewRoot];
2086
12
2087
12
  // Restore SHL if we earlier converted it to a MUL
2088
12
  if (
NewRoot.getOpcode() == ISD::MUL12
) {
2089
0
    ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2090
0
    if (
V1C && 0
V1C->getAPIntValue().isPowerOf2()0
) {
2091
0
      EVT VT = NewRoot.getValueType();
2092
0
      SDValue V0 = NewRoot.getOperand(0);
2093
0
      NewRoot = CurDAG->getNode(
2094
0
          ISD::SHL, SDLoc(NewRoot), VT, V0,
2095
0
          CurDAG->getConstant(
2096
0
              V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2097
0
              TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2098
0
    }
2099
0
  }
2100
12
2101
12
  if (
N != NewRoot.getNode()12
) {
2102
6
    DEBUG(dbgs() << "--> Root is now: ");
2103
6
    DEBUG(NewRoot.dump());
2104
6
2105
6
    // Replace all uses of old root by new root
2106
6
    CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2107
6
    // Mark that we have RAUW'd N
2108
6
    RootWeights[N] = -2;
2109
12
  } else {
2110
6
    DEBUG(dbgs() << "--> Root unchanged.\n");
2111
6
  }
2112
12
2113
12
  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2114
12
  RootHeights[NewRoot.getNode()] = Height;
2115
12
2116
12
  return NewRoot;
2117
718
}
2118
2119
3.38k
void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2120
59.0k
  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); 
I != E59.0k
;) {
2121
55.6k
    SDNode *N = &*I++;
2122
55.6k
    if (
N->getOpcode() != ISD::LOAD && 55.6k
N->getOpcode() != ISD::STORE53.5k
)
2123
51.6k
      continue;
2124
3.99k
2125
3.99k
    SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2126
3.99k
    if (BasePtr.getOpcode() != ISD::ADD)
2127
3.18k
      continue;
2128
808
2129
808
    // We've already processed this node
2130
808
    
if (808
RootWeights.count(BasePtr.getNode())808
)
2131
195
      continue;
2132
613
2133
613
    
DEBUG613
(dbgs() << "** Rebalancing address calculation in node: ");
2134
613
    DEBUG(N->dump());
2135
613
2136
613
    // FindRoots
2137
613
    SmallVector<SDNode *, 4> Worklist;
2138
613
2139
613
    Worklist.push_back(BasePtr.getOperand(0).getNode());
2140
613
    Worklist.push_back(BasePtr.getOperand(1).getNode());
2141
613
2142
2.29k
    while (
!Worklist.empty()2.29k
) {
2143
1.68k
      SDNode *N = Worklist.pop_back_val();
2144
1.68k
      unsigned Opcode = N->getOpcode();
2145
1.68k
2146
1.68k
      if (!isOpcodeHandled(N))
2147
1.45k
        continue;
2148
230
2149
230
      Worklist.push_back(N->getOperand(0).getNode());
2150
230
      Worklist.push_back(N->getOperand(1).getNode());
2151
230
2152
230
      // Not a root if it has only one use and same opcode as its parent
2153
230
      if (
N->hasOneUse() && 230
Opcode == N->use_begin()->getOpcode()131
)
2154
12
        continue;
2155
218
2156
218
      // This root node has already been processed
2157
218
      
if (218
RootWeights.count(N)218
)
2158
113
        continue;
2159
105
2160
105
      RootWeights[N] = -1;
2161
105
    }
2162
613
2163
613
    // Balance node itself
2164
613
    RootWeights[BasePtr.getNode()] = -1;
2165
613
    SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2166
613
2167
613
    if (N->getOpcode() == ISD::LOAD)
2168
417
      N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2169
417
            NewBasePtr, N->getOperand(2));
2170
613
    else
2171
196
      N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2172
196
            NewBasePtr, N->getOperand(3));
2173
613
2174
613
    DEBUG(dbgs() << "--> Final node: ");
2175
613
    DEBUG(N->dump());
2176
55.6k
  }
2177
3.38k
2178
3.38k
  CurDAG->RemoveDeadNodes();
2179
3.38k
  GAUsesInFunction.clear();
2180
3.38k
  RootHeights.clear();
2181
3.38k
  RootWeights.clear();
2182
3.38k
}
2183