Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the Hexagon specific subclass of TargetSubtarget.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "Hexagon.h"
14
#include "HexagonInstrInfo.h"
15
#include "HexagonRegisterInfo.h"
16
#include "HexagonSubtarget.h"
17
#include "MCTargetDesc/HexagonMCTargetDesc.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallSet.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/StringRef.h"
22
#include "llvm/CodeGen/MachineInstr.h"
23
#include "llvm/CodeGen/MachineOperand.h"
24
#include "llvm/CodeGen/MachineScheduler.h"
25
#include "llvm/CodeGen/ScheduleDAG.h"
26
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
27
#include "llvm/Support/CommandLine.h"
28
#include "llvm/Support/ErrorHandling.h"
29
#include <algorithm>
30
#include <cassert>
31
#include <map>
32
33
using namespace llvm;
34
35
#define DEBUG_TYPE "hexagon-subtarget"
36
37
#define GET_SUBTARGETINFO_CTOR
38
#define GET_SUBTARGETINFO_TARGET_DESC
39
#include "HexagonGenSubtargetInfo.inc"
40
41
42
static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
43
  cl::Hidden, cl::ZeroOrMore, cl::init(true));
44
45
static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched",
46
  cl::Hidden, cl::ZeroOrMore, cl::init(false));
47
48
static cl::opt<bool> EnableDotCurSched("enable-cur-sched",
49
  cl::Hidden, cl::ZeroOrMore, cl::init(true),
50
  cl::desc("Enable the scheduler to generate .cur"));
51
52
static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
53
  cl::Hidden, cl::ZeroOrMore, cl::init(false),
54
  cl::desc("Disable Hexagon MI Scheduling"));
55
56
static cl::opt<bool> EnableSubregLiveness("hexagon-subreg-liveness",
57
  cl::Hidden, cl::ZeroOrMore, cl::init(true),
58
  cl::desc("Enable subregister liveness tracking for Hexagon"));
59
60
static cl::opt<bool> OverrideLongCalls("hexagon-long-calls",
61
  cl::Hidden, cl::ZeroOrMore, cl::init(false),
62
  cl::desc("If present, forces/disables the use of long calls"));
63
64
static cl::opt<bool> EnablePredicatedCalls("hexagon-pred-calls",
65
  cl::Hidden, cl::ZeroOrMore, cl::init(false),
66
  cl::desc("Consider calls to be predicable"));
67
68
static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
69
  cl::Hidden, cl::ZeroOrMore, cl::init(true));
70
71
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
72
  cl::Hidden, cl::ZeroOrMore, cl::init(true));
73
74
static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
75
  cl::Hidden, cl::ZeroOrMore, cl::init(true),
76
  cl::desc("Enable checking for cache bank conflicts"));
77
78
79
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
80
                                   StringRef FS, const TargetMachine &TM)
81
    : HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()),
82
      CPUString(Hexagon_MC::selectHexagonCPU(CPU)),
83
      InstrInfo(initializeSubtargetDependencies(CPU, FS)),
84
      RegInfo(getHwMode()), TLInfo(TM, *this),
85
1.01k
      InstrItins(getInstrItineraryForCPU(CPUString)) {
86
1.01k
  // Beware of the default constructor of InstrItineraryData: it will
87
1.01k
  // reset all members to 0.
88
1.01k
  assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
89
1.01k
}
90
91
HexagonSubtarget &
92
1.01k
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
93
1.01k
  static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
94
1.01k
      {"generic", Hexagon::ArchEnum::V60},
95
1.01k
      {"hexagonv5", Hexagon::ArchEnum::V5},
96
1.01k
      {"hexagonv55", Hexagon::ArchEnum::V55},
97
1.01k
      {"hexagonv60", Hexagon::ArchEnum::V60},
98
1.01k
      {"hexagonv62", Hexagon::ArchEnum::V62},
99
1.01k
      {"hexagonv65", Hexagon::ArchEnum::V65},
100
1.01k
      {"hexagonv66", Hexagon::ArchEnum::V66},
101
1.01k
  };
102
1.01k
103
1.01k
  auto FoundIt = CpuTable.find(CPUString);
104
1.01k
  if (FoundIt != CpuTable.end())
105
1.01k
    HexagonArchVersion = FoundIt->second;
106
1.01k
  else
107
1.01k
    
llvm_unreachable0
("Unrecognized Hexagon processor version");
108
1.01k
109
1.01k
  UseHVX128BOps = false;
110
1.01k
  UseHVX64BOps = false;
111
1.01k
  UseLongCalls = false;
112
1.01k
113
1.01k
  UseBSBScheduling = hasV60Ops() && 
EnableBSBSched841
;
114
1.01k
115
1.01k
  ParseSubtargetFeatures(CPUString, FS);
116
1.01k
117
1.01k
  if (OverrideLongCalls.getPosition())
118
1
    UseLongCalls = OverrideLongCalls;
119
1.01k
120
1.01k
  FeatureBitset Features = getFeatureBits();
121
1.01k
  if (HexagonDisableDuplex)
122
5
    setFeatureBits(Features.set(Hexagon::FeatureDuplex, false));
123
1.01k
  setFeatureBits(Hexagon_MC::completeHVXFeatures(Features));
124
1.01k
125
1.01k
  return *this;
126
1.01k
}
127
128
21.8k
void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) {
129
86.4k
  for (SUnit &SU : DAG->SUnits) {
130
86.4k
    if (!SU.isInstr())
131
0
      continue;
132
86.4k
    SmallVector<SDep, 4> Erase;
133
86.4k
    for (auto &D : SU.Preds)
134
131k
      if (D.getKind() == SDep::Output && 
D.getReg() == Hexagon::USR_OVF20.9k
)
135
4
        Erase.push_back(D);
136
86.4k
    for (auto &E : Erase)
137
4
      SU.removePred(E);
138
86.4k
  }
139
21.8k
}
140
141
21.8k
void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) {
142
86.4k
  for (SUnit &SU : DAG->SUnits) {
143
86.4k
    // Update the latency of chain edges between v60 vector load or store
144
86.4k
    // instructions to be 1. These instruction cannot be scheduled in the
145
86.4k
    // same packet.
146
86.4k
    MachineInstr &MI1 = *SU.getInstr();
147
86.4k
    auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
148
86.4k
    bool IsStoreMI1 = MI1.mayStore();
149
86.4k
    bool IsLoadMI1 = MI1.mayLoad();
150
86.4k
    if (!QII->isHVXVec(MI1) || 
!(20.0k
IsStoreMI120.0k
||
IsLoadMI117.3k
))
151
80.1k
      continue;
152
14.1k
    
for (SDep &SI : SU.Succs)6.33k
{
153
14.1k
      if (SI.getKind() != SDep::Order || 
SI.getLatency() != 04.30k
)
154
11.1k
        continue;
155
2.97k
      MachineInstr &MI2 = *SI.getSUnit()->getInstr();
156
2.97k
      if (!QII->isHVXVec(MI2))
157
193
        continue;
158
2.78k
      if ((IsStoreMI1 && 
MI2.mayStore()337
) ||
(2.44k
IsLoadMI12.44k
&&
MI2.mayLoad()2.44k
)) {
159
1.09k
        SI.setLatency(1);
160
1.09k
        SU.setHeightDirty();
161
1.09k
        // Change the dependence in the opposite direction too.
162
4.63k
        for (SDep &PI : SI.getSUnit()->Preds) {
163
4.63k
          if (PI.getSUnit() != &SU || 
PI.getKind() != SDep::Order1.20k
)
164
3.54k
            continue;
165
1.09k
          PI.setLatency(1);
166
1.09k
          SI.getSUnit()->setDepthDirty();
167
1.09k
        }
168
1.09k
      }
169
2.78k
    }
170
6.33k
  }
171
21.8k
}
172
173
// Check if a call and subsequent A2_tfrpi instructions should maintain
174
// scheduling affinity. We are looking for the TFRI to be consumed in
175
// the next instruction. This should help reduce the instances of
176
// double register pairs being allocated and scheduled before a call
177
// when not used until after the call. This situation is exacerbated
178
// by the fact that we allocate the pair from the callee saves list,
179
// leading to excess spills and restores.
180
bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
181
      const HexagonInstrInfo &HII, const SUnit &Inst1,
182
0
      const SUnit &Inst2) const {
183
0
  if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
184
0
    return false;
185
0
186
0
  // TypeXTYPE are 64 bit operations.
187
0
  unsigned Type = HII.getType(*Inst2.getInstr());
188
0
  return Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
189
0
         Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM;
190
0
}
191
192
5.01k
void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
193
5.01k
  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
194
5.01k
  SUnit* LastSequentialCall = nullptr;
195
5.01k
  // Map from virtual register to physical register from the copy.
196
5.01k
  DenseMap<unsigned, unsigned> VRegHoldingReg;
197
5.01k
  // Map from the physical register to the instruction that uses virtual
198
5.01k
  // register. This is used to create the barrier edge.
199
5.01k
  DenseMap<unsigned, SUnit *> LastVRegUse;
200
5.01k
  auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
201
5.01k
  auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
202
5.01k
203
5.01k
  // Currently we only catch the situation when compare gets scheduled
204
5.01k
  // before preceding call.
205
34.5k
  for (unsigned su = 0, e = DAG->SUnits.size(); su != e; 
++su29.4k
) {
206
29.4k
    // Remember the call.
207
29.4k
    if (DAG->SUnits[su].getInstr()->isCall())
208
0
      LastSequentialCall = &DAG->SUnits[su];
209
29.4k
    // Look for a compare that defines a predicate.
210
29.4k
    else if (DAG->SUnits[su].getInstr()->isCompare() && 
LastSequentialCall819
)
211
0
      DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
212
29.4k
    // Look for call and tfri* instructions.
213
29.4k
    else if (SchedPredsCloser && LastSequentialCall && 
su > 10
&&
su < e-10
&&
214
29.4k
             
shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])0
)
215
0
      DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
216
29.4k
    // Prevent redundant register copies due to reads and writes of physical
217
29.4k
    // registers. The original motivation for this was the code generated
218
29.4k
    // between two calls, which are caused both the return value and the
219
29.4k
    // argument for the next call being in %r0.
220
29.4k
    // Example:
221
29.4k
    //   1: <call1>
222
29.4k
    //   2: %vreg = COPY %r0
223
29.4k
    //   3: <use of %vreg>
224
29.4k
    //   4: %r0 = ...
225
29.4k
    //   5: <call2>
226
29.4k
    // The scheduler would often swap 3 and 4, so an additional register is
227
29.4k
    // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
228
29.4k
    // this.
229
29.4k
    // The code below checks for all the physical registers, not just R0/D0/V0.
230
29.4k
    else if (SchedRetvalOptimization) {
231
29.4k
      const MachineInstr *MI = DAG->SUnits[su].getInstr();
232
29.4k
      if (MI->isCopy() &&
233
29.4k
          
TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())8.86k
) {
234
5.14k
        // %vregX = COPY %r0
235
5.14k
        VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
236
5.14k
        LastVRegUse.erase(MI->getOperand(1).getReg());
237
24.3k
      } else {
238
98.3k
        for (unsigned i = 0, e = MI->getNumOperands(); i != e; 
++i74.0k
) {
239
74.0k
          const MachineOperand &MO = MI->getOperand(i);
240
74.0k
          if (!MO.isReg())
241
16.6k
            continue;
242
57.4k
          if (MO.isUse() && 
!MI->isCopy()33.2k
&&
243
57.4k
              
VRegHoldingReg.count(MO.getReg())29.5k
) {
244
6.12k
            // <use of %vregX>
245
6.12k
            LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
246
51.3k
          } else if (MO.isDef() &&
247
51.3k
                     
TargetRegisterInfo::isPhysicalRegister(MO.getReg())24.1k
) {
248
25.5k
            for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
249
18.8k
                 ++AI) {
250
18.8k
              if (LastVRegUse.count(*AI) &&
251
18.8k
                  
LastVRegUse[*AI] != &DAG->SUnits[su]1.89k
)
252
1.89k
                // %r0 = ...
253
1.89k
                DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
254
18.8k
              LastVRegUse.erase(*AI);
255
18.8k
            }
256
6.73k
          }
257
57.4k
        }
258
24.3k
      }
259
29.4k
    }
260
29.4k
  }
261
5.01k
}
262
263
16.6k
void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
264
16.6k
  if (!EnableCheckBankConflict)
265
3
    return;
266
16.6k
267
16.6k
  const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
268
16.6k
269
16.6k
  // Create artificial edges between loads that could likely cause a bank
270
16.6k
  // conflict. Since such loads would normally not have any dependency
271
16.6k
  // between them, we cannot rely on existing edges.
272
71.6k
  for (unsigned i = 0, e = DAG->SUnits.size(); i != e; 
++i54.9k
) {
273
54.9k
    SUnit &S0 = DAG->SUnits[i];
274
54.9k
    MachineInstr &L0 = *S0.getInstr();
275
54.9k
    if (!L0.mayLoad() || 
L0.mayStore()9.70k
||
276
54.9k
        
HII.getAddrMode(L0) != HexagonII::BaseImmOffset8.60k
)
277
49.8k
      continue;
278
5.14k
    int64_t Offset0;
279
5.14k
    unsigned Size0;
280
5.14k
    MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
281
5.14k
    // Is the access size is longer than the L1 cache line, skip the check.
282
5.14k
    if (BaseOp0 == nullptr || 
!BaseOp0->isReg()5.11k
||
Size0 >= 325.11k
)
283
1.98k
      continue;
284
3.15k
    // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
285
37.7k
    
for (unsigned j = i+1, m = std::min(i+32, e); 3.15k
j != m;
++j34.6k
) {
286
34.6k
      SUnit &S1 = DAG->SUnits[j];
287
34.6k
      MachineInstr &L1 = *S1.getInstr();
288
34.6k
      if (!L1.mayLoad() || 
L1.mayStore()8.75k
||
289
34.6k
          
HII.getAddrMode(L1) != HexagonII::BaseImmOffset8.73k
)
290
26.6k
        continue;
291
7.97k
      int64_t Offset1;
292
7.97k
      unsigned Size1;
293
7.97k
      MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
294
7.97k
      if (BaseOp1 == nullptr || 
!BaseOp1->isReg()7.97k
||
Size1 >= 327.97k
||
295
7.97k
          
BaseOp0->getReg() != BaseOp1->getReg()7.75k
)
296
1.33k
        continue;
297
6.64k
      // Check bits 3 and 4 of the offset: if they differ, a bank conflict
298
6.64k
      // is unlikely.
299
6.64k
      if (((Offset0 ^ Offset1) & 0x18) != 0)
300
4.45k
        continue;
301
2.19k
      // Bits 3 and 4 are the same, add an artificial edge and set extra
302
2.19k
      // latency.
303
2.19k
      SDep A(&S0, SDep::Artificial);
304
2.19k
      A.setLatency(1);
305
2.19k
      S1.addPred(A, true);
306
2.19k
    }
307
3.15k
  }
308
16.6k
}
309
310
/// Enable use of alias analysis during code generation (during MI
311
/// scheduling, DAGCombine, etc.).
312
31.2k
bool HexagonSubtarget::useAA() const {
313
31.2k
  if (OptLevel != CodeGenOpt::None)
314
29.5k
    return true;
315
1.63k
  return false;
316
1.63k
}
317
318
/// Perform target specific adjustments to the latency of a schedule
319
/// dependency.
320
void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
321
97.3k
                                             SDep &Dep) const {
322
97.3k
  MachineInstr *SrcInst = Src->getInstr();
323
97.3k
  MachineInstr *DstInst = Dst->getInstr();
324
97.3k
  if (!Src->isInstr() || !Dst->isInstr())
325
5.80k
    return;
326
91.5k
327
91.5k
  const HexagonInstrInfo *QII = getInstrInfo();
328
91.5k
329
91.5k
  // Instructions with .new operands have zero latency.
330
91.5k
  SmallSet<SUnit *, 4> ExclSrc;
331
91.5k
  SmallSet<SUnit *, 4> ExclDst;
332
91.5k
  if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
333
91.5k
      
isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)5.14k
) {
334
4.95k
    Dep.setLatency(0);
335
4.95k
    return;
336
4.95k
  }
337
86.5k
338
86.5k
  if (!hasV60Ops())
339
15.2k
    return;
340
71.3k
341
71.3k
  // Set the latency for a copy to zero since we hope that is will get removed.
342
71.3k
  if (DstInst->isCopy())
343
2.64k
    Dep.setLatency(0);
344
71.3k
345
71.3k
  // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
346
71.3k
  // the correct latency.
347
71.3k
  if ((DstInst->isRegSequence() || 
DstInst->isCopy()71.2k
) &&
Dst->NumSuccs == 12.78k
) {
348
502
    unsigned DReg = DstInst->getOperand(0).getReg();
349
502
    MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr();
350
502
    unsigned UseIdx = -1;
351
1.28k
    for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); 
OpNum++780
) {
352
1.28k
      const MachineOperand &MO = DDst->getOperand(OpNum);
353
1.28k
      if (MO.isReg() && 
MO.getReg()1.24k
&&
MO.isUse()1.24k
&&
MO.getReg() == DReg735
) {
354
502
        UseIdx = OpNum;
355
502
        break;
356
502
      }
357
1.28k
    }
358
502
    int DLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst,
359
502
                                                0, *DDst, UseIdx));
360
502
    DLatency = std::max(DLatency, 0);
361
502
    Dep.setLatency((unsigned)DLatency);
362
502
  }
363
71.3k
364
71.3k
  // Try to schedule uses near definitions to generate .cur.
365
71.3k
  ExclSrc.clear();
366
71.3k
  ExclDst.clear();
367
71.3k
  if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
368
71.3k
      
isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)3.36k
) {
369
1.68k
    Dep.setLatency(0);
370
1.68k
    return;
371
1.68k
  }
372
69.6k
373
69.6k
  updateLatency(*SrcInst, *DstInst, Dep);
374
69.6k
}
375
376
void HexagonSubtarget::getPostRAMutations(
377
3.35k
    std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
378
3.35k
  Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
379
3.35k
  Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
380
3.35k
  Mutations.push_back(llvm::make_unique<BankConflictMutation>());
381
3.35k
}
382
383
void HexagonSubtarget::getSMSMutations(
384
190
    std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
385
190
  Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
386
190
  Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
387
190
}
388
389
// Pin the vtable to this file.
390
0
void HexagonSubtarget::anchor() {}
391
392
11.6k
bool HexagonSubtarget::enableMachineScheduler() const {
393
11.6k
  if (DisableHexagonMISched.getNumOccurrences())
394
30
    return !DisableHexagonMISched;
395
11.6k
  return true;
396
11.6k
}
397
398
134
bool HexagonSubtarget::usePredicatedCalls() const {
399
134
  return EnablePredicatedCalls;
400
134
}
401
402
void HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
403
70.5k
      MachineInstr &DstInst, SDep &Dep) const {
404
70.5k
  if (Dep.isArtificial()) {
405
10.7k
    Dep.setLatency(1);
406
10.7k
    return;
407
10.7k
  }
408
59.7k
409
59.7k
  if (!hasV60Ops())
410
0
    return;
411
59.7k
412
59.7k
  auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo());
413
59.7k
414
59.7k
  // BSB scheduling.
415
59.7k
  if (QII.isHVXVec(SrcInst) || 
useBSBScheduling()40.9k
)
416
59.7k
    Dep.setLatency((Dep.getLatency() + 1) >> 1);
417
59.7k
}
418
419
834
void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
420
834
  MachineInstr *SrcI = Src->getInstr();
421
2.16k
  for (auto &I : Src->Succs) {
422
2.16k
    if (!I.isAssignedRegDep() || 
I.getSUnit() != Dst1.85k
)
423
1.33k
      continue;
424
834
    unsigned DepR = I.getReg();
425
834
    int DefIdx = -1;
426
4.40k
    for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); 
OpNum++3.57k
) {
427
3.57k
      const MachineOperand &MO = SrcI->getOperand(OpNum);
428
3.57k
      if (MO.isReg() && 
MO.isDef()3.01k
&&
MO.getReg() == DepR1.13k
)
429
834
        DefIdx = OpNum;
430
3.57k
    }
431
834
    assert(DefIdx >= 0 && "Def Reg not found in Src MI");
432
834
    MachineInstr *DstI = Dst->getInstr();
433
834
    SDep T = I;
434
3.60k
    for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); 
OpNum++2.77k
) {
435
2.77k
      const MachineOperand &MO = DstI->getOperand(OpNum);
436
2.77k
      if (MO.isReg() && 
MO.isUse()2.10k
&&
MO.getReg() == DepR1.69k
) {
437
851
        int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcI,
438
851
                                                   DefIdx, *DstI, OpNum));
439
851
440
851
        // For some instructions (ex: COPY), we might end up with < 0 latency
441
851
        // as they don't have any Itinerary class associated with them.
442
851
        Latency = std::max(Latency, 0);
443
851
444
851
        I.setLatency(Latency);
445
851
        updateLatency(*SrcI, *DstI, I);
446
851
      }
447
2.77k
    }
448
834
449
834
    // Update the latency of opposite edge too.
450
834
    T.setSUnit(Src);
451
834
    auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T);
452
834
    assert(F != Dst->Preds.end());
453
834
    F->setLatency(I.getLatency());
454
834
  }
455
834
}
456
457
/// Change the latency between the two SUnits.
458
void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
459
357
      const {
460
1.70k
  for (auto &I : Src->Succs) {
461
1.70k
    if (!I.isAssignedRegDep() || 
I.getSUnit() != Dst1.46k
)
462
1.35k
      continue;
463
357
    SDep T = I;
464
357
    I.setLatency(Lat);
465
357
466
357
    // Update the latency of opposite edge too.
467
357
    T.setSUnit(Src);
468
357
    auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T);
469
357
    assert(F != Dst->Preds.end());
470
357
    F->setLatency(Lat);
471
357
  }
472
357
}
473
474
/// If the SUnit has a zero latency edge, return the other SUnit.
475
22.4k
static SUnit *getZeroLatency(SUnit *N, SmallVector<SDep, 4> &Deps) {
476
22.4k
  for (auto &I : Deps)
477
25.5k
    if (I.isAssignedRegDep() && 
I.getLatency() == 010.0k
&&
478
25.5k
        
!I.getSUnit()->getInstr()->isPseudo()3.43k
)
479
3.29k
      return I.getSUnit();
480
22.4k
  
return nullptr19.1k
;
481
22.4k
}
482
483
// Return true if these are the best two instructions to schedule
484
// together with a zero latency. Only one dependence should have a zero
485
// latency. If there are multiple choices, choose the best, and change
486
// the others, if needed.
487
bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
488
      const HexagonInstrInfo *TII, SmallSet<SUnit*, 4> &ExclSrc,
489
10.0k
      SmallSet<SUnit*, 4> &ExclDst) const {
490
10.0k
  MachineInstr &SrcInst = *Src->getInstr();
491
10.0k
  MachineInstr &DstInst = *Dst->getInstr();
492
10.0k
493
10.0k
  // Ignore Boundary SU nodes as these have null instructions.
494
10.0k
  if (Dst->isBoundaryNode())
495
25
    return false;
496
10.0k
497
10.0k
  if (SrcInst.isPHI() || 
DstInst.isPHI()9.98k
)
498
20
    return false;
499
9.98k
500
9.98k
  if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
501
9.98k
      
!TII->canExecuteInBundle(SrcInst, DstInst)6.29k
)
502
1.34k
    return false;
503
8.63k
504
8.63k
  // The architecture doesn't allow three dependent instructions in the same
505
8.63k
  // packet. So, if the destination has a zero latency successor, then it's
506
8.63k
  // not a candidate for a zero latency predecessor.
507
8.63k
  if (getZeroLatency(Dst, Dst->Succs) != nullptr)
508
1.57k
    return false;
509
7.06k
510
7.06k
  // Check if the Dst instruction is the best candidate first.
511
7.06k
  SUnit *Best = nullptr;
512
7.06k
  SUnit *DstBest = nullptr;
513
7.06k
  SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
514
7.06k
  if (SrcBest == nullptr || 
Src->NodeNum >= SrcBest->NodeNum414
) {
515
6.70k
    // Check that Src doesn't have a better candidate.
516
6.70k
    DstBest = getZeroLatency(Src, Src->Succs);
517
6.70k
    if (DstBest == nullptr || 
Dst->NodeNum <= DstBest->NodeNum1.30k
)
518
6.64k
      Best = Dst;
519
6.70k
  }
520
7.06k
  if (Best != Dst)
521
421
    return false;
522
6.64k
523
6.64k
  // The caller frequently adds the same dependence twice. If so, then
524
6.64k
  // return true for this case too.
525
6.64k
  if ((Src == SrcBest && 
Dst == DstBest53
) ||
526
6.64k
      
(6.58k
SrcBest == nullptr6.58k
&&
Dst == DstBest6.58k
) ||
527
6.64k
      
(6.58k
Src == SrcBest6.58k
&&
Dst == nullptr0
))
528
53
    return true;
529
6.58k
530
6.58k
  // Reassign the latency for the previous bests, which requires setting
531
6.58k
  // the dependence edge in both directions.
532
6.58k
  if (SrcBest != nullptr) {
533
0
    if (!hasV60Ops())
534
0
      changeLatency(SrcBest, Dst, 1);
535
0
    else
536
0
      restoreLatency(SrcBest, Dst);
537
0
  }
538
6.58k
  if (DstBest != nullptr) {
539
1.18k
    if (!hasV60Ops())
540
355
      changeLatency(Src, DstBest, 1);
541
834
    else
542
834
      restoreLatency(Src, DstBest);
543
1.18k
  }
544
6.58k
545
6.58k
  // Attempt to find another opprotunity for zero latency in a different
546
6.58k
  // dependence.
547
6.58k
  if (SrcBest && 
DstBest0
)
548
0
    // If there is an edge from SrcBest to DstBst, then try to change that
549
0
    // to 0 now.
550
0
    changeLatency(SrcBest, DstBest, 0);
551
6.58k
  else if (DstBest) {
552
1.18k
    // Check if the previous best destination instruction has a new zero
553
1.18k
    // latency dependence opportunity.
554
1.18k
    ExclSrc.insert(Src);
555
1.18k
    for (auto &I : DstBest->Preds)
556
2.74k
      if (ExclSrc.count(I.getSUnit()) == 0 &&
557
2.74k
          
isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst)1.52k
)
558
2
        changeLatency(I.getSUnit(), DstBest, 0);
559
5.40k
  } else if (SrcBest) {
560
0
    // Check if previous best source instruction has a new zero latency
561
0
    // dependence opportunity.
562
0
    ExclDst.insert(Dst);
563
0
    for (auto &I : SrcBest->Succs)
564
0
      if (ExclDst.count(I.getSUnit()) == 0 &&
565
0
          isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
566
0
        changeLatency(SrcBest, I.getSUnit(), 0);
567
0
  }
568
6.58k
569
6.58k
  return true;
570
6.58k
}
571
572
0
unsigned HexagonSubtarget::getL1CacheLineSize() const {
573
0
  return 32;
574
0
}
575
576
2
unsigned HexagonSubtarget::getL1PrefetchDistance() const {
577
2
  return 32;
578
2
}
579
580
5.06k
bool HexagonSubtarget::enableSubRegLiveness() const {
581
5.06k
  return EnableSubregLiveness;
582
5.06k
}