Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements a register stacking pass.
11
///
12
/// This pass reorders instructions to put register uses and defs in an order
13
/// such that they form single-use expression trees. Registers fitting this form
14
/// are then marked as "stackified", meaning references to them are replaced by
15
/// "push" and "pop" from the value stack.
16
///
17
/// This is primarily a code size optimization, since temporary values on the
18
/// value stack don't need to be named.
19
///
20
//===----------------------------------------------------------------------===//
21
22
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
23
#include "WebAssembly.h"
24
#include "WebAssemblyDebugValueManager.h"
25
#include "WebAssemblyMachineFunctionInfo.h"
26
#include "WebAssemblySubtarget.h"
27
#include "WebAssemblyUtilities.h"
28
#include "llvm/ADT/SmallPtrSet.h"
29
#include "llvm/Analysis/AliasAnalysis.h"
30
#include "llvm/CodeGen/LiveIntervals.h"
31
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32
#include "llvm/CodeGen/MachineDominators.h"
33
#include "llvm/CodeGen/MachineInstrBuilder.h"
34
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
35
#include "llvm/CodeGen/MachineRegisterInfo.h"
36
#include "llvm/CodeGen/Passes.h"
37
#include "llvm/Support/Debug.h"
38
#include "llvm/Support/raw_ostream.h"
39
using namespace llvm;
40
41
#define DEBUG_TYPE "wasm-reg-stackify"
42
43
namespace {
44
class WebAssemblyRegStackify final : public MachineFunctionPass {
45
4.73k
  StringRef getPassName() const override {
46
4.73k
    return "WebAssembly Register Stackify";
47
4.73k
  }
48
49
415
  void getAnalysisUsage(AnalysisUsage &AU) const override {
50
415
    AU.setPreservesCFG();
51
415
    AU.addRequired<AAResultsWrapperPass>();
52
415
    AU.addRequired<MachineDominatorTree>();
53
415
    AU.addRequired<LiveIntervals>();
54
415
    AU.addPreserved<MachineBlockFrequencyInfo>();
55
415
    AU.addPreserved<SlotIndexes>();
56
415
    AU.addPreserved<LiveIntervals>();
57
415
    AU.addPreservedID(LiveVariablesID);
58
415
    AU.addPreserved<MachineDominatorTree>();
59
415
    MachineFunctionPass::getAnalysisUsage(AU);
60
415
  }
61
62
  bool runOnMachineFunction(MachineFunction &MF) override;
63
64
public:
65
  static char ID; // Pass identification, replacement for typeid
66
415
  WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
67
};
68
} // end anonymous namespace
69
70
char WebAssemblyRegStackify::ID = 0;
71
INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
72
                "Reorder instructions to use the WebAssembly value stack",
73
                false, false)
74
75
413
FunctionPass *llvm::createWebAssemblyRegStackify() {
76
413
  return new WebAssemblyRegStackify();
77
413
}
78
79
// Decorate the given instruction with implicit operands that enforce the
80
// expression stack ordering constraints for an instruction which is on
81
// the expression stack.
82
33.7k
static void imposeStackOrdering(MachineInstr *MI) {
83
33.7k
  // Write the opaque VALUE_STACK register.
84
33.7k
  if (!MI->definesRegister(WebAssembly::VALUE_STACK))
85
33.7k
    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
86
33.7k
                                             /*isDef=*/true,
87
33.7k
                                             /*isImp=*/true));
88
33.7k
89
33.7k
  // Also read the opaque VALUE_STACK register.
90
33.7k
  if (!MI->readsRegister(WebAssembly::VALUE_STACK))
91
33.7k
    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
92
33.7k
                                             /*isDef=*/false,
93
33.7k
                                             /*isImp=*/true));
94
33.7k
}
95
96
// Convert an IMPLICIT_DEF instruction into an instruction which defines
97
// a constant zero value.
98
static void convertImplicitDefToConstZero(MachineInstr *MI,
99
                                          MachineRegisterInfo &MRI,
100
                                          const TargetInstrInfo *TII,
101
                                          MachineFunction &MF,
102
5
                                          LiveIntervals &LIS) {
103
5
  assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
104
5
105
5
  const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
106
5
  if (RegClass == &WebAssembly::I32RegClass) {
107
1
    MI->setDesc(TII->get(WebAssembly::CONST_I32));
108
1
    MI->addOperand(MachineOperand::CreateImm(0));
109
4
  } else if (RegClass == &WebAssembly::I64RegClass) {
110
1
    MI->setDesc(TII->get(WebAssembly::CONST_I64));
111
1
    MI->addOperand(MachineOperand::CreateImm(0));
112
3
  } else if (RegClass == &WebAssembly::F32RegClass) {
113
1
    MI->setDesc(TII->get(WebAssembly::CONST_F32));
114
1
    auto *Val = cast<ConstantFP>(Constant::getNullValue(
115
1
        Type::getFloatTy(MF.getFunction().getContext())));
116
1
    MI->addOperand(MachineOperand::CreateFPImm(Val));
117
2
  } else if (RegClass == &WebAssembly::F64RegClass) {
118
1
    MI->setDesc(TII->get(WebAssembly::CONST_F64));
119
1
    auto *Val = cast<ConstantFP>(Constant::getNullValue(
120
1
        Type::getDoubleTy(MF.getFunction().getContext())));
121
1
    MI->addOperand(MachineOperand::CreateFPImm(Val));
122
1
  } else if (RegClass == &WebAssembly::V128RegClass) {
123
1
    unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
124
1
    MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
125
1
    MI->addOperand(MachineOperand::CreateReg(TempReg, false));
126
1
    MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
127
1
                                  TII->get(WebAssembly::CONST_I32), TempReg)
128
1
                              .addImm(0);
129
1
    LIS.InsertMachineInstrInMaps(*Const);
130
1
  } else {
131
0
    llvm_unreachable("Unexpected reg class");
132
0
  }
133
5
}
134
135
// Determine whether a call to the callee referenced by
136
// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
137
// effects.
138
static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
139
320
                        bool &Write, bool &Effects, bool &StackPointer) {
140
320
  // All calls can use the stack pointer.
141
320
  StackPointer = true;
142
320
143
320
  const MachineOperand &MO = MI.getOperand(CalleeOpNo);
144
320
  if (MO.isGlobal()) {
145
151
    const Constant *GV = MO.getGlobal();
146
151
    if (const auto *GA = dyn_cast<GlobalAlias>(GV))
147
5
      if (!GA->isInterposable())
148
0
        GV = GA->getAliasee();
149
151
150
151
    if (const auto *F = dyn_cast<Function>(GV)) {
151
146
      if (!F->doesNotThrow())
152
138
        Effects = true;
153
146
      if (F->doesNotAccessMemory())
154
4
        return;
155
142
      if (F->onlyReadsMemory()) {
156
2
        Read = true;
157
2
        return;
158
2
      }
159
314
    }
160
151
  }
161
314
162
314
  // Assume the worst.
163
314
  Write = true;
164
314
  Read = true;
165
314
  Effects = true;
166
314
}
167
168
// Determine whether MI reads memory, writes memory, has side effects,
169
// and/or uses the stack pointer value.
170
static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
171
30.5k
                  bool &Write, bool &Effects, bool &StackPointer) {
172
30.5k
  assert(!MI.isTerminator());
173
30.5k
174
30.5k
  if (MI.isDebugInstr() || 
MI.isPosition()30.5k
)
175
14
    return;
176
30.5k
177
30.5k
  // Check for loads.
178
30.5k
  if (MI.mayLoad() && 
!MI.isDereferenceableInvariantLoad(&AA)2.31k
)
179
2.31k
    Read = true;
180
30.5k
181
30.5k
  // Check for stores.
182
30.5k
  if (MI.mayStore()) {
183
515
    Write = true;
184
29.9k
  } else if (MI.hasOrderedMemoryRef()) {
185
839
    switch (MI.getOpcode()) {
186
839
    case WebAssembly::DIV_S_I32:
187
184
    case WebAssembly::DIV_S_I64:
188
184
    case WebAssembly::REM_S_I32:
189
184
    case WebAssembly::REM_S_I64:
190
184
    case WebAssembly::DIV_U_I32:
191
184
    case WebAssembly::DIV_U_I64:
192
184
    case WebAssembly::REM_U_I32:
193
184
    case WebAssembly::REM_U_I64:
194
184
    case WebAssembly::I32_TRUNC_S_F32:
195
184
    case WebAssembly::I64_TRUNC_S_F32:
196
184
    case WebAssembly::I32_TRUNC_S_F64:
197
184
    case WebAssembly::I64_TRUNC_S_F64:
198
184
    case WebAssembly::I32_TRUNC_U_F32:
199
184
    case WebAssembly::I64_TRUNC_U_F32:
200
184
    case WebAssembly::I32_TRUNC_U_F64:
201
184
    case WebAssembly::I64_TRUNC_U_F64:
202
184
      // These instruction have hasUnmodeledSideEffects() returning true
203
184
      // because they trap on overflow and invalid so they can't be arbitrarily
204
184
      // moved, however hasOrderedMemoryRef() interprets this plus their lack
205
184
      // of memoperands as having a potential unknown memory reference.
206
184
      break;
207
655
    default:
208
655
      // Record volatile accesses, unless it's a call, as calls are handled
209
655
      // specially below.
210
655
      if (!MI.isCall()) {
211
335
        Write = true;
212
335
        Effects = true;
213
335
      }
214
655
      break;
215
30.5k
    }
216
30.5k
  }
217
30.5k
218
30.5k
  // Check for side effects.
219
30.5k
  if (MI.hasUnmodeledSideEffects()) {
220
256
    switch (MI.getOpcode()) {
221
256
    case WebAssembly::DIV_S_I32:
222
184
    case WebAssembly::DIV_S_I64:
223
184
    case WebAssembly::REM_S_I32:
224
184
    case WebAssembly::REM_S_I64:
225
184
    case WebAssembly::DIV_U_I32:
226
184
    case WebAssembly::DIV_U_I64:
227
184
    case WebAssembly::REM_U_I32:
228
184
    case WebAssembly::REM_U_I64:
229
184
    case WebAssembly::I32_TRUNC_S_F32:
230
184
    case WebAssembly::I64_TRUNC_S_F32:
231
184
    case WebAssembly::I32_TRUNC_S_F64:
232
184
    case WebAssembly::I64_TRUNC_S_F64:
233
184
    case WebAssembly::I32_TRUNC_U_F32:
234
184
    case WebAssembly::I64_TRUNC_U_F32:
235
184
    case WebAssembly::I32_TRUNC_U_F64:
236
184
    case WebAssembly::I64_TRUNC_U_F64:
237
184
      // These instructions have hasUnmodeledSideEffects() returning true
238
184
      // because they trap on overflow and invalid so they can't be arbitrarily
239
184
      // moved, however in the specific case of register stackifying, it is safe
240
184
      // to move them because overflow and invalid are Undefined Behavior.
241
184
      break;
242
184
    default:
243
72
      Effects = true;
244
72
      break;
245
30.5k
    }
246
30.5k
  }
247
30.5k
248
30.5k
  // Check for writes to __stack_pointer global.
249
30.5k
  if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
250
30.5k
      
strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 034
)
251
34
    StackPointer = true;
252
30.5k
253
30.5k
  // Analyze calls.
254
30.5k
  if (MI.isCall()) {
255
320
    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode());
256
320
    queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
257
320
  }
258
30.5k
}
259
260
// Test whether Def is safe and profitable to rematerialize.
261
static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
262
6.99k
                                const WebAssemblyInstrInfo *TII) {
263
6.99k
  return Def.isAsCheapAsAMove() && 
TII->isTriviallyReMaterializable(Def, &AA)3.98k
;
264
6.99k
}
265
266
// Identify the definition for this register at this point. This is a
267
// generalization of MachineRegisterInfo::getUniqueVRegDef that uses
268
// LiveIntervals to handle complex cases.
269
static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
270
                                const MachineRegisterInfo &MRI,
271
44.5k
                                const LiveIntervals &LIS) {
272
44.5k
  // Most registers are in SSA form here so we try a quick MRI query first.
273
44.5k
  if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
274
44.2k
    return Def;
275
274
276
274
  // MRI doesn't know what the Def is. Try asking LIS.
277
274
  if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
278
217
          LIS.getInstructionIndex(*Insert)))
279
217
    return LIS.getInstructionFromIndex(ValNo->def);
280
57
281
57
  return nullptr;
282
57
}
283
284
// Test whether Reg, as defined at Def, has exactly one use. This is a
285
// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
286
// to handle complex cases.
287
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
288
28.0k
                      MachineDominatorTree &MDT, LiveIntervals &LIS) {
289
28.0k
  // Most registers are in SSA form here so we try a quick MRI query first.
290
28.0k
  if (MRI.hasOneUse(Reg))
291
21.9k
    return true;
292
6.10k
293
6.10k
  bool HasOne = false;
294
6.10k
  const LiveInterval &LI = LIS.getInterval(Reg);
295
6.10k
  const VNInfo *DefVNI =
296
6.10k
      LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
297
6.10k
  assert(DefVNI);
298
6.48k
  for (auto &I : MRI.use_nodbg_operands(Reg)) {
299
6.48k
    const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
300
6.48k
    if (Result.valueIn() == DefVNI) {
301
6.44k
      if (!Result.isKill())
302
6.08k
        return false;
303
355
      if (HasOne)
304
2
        return false;
305
353
      HasOne = true;
306
353
    }
307
6.48k
  }
308
6.10k
  
return HasOne13
;
309
6.10k
}
310
311
// Test whether it's safe to move Def to just before Insert.
312
// TODO: Compute memory dependencies in a way that doesn't require always
313
// walking the block.
314
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
315
// more precise.
316
static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
317
28.5k
                         AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
318
28.5k
  assert(Def->getParent() == Insert->getParent());
319
28.5k
320
28.5k
  // 'catch' and 'extract_exception' should be the first instruction of a BB and
321
28.5k
  // cannot move.
322
28.5k
  if (Def->getOpcode() == WebAssembly::CATCH ||
323
28.5k
      Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
324
29
    const MachineBasicBlock *MBB = Def->getParent();
325
29
    auto NextI = std::next(MachineBasicBlock::const_iterator(Def));
326
29
    for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); 
++NextI0
)
327
0
      ;
328
29
    if (NextI != Insert)
329
14
      return false;
330
28.5k
  }
331
28.5k
332
28.5k
  // Check for register dependencies.
333
28.5k
  SmallVector<unsigned, 4> MutableRegisters;
334
108k
  for (const MachineOperand &MO : Def->operands()) {
335
108k
    if (!MO.isReg() || 
MO.isUndef()93.4k
)
336
15.5k
      continue;
337
93.4k
    unsigned Reg = MO.getReg();
338
93.4k
339
93.4k
    // If the register is dead here and at Insert, ignore it.
340
93.4k
    if (MO.isDead() && 
Insert->definesRegister(Reg)26.7k
&&
341
93.4k
        
!Insert->readsRegister(Reg)26.7k
)
342
26.7k
      continue;
343
66.6k
344
66.6k
    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
345
2.22k
      // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
346
2.22k
      // from moving down, and we've already checked for that.
347
2.22k
      if (Reg == WebAssembly::ARGUMENTS)
348
1.76k
        continue;
349
466
      // If the physical register is never modified, ignore it.
350
466
      if (!MRI.isPhysRegModified(Reg))
351
466
        continue;
352
0
      // Otherwise, it's a physical register with unknown liveness.
353
0
      return false;
354
0
    }
355
64.4k
356
64.4k
    // If one of the operands isn't in SSA form, it has different values at
357
64.4k
    // different times, and we need to make sure we don't move our use across
358
64.4k
    // a different def.
359
64.4k
    if (!MO.isDef() && 
!MRI.hasOneDef(Reg)35.9k
)
360
194
      MutableRegisters.push_back(Reg);
361
64.4k
  }
362
28.5k
363
28.5k
  bool Read = false, Write = false, Effects = false, StackPointer = false;
364
28.5k
  query(*Def, AA, Read, Write, Effects, StackPointer);
365
28.5k
366
28.5k
  // If the instruction does not access memory and has no side effects, it has
367
28.5k
  // no additional dependencies.
368
28.5k
  bool HasMutableRegisters = !MutableRegisters.empty();
369
28.5k
  if (!Read && 
!Write26.2k
&&
!Effects26.2k
&&
!StackPointer26.2k
&&
!HasMutableRegisters26.2k
)
370
26.1k
    return true;
371
2.40k
372
2.40k
  // Scan through the intervening instructions between Def and Insert.
373
2.40k
  MachineBasicBlock::const_iterator D(Def), I(Insert);
374
4.05k
  for (--I; I != D; 
--I1.65k
) {
375
1.98k
    bool InterveningRead = false;
376
1.98k
    bool InterveningWrite = false;
377
1.98k
    bool InterveningEffects = false;
378
1.98k
    bool InterveningStackPointer = false;
379
1.98k
    query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
380
1.98k
          InterveningStackPointer);
381
1.98k
    if (Effects && 
InterveningEffects95
)
382
40
      return false;
383
1.94k
    if (Read && 
InterveningWrite1.77k
)
384
238
      return false;
385
1.70k
    if (Write && 
(81
InterveningRead81
||
InterveningWrite79
))
386
2
      return false;
387
1.70k
    if (StackPointer && 
InterveningStackPointer39
)
388
2
      return false;
389
1.70k
390
1.70k
    for (unsigned Reg : MutableRegisters)
391
200
      for (const MachineOperand &MO : I->operands())
392
607
        if (MO.isReg() && 
MO.isDef()525
&&
MO.getReg() == Reg303
)
393
48
          return false;
394
1.70k
  }
395
2.40k
396
2.40k
  
return true2.07k
;
397
2.40k
}
398
399
/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
400
static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
401
                                     const MachineBasicBlock &MBB,
402
                                     const MachineRegisterInfo &MRI,
403
                                     const MachineDominatorTree &MDT,
404
                                     LiveIntervals &LIS,
405
2.48k
                                     WebAssemblyFunctionInfo &MFI) {
406
2.48k
  const LiveInterval &LI = LIS.getInterval(Reg);
407
2.48k
408
2.48k
  const MachineInstr *OneUseInst = OneUse.getParent();
409
2.48k
  VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
410
2.48k
411
4.84k
  for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
412
4.84k
    if (&Use == &OneUse)
413
749
      continue;
414
4.10k
415
4.10k
    const MachineInstr *UseInst = Use.getParent();
416
4.10k
    VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
417
4.10k
418
4.10k
    if (UseVNI != OneUseVNI)
419
30
      continue;
420
4.07k
421
4.07k
    if (UseInst == OneUseInst) {
422
2
      // Another use in the same instruction. We need to ensure that the one
423
2
      // selected use happens "before" it.
424
2
      if (&OneUse > &Use)
425
0
        return false;
426
4.06k
    } else {
427
4.06k
      // Test that the use is dominated by the one selected use.
428
4.12k
      while (!MDT.dominates(OneUseInst, UseInst)) {
429
1.99k
        // Actually, dominating is over-conservative. Test that the use would
430
1.99k
        // happen after the one selected use in the stack evaluation order.
431
1.99k
        //
432
1.99k
        // This is needed as a consequence of using implicit local.gets for
433
1.99k
        // uses and implicit local.sets for defs.
434
1.99k
        if (UseInst->getDesc().getNumDefs() == 0)
435
898
          return false;
436
1.09k
        const MachineOperand &MO = UseInst->getOperand(0);
437
1.09k
        if (!MO.isReg())
438
0
          return false;
439
1.09k
        unsigned DefReg = MO.getReg();
440
1.09k
        if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
441
1.09k
            !MFI.isVRegStackified(DefReg))
442
940
          return false;
443
157
        assert(MRI.hasOneNonDBGUse(DefReg));
444
157
        const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
445
157
        const MachineInstr *NewUseInst = NewUse.getParent();
446
157
        if (NewUseInst == OneUseInst) {
447
99
          if (&OneUse > &NewUse)
448
0
            return false;
449
99
          break;
450
99
        }
451
58
        UseInst = NewUseInst;
452
58
      }
453
4.06k
    }
454
4.07k
  }
455
2.48k
  
return true642
;
456
2.48k
}
457
458
/// Get the appropriate tee opcode for the given register class.
459
642
static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
460
642
  if (RC == &WebAssembly::I32RegClass)
461
491
    return WebAssembly::TEE_I32;
462
151
  if (RC == &WebAssembly::I64RegClass)
463
69
    return WebAssembly::TEE_I64;
464
82
  if (RC == &WebAssembly::F32RegClass)
465
0
    return WebAssembly::TEE_F32;
466
82
  if (RC == &WebAssembly::F64RegClass)
467
4
    return WebAssembly::TEE_F64;
468
78
  if (RC == &WebAssembly::V128RegClass)
469
78
    return WebAssembly::TEE_V128;
470
0
  llvm_unreachable("Unexpected register class");
471
0
}
472
473
// Shrink LI to its uses, cleaning up LI.
474
4.39k
static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
475
4.39k
  if (LIS.shrinkToUses(&LI)) {
476
0
    SmallVector<LiveInterval *, 4> SplitLIs;
477
0
    LIS.splitSeparateComponents(LI, SplitLIs);
478
0
  }
479
4.39k
}
480
481
/// A single-use def in the same block with no intervening memory or register
482
/// dependencies; move the def down and nest it with the current instruction.
483
static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
484
                                      MachineInstr *Def, MachineBasicBlock &MBB,
485
                                      MachineInstr *Insert, LiveIntervals &LIS,
486
                                      WebAssemblyFunctionInfo &MFI,
487
21.9k
                                      MachineRegisterInfo &MRI) {
488
21.9k
  LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
489
21.9k
490
21.9k
  WebAssemblyDebugValueManager DefDIs(Def);
491
21.9k
  MBB.splice(Insert, &MBB, Def);
492
21.9k
  DefDIs.move(Insert);
493
21.9k
  LIS.handleMove(*Def);
494
21.9k
495
21.9k
  if (MRI.hasOneDef(Reg) && 
MRI.hasOneUse(Reg)21.9k
) {
496
21.9k
    // No one else is using this register for anything so we can just stackify
497
21.9k
    // it in place.
498
21.9k
    MFI.stackifyVReg(Reg);
499
21.9k
  } else {
500
13
    // The register may have unrelated uses or defs; create a new register for
501
13
    // just our one def and use so that we can stackify it.
502
13
    unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
503
13
    Def->getOperand(0).setReg(NewReg);
504
13
    Op.setReg(NewReg);
505
13
506
13
    // Tell LiveIntervals about the new register.
507
13
    LIS.createAndComputeVirtRegInterval(NewReg);
508
13
509
13
    // Tell LiveIntervals about the changes to the old register.
510
13
    LiveInterval &LI = LIS.getInterval(Reg);
511
13
    LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
512
13
                     LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
513
13
                     /*RemoveDeadValNo=*/true);
514
13
515
13
    MFI.stackifyVReg(NewReg);
516
13
517
13
    DefDIs.updateReg(NewReg);
518
13
519
13
    LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
520
13
  }
521
21.9k
522
21.9k
  imposeStackOrdering(Def);
523
21.9k
  return Def;
524
21.9k
}
525
526
/// A trivially cloneable instruction; clone it and nest the new copy with the
527
/// current instruction.
528
static MachineInstr *rematerializeCheapDef(
529
    unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
530
    MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
531
    WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
532
3.88k
    const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
533
3.88k
  LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
534
3.88k
  LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
535
3.88k
536
3.88k
  WebAssemblyDebugValueManager DefDIs(&Def);
537
3.88k
538
3.88k
  unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
539
3.88k
  TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
540
3.88k
  Op.setReg(NewReg);
541
3.88k
  MachineInstr *Clone = &*std::prev(Insert);
542
3.88k
  LIS.InsertMachineInstrInMaps(*Clone);
543
3.88k
  LIS.createAndComputeVirtRegInterval(NewReg);
544
3.88k
  MFI.stackifyVReg(NewReg);
545
3.88k
  imposeStackOrdering(Clone);
546
3.88k
547
3.88k
  LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
548
3.88k
549
3.88k
  // Shrink the interval.
550
3.88k
  bool IsDead = MRI.use_empty(Reg);
551
3.88k
  if (!IsDead) {
552
3.75k
    LiveInterval &LI = LIS.getInterval(Reg);
553
3.75k
    shrinkToUses(LI, LIS);
554
3.75k
    IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
555
3.75k
  }
556
3.88k
557
3.88k
  // If that was the last use of the original, delete the original.
558
3.88k
  // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
559
3.88k
  if (IsDead) {
560
134
    LLVM_DEBUG(dbgs() << " - Deleting original\n");
561
134
    SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
562
134
    LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
563
134
    LIS.removeInterval(Reg);
564
134
    LIS.RemoveMachineInstrFromMaps(Def);
565
134
    Def.eraseFromParent();
566
134
567
134
    DefDIs.move(&*Insert);
568
134
    DefDIs.updateReg(NewReg);
569
3.75k
  } else {
570
3.75k
    DefDIs.clone(&*Insert, NewReg);
571
3.75k
  }
572
3.88k
573
3.88k
  return Clone;
574
3.88k
}
575
576
/// A multiple-use def in the same block with no intervening memory or register
577
/// dependencies; move the def down, nest it with the current instruction, and
578
/// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
579
/// this:
580
///
581
///    Reg = INST ...        // Def
582
///    INST ..., Reg, ...    // Insert
583
///    INST ..., Reg, ...
584
///    INST ..., Reg, ...
585
///
586
/// to this:
587
///
588
///    DefReg = INST ...     // Def (to become the new Insert)
589
///    TeeReg, Reg = TEE_... DefReg
590
///    INST ..., TeeReg, ... // Insert
591
///    INST ..., Reg, ...
592
///    INST ..., Reg, ...
593
///
594
/// with DefReg and TeeReg stackified. This eliminates a local.get from the
595
/// resulting code.
596
static MachineInstr *moveAndTeeForMultiUse(
597
    unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
598
    MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
599
642
    MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
600
642
  LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
601
642
602
642
  WebAssemblyDebugValueManager DefDIs(Def);
603
642
604
642
  // Move Def into place.
605
642
  MBB.splice(Insert, &MBB, Def);
606
642
  LIS.handleMove(*Def);
607
642
608
642
  // Create the Tee and attach the registers.
609
642
  const auto *RegClass = MRI.getRegClass(Reg);
610
642
  unsigned TeeReg = MRI.createVirtualRegister(RegClass);
611
642
  unsigned DefReg = MRI.createVirtualRegister(RegClass);
612
642
  MachineOperand &DefMO = Def->getOperand(0);
613
642
  MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
614
642
                              TII->get(getTeeOpcode(RegClass)), TeeReg)
615
642
                          .addReg(Reg, RegState::Define)
616
642
                          .addReg(DefReg, getUndefRegState(DefMO.isDead()));
617
642
  Op.setReg(TeeReg);
618
642
  DefMO.setReg(DefReg);
619
642
  SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
620
642
  SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
621
642
622
642
  DefDIs.move(Insert);
623
642
624
642
  // Tell LiveIntervals we moved the original vreg def from Def to Tee.
625
642
  LiveInterval &LI = LIS.getInterval(Reg);
626
642
  LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
627
642
  VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
628
642
  I->start = TeeIdx;
629
642
  ValNo->def = TeeIdx;
630
642
  shrinkToUses(LI, LIS);
631
642
632
642
  // Finish stackifying the new regs.
633
642
  LIS.createAndComputeVirtRegInterval(TeeReg);
634
642
  LIS.createAndComputeVirtRegInterval(DefReg);
635
642
  MFI.stackifyVReg(DefReg);
636
642
  MFI.stackifyVReg(TeeReg);
637
642
  imposeStackOrdering(Def);
638
642
  imposeStackOrdering(Tee);
639
642
640
642
  DefDIs.clone(Tee, DefReg);
641
642
  DefDIs.clone(Insert, TeeReg);
642
642
643
642
  LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
644
642
  LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
645
642
  return Def;
646
642
}
647
648
namespace {
649
/// A stack for walking the tree of instructions being built, visiting the
650
/// MachineOperands in DFS order.
651
class TreeWalkerState {
652
  using mop_iterator = MachineInstr::mop_iterator;
653
  using mop_reverse_iterator = std::reverse_iterator<mop_iterator>;
654
  using RangeTy = iterator_range<mop_reverse_iterator>;
655
  SmallVector<RangeTy, 4> Worklist;
656
657
public:
658
21.3k
  explicit TreeWalkerState(MachineInstr *Insert) {
659
21.3k
    const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
660
21.3k
    if (Range.begin() != Range.end())
661
19.6k
      Worklist.push_back(reverse(Range));
662
21.3k
  }
663
664
101k
  bool done() const { return Worklist.empty(); }
665
666
80.6k
  MachineOperand &pop() {
667
80.6k
    RangeTy &Range = Worklist.back();
668
80.6k
    MachineOperand &Op = *Range.begin();
669
80.6k
    Range = drop_begin(Range, 1);
670
80.6k
    if (Range.begin() == Range.end())
671
46.0k
      Worklist.pop_back();
672
80.6k
    assert((Worklist.empty() ||
673
80.6k
            Worklist.back().begin() != Worklist.back().end()) &&
674
80.6k
           "Empty ranges shouldn't remain in the worklist");
675
80.6k
    return Op;
676
80.6k
  }
677
678
  /// Push Instr's operands onto the stack to be visited.
679
26.4k
  void pushOperands(MachineInstr *Instr) {
680
26.4k
    const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
681
26.4k
    if (Range.begin() != Range.end())
682
26.4k
      Worklist.push_back(reverse(Range));
683
26.4k
  }
684
685
  /// Some of Instr's operands are on the top of the stack; remove them and
686
  /// re-insert them starting from the beginning (because we've commuted them).
687
15
  void resetTopOperands(MachineInstr *Instr) {
688
15
    assert(hasRemainingOperands(Instr) &&
689
15
           "Reseting operands should only be done when the instruction has "
690
15
           "an operand still on the stack");
691
15
    Worklist.back() = reverse(Instr->explicit_uses());
692
15
  }
693
694
  /// Test whether Instr has operands remaining to be visited at the top of
695
  /// the stack.
696
434
  bool hasRemainingOperands(const MachineInstr *Instr) const {
697
434
    if (Worklist.empty())
698
212
      return false;
699
222
    const RangeTy &Range = Worklist.back();
700
222
    return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
701
222
  }
702
703
  /// Test whether the given register is present on the stack, indicating an
704
  /// operand in the tree that we haven't visited yet. Moving a definition of
705
  /// Reg to a point in the tree after that would change its value.
706
  ///
707
  /// This is needed as a consequence of using implicit local.gets for
708
  /// uses and implicit local.sets for defs.
709
28.2k
  bool isOnStack(unsigned Reg) const {
710
28.2k
    for (const RangeTy &Range : Worklist)
711
41.6k
      for (const MachineOperand &MO : Range)
712
76.8k
        if (MO.isReg() && 
MO.getReg() == Reg36.0k
)
713
163
          return true;
714
28.2k
    
return false28.0k
;
715
28.2k
  }
716
};
717
718
/// State to keep track of whether commuting is in flight or whether it's been
719
/// tried for the current instruction and didn't work.
720
class CommutingState {
721
  /// There are effectively three states: the initial state where we haven't
722
  /// started commuting anything and we don't know anything yet, the tentative
723
  /// state where we've commuted the operands of the current instruction and are
724
  /// revisiting it, and the declined state where we've reverted the operands
725
  /// back to their original order and will no longer commute it further.
726
  bool TentativelyCommuting = false;
727
  bool Declined = false;
728
729
  /// During the tentative state, these hold the operand indices of the commuted
730
  /// operands.
731
  unsigned Operand0, Operand1;
732
733
public:
734
  /// Stackification for an operand was not successful due to ordering
735
  /// constraints. If possible, and if we haven't already tried it and declined
736
  /// it, commute Insert's operands and prepare to revisit it.
737
  void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
738
451
                    const WebAssemblyInstrInfo *TII) {
739
451
    if (TentativelyCommuting) {
740
9
      assert(!Declined &&
741
9
             "Don't decline commuting until you've finished trying it");
742
9
      // Commuting didn't help. Revert it.
743
9
      TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
744
9
      TentativelyCommuting = false;
745
9
      Declined = true;
746
442
    } else if (!Declined && 
TreeWalker.hasRemainingOperands(Insert)434
) {
747
117
      Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
748
117
      Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
749
117
      if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
750
15
        // Tentatively commute the operands and try again.
751
15
        TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
752
15
        TreeWalker.resetTopOperands(Insert);
753
15
        TentativelyCommuting = true;
754
15
        Declined = false;
755
15
      }
756
117
    }
757
451
  }
758
759
  /// Stackification for some operand was successful. Reset to the default
760
  /// state.
761
26.4k
  void reset() {
762
26.4k
    TentativelyCommuting = false;
763
26.4k
    Declined = false;
764
26.4k
  }
765
};
766
} // end anonymous namespace
767
768
4.31k
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
769
4.31k
  LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
770
4.31k
                       "********** Function: "
771
4.31k
                    << MF.getName() << '\n');
772
4.31k
773
4.31k
  bool Changed = false;
774
4.31k
  MachineRegisterInfo &MRI = MF.getRegInfo();
775
4.31k
  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
776
4.31k
  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
777
4.31k
  const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
778
4.31k
  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
779
4.31k
  auto &MDT = getAnalysis<MachineDominatorTree>();
780
4.31k
  auto &LIS = getAnalysis<LiveIntervals>();
781
4.31k
782
4.31k
  // Walk the instructions from the bottom up. Currently we don't look past
783
4.31k
  // block boundaries, and the blocks aren't ordered so the block visitation
784
4.31k
  // order isn't significant, but we may want to change this in the future.
785
4.95k
  for (MachineBasicBlock &MBB : MF) {
786
4.95k
    // Don't use a range-based for loop, because we modify the list as we're
787
4.95k
    // iterating over it and the end iterator may change.
788
26.3k
    for (auto MII = MBB.rbegin(); MII != MBB.rend(); 
++MII21.3k
) {
789
21.3k
      MachineInstr *Insert = &*MII;
790
21.3k
      // Don't nest anything inside an inline asm, because we don't have
791
21.3k
      // constraints for $push inputs.
792
21.3k
      if (Insert->isInlineAsm())
793
14
        continue;
794
21.3k
795
21.3k
      // Ignore debugging intrinsics.
796
21.3k
      if (Insert->isDebugValue())
797
30
        continue;
798
21.3k
799
21.3k
      // Iterate through the inputs in reverse order, since we'll be pulling
800
21.3k
      // operands off the stack in LIFO order.
801
21.3k
      CommutingState Commuting;
802
21.3k
      TreeWalkerState TreeWalker(Insert);
803
101k
      while (!TreeWalker.done()) {
804
80.6k
        MachineOperand &Op = TreeWalker.pop();
805
80.6k
806
80.6k
        // We're only interested in explicit virtual register operands.
807
80.6k
        if (!Op.isReg())
808
36.0k
          continue;
809
44.5k
810
44.5k
        unsigned Reg = Op.getReg();
811
44.5k
        assert(Op.isUse() && "explicit_uses() should only iterate over uses");
812
44.5k
        assert(!Op.isImplicit() &&
813
44.5k
               "explicit_uses() should only iterate over explicit operands");
814
44.5k
        if (TargetRegisterInfo::isPhysicalRegister(Reg))
815
0
          continue;
816
44.5k
817
44.5k
        // Identify the definition for this register at this point.
818
44.5k
        MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS);
819
44.5k
        if (!Def)
820
224
          continue;
821
44.3k
822
44.3k
        // Don't nest an INLINE_ASM def into anything, because we don't have
823
44.3k
        // constraints for $pop outputs.
824
44.3k
        if (Def->isInlineAsm())
825
8
          continue;
826
44.3k
827
44.3k
        // Argument instructions represent live-in registers and not real
828
44.3k
        // instructions.
829
44.3k
        if (WebAssembly::isArgument(Def->getOpcode()))
830
15.3k
          continue;
831
29.0k
832
29.0k
        // Currently catch's return value register cannot be stackified, because
833
29.0k
        // the wasm LLVM backend currently does not support live-in values
834
29.0k
        // entering blocks, which is a part of multi-value proposal.
835
29.0k
        //
836
29.0k
        // Once we support live-in values of wasm blocks, this can be:
837
29.0k
        // catch                           ; push exnref value onto stack
838
29.0k
        // block exnref -> i32
839
29.0k
        // br_on_exn $__cpp_exception      ; pop the exnref value
840
29.0k
        // end_block
841
29.0k
        //
842
29.0k
        // But because we don't support it yet, the catch instruction's dst
843
29.0k
        // register should be assigned to a local to be propagated across
844
29.0k
        // 'block' boundary now.
845
29.0k
        //
846
29.0k
        // TODO Fix this once we support the multi-value proposal.
847
29.0k
        if (Def->getOpcode() == WebAssembly::CATCH)
848
74
          continue;
849
28.9k
850
28.9k
        // Decide which strategy to take. Prefer to move a single-use value
851
28.9k
        // over cloning it, and prefer cloning over introducing a tee.
852
28.9k
        // For moving, we require the def to be in the same block as the use;
853
28.9k
        // this makes things simpler (LiveIntervals' handleMove function only
854
28.9k
        // supports intra-block moves) and it's MachineSink's job to catch all
855
28.9k
        // the sinking opportunities anyway.
856
28.9k
        bool SameBlock = Def->getParent() == &MBB;
857
28.9k
        bool CanMove = SameBlock && 
isSafeToMove(Def, Insert, AA, MRI)28.5k
&&
858
28.9k
                       
!TreeWalker.isOnStack(Reg)28.2k
;
859
28.9k
        if (CanMove && 
hasOneUse(Reg, Def, MRI, MDT, LIS)28.0k
) {
860
21.9k
          Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
861
21.9k
        } else 
if (6.99k
shouldRematerialize(*Def, AA, TII)6.99k
) {
862
3.88k
          Insert =
863
3.88k
              rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
864
3.88k
                                    LIS, MFI, MRI, TII, TRI);
865
3.88k
        } else 
if (3.10k
CanMove3.10k
&&
866
3.10k
                   
oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)2.48k
) {
867
642
          Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
868
642
                                         MRI, TII);
869
2.46k
        } else {
870
2.46k
          // We failed to stackify the operand. If the problem was ordering
871
2.46k
          // constraints, Commuting may be able to help.
872
2.46k
          if (!CanMove && 
SameBlock624
)
873
451
            Commuting.maybeCommute(Insert, TreeWalker, TII);
874
2.46k
          // Proceed to the next operand.
875
2.46k
          continue;
876
2.46k
        }
877
26.4k
878
26.4k
        // If the instruction we just stackified is an IMPLICIT_DEF, convert it
879
26.4k
        // to a constant 0 so that the def is explicit, and the push/pop
880
26.4k
        // correspondence is maintained.
881
26.4k
        if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
882
5
          convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
883
26.4k
884
26.4k
        // We stackified an operand. Add the defining instruction's operands to
885
26.4k
        // the worklist stack now to continue to build an ever deeper tree.
886
26.4k
        Commuting.reset();
887
26.4k
        TreeWalker.pushOperands(Insert);
888
26.4k
      }
889
21.3k
890
21.3k
      // If we stackified any operands, skip over the tree to start looking for
891
21.3k
      // the next instruction we can build a tree on.
892
21.3k
      if (Insert != &*MII) {
893
6.62k
        imposeStackOrdering(&*MII);
894
6.62k
        MII = MachineBasicBlock::iterator(Insert).getReverse();
895
6.62k
        Changed = true;
896
6.62k
      }
897
21.3k
    }
898
4.95k
  }
899
4.31k
900
4.31k
  // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
901
4.31k
  // that it never looks like a use-before-def.
902
4.31k
  if (Changed) {
903
3.78k
    MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
904
3.78k
    for (MachineBasicBlock &MBB : MF)
905
4.39k
      MBB.addLiveIn(WebAssembly::VALUE_STACK);
906
3.78k
  }
907
4.31k
908
#ifndef NDEBUG
909
  // Verify that pushes and pops are performed in LIFO order.
910
  SmallVector<unsigned, 0> Stack;
911
  for (MachineBasicBlock &MBB : MF) {
912
    for (MachineInstr &MI : MBB) {
913
      if (MI.isDebugInstr())
914
        continue;
915
      for (MachineOperand &MO : reverse(MI.explicit_operands())) {
916
        if (!MO.isReg())
917
          continue;
918
        unsigned Reg = MO.getReg();
919
920
        if (MFI.isVRegStackified(Reg)) {
921
          if (MO.isDef())
922
            Stack.push_back(Reg);
923
          else
924
            assert(Stack.pop_back_val() == Reg &&
925
                   "Register stack pop should be paired with a push");
926
        }
927
      }
928
    }
929
    // TODO: Generalize this code to support keeping values on the stack across
930
    // basic block boundaries.
931
    assert(Stack.empty() &&
932
           "Register stack pushes and pops should be balanced");
933
  }
934
#endif
935
936
4.31k
  return Changed;
937
4.31k
}