Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains a printer that converts from our internal representation
10
// of machine-dependent LLVM code to NVPTX assembly language.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "NVPTXAsmPrinter.h"
15
#include "MCTargetDesc/NVPTXBaseInfo.h"
16
#include "MCTargetDesc/NVPTXInstPrinter.h"
17
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
18
#include "MCTargetDesc/NVPTXTargetStreamer.h"
19
#include "NVPTX.h"
20
#include "NVPTXMCExpr.h"
21
#include "NVPTXMachineFunctionInfo.h"
22
#include "NVPTXRegisterInfo.h"
23
#include "NVPTXSubtarget.h"
24
#include "NVPTXTargetMachine.h"
25
#include "NVPTXUtilities.h"
26
#include "TargetInfo/NVPTXTargetInfo.h"
27
#include "cl_common_defines.h"
28
#include "llvm/ADT/APFloat.h"
29
#include "llvm/ADT/APInt.h"
30
#include "llvm/ADT/DenseMap.h"
31
#include "llvm/ADT/DenseSet.h"
32
#include "llvm/ADT/SmallString.h"
33
#include "llvm/ADT/SmallVector.h"
34
#include "llvm/ADT/StringExtras.h"
35
#include "llvm/ADT/StringRef.h"
36
#include "llvm/ADT/Triple.h"
37
#include "llvm/ADT/Twine.h"
38
#include "llvm/Analysis/ConstantFolding.h"
39
#include "llvm/CodeGen/Analysis.h"
40
#include "llvm/CodeGen/MachineBasicBlock.h"
41
#include "llvm/CodeGen/MachineFrameInfo.h"
42
#include "llvm/CodeGen/MachineFunction.h"
43
#include "llvm/CodeGen/MachineInstr.h"
44
#include "llvm/CodeGen/MachineLoopInfo.h"
45
#include "llvm/CodeGen/MachineModuleInfo.h"
46
#include "llvm/CodeGen/MachineOperand.h"
47
#include "llvm/CodeGen/MachineRegisterInfo.h"
48
#include "llvm/CodeGen/TargetLowering.h"
49
#include "llvm/CodeGen/TargetRegisterInfo.h"
50
#include "llvm/CodeGen/ValueTypes.h"
51
#include "llvm/IR/Attributes.h"
52
#include "llvm/IR/BasicBlock.h"
53
#include "llvm/IR/Constant.h"
54
#include "llvm/IR/Constants.h"
55
#include "llvm/IR/DataLayout.h"
56
#include "llvm/IR/DebugInfo.h"
57
#include "llvm/IR/DebugInfoMetadata.h"
58
#include "llvm/IR/DebugLoc.h"
59
#include "llvm/IR/DerivedTypes.h"
60
#include "llvm/IR/Function.h"
61
#include "llvm/IR/GlobalValue.h"
62
#include "llvm/IR/GlobalVariable.h"
63
#include "llvm/IR/Instruction.h"
64
#include "llvm/IR/LLVMContext.h"
65
#include "llvm/IR/Module.h"
66
#include "llvm/IR/Operator.h"
67
#include "llvm/IR/Type.h"
68
#include "llvm/IR/User.h"
69
#include "llvm/MC/MCExpr.h"
70
#include "llvm/MC/MCInst.h"
71
#include "llvm/MC/MCInstrDesc.h"
72
#include "llvm/MC/MCStreamer.h"
73
#include "llvm/MC/MCSymbol.h"
74
#include "llvm/Support/Casting.h"
75
#include "llvm/Support/CommandLine.h"
76
#include "llvm/Support/ErrorHandling.h"
77
#include "llvm/Support/MachineValueType.h"
78
#include "llvm/Support/Path.h"
79
#include "llvm/Support/TargetRegistry.h"
80
#include "llvm/Support/raw_ostream.h"
81
#include "llvm/Target/TargetLoweringObjectFile.h"
82
#include "llvm/Target/TargetMachine.h"
83
#include "llvm/Transforms/Utils/UnrollLoop.h"
84
#include <cassert>
85
#include <cstdint>
86
#include <cstring>
87
#include <new>
88
#include <string>
89
#include <utility>
90
#include <vector>
91
92
using namespace llvm;
93
94
25
#define DEPOTNAME "__local_depot"
95
96
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
97
/// depends.
98
static void
99
DiscoverDependentGlobals(const Value *V,
100
93
                         DenseSet<const GlobalVariable *> &Globals) {
101
93
  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
102
10
    Globals.insert(GV);
103
83
  else {
104
83
    if (const User *U = dyn_cast<User>(V)) {
105
102
      for (unsigned i = 0, e = U->getNumOperands(); i != e; 
++i19
) {
106
19
        DiscoverDependentGlobals(U->getOperand(i), Globals);
107
19
      }
108
83
    }
109
83
  }
110
93
}
111
112
/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113
/// instances to be emitted, but only after any dependents have been added
114
/// first.s
115
static void
116
VisitGlobalVariableForEmission(const GlobalVariable *GV,
117
                               SmallVectorImpl<const GlobalVariable *> &Order,
118
                               DenseSet<const GlobalVariable *> &Visited,
119
88
                               DenseSet<const GlobalVariable *> &Visiting) {
120
88
  // Have we already visited this one?
121
88
  if (Visited.count(GV))
122
8
    return;
123
80
124
80
  // Do we have a circular dependency?
125
80
  if (!Visiting.insert(GV).second)
126
0
    report_fatal_error("Circular dependency found in global variable set");
127
80
128
80
  // Make sure we visit all dependents first
129
80
  DenseSet<const GlobalVariable *> Others;
130
154
  for (unsigned i = 0, e = GV->getNumOperands(); i != e; 
++i74
)
131
74
    DiscoverDependentGlobals(GV->getOperand(i), Others);
132
80
133
80
  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
134
80
                                                  E = Others.end();
135
88
       I != E; 
++I8
)
136
8
    VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
137
80
138
80
  // Now we can visit ourself
139
80
  Order.push_back(GV);
140
80
  Visited.insert(GV);
141
80
  Visiting.erase(GV);
142
80
}
143
144
13.4k
void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
145
13.4k
  MCInst Inst;
146
13.4k
  lowerToMCInst(MI, Inst);
147
13.4k
  EmitToStreamer(*OutStreamer, Inst);
148
13.4k
}
149
150
// Handle symbol backtracking for targets that do not support image handles
151
bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
152
22.8k
                                           unsigned OpNo, MCOperand &MCOp) {
153
22.8k
  const MachineOperand &MO = MI->getOperand(OpNo);
154
22.8k
  const MCInstrDesc &MCID = MI->getDesc();
155
22.8k
156
22.8k
  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
157
19
    // This is a texture fetch, so operand 4 is a texref and operand 5 is
158
19
    // a samplerref
159
19
    if (OpNo == 4 && 
MO.isImm()3
) {
160
2
      lowerImageHandleSymbol(MO.getImm(), MCOp);
161
2
      return true;
162
2
    }
163
17
    if (OpNo == 5 && 
MO.isImm()3
&&
!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)1
) {
164
1
      lowerImageHandleSymbol(MO.getImm(), MCOp);
165
1
      return true;
166
1
    }
167
16
168
16
    return false;
169
22.8k
  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
170
9
    unsigned VecSize =
171
9
      1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
172
9
173
9
    // For a surface load of vector size N, the Nth operand will be the surfref
174
9
    if (OpNo == VecSize && 
MO.isImm()3
) {
175
2
      lowerImageHandleSymbol(MO.getImm(), MCOp);
176
2
      return true;
177
2
    }
178
7
179
7
    return false;
180
22.8k
  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
181
9
    // This is a surface store, so operand 0 is a surfref
182
9
    if (OpNo == 0 && 
MO.isImm()3
) {
183
2
      lowerImageHandleSymbol(MO.getImm(), MCOp);
184
2
      return true;
185
2
    }
186
7
187
7
    return false;
188
22.8k
  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
189
16
    // This is a query, so operand 1 is a surfref/texref
190
16
    if (OpNo == 1 && 
MO.isImm()8
) {
191
4
      lowerImageHandleSymbol(MO.getImm(), MCOp);
192
4
      return true;
193
4
    }
194
12
195
12
    return false;
196
12
  }
197
22.8k
198
22.8k
  return false;
199
22.8k
}
200
201
11
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
202
11
  // Ewwww
203
11
  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
204
11
  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
205
11
  const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
206
11
  const char *Sym = MFI->getImageHandleSymbol(Index);
207
11
  std::string *SymNamePtr =
208
11
    nvTM.getManagedStrPool()->getManagedString(Sym);
209
11
  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
210
11
}
211
212
13.4k
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
213
13.4k
  OutMI.setOpcode(MI->getOpcode());
214
13.4k
  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
215
13.4k
  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
216
1
    const MachineOperand &MO = MI->getOperand(0);
217
1
    OutMI.addOperand(GetSymbolRef(
218
1
      OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
219
1
    return;
220
1
  }
221
13.4k
222
13.4k
  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
223
59.0k
  for (unsigned i = 0, e = MI->getNumOperands(); i != e; 
++i45.6k
) {
224
45.6k
    const MachineOperand &MO = MI->getOperand(i);
225
45.6k
226
45.6k
    MCOperand MCOp;
227
45.6k
    if (!STI.hasImageHandles()) {
228
22.8k
      if (lowerImageHandleOperand(MI, i, MCOp)) {
229
11
        OutMI.addOperand(MCOp);
230
11
        continue;
231
11
      }
232
45.6k
    }
233
45.6k
234
45.6k
    if (lowerOperand(MO, MCOp))
235
45.6k
      OutMI.addOperand(MCOp);
236
45.6k
  }
237
13.4k
}
238
239
bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
240
45.6k
                                   MCOperand &MCOp) {
241
45.6k
  switch (MO.getType()) {
242
45.6k
  
default: 0
llvm_unreachable0
("unknown operand type");
243
45.6k
  case MachineOperand::MO_Register:
244
18.1k
    MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
245
18.1k
    break;
246
45.6k
  case MachineOperand::MO_Immediate:
247
24.3k
    MCOp = MCOperand::createImm(MO.getImm());
248
24.3k
    break;
249
45.6k
  case MachineOperand::MO_MachineBasicBlock:
250
150
    MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
251
150
        MO.getMBB()->getSymbol(), OutContext));
252
150
    break;
253
45.6k
  case MachineOperand::MO_ExternalSymbol:
254
2.69k
    MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
255
2.69k
    break;
256
45.6k
  case MachineOperand::MO_GlobalAddress:
257
227
    MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
258
227
    break;
259
45.6k
  case MachineOperand::MO_FPImmediate: {
260
132
    const ConstantFP *Cnt = MO.getFPImm();
261
132
    const APFloat &Val = Cnt->getValueAPF();
262
132
263
132
    switch (Cnt->getType()->getTypeID()) {
264
132
    
default: report_fatal_error("Unsupported FP type")0
;
break0
;
265
132
    case Type::HalfTyID:
266
10
      MCOp = MCOperand::createExpr(
267
10
        NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
268
10
      break;
269
132
    case Type::FloatTyID:
270
102
      MCOp = MCOperand::createExpr(
271
102
        NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
272
102
      break;
273
132
    case Type::DoubleTyID:
274
20
      MCOp = MCOperand::createExpr(
275
20
        NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
276
20
      break;
277
132
    }
278
132
    break;
279
132
  }
280
45.6k
  }
281
45.6k
  return true;
282
45.6k
}
283
284
18.1k
unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
285
18.1k
  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
286
17.9k
    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
287
17.9k
288
17.9k
    DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
289
17.9k
    unsigned RegNum = RegMap[Reg];
290
17.9k
291
17.9k
    // Encode the register class in the upper 4 bits
292
17.9k
    // Must be kept in sync with NVPTXInstPrinter::printRegName
293
17.9k
    unsigned Ret = 0;
294
17.9k
    if (RC == &NVPTX::Int1RegsRegClass) {
295
884
      Ret = (1 << 28);
296
17.0k
    } else if (RC == &NVPTX::Int16RegsRegClass) {
297
1.75k
      Ret = (2 << 28);
298
15.2k
    } else if (RC == &NVPTX::Int32RegsRegClass) {
299
4.68k
      Ret = (3 << 28);
300
10.5k
    } else if (RC == &NVPTX::Int64RegsRegClass) {
301
2.93k
      Ret = (4 << 28);
302
7.64k
    } else if (RC == &NVPTX::Float32RegsRegClass) {
303
3.36k
      Ret = (5 << 28);
304
4.28k
    } else if (RC == &NVPTX::Float64RegsRegClass) {
305
624
      Ret = (6 << 28);
306
3.65k
    } else if (RC == &NVPTX::Float16RegsRegClass) {
307
2.72k
      Ret = (7 << 28);
308
2.72k
    } else 
if (931
RC == &NVPTX::Float16x2RegsRegClass931
) {
309
931
      Ret = (8 << 28);
310
931
    } else {
311
0
      report_fatal_error("Bad register class");
312
0
    }
313
17.9k
314
17.9k
    // Insert the vreg number
315
17.9k
    Ret |= (RegNum & 0x0FFFFFFF);
316
17.9k
    return Ret;
317
17.9k
  } else {
318
207
    // Some special-use registers are actually physical registers.
319
207
    // Encode this as the register class ID of 0 and the real register ID.
320
207
    return Reg & 0x0FFFFFFF;
321
207
  }
322
18.1k
}
323
324
2.93k
MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
325
2.93k
  const MCExpr *Expr;
326
2.93k
  Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
327
2.93k
                                 OutContext);
328
2.93k
  return MCOperand::createExpr(Expr);
329
2.93k
}
330
331
1.66k
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
332
1.66k
  const DataLayout &DL = getDataLayout();
333
1.66k
  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
334
1.66k
  const TargetLowering *TLI = STI.getTargetLowering();
335
1.66k
336
1.66k
  Type *Ty = F->getReturnType();
337
1.66k
338
1.66k
  bool isABI = (STI.getSmVersion() >= 20);
339
1.66k
340
1.66k
  if (Ty->getTypeID() == Type::VoidTyID)
341
289
    return;
342
1.37k
343
1.37k
  O << " (";
344
1.37k
345
1.37k
  if (isABI) {
346
1.37k
    if (Ty->isFloatingPointTy() || 
(953
Ty->isIntegerTy()953
&&
!Ty->isIntegerTy(128)672
)) {
347
1.08k
      unsigned size = 0;
348
1.08k
      if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
349
664
        size = ITy->getBitWidth();
350
664
      } else {
351
418
        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
352
418
        size = Ty->getPrimitiveSizeInBits();
353
418
      }
354
1.08k
      // PTX ABI requires all scalar return values to be at least 32
355
1.08k
      // bits in size.  fp16 normally uses .b16 as its storage type in
356
1.08k
      // PTX, so its size must be adjusted here, too.
357
1.08k
      if (size < 32)
358
416
        size = 32;
359
1.08k
360
1.08k
      O << ".param .b" << size << " func_retval0";
361
1.08k
    } else 
if (289
isa<PointerType>(Ty)289
) {
362
7
      O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
363
7
        << " func_retval0";
364
282
    } else if (Ty->isAggregateType() || 
Ty->isVectorTy()264
||
Ty->isIntegerTy(128)8
) {
365
282
      unsigned totalsz = DL.getTypeAllocSize(Ty);
366
282
      unsigned retAlignment = 0;
367
282
      if (!getAlign(*F, 0, retAlignment))
368
282
        retAlignment = DL.getABITypeAlignment(Ty);
369
282
      O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
370
282
        << "]";
371
282
    } else
372
282
      
llvm_unreachable0
("Unknown return type");
373
1.37k
  } else {
374
0
    SmallVector<EVT, 16> vtparts;
375
0
    ComputeValueVTs(*TLI, DL, Ty, vtparts);
376
0
    unsigned idx = 0;
377
0
    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
378
0
      unsigned elems = 1;
379
0
      EVT elemtype = vtparts[i];
380
0
      if (vtparts[i].isVector()) {
381
0
        elems = vtparts[i].getVectorNumElements();
382
0
        elemtype = vtparts[i].getVectorElementType();
383
0
      }
384
0
385
0
      for (unsigned j = 0, je = elems; j != je; ++j) {
386
0
        unsigned sz = elemtype.getSizeInBits();
387
0
        if (elemtype.isInteger() && (sz < 32))
388
0
          sz = 32;
389
0
        O << ".reg .b" << sz << " func_retval" << idx;
390
0
        if (j < je - 1)
391
0
          O << ", ";
392
0
        ++idx;
393
0
      }
394
0
      if (i < e - 1)
395
0
        O << ", ";
396
0
    }
397
0
  }
398
1.37k
  O << ") ";
399
1.37k
}
400
401
void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
402
1.58k
                                        raw_ostream &O) {
403
1.58k
  const Function &F = MF.getFunction();
404
1.58k
  printReturnValStr(&F, O);
405
1.58k
}
406
407
// Return true if MBB is the header of a loop marked with
408
// llvm.loop.unroll.disable.
409
// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
410
bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
411
1.83k
    const MachineBasicBlock &MBB) const {
412
1.83k
  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
413
1.83k
  // We insert .pragma "nounroll" only to the loop header.
414
1.83k
  if (!LI.isLoopHeader(&MBB))
415
1.81k
    return false;
416
23
417
23
  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
418
23
  // we iterate through each back edge of the loop with header MBB, and check
419
23
  // whether its metadata contains llvm.loop.unroll.disable.
420
69
  
for (auto I = MBB.pred_begin(); 23
I != MBB.pred_end();
++I46
) {
421
47
    const MachineBasicBlock *PMBB = *I;
422
47
    if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
423
24
      // Edges from other loops to MBB are not back edges.
424
24
      continue;
425
24
    }
426
23
    if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
427
23
      if (MDNode *LoopID =
428
1
              PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
429
1
        if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
430
1
          return true;
431
1
      }
432
23
    }
433
23
  }
434
23
  
return false22
;
435
23
}
436
437
1.83k
void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
438
1.83k
  AsmPrinter::EmitBasicBlockStart(MBB);
439
1.83k
  if (isLoopHeaderOfNoUnroll(MBB))
440
1
    OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
441
1.83k
}
442
443
1.68k
void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
444
1.68k
  SmallString<128> Str;
445
1.68k
  raw_svector_ostream O(Str);
446
1.68k
447
1.68k
  if (!GlobalsEmitted) {
448
209
    emitGlobals(*MF->getFunction().getParent());
449
209
    GlobalsEmitted = true;
450
209
  }
451
1.68k
452
1.68k
  // Set up
453
1.68k
  MRI = &MF->getRegInfo();
454
1.68k
  F = &MF->getFunction();
455
1.68k
  emitLinkageDirective(F, O);
456
1.68k
  if (isKernelFunction(*F))
457
98
    O << ".entry ";
458
1.58k
  else {
459
1.58k
    O << ".func ";
460
1.58k
    printReturnValStr(*MF, O);
461
1.58k
  }
462
1.68k
463
1.68k
  CurrentFnSym->print(O, MAI);
464
1.68k
465
1.68k
  emitFunctionParamList(*MF, O);
466
1.68k
467
1.68k
  if (isKernelFunction(*F))
468
98
    emitKernelFunctionDirectives(*F, O);
469
1.68k
470
1.68k
  OutStreamer->EmitRawText(O.str());
471
1.68k
472
1.68k
  VRegMapping.clear();
473
1.68k
  // Emit open brace for function body.
474
1.68k
  OutStreamer->EmitRawText(StringRef("{\n"));
475
1.68k
  setAndEmitFunctionVirtualRegisters(*MF);
476
1.68k
  // Emit initial .loc debug directive for correct relocation symbol data.
477
1.68k
  if (MMI && MMI->hasDebugInfo())
478
13
    emitInitialRawDwarfLocDirective(*MF);
479
1.68k
}
480
481
1.68k
bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
482
1.68k
  bool Result = AsmPrinter::runOnMachineFunction(F);
483
1.68k
  // Emit closing brace for the body of function F.
484
1.68k
  // The closing brace must be emitted here because we need to emit additional
485
1.68k
  // debug labels/data after the last basic block.
486
1.68k
  // We need to emit the closing brace here because we don't have function that
487
1.68k
  // finished emission of the function body.
488
1.68k
  OutStreamer->EmitRawText(StringRef("}\n"));
489
1.68k
  return Result;
490
1.68k
}
491
492
1.68k
void NVPTXAsmPrinter::EmitFunctionBodyStart() {
493
1.68k
  SmallString<128> Str;
494
1.68k
  raw_svector_ostream O(Str);
495
1.68k
  emitDemotedVars(&MF->getFunction(), O);
496
1.68k
  OutStreamer->EmitRawText(O.str());
497
1.68k
}
498
499
1.68k
void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
500
1.68k
  VRegMapping.clear();
501
1.68k
}
502
503
1
const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
504
1
    SmallString<128> Str;
505
1
    raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber();
506
1
    return OutContext.getOrCreateSymbol(Str);
507
1
}
508
509
1
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
510
1
  unsigned RegNo = MI->getOperand(0).getReg();
511
1
  if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
512
1
    OutStreamer->AddComment(Twine("implicit-def: ") +
513
1
                            getVirtualRegisterName(RegNo));
514
1
  } else {
515
0
    const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
516
0
    OutStreamer->AddComment(Twine("implicit-def: ") +
517
0
                            STI.getRegisterInfo()->getName(RegNo));
518
0
  }
519
1
  OutStreamer->AddBlankLine();
520
1
}
521
522
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
523
98
                                                   raw_ostream &O) const {
524
98
  // If the NVVM IR has some of reqntid* specified, then output
525
98
  // the reqntid directive, and set the unspecified ones to 1.
526
98
  // If none of reqntid* is specified, don't output reqntid directive.
527
98
  unsigned reqntidx, reqntidy, reqntidz;
528
98
  bool specified = false;
529
98
  if (!getReqNTIDx(F, reqntidx))
530
96
    reqntidx = 1;
531
2
  else
532
2
    specified = true;
533
98
  if (!getReqNTIDy(F, reqntidy))
534
96
    reqntidy = 1;
535
2
  else
536
2
    specified = true;
537
98
  if (!getReqNTIDz(F, reqntidz))
538
96
    reqntidz = 1;
539
2
  else
540
2
    specified = true;
541
98
542
98
  if (specified)
543
2
    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
544
2
      << "\n";
545
98
546
98
  // If the NVVM IR has some of maxntid* specified, then output
547
98
  // the maxntid directive, and set the unspecified ones to 1.
548
98
  // If none of maxntid* is specified, don't output maxntid directive.
549
98
  unsigned maxntidx, maxntidy, maxntidz;
550
98
  specified = false;
551
98
  if (!getMaxNTIDx(F, maxntidx))
552
95
    maxntidx = 1;
553
3
  else
554
3
    specified = true;
555
98
  if (!getMaxNTIDy(F, maxntidy))
556
95
    maxntidy = 1;
557
3
  else
558
3
    specified = true;
559
98
  if (!getMaxNTIDz(F, maxntidz))
560
95
    maxntidz = 1;
561
3
  else
562
3
    specified = true;
563
98
564
98
  if (specified)
565
3
    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
566
3
      << "\n";
567
98
568
98
  unsigned mincta;
569
98
  if (getMinCTASm(F, mincta))
570
2
    O << ".minnctapersm " << mincta << "\n";
571
98
572
98
  unsigned maxnreg;
573
98
  if (getMaxNReg(F, maxnreg))
574
2
    O << ".maxnreg " << maxnreg << "\n";
575
98
}
576
577
std::string
578
7
NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
579
7
  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
580
7
581
7
  std::string Name;
582
7
  raw_string_ostream NameStr(Name);
583
7
584
7
  VRegRCMap::const_iterator I = VRegMapping.find(RC);
585
7
  assert(I != VRegMapping.end() && "Bad register class");
586
7
  const DenseMap<unsigned, unsigned> &RegMap = I->second;
587
7
588
7
  VRegMap::const_iterator VI = RegMap.find(Reg);
589
7
  assert(VI != RegMap.end() && "Bad virtual register");
590
7
  unsigned MappedVR = VI->second;
591
7
592
7
  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
593
7
594
7
  NameStr.flush();
595
7
  return Name;
596
7
}
597
598
void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
599
6
                                          raw_ostream &O) {
600
6
  O << getVirtualRegisterName(vr);
601
6
}
602
603
73
void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
604
73
  emitLinkageDirective(F, O);
605
73
  if (isKernelFunction(*F))
606
0
    O << ".entry ";
607
73
  else
608
73
    O << ".func ";
609
73
  printReturnValStr(F, O);
610
73
  getSymbol(F)->print(O, MAI);
611
73
  O << "\n";
612
73
  emitFunctionParamList(F, O);
613
73
  O << ";\n";
614
73
}
615
616
4
static bool usedInGlobalVarDef(const Constant *C) {
617
4
  if (!C)
618
0
    return false;
619
4
620
4
  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
621
1
    return GV->getName() != "llvm.used";
622
1
  }
623
3
624
3
  for (const User *U : C->users())
625
3
    if (const Constant *C = dyn_cast<Constant>(U))
626
2
      if (usedInGlobalVarDef(C))
627
0
        return true;
628
3
629
3
  return false;
630
3
}
631
632
48
static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
633
48
  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
634
8
    if (othergv->getName() == "llvm.used")
635
0
      return true;
636
48
  }
637
48
638
48
  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
639
20
    if (instr->getParent() && instr->getParent()->getParent()) {
640
20
      const Function *curFunc = instr->getParent()->getParent();
641
20
      if (oneFunc && 
(curFunc != oneFunc)12
)
642
5
        return false;
643
15
      oneFunc = curFunc;
644
15
      return true;
645
15
    } else
646
0
      return false;
647
28
  }
648
28
649
28
  for (const User *UU : U->users())
650
40
    if (!usedInOneFunc(UU, oneFunc))
651
11
      return false;
652
28
653
28
  
return true17
;
654
28
}
655
656
/* Find out if a global variable can be demoted to local scope.
657
 * Currently, this is valid for CUDA shared variables, which have local
658
 * scope and global lifetime. So the conditions to check are :
659
 * 1. Is the global variable in shared address space?
660
 * 2. Does it have internal linkage?
661
 * 3. Is the global variable referenced only in one function?
662
 */
663
57
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
664
57
  if (!gv->hasInternalLinkage())
665
47
    return false;
666
10
  PointerType *Pty = gv->getType();
667
10
  if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
668
2
    return false;
669
8
670
8
  const Function *oneFunc = nullptr;
671
8
672
8
  bool flag = usedInOneFunc(gv, oneFunc);
673
8
  if (!flag)
674
5
    return false;
675
3
  if (!oneFunc)
676
0
    return false;
677
3
  f = oneFunc;
678
3
  return true;
679
3
}
680
681
static bool useFuncSeen(const Constant *C,
682
4
                        DenseMap<const Function *, bool> &seenMap) {
683
4
  for (const User *U : C->users()) {
684
3
    if (const Constant *cu = dyn_cast<Constant>(U)) {
685
2
      if (useFuncSeen(cu, seenMap))
686
0
        return true;
687
1
    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
688
1
      const BasicBlock *bb = I->getParent();
689
1
      if (!bb)
690
0
        continue;
691
1
      const Function *caller = bb->getParent();
692
1
      if (!caller)
693
0
        continue;
694
1
      if (seenMap.find(caller) != seenMap.end())
695
0
        return true;
696
1
    }
697
3
  }
698
4
  return false;
699
4
}
700
701
251
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
702
251
  DenseMap<const Function *, bool> seenMap;
703
2.58k
  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; 
++FI2.33k
) {
704
2.33k
    const Function *F = &*FI;
705
2.33k
706
2.33k
    if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
707
1
      emitDeclaration(F, O);
708
1
      continue;
709
1
    }
710
2.33k
711
2.33k
    if (F->isDeclaration()) {
712
651
      if (F->use_empty())
713
137
        continue;
714
514
      if (F->getIntrinsicID())
715
446
        continue;
716
68
      emitDeclaration(F, O);
717
68
      continue;
718
68
    }
719
1.68k
    for (const User *U : F->users()) {
720
60
      if (const Constant *C = dyn_cast<Constant>(U)) {
721
2
        if (usedInGlobalVarDef(C)) {
722
0
          // The use is in the initialization of a global variable
723
0
          // that is a function pointer, so print a declaration
724
0
          // for the original function
725
0
          emitDeclaration(F, O);
726
0
          break;
727
0
        }
728
2
        // Emit a declaration of this function if the function that
729
2
        // uses this constant expr has already been seen.
730
2
        if (useFuncSeen(C, seenMap)) {
731
0
          emitDeclaration(F, O);
732
0
          break;
733
0
        }
734
60
      }
735
60
736
60
      if (!isa<Instruction>(U))
737
2
        continue;
738
58
      const Instruction *instr = cast<Instruction>(U);
739
58
      const BasicBlock *bb = instr->getParent();
740
58
      if (!bb)
741
0
        continue;
742
58
      const Function *caller = bb->getParent();
743
58
      if (!caller)
744
0
        continue;
745
58
746
58
      // If a caller has already been seen, then the caller is
747
58
      // appearing in the module before the callee. so print out
748
58
      // a declaration for the callee.
749
58
      if (seenMap.find(caller) != seenMap.end()) {
750
4
        emitDeclaration(F, O);
751
4
        break;
752
4
      }
753
58
    }
754
1.68k
    seenMap[F] = true;
755
1.68k
  }
756
251
}
757
758
513
static bool isEmptyXXStructor(GlobalVariable *GV) {
759
513
  if (!GV) 
return true509
;
760
4
  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
761
4
  if (!InitList) 
return true2
; // Not an array; we don't know how to parse.
762
2
  return InitList->getNumOperands() == 0;
763
2
}
764
765
258
bool NVPTXAsmPrinter::doInitialization(Module &M) {
766
258
  // Construct a default subtarget off of the TargetMachine defaults. The
767
258
  // rest of NVPTX isn't friendly to change subtargets per function and
768
258
  // so the default TargetMachine will have all of the options.
769
258
  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
770
258
  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
771
258
772
258
  if (M.alias_size()) {
773
1
    report_fatal_error("Module has aliases, which NVPTX does not support.");
774
1
    
return true0
; // error
775
257
  }
776
257
  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
777
1
    report_fatal_error(
778
1
        "Module has a nontrivial global ctor, which NVPTX does not support.");
779
1
    
return true0
; // error
780
256
  }
781
256
  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
782
1
    report_fatal_error(
783
1
        "Module has a nontrivial global dtor, which NVPTX does not support.");
784
1
    
return true0
; // error
785
255
  }
786
255
787
255
  SmallString<128> Str1;
788
255
  raw_svector_ostream OS1(Str1);
789
255
790
255
  // We need to call the parent's one explicitly.
791
255
  bool Result = AsmPrinter::doInitialization(M);
792
255
793
255
  // Emit header before any dwarf directives are emitted below.
794
255
  emitHeader(M, OS1, *STI);
795
255
  OutStreamer->EmitRawText(OS1.str());
796
255
797
255
  // Emit module-level inline asm if it exists.
798
255
  if (!M.getModuleInlineAsm().empty()) {
799
1
    OutStreamer->AddComment("Start of file scope inline assembly");
800
1
    OutStreamer->AddBlankLine();
801
1
    OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
802
1
    OutStreamer->AddBlankLine();
803
1
    OutStreamer->AddComment("End of file scope inline assembly");
804
1
    OutStreamer->AddBlankLine();
805
1
  }
806
255
807
255
  GlobalsEmitted = false;
808
255
809
255
  return Result;
810
255
}
811
812
251
void NVPTXAsmPrinter::emitGlobals(const Module &M) {
813
251
  SmallString<128> Str2;
814
251
  raw_svector_ostream OS2(Str2);
815
251
816
251
  emitDeclarations(M, OS2);
817
251
818
251
  // As ptxas does not support forward references of globals, we need to first
819
251
  // sort the list of module-level globals in def-use order. We visit each
820
251
  // global variable in order, and ensure that we emit it *after* its dependent
821
251
  // globals. We use a little extra memory maintaining both a set and a list to
822
251
  // have fast searches while maintaining a strict ordering.
823
251
  SmallVector<const GlobalVariable *, 8> Globals;
824
251
  DenseSet<const GlobalVariable *> GVVisited;
825
251
  DenseSet<const GlobalVariable *> GVVisiting;
826
251
827
251
  // Visit each global variable, in order
828
251
  for (const GlobalVariable &I : M.globals())
829
80
    VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
830
251
831
251
  assert(GVVisited.size() == M.getGlobalList().size() &&
832
251
         "Missed a global variable");
833
251
  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
834
251
835
251
  // Print out module-level global variables in proper order
836
331
  for (unsigned i = 0, e = Globals.size(); i != e; 
++i80
)
837
80
    printModuleLevelGV(Globals[i], OS2);
838
251
839
251
  OS2 << '\n';
840
251
841
251
  OutStreamer->EmitRawText(OS2.str());
842
251
}
843
844
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
845
255
                                 const NVPTXSubtarget &STI) {
846
255
  O << "//\n";
847
255
  O << "// Generated by LLVM NVPTX Back-End\n";
848
255
  O << "//\n";
849
255
  O << "\n";
850
255
851
255
  unsigned PTXVersion = STI.getPTXVersion();
852
255
  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
853
255
854
255
  O << ".target ";
855
255
  O << STI.getTargetName();
856
255
857
255
  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
858
255
  if (NTM.getDrvInterface() == NVPTX::NVCL)
859
4
    O << ", texmode_independent";
860
255
861
255
  bool HasFullDebugInfo = false;
862
255
  for (DICompileUnit *CU : M.debug_compile_units()) {
863
9
    switch(CU->getEmissionKind()) {
864
9
    case DICompileUnit::NoDebug:
865
2
    case DICompileUnit::DebugDirectivesOnly:
866
2
      break;
867
7
    case DICompileUnit::LineTablesOnly:
868
7
    case DICompileUnit::FullDebug:
869
7
      HasFullDebugInfo = true;
870
7
      break;
871
9
    }
872
9
    if (HasFullDebugInfo)
873
7
      break;
874
9
  }
875
255
  if (MMI && MMI->hasDebugInfo() && 
HasFullDebugInfo9
)
876
7
    O << ", debug";
877
255
878
255
  O << "\n";
879
255
880
255
  O << ".address_size ";
881
255
  if (NTM.is64Bit())
882
108
    O << "64";
883
147
  else
884
147
    O << "32";
885
255
  O << "\n";
886
255
887
255
  O << "\n";
888
255
}
889
890
251
bool NVPTXAsmPrinter::doFinalization(Module &M) {
891
251
  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
892
251
893
251
  // If we did not emit any functions, then the global declarations have not
894
251
  // yet been emitted.
895
251
  if (!GlobalsEmitted) {
896
42
    emitGlobals(M);
897
42
    GlobalsEmitted = true;
898
42
  }
899
251
900
251
  // XXX Temproarily remove global variables so that doFinalization() will not
901
251
  // emit them again (global variables are emitted at beginning).
902
251
903
251
  Module::GlobalListType &global_list = M.getGlobalList();
904
251
  int i, n = global_list.size();
905
251
  GlobalVariable **gv_array = new GlobalVariable *[n];
906
251
907
251
  // first, back-up GlobalVariable in gv_array
908
251
  i = 0;
909
251
  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
910
330
       I != E; 
++I79
)
911
79
    gv_array[i++] = &*I;
912
251
913
251
  // second, empty global_list
914
330
  while (!global_list.empty())
915
79
    global_list.remove(global_list.begin());
916
251
917
251
  // call doFinalization
918
251
  bool ret = AsmPrinter::doFinalization(M);
919
251
920
251
  // now we restore global variables
921
330
  for (i = 0; i < n; 
i++79
)
922
79
    global_list.insert(global_list.end(), gv_array[i]);
923
251
924
251
  clearAnnotationCache(&M);
925
251
926
251
  delete[] gv_array;
927
251
  // Close the last emitted section
928
251
  if (HasDebugInfo) {
929
9
    static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
930
9
        ->closeLastSection();
931
9
    // Emit empty .debug_loc section for better support of the empty files.
932
9
    OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}");
933
9
  }
934
251
935
251
  // Output last DWARF .file directives, if any.
936
251
  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
937
251
      ->outputDwarfFileDirectives();
938
251
939
251
  return ret;
940
251
941
251
  //bool Result = AsmPrinter::doFinalization(M);
942
251
  // Instead of calling the parents doFinalization, we may
943
251
  // clone parents doFinalization and customize here.
944
251
  // Currently, we if NVISA out the EmitGlobals() in
945
251
  // parent's doFinalization, which is too intrusive.
946
251
  //
947
251
  // Same for the doInitialization.
948
251
  //return Result;
949
251
}
950
951
// This function emits appropriate linkage directives for
952
// functions and global variables.
953
//
954
// extern function declaration            -> .extern
955
// extern function definition             -> .visible
956
// external global variable with init     -> .visible
957
// external without init                  -> .extern
958
// appending                              -> not allowed, assert.
959
// for any linkage other than
960
// internal, private, linker_private,
961
// linker_private_weak, linker_private_weak_def_auto,
962
// we emit                                -> .weak.
963
964
void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
965
1.75k
                                           raw_ostream &O) {
966
1.75k
  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
967
1.75k
    if (V->hasExternalLinkage()) {
968
1.74k
      if (isa<GlobalVariable>(V)) {
969
0
        const GlobalVariable *GVar = cast<GlobalVariable>(V);
970
0
        if (GVar) {
971
0
          if (GVar->hasInitializer())
972
0
            O << ".visible ";
973
0
          else
974
0
            O << ".extern ";
975
0
        }
976
1.74k
      } else if (V->isDeclaration())
977
68
        O << ".extern ";
978
1.67k
      else
979
1.67k
        O << ".visible ";
980
1.74k
    } else 
if (13
V->hasAppendingLinkage()13
) {
981
0
      std::string msg;
982
0
      msg.append("Error: ");
983
0
      msg.append("Symbol ");
984
0
      if (V->hasName())
985
0
        msg.append(V->getName());
986
0
      msg.append("has unsupported appending linkage type");
987
0
      llvm_unreachable(msg.c_str());
988
13
    } else if (!V->hasInternalLinkage() &&
989
13
               
!V->hasPrivateLinkage()3
) {
990
2
      O << ".weak ";
991
2
    }
992
1.75k
  }
993
1.75k
}
994
995
void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
996
                                         raw_ostream &O,
997
83
                                         bool processDemoted) {
998
83
  // Skip meta data
999
83
  if (GVar->hasSection()) {
1000
1
    if (GVar->getSection() == "llvm.metadata")
1001
1
      return;
1002
82
  }
1003
82
1004
82
  // Skip LLVM intrinsic global variables
1005
82
  if (GVar->getName().startswith("llvm.") ||
1006
82
      
GVar->getName().startswith("nvvm.")80
)
1007
2
    return;
1008
80
1009
80
  const DataLayout &DL = getDataLayout();
1010
80
1011
80
  // GlobalVariables are always constant pointers themselves.
1012
80
  PointerType *PTy = GVar->getType();
1013
80
  Type *ETy = GVar->getValueType();
1014
80
1015
80
  if (GVar->hasExternalLinkage()) {
1016
46
    if (GVar->hasInitializer())
1017
40
      O << ".visible ";
1018
6
    else
1019
6
      O << ".extern ";
1020
46
  } else 
if (34
GVar->hasLinkOnceLinkage()34
||
GVar->hasWeakLinkage()34
||
1021
34
             GVar->hasAvailableExternallyLinkage() ||
1022
34
             GVar->hasCommonLinkage()) {
1023
1
    O << ".weak ";
1024
1
  }
1025
80
1026
80
  if (isTexture(*GVar)) {
1027
6
    O << ".global .texref " << getTextureName(*GVar) << ";\n";
1028
6
    return;
1029
6
  }
1030
74
1031
74
  if (isSurface(*GVar)) {
1032
8
    O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1033
8
    return;
1034
8
  }
1035
66
1036
66
  if (GVar->isDeclaration()) {
1037
6
    // (extern) declarations, no definition or initializer
1038
6
    // Currently the only known declaration is for an automatic __local
1039
6
    // (.shared) promoted to global.
1040
6
    emitPTXGlobalVariable(GVar, O);
1041
6
    O << ";\n";
1042
6
    return;
1043
6
  }
1044
60
1045
60
  if (isSampler(*GVar)) {
1046
0
    O << ".global .samplerref " << getSamplerName(*GVar);
1047
0
1048
0
    const Constant *Initializer = nullptr;
1049
0
    if (GVar->hasInitializer())
1050
0
      Initializer = GVar->getInitializer();
1051
0
    const ConstantInt *CI = nullptr;
1052
0
    if (Initializer)
1053
0
      CI = dyn_cast<ConstantInt>(Initializer);
1054
0
    if (CI) {
1055
0
      unsigned sample = CI->getZExtValue();
1056
0
1057
0
      O << " = { ";
1058
0
1059
0
      for (int i = 0,
1060
0
               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1061
0
           i < 3; i++) {
1062
0
        O << "addr_mode_" << i << " = ";
1063
0
        switch (addr) {
1064
0
        case 0:
1065
0
          O << "wrap";
1066
0
          break;
1067
0
        case 1:
1068
0
          O << "clamp_to_border";
1069
0
          break;
1070
0
        case 2:
1071
0
          O << "clamp_to_edge";
1072
0
          break;
1073
0
        case 3:
1074
0
          O << "wrap";
1075
0
          break;
1076
0
        case 4:
1077
0
          O << "mirror";
1078
0
          break;
1079
0
        }
1080
0
        O << ", ";
1081
0
      }
1082
0
      O << "filter_mode = ";
1083
0
      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1084
0
      case 0:
1085
0
        O << "nearest";
1086
0
        break;
1087
0
      case 1:
1088
0
        O << "linear";
1089
0
        break;
1090
0
      case 2:
1091
0
        llvm_unreachable("Anisotropic filtering is not supported");
1092
0
      default:
1093
0
        O << "nearest";
1094
0
        break;
1095
0
      }
1096
0
      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1097
0
        O << ", force_unnormalized_coords = 1";
1098
0
      }
1099
0
      O << " }";
1100
0
    }
1101
0
1102
0
    O << ";\n";
1103
0
    return;
1104
60
  }
1105
60
1106
60
  if (GVar->hasPrivateLinkage()) {
1107
6
    if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1108
0
      return;
1109
6
1110
6
    // FIXME - need better way (e.g. Metadata) to avoid generating this global
1111
6
    if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1112
0
      return;
1113
6
    if (GVar->use_empty())
1114
0
      return;
1115
60
  }
1116
60
1117
60
  const Function *demotedFunc = nullptr;
1118
60
  if (!processDemoted && 
canDemoteGlobalVar(GVar, demotedFunc)57
) {
1119
3
    O << "// " << GVar->getName() << " has been demoted\n";
1120
3
    if (localDecls.find(demotedFunc) != localDecls.end())
1121
1
      localDecls[demotedFunc].push_back(GVar);
1122
2
    else {
1123
2
      std::vector<const GlobalVariable *> temp;
1124
2
      temp.push_back(GVar);
1125
2
      localDecls[demotedFunc] = temp;
1126
2
    }
1127
3
    return;
1128
3
  }
1129
57
1130
57
  O << ".";
1131
57
  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1132
57
1133
57
  if (isManaged(*GVar)) {
1134
1
    O << " .attribute(.managed)";
1135
1
  }
1136
57
1137
57
  if (GVar->getAlignment() == 0)
1138
29
    O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1139
28
  else
1140
28
    O << " .align " << GVar->getAlignment();
1141
57
1142
57
  if (ETy->isFloatingPointTy() || 
ETy->isPointerTy()52
||
1143
57
      
(48
ETy->isIntegerTy()48
&&
ETy->getScalarSizeInBits() <= 6423
)) {
1144
29
    O << " .";
1145
29
    // Special case: ABI requires that we use .u8 for predicates
1146
29
    if (ETy->isIntegerTy(1))
1147
1
      O << "u8";
1148
28
    else
1149
28
      O << getPTXFundamentalTypeStr(ETy, false);
1150
29
    O << " ";
1151
29
    getSymbol(GVar)->print(O, MAI);
1152
29
1153
29
    // Ptx allows variable initilization only for constant and global state
1154
29
    // spaces.
1155
29
    if (GVar->hasInitializer()) {
1156
29
      if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1157
29
          
(PTy->getAddressSpace() == ADDRESS_SPACE_CONST)11
) {
1158
21
        const Constant *Initializer = GVar->getInitializer();
1159
21
        // 'undef' is treated as there is no value specified.
1160
21
        if (!Initializer->isNullValue() && 
!isa<UndefValue>(Initializer)13
) {
1161
13
          O << " = ";
1162
13
          printScalarConstant(Initializer, O);
1163
13
        }
1164
21
      } else {
1165
8
        // The frontend adds zero-initializer to device and constant variables
1166
8
        // that don't have an initial value, and UndefValue to shared
1167
8
        // variables, so skip warning for this case.
1168
8
        if (!GVar->getInitializer()->isNullValue() &&
1169
8
            
!isa<UndefValue>(GVar->getInitializer())4
) {
1170
1
          report_fatal_error("initial value of '" + GVar->getName() +
1171
1
                             "' is not allowed in addrspace(" +
1172
1
                             Twine(PTy->getAddressSpace()) + ")");
1173
1
        }
1174
28
      }
1175
29
    }
1176
28
  } else {
1177
28
    unsigned int ElementSize = 0;
1178
28
1179
28
    // Although PTX has direct support for struct type and array type and
1180
28
    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1181
28
    // targets that support these high level field accesses. Structs, arrays
1182
28
    // and vectors are lowered into arrays of bytes.
1183
28
    switch (ETy->getTypeID()) {
1184
28
    case Type::IntegerTyID: // Integers larger than 64 bits
1185
28
    case Type::StructTyID:
1186
28
    case Type::ArrayTyID:
1187
28
    case Type::VectorTyID:
1188
28
      ElementSize = DL.getTypeStoreSize(ETy);
1189
28
      // Ptx allows variable initilization only for constant and
1190
28
      // global state spaces.
1191
28
      if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1192
28
           
(PTy->getAddressSpace() == ADDRESS_SPACE_CONST)4
) &&
1193
28
          
GVar->hasInitializer()24
) {
1194
24
        const Constant *Initializer = GVar->getInitializer();
1195
24
        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1196
21
          AggBuffer aggBuffer(ElementSize, O, *this);
1197
21
          bufferAggregateConstant(Initializer, &aggBuffer);
1198
21
          if (aggBuffer.numSymbols) {
1199
6
            if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1200
2
              O << " .u64 ";
1201
2
              getSymbol(GVar)->print(O, MAI);
1202
2
              O << "[";
1203
2
              O << ElementSize / 8;
1204
4
            } else {
1205
4
              O << " .u32 ";
1206
4
              getSymbol(GVar)->print(O, MAI);
1207
4
              O << "[";
1208
4
              O << ElementSize / 4;
1209
4
            }
1210
6
            O << "]";
1211
15
          } else {
1212
15
            O << " .b8 ";
1213
15
            getSymbol(GVar)->print(O, MAI);
1214
15
            O << "[";
1215
15
            O << ElementSize;
1216
15
            O << "]";
1217
15
          }
1218
21
          O << " = {";
1219
21
          aggBuffer.print();
1220
21
          O << "}";
1221
21
        } else {
1222
3
          O << " .b8 ";
1223
3
          getSymbol(GVar)->print(O, MAI);
1224
3
          if (ElementSize) {
1225
3
            O << "[";
1226
3
            O << ElementSize;
1227
3
            O << "]";
1228
3
          }
1229
3
        }
1230
24
      } else {
1231
4
        O << " .b8 ";
1232
4
        getSymbol(GVar)->print(O, MAI);
1233
4
        if (ElementSize) {
1234
4
          O << "[";
1235
4
          O << ElementSize;
1236
4
          O << "]";
1237
4
        }
1238
4
      }
1239
28
      break;
1240
28
    default:
1241
0
      llvm_unreachable("type not supported yet");
1242
56
    }
1243
56
  }
1244
56
  O << ";\n";
1245
56
}
1246
1247
1.68k
void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1248
1.68k
  if (localDecls.find(f) == localDecls.end())
1249
1.68k
    return;
1250
2
1251
2
  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1252
2
1253
5
  for (unsigned i = 0, e = gvars.size(); i != e; 
++i3
) {
1254
3
    O << "\t// demoted variable\n\t";
1255
3
    printModuleLevelGV(gvars[i], O, true);
1256
3
  }
1257
2
}
1258
1259
void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1260
63
                                          raw_ostream &O) const {
1261
63
  switch (AddressSpace) {
1262
63
  case ADDRESS_SPACE_LOCAL:
1263
0
    O << "local";
1264
0
    break;
1265
63
  case ADDRESS_SPACE_GLOBAL:
1266
48
    O << "global";
1267
48
    break;
1268
63
  case ADDRESS_SPACE_CONST:
1269
3
    O << "const";
1270
3
    break;
1271
63
  case ADDRESS_SPACE_SHARED:
1272
12
    O << "shared";
1273
12
    break;
1274
63
  default:
1275
0
    report_fatal_error("Bad address space found while emitting PTX: " +
1276
0
                       llvm::Twine(AddressSpace));
1277
63
    
break0
;
1278
63
  }
1279
63
}
1280
1281
std::string
1282
69
NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1283
69
  switch (Ty->getTypeID()) {
1284
69
  default:
1285
0
    llvm_unreachable("unexpected type");
1286
69
    
break0
;
1287
69
  case Type::IntegerTyID: {
1288
55
    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1289
55
    if (NumBits == 1)
1290
0
      return "pred";
1291
55
    else if (NumBits <= 64) {
1292
55
      std::string name = "u";
1293
55
      return name + utostr(NumBits);
1294
55
    } else {
1295
0
      llvm_unreachable("Integer too large");
1296
0
      break;
1297
0
    }
1298
0
    break;
1299
0
  }
1300
0
  case Type::HalfTyID:
1301
0
    // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1302
0
    return "b16";
1303
8
  case Type::FloatTyID:
1304
8
    return "f32";
1305
0
  case Type::DoubleTyID:
1306
0
    return "f64";
1307
6
  case Type::PointerTyID:
1308
6
    if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1309
2
      if (useB4PTR)
1310
1
        return "b64";
1311
1
      else
1312
1
        return "u64";
1313
4
    else if (useB4PTR)
1314
1
      return "b32";
1315
3
    else
1316
3
      return "u32";
1317
0
  }
1318
0
  llvm_unreachable("unexpected type");
1319
0
  return nullptr;
1320
0
}
1321
1322
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1323
6
                                            raw_ostream &O) {
1324
6
  const DataLayout &DL = getDataLayout();
1325
6
1326
6
  // GlobalVariables are always constant pointers themselves.
1327
6
  Type *ETy = GVar->getValueType();
1328
6
1329
6
  O << ".";
1330
6
  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1331
6
  if (GVar->getAlignment() == 0)
1332
6
    O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1333
0
  else
1334
0
    O << " .align " << GVar->getAlignment();
1335
6
1336
6
  // Special case for i128
1337
6
  if (ETy->isIntegerTy(128)) {
1338
0
    O << " .b8 ";
1339
0
    getSymbol(GVar)->print(O, MAI);
1340
0
    O << "[16]";
1341
0
    return;
1342
0
  }
1343
6
1344
6
  if (ETy->isFloatingPointTy() || 
ETy->isIntOrPtrTy()5
) {
1345
3
    O << " .";
1346
3
    O << getPTXFundamentalTypeStr(ETy);
1347
3
    O << " ";
1348
3
    getSymbol(GVar)->print(O, MAI);
1349
3
    return;
1350
3
  }
1351
3
1352
3
  int64_t ElementSize = 0;
1353
3
1354
3
  // Although PTX has direct support for struct type and array type and LLVM IR
1355
3
  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1356
3
  // support these high level field accesses. Structs and arrays are lowered
1357
3
  // into arrays of bytes.
1358
3
  switch (ETy->getTypeID()) {
1359
3
  case Type::StructTyID:
1360
3
  case Type::ArrayTyID:
1361
3
  case Type::VectorTyID:
1362
3
    ElementSize = DL.getTypeStoreSize(ETy);
1363
3
    O << " .b8 ";
1364
3
    getSymbol(GVar)->print(O, MAI);
1365
3
    O << "[";
1366
3
    if (ElementSize) {
1367
3
      O << ElementSize;
1368
3
    }
1369
3
    O << "]";
1370
3
    break;
1371
3
  default:
1372
0
    llvm_unreachable("type not supported yet");
1373
3
  }
1374
3
}
1375
1376
3
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1377
3
  if (Ty->isSingleValueType())
1378
3
    return DL.getPrefTypeAlignment(Ty);
1379
0
1380
0
  auto *ATy = dyn_cast<ArrayType>(Ty);
1381
0
  if (ATy)
1382
0
    return getOpenCLAlignment(DL, ATy->getElementType());
1383
0
1384
0
  auto *STy = dyn_cast<StructType>(Ty);
1385
0
  if (STy) {
1386
0
    unsigned int alignStruct = 1;
1387
0
    // Go through each element of the struct and find the
1388
0
    // largest alignment.
1389
0
    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1390
0
      Type *ETy = STy->getElementType(i);
1391
0
      unsigned int align = getOpenCLAlignment(DL, ETy);
1392
0
      if (align > alignStruct)
1393
0
        alignStruct = align;
1394
0
    }
1395
0
    return alignStruct;
1396
0
  }
1397
0
1398
0
  auto *FTy = dyn_cast<FunctionType>(Ty);
1399
0
  if (FTy)
1400
0
    return DL.getPointerPrefAlignment();
1401
0
  return DL.getPrefTypeAlignment(Ty);
1402
0
}
1403
1404
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1405
2.73k
                                     int paramIndex, raw_ostream &O) {
1406
2.73k
  getSymbol(I->getParent())->print(O, MAI);
1407
2.73k
  O << "_param_" << paramIndex;
1408
2.73k
}
1409
1410
1.75k
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1411
1.75k
  const DataLayout &DL = getDataLayout();
1412
1.75k
  const AttributeList &PAL = F->getAttributes();
1413
1.75k
  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1414
1.75k
  const TargetLowering *TLI = STI.getTargetLowering();
1415
1.75k
  Function::const_arg_iterator I, E;
1416
1.75k
  unsigned paramIndex = 0;
1417
1.75k
  bool first = true;
1418
1.75k
  bool isKernelFunc = isKernelFunction(*F);
1419
1.75k
  bool isABI = (STI.getSmVersion() >= 20);
1420
1.75k
  bool hasImageHandles = STI.hasImageHandles();
1421
1.75k
  MVT thePointerTy = TLI->getPointerTy(DL);
1422
1.75k
1423
1.75k
  if (F->arg_empty()) {
1424
193
    O << "()\n";
1425
193
    return;
1426
193
  }
1427
1.56k
1428
1.56k
  O << "(\n";
1429
1.56k
1430
4.30k
  for (I = F->arg_begin(), E = F->arg_end(); I != E; 
++I, paramIndex++2.74k
) {
1431
2.74k
    Type *Ty = I->getType();
1432
2.74k
1433
2.74k
    if (!first)
1434
1.17k
      O << ",\n";
1435
2.74k
1436
2.74k
    first = false;
1437
2.74k
1438
2.74k
    // Handle image/sampler parameters
1439
2.74k
    if (isKernelFunction(*F)) {
1440
188
      if (isSampler(*I) || 
isImage(*I)186
) {
1441
6
        if (isImage(*I)) {
1442
4
          std::string sname = I->getName();
1443
4
          if (isImageWriteOnly(*I) || 
isImageReadWrite(*I)3
) {
1444
2
            if (hasImageHandles)
1445
0
              O << "\t.param .u64 .ptr .surfref ";
1446
2
            else
1447
2
              O << "\t.param .surfref ";
1448
2
            CurrentFnSym->print(O, MAI);
1449
2
            O << "_param_" << paramIndex;
1450
2
          }
1451
2
          else { // Default image is read_only
1452
2
            if (hasImageHandles)
1453
0
              O << "\t.param .u64 .ptr .texref ";
1454
2
            else
1455
2
              O << "\t.param .texref ";
1456
2
            CurrentFnSym->print(O, MAI);
1457
2
            O << "_param_" << paramIndex;
1458
2
          }
1459
4
        } else {
1460
2
          if (hasImageHandles)
1461
0
            O << "\t.param .u64 .ptr .samplerref ";
1462
2
          else
1463
2
            O << "\t.param .samplerref ";
1464
2
          CurrentFnSym->print(O, MAI);
1465
2
          O << "_param_" << paramIndex;
1466
2
        }
1467
6
        continue;
1468
6
      }
1469
2.73k
    }
1470
2.73k
1471
2.73k
    if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
1472
2.73k
      if (Ty->isAggregateType() || 
Ty->isVectorTy()2.71k
||
Ty->isIntegerTy(128)2.31k
) {
1473
431
        // Just print .param .align <a> .b8 .param[size];
1474
431
        // <a> = PAL.getparamalignment
1475
431
        // size = typeallocsize of element type
1476
431
        unsigned align = PAL.getParamAlignment(paramIndex);
1477
431
        if (align == 0)
1478
431
          align = DL.getABITypeAlignment(Ty);
1479
431
1480
431
        unsigned sz = DL.getTypeAllocSize(Ty);
1481
431
        O << "\t.param .align " << align << " .b8 ";
1482
431
        printParamName(I, paramIndex, O);
1483
431
        O << "[" << sz << "]";
1484
431
1485
431
        continue;
1486
431
      }
1487
2.29k
      // Just a scalar
1488
2.29k
      auto *PTy = dyn_cast<PointerType>(Ty);
1489
2.29k
      if (isKernelFunc) {
1490
178
        if (PTy) {
1491
138
          // Special handling for pointer arguments to kernel
1492
138
          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1493
138
1494
138
          if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1495
138
              NVPTX::CUDA) {
1496
3
            Type *ETy = PTy->getElementType();
1497
3
            int addrSpace = PTy->getAddressSpace();
1498
3
            switch (addrSpace) {
1499
3
            default:
1500
3
              O << ".ptr ";
1501
3
              break;
1502
3
            case ADDRESS_SPACE_CONST:
1503
0
              O << ".ptr .const ";
1504
0
              break;
1505
3
            case ADDRESS_SPACE_SHARED:
1506
0
              O << ".ptr .shared ";
1507
0
              break;
1508
3
            case ADDRESS_SPACE_GLOBAL:
1509
0
              O << ".ptr .global ";
1510
0
              break;
1511
3
            }
1512
3
            O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1513
3
          }
1514
138
          printParamName(I, paramIndex, O);
1515
138
          continue;
1516
40
        }
1517
40
1518
40
        // non-pointer scalar to kernel func
1519
40
        O << "\t.param .";
1520
40
        // Special case: predicate operands become .u8 types
1521
40
        if (Ty->isIntegerTy(1))
1522
2
          O << "u8";
1523
38
        else
1524
38
          O << getPTXFundamentalTypeStr(Ty);
1525
40
        O << " ";
1526
40
        printParamName(I, paramIndex, O);
1527
40
        continue;
1528
40
      }
1529
2.12k
      // Non-kernel function, just print .param .b<size> for ABI
1530
2.12k
      // and .reg .b<size> for non-ABI
1531
2.12k
      unsigned sz = 0;
1532
2.12k
      if (isa<IntegerType>(Ty)) {
1533
874
        sz = cast<IntegerType>(Ty)->getBitWidth();
1534
874
        if (sz < 32)
1535
284
          sz = 32;
1536
1.24k
      } else if (isa<PointerType>(Ty))
1537
507
        sz = thePointerTy.getSizeInBits();
1538
740
      else if (Ty->isHalfTy())
1539
392
        // PTX ABI requires all scalar parameters to be at least 32
1540
392
        // bits in size.  fp16 normally uses .b16 as its storage type
1541
392
        // in PTX, so its size must be adjusted here, too.
1542
392
        sz = 32;
1543
348
      else
1544
348
        sz = Ty->getPrimitiveSizeInBits();
1545
2.12k
      if (isABI)
1546
2.12k
        O << "\t.param .b" << sz << " ";
1547
0
      else
1548
0
        O << "\t.reg .b" << sz << " ";
1549
2.12k
      printParamName(I, paramIndex, O);
1550
2.12k
      continue;
1551
2.12k
    }
1552
8
1553
8
    // param has byVal attribute. So should be a pointer
1554
8
    auto *PTy = dyn_cast<PointerType>(Ty);
1555
8
    assert(PTy && "Param with byval attribute should be a pointer type");
1556
8
    Type *ETy = PTy->getElementType();
1557
8
1558
8
    if (isABI || 
isKernelFunc0
) {
1559
8
      // Just print .param .align <a> .b8 .param[size];
1560
8
      // <a> = PAL.getparamalignment
1561
8
      // size = typeallocsize of element type
1562
8
      unsigned align = PAL.getParamAlignment(paramIndex);
1563
8
      if (align == 0)
1564
7
        align = DL.getABITypeAlignment(ETy);
1565
8
      // Work around a bug in ptxas. When PTX code takes address of
1566
8
      // byval parameter with alignment < 4, ptxas generates code to
1567
8
      // spill argument into memory. Alas on sm_50+ ptxas generates
1568
8
      // SASS code that fails with misaligned access. To work around
1569
8
      // the problem, make sure that we align byval parameters by at
1570
8
      // least 4. Matching change must be made in LowerCall() where we
1571
8
      // prepare parameters for the call.
1572
8
      //
1573
8
      // TODO: this will need to be undone when we get to support multi-TU
1574
8
      // device-side compilation as it breaks ABI compatibility with nvcc.
1575
8
      // Hopefully ptxas bug is fixed by then.
1576
8
      if (!isKernelFunc && 
align < 46
)
1577
2
        align = 4;
1578
8
      unsigned sz = DL.getTypeAllocSize(ETy);
1579
8
      O << "\t.param .align " << align << " .b8 ";
1580
8
      printParamName(I, paramIndex, O);
1581
8
      O << "[" << sz << "]";
1582
8
      continue;
1583
8
    } else {
1584
0
      // Split the ETy into constituent parts and
1585
0
      // print .param .b<size> <name> for each part.
1586
0
      // Further, if a part is vector, print the above for
1587
0
      // each vector element.
1588
0
      SmallVector<EVT, 16> vtparts;
1589
0
      ComputeValueVTs(*TLI, DL, ETy, vtparts);
1590
0
      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1591
0
        unsigned elems = 1;
1592
0
        EVT elemtype = vtparts[i];
1593
0
        if (vtparts[i].isVector()) {
1594
0
          elems = vtparts[i].getVectorNumElements();
1595
0
          elemtype = vtparts[i].getVectorElementType();
1596
0
        }
1597
0
1598
0
        for (unsigned j = 0, je = elems; j != je; ++j) {
1599
0
          unsigned sz = elemtype.getSizeInBits();
1600
0
          if (elemtype.isInteger() && (sz < 32))
1601
0
            sz = 32;
1602
0
          O << "\t.reg .b" << sz << " ";
1603
0
          printParamName(I, paramIndex, O);
1604
0
          if (j < je - 1)
1605
0
            O << ",\n";
1606
0
          ++paramIndex;
1607
0
        }
1608
0
        if (i < e - 1)
1609
0
          O << ",\n";
1610
0
      }
1611
0
      --paramIndex;
1612
0
      continue;
1613
0
    }
1614
8
  }
1615
1.56k
1616
1.56k
  O << "\n)\n";
1617
1.56k
}
1618
1619
void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1620
1.68k
                                            raw_ostream &O) {
1621
1.68k
  const Function &F = MF.getFunction();
1622
1.68k
  emitFunctionParamList(&F, O);
1623
1.68k
}
1624
1625
void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1626
1.68k
    const MachineFunction &MF) {
1627
1.68k
  SmallString<128> Str;
1628
1.68k
  raw_svector_ostream O(Str);
1629
1.68k
1630
1.68k
  // Map the global virtual register number to a register class specific
1631
1.68k
  // virtual register number starting from 1 with that class.
1632
1.68k
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1633
1.68k
  //unsigned numRegClasses = TRI->getNumRegClasses();
1634
1.68k
1635
1.68k
  // Emit the Fake Stack Object
1636
1.68k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1637
1.68k
  int NumBytes = (int) MFI.getStackSize();
1638
1.68k
  if (NumBytes) {
1639
24
    O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
1640
24
      << getFunctionNumber() << "[" << NumBytes << "];\n";
1641
24
    if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1642
10
      O << "\t.reg .b64 \t%SP;\n";
1643
10
      O << "\t.reg .b64 \t%SPL;\n";
1644
14
    } else {
1645
14
      O << "\t.reg .b32 \t%SP;\n";
1646
14
      O << "\t.reg .b32 \t%SPL;\n";
1647
14
    }
1648
24
  }
1649
1.68k
1650
1.68k
  // Go through all virtual registers to establish the mapping between the
1651
1.68k
  // global virtual
1652
1.68k
  // register number and the per class virtual register number.
1653
1.68k
  // We use the per class virtual register number in the ptx output.
1654
1.68k
  unsigned int numVRs = MRI->getNumVirtRegs();
1655
10.6k
  for (unsigned i = 0; i < numVRs; 
i++8.93k
) {
1656
8.93k
    unsigned int vr = TRI->index2VirtReg(i);
1657
8.93k
    const TargetRegisterClass *RC = MRI->getRegClass(vr);
1658
8.93k
    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1659
8.93k
    int n = regmap.size();
1660
8.93k
    regmap.insert(std::make_pair(vr, n + 1));
1661
8.93k
  }
1662
1.68k
1663
1.68k
  // Emit register declarations
1664
1.68k
  // @TODO: Extract out the real register usage
1665
1.68k
  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1666
1.68k
  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1667
1.68k
  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1668
1.68k
  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1669
1.68k
  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1670
1.68k
  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1671
1.68k
  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1672
1.68k
1673
1.68k
  // Emit declaration of the virtual registers or 'physical' registers for
1674
1.68k
  // each register class
1675
23.5k
  for (unsigned i=0; i< TRI->getNumRegClasses(); 
i++21.9k
) {
1676
21.9k
    const TargetRegisterClass *RC = TRI->getRegClass(i);
1677
21.9k
    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1678
21.9k
    std::string rcname = getNVPTXRegClassName(RC);
1679
21.9k
    std::string rcStr = getNVPTXRegClassStr(RC);
1680
21.9k
    int n = regmap.size();
1681
21.9k
1682
21.9k
    // Only declare those registers that may be used.
1683
21.9k
    if (n) {
1684
3.32k
       O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1685
3.32k
         << ">;\n";
1686
3.32k
    }
1687
21.9k
  }
1688
1.68k
1689
1.68k
  OutStreamer->EmitRawText(O.str());
1690
1.68k
}
1691
1692
1
void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1693
1
  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1694
1
  bool ignored;
1695
1
  unsigned int numHex;
1696
1
  const char *lead;
1697
1
1698
1
  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1699
1
    numHex = 8;
1700
1
    lead = "0f";
1701
1
    APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
1702
1
  } else 
if (0
Fp->getType()->getTypeID() == Type::DoubleTyID0
) {
1703
0
    numHex = 16;
1704
0
    lead = "0d";
1705
0
    APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
1706
0
  } else
1707
0
    llvm_unreachable("unsupported fp type");
1708
1
1709
1
  APInt API = APF.bitcastToAPInt();
1710
1
  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1711
1
}
1712
1713
13
void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1714
13
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1715
8
    O << CI->getValue();
1716
8
    return;
1717
8
  }
1718
5
  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1719
1
    printFPConstant(CFP, O);
1720
1
    return;
1721
1
  }
1722
4
  if (isa<ConstantPointerNull>(CPV)) {
1723
0
    O << "0";
1724
0
    return;
1725
0
  }
1726
4
  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1727
3
    bool IsNonGenericPointer = false;
1728
3
    if (GVar->getType()->getAddressSpace() != 0) {
1729
3
      IsNonGenericPointer = true;
1730
3
    }
1731
3
    if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1732
0
      O << "generic(";
1733
0
      getSymbol(GVar)->print(O, MAI);
1734
0
      O << ")";
1735
3
    } else {
1736
3
      getSymbol(GVar)->print(O, MAI);
1737
3
    }
1738
3
    return;
1739
3
  }
1740
1
  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1741
1
    const Value *v = Cexpr->stripPointerCasts();
1742
1
    PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1743
1
    bool IsNonGenericPointer = false;
1744
1
    if (PTy && PTy->getAddressSpace() != 0) {
1745
0
      IsNonGenericPointer = true;
1746
0
    }
1747
1
    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1748
1
      if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1749
1
        O << "generic(";
1750
1
        getSymbol(GVar)->print(O, MAI);
1751
1
        O << ")";
1752
1
      } else {
1753
0
        getSymbol(GVar)->print(O, MAI);
1754
0
      }
1755
1
      return;
1756
1
    } else {
1757
0
      lowerConstant(CPV)->print(O, MAI);
1758
0
      return;
1759
0
    }
1760
0
  }
1761
0
  llvm_unreachable("Not scalar type found in printScalarConstant()");
1762
0
}
1763
1764
// These utility functions assure we get the right sequence of bytes for a given
1765
// type even for big-endian machines
1766
74
template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1767
74
  int64_t vp = (int64_t)val;
1768
174
  for (unsigned i = 0; i < sizeof(T); 
++i100
) {
1769
100
    p[i] = (unsigned char)vp;
1770
100
    vp >>= 8;
1771
100
  }
1772
74
}
NVPTXAsmPrinter.cpp:void ConvertIntToBytes<unsigned char>(unsigned char*, unsigned char)
Line
Count
Source
1766
64
template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1767
64
  int64_t vp = (int64_t)val;
1768
128
  for (unsigned i = 0; i < sizeof(T); 
++i64
) {
1769
64
    p[i] = (unsigned char)vp;
1770
64
    vp >>= 8;
1771
64
  }
1772
64
}
NVPTXAsmPrinter.cpp:void ConvertIntToBytes<short>(unsigned char*, short)
Line
Count
Source
1766
2
template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1767
2
  int64_t vp = (int64_t)val;
1768
6
  for (unsigned i = 0; i < sizeof(T); 
++i4
) {
1769
4
    p[i] = (unsigned char)vp;
1770
4
    vp >>= 8;
1771
4
  }
1772
2
}
NVPTXAsmPrinter.cpp:void ConvertIntToBytes<int>(unsigned char*, int)
Line
Count
Source
1766
2
template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1767
2
  int64_t vp = (int64_t)val;
1768
10
  for (unsigned i = 0; i < sizeof(T); 
++i8
) {
1769
8
    p[i] = (unsigned char)vp;
1770
8
    vp >>= 8;
1771
8
  }
1772
2
}
NVPTXAsmPrinter.cpp:void ConvertIntToBytes<long long>(unsigned char*, long long)
Line
Count
Source
1766
2
template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1767
2
  int64_t vp = (int64_t)val;
1768
18
  for (unsigned i = 0; i < sizeof(T); 
++i16
) {
1769
16
    p[i] = (unsigned char)vp;
1770
16
    vp >>= 8;
1771
16
  }
1772
2
}
NVPTXAsmPrinter.cpp:void ConvertIntToBytes<unsigned short>(unsigned char*, unsigned short)
Line
Count
Source
1766
4
template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1767
4
  int64_t vp = (int64_t)val;
1768
12
  for (unsigned i = 0; i < sizeof(T); 
++i8
) {
1769
8
    p[i] = (unsigned char)vp;
1770
8
    vp >>= 8;
1771
8
  }
1772
4
}
1773
2
static void ConvertFloatToBytes(unsigned char *p, float val) {
1774
2
  int32_t *vp = (int32_t *)&val;
1775
10
  for (unsigned i = 0; i < sizeof(int32_t); 
++i8
) {
1776
8
    p[i] = (unsigned char)*vp;
1777
8
    *vp >>= 8;
1778
8
  }
1779
2
}
1780
2
static void ConvertDoubleToBytes(unsigned char *p, double val) {
1781
2
  int64_t *vp = (int64_t *)&val;
1782
18
  for (unsigned i = 0; i < sizeof(int64_t); 
++i16
) {
1783
16
    p[i] = (unsigned char)*vp;
1784
16
    *vp >>= 8;
1785
16
  }
1786
2
}
1787
1788
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1789
96
                                   AggBuffer *aggBuffer) {
1790
96
  const DataLayout &DL = getDataLayout();
1791
96
1792
96
  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1793
8
    int s = DL.getTypeAllocSize(CPV->getType());
1794
8
    if (s < Bytes)
1795
0
      s = Bytes;
1796
8
    aggBuffer->addZeros(s);
1797
8
    return;
1798
8
  }
1799
88
1800
88
  unsigned char ptr[8];
1801
88
  switch (CPV->getType()->getTypeID()) {
1802
88
1803
88
  case Type::IntegerTyID: {
1804
70
    Type *ETy = CPV->getType();
1805
70
    if (ETy == Type::getInt8Ty(CPV->getContext())) {
1806
64
      unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1807
64
      ConvertIntToBytes<>(ptr, c);
1808
64
      aggBuffer->addBytes(ptr, 1, Bytes);
1809
64
    } else 
if (6
ETy == Type::getInt16Ty(CPV->getContext())6
) {
1810
2
      short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1811
2
      ConvertIntToBytes<>(ptr, int16);
1812
2
      aggBuffer->addBytes(ptr, 2, Bytes);
1813
4
    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1814
2
      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1815
2
        int int32 = (int)(constInt->getZExtValue());
1816
2
        ConvertIntToBytes<>(ptr, int32);
1817
2
        aggBuffer->addBytes(ptr, 4, Bytes);
1818
2
        break;
1819
2
      } else 
if (const auto *0
Cexpr0
= dyn_cast<ConstantExpr>(CPV)) {
1820
0
        if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1821
0
                ConstantFoldConstant(Cexpr, DL))) {
1822
0
          int int32 = (int)(constInt->getZExtValue());
1823
0
          ConvertIntToBytes<>(ptr, int32);
1824
0
          aggBuffer->addBytes(ptr, 4, Bytes);
1825
0
          break;
1826
0
        }
1827
0
        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1828
0
          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1829
0
          aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1830
0
          aggBuffer->addZeros(4);
1831
0
          break;
1832
0
        }
1833
0
      }
1834
0
      llvm_unreachable("unsupported integer const type");
1835
2
    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1836
2
      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1837
2
        long long int64 = (long long)(constInt->getZExtValue());
1838
2
        ConvertIntToBytes<>(ptr, int64);
1839
2
        aggBuffer->addBytes(ptr, 8, Bytes);
1840
2
        break;
1841
2
      } else 
if (const ConstantExpr *0
Cexpr0
= dyn_cast<ConstantExpr>(CPV)) {
1842
0
        if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1843
0
                ConstantFoldConstant(Cexpr, DL))) {
1844
0
          long long int64 = (long long)(constInt->getZExtValue());
1845
0
          ConvertIntToBytes<>(ptr, int64);
1846
0
          aggBuffer->addBytes(ptr, 8, Bytes);
1847
0
          break;
1848
0
        }
1849
0
        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1850
0
          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1851
0
          aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1852
0
          aggBuffer->addZeros(8);
1853
0
          break;
1854
0
        }
1855
0
      }
1856
0
      llvm_unreachable("unsupported integer const type");
1857
0
    } else
1858
2
      llvm_unreachable("unsupported integer const type");
1859
70
    
break66
;
1860
70
  }
1861
70
  case Type::HalfTyID:
1862
8
  case Type::FloatTyID:
1863
8
  case Type::DoubleTyID: {
1864
8
    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1865
8
    Type *Ty = CFP->getType();
1866
8
    if (Ty == Type::getHalfTy(CPV->getContext())) {
1867
4
      APInt API = CFP->getValueAPF().bitcastToAPInt();
1868
4
      uint16_t float16 = API.getLoBits(16).getZExtValue();
1869
4
      ConvertIntToBytes<>(ptr, float16);
1870
4
      aggBuffer->addBytes(ptr, 2, Bytes);
1871
4
    } else if (Ty == Type::getFloatTy(CPV->getContext())) {
1872
2
      float float32 = (float) CFP->getValueAPF().convertToFloat();
1873
2
      ConvertFloatToBytes(ptr, float32);
1874
2
      aggBuffer->addBytes(ptr, 4, Bytes);
1875
2
    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1876
2
      double float64 = CFP->getValueAPF().convertToDouble();
1877
2
      ConvertDoubleToBytes(ptr, float64);
1878
2
      aggBuffer->addBytes(ptr, 8, Bytes);
1879
2
    } else {
1880
0
      llvm_unreachable("unsupported fp const type");
1881
0
    }
1882
8
    break;
1883
8
  }
1884
8
  case Type::PointerTyID: {
1885
8
    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1886
6
      aggBuffer->addSymbol(GVar, GVar);
1887
6
    } else 
if (const ConstantExpr *2
Cexpr2
= dyn_cast<ConstantExpr>(CPV)) {
1888
2
      const Value *v = Cexpr->stripPointerCasts();
1889
2
      aggBuffer->addSymbol(v, Cexpr);
1890
2
    }
1891
8
    unsigned int s = DL.getTypeAllocSize(CPV->getType());
1892
8
    aggBuffer->addZeros(s);
1893
8
    break;
1894
8
  }
1895
8
1896
8
  case Type::ArrayTyID:
1897
2
  case Type::VectorTyID:
1898
2
  case Type::StructTyID: {
1899
2
    if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1900
2
      int ElementSize = DL.getTypeAllocSize(CPV->getType());
1901
2
      bufferAggregateConstant(CPV, aggBuffer);
1902
2
      if (Bytes > ElementSize)
1903
0
        aggBuffer->addZeros(Bytes - ElementSize);
1904
2
    } else 
if (0
isa<ConstantAggregateZero>(CPV)0
)
1905
0
      aggBuffer->addZeros(Bytes);
1906
0
    else
1907
0
      llvm_unreachable("Unexpected Constant type");
1908
2
    break;
1909
2
  }
1910
2
1911
2
  default:
1912
0
    llvm_unreachable("unsupported type");
1913
88
  }
1914
88
}
1915
1916
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1917
23
                                              AggBuffer *aggBuffer) {
1918
23
  const DataLayout &DL = getDataLayout();
1919
23
  int Bytes;
1920
23
1921
23
  // Integers of arbitrary width
1922
23
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1923
2
    APInt Val = CI->getValue();
1924
34
    for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; 
++I32
) {
1925
32
      uint8_t Byte = Val.getLoBits(8).getZExtValue();
1926
32
      aggBuffer->addBytes(&Byte, 1, 1);
1927
32
      Val.lshrInPlace(8);
1928
32
    }
1929
2
    return;
1930
2
  }
1931
21
1932
21
  // Old constants
1933
21
  if (isa<ConstantArray>(CPV) || 
isa<ConstantVector>(CPV)16
) {
1934
5
    if (CPV->getNumOperands())
1935
13
      
for (unsigned i = 0, e = CPV->getNumOperands(); 5
i != e;
++i8
)
1936
8
        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1937
5
    return;
1938
5
  }
1939
16
1940
16
  if (const ConstantDataSequential *CDS =
1941
14
          dyn_cast<ConstantDataSequential>(CPV)) {
1942
14
    if (CDS->getNumElements())
1943
98
      
for (unsigned i = 0; 14
i < CDS->getNumElements();
++i84
)
1944
84
        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1945
84
                     aggBuffer);
1946
14
    return;
1947
14
  }
1948
2
1949
2
  if (isa<ConstantStruct>(CPV)) {
1950
2
    if (CPV->getNumOperands()) {
1951
2
      StructType *ST = cast<StructType>(CPV->getType());
1952
6
      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; 
++i4
) {
1953
4
        if (i == (e - 1))
1954
2
          Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1955
2
                  DL.getTypeAllocSize(ST) -
1956
2
                  DL.getStructLayout(ST)->getElementOffset(i);
1957
2
        else
1958
2
          Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1959
2
                  DL.getStructLayout(ST)->getElementOffset(i);
1960
4
        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1961
4
      }
1962
2
    }
1963
2
    return;
1964
2
  }
1965
0
  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1966
0
}
1967
1968
/// lowerConstantForGV - Return an MCExpr for the given Constant.  This is mostly
1969
/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1970
/// expressions that are representable in PTX and create
1971
/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1972
const MCExpr *
1973
3
NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1974
3
  MCContext &Ctx = OutContext;
1975
3
1976
3
  if (CV->isNullValue() || isa<UndefValue>(CV))
1977
0
    return MCConstantExpr::create(0, Ctx);
1978
3
1979
3
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1980
0
    return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1981
3
1982
3
  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1983
1
    const MCSymbolRefExpr *Expr =
1984
1
      MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1985
1
    if (ProcessingGeneric) {
1986
1
      return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1987
1
    } else {
1988
0
      return Expr;
1989
0
    }
1990
2
  }
1991
2
1992
2
  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1993
2
  if (!CE) {
1994
0
    llvm_unreachable("Unknown constant value to lower!");
1995
0
  }
1996
2
1997
2
  switch (CE->getOpcode()) {
1998
2
  default:
1999
0
    // If the code isn't optimized, there may be outstanding folding
2000
0
    // opportunities. Attempt to fold the expression using DataLayout as a
2001
0
    // last resort before giving up.
2002
0
    if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
2003
0
      if (C && C != CE)
2004
0
        return lowerConstantForGV(C, ProcessingGeneric);
2005
0
2006
0
    // Otherwise report the problem to the user.
2007
0
    {
2008
0
      std::string S;
2009
0
      raw_string_ostream OS(S);
2010
0
      OS << "Unsupported expression in static initializer: ";
2011
0
      CE->printAsOperand(OS, /*PrintType=*/false,
2012
0
                     !MF ? nullptr : MF->getFunction().getParent());
2013
0
      report_fatal_error(OS.str());
2014
0
    }
2015
0
2016
1
  case Instruction::AddrSpaceCast: {
2017
1
    // Strip the addrspacecast and pass along the operand
2018
1
    PointerType *DstTy = cast<PointerType>(CE->getType());
2019
1
    if (DstTy->getAddressSpace() == 0) {
2020
1
      return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2021
1
    }
2022
0
    std::string S;
2023
0
    raw_string_ostream OS(S);
2024
0
    OS << "Unsupported expression in static initializer: ";
2025
0
    CE->printAsOperand(OS, /*PrintType=*/ false,
2026
0
                       !MF ? nullptr : MF->getFunction().getParent());
2027
0
    report_fatal_error(OS.str());
2028
0
  }
2029
0
2030
1
  case Instruction::GetElementPtr: {
2031
1
    const DataLayout &DL = getDataLayout();
2032
1
2033
1
    // Generate a symbolic expression for the byte address
2034
1
    APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2035
1
    cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2036
1
2037
1
    const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2038
1
                                            ProcessingGeneric);
2039
1
    if (!OffsetAI)
2040
0
      return Base;
2041
1
2042
1
    int64_t Offset = OffsetAI.getSExtValue();
2043
1
    return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
2044
1
                                   Ctx);
2045
1
  }
2046
1
2047
1
  case Instruction::Trunc:
2048
0
    // We emit the value and depend on the assembler to truncate the generated
2049
0
    // expression properly.  This is important for differences between
2050
0
    // blockaddress labels.  Since the two labels are in the same function, it
2051
0
    // is reasonable to treat their delta as a 32-bit value.
2052
0
    LLVM_FALLTHROUGH;
2053
0
  case Instruction::BitCast:
2054
0
    return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2055
0
2056
0
  case Instruction::IntToPtr: {
2057
0
    const DataLayout &DL = getDataLayout();
2058
0
2059
0
    // Handle casts to pointers by changing them into casts to the appropriate
2060
0
    // integer type.  This promotes constant folding and simplifies this code.
2061
0
    Constant *Op = CE->getOperand(0);
2062
0
    Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2063
0
                                      false/*ZExt*/);
2064
0
    return lowerConstantForGV(Op, ProcessingGeneric);
2065
0
  }
2066
0
2067
0
  case Instruction::PtrToInt: {
2068
0
    const DataLayout &DL = getDataLayout();
2069
0
2070
0
    // Support only foldable casts to/from pointers that can be eliminated by
2071
0
    // changing the pointer to the appropriately sized integer type.
2072
0
    Constant *Op = CE->getOperand(0);
2073
0
    Type *Ty = CE->getType();
2074
0
2075
0
    const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2076
0
2077
0
    // We can emit the pointer value into this slot if the slot is an
2078
0
    // integer slot equal to the size of the pointer.
2079
0
    if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2080
0
      return OpExpr;
2081
0
2082
0
    // Otherwise the pointer is smaller than the resultant integer, mask off
2083
0
    // the high bits so we are sure to get a proper truncation if the input is
2084
0
    // a constant expr.
2085
0
    unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2086
0
    const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2087
0
    return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2088
0
  }
2089
0
2090
0
  // The MC library also has a right-shift operator, but it isn't consistently
2091
0
  // signed or unsigned between different targets.
2092
0
  case Instruction::Add: {
2093
0
    const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2094
0
    const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2095
0
    switch (CE->getOpcode()) {
2096
0
    default: llvm_unreachable("Unknown binary operator constant cast expr");
2097
0
    case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2098
0
    }
2099
0
  }
2100
2
  }
2101
2
}
2102
2103
// Copy of MCExpr::print customized for NVPTX
2104
3
void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2105
3
  switch (Expr.getKind()) {
2106
3
  case MCExpr::Target:
2107
1
    return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2108
3
  case MCExpr::Constant:
2109
1
    OS << cast<MCConstantExpr>(Expr).getValue();
2110
1
    return;
2111
3
2112
3
  case MCExpr::SymbolRef: {
2113
0
    const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2114
0
    const MCSymbol &Sym = SRE.getSymbol();
2115
0
    Sym.print(OS, MAI);
2116
0
    return;
2117
3
  }
2118
3
2119
3
  case MCExpr::Unary: {
2120
0
    const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2121
0
    switch (UE.getOpcode()) {
2122
0
    case MCUnaryExpr::LNot:  OS << '!'; break;
2123
0
    case MCUnaryExpr::Minus: OS << '-'; break;
2124
0
    case MCUnaryExpr::Not:   OS << '~'; break;
2125
0
    case MCUnaryExpr::Plus:  OS << '+'; break;
2126
0
    }
2127
0
    printMCExpr(*UE.getSubExpr(), OS);
2128
0
    return;
2129
0
  }
2130
0
2131
1
  case MCExpr::Binary: {
2132
1
    const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2133
1
2134
1
    // Only print parens around the LHS if it is non-trivial.
2135
1
    if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2136
1
        isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2137
1
      printMCExpr(*BE.getLHS(), OS);
2138
1
    } else {
2139
0
      OS << '(';
2140
0
      printMCExpr(*BE.getLHS(), OS);
2141
0
      OS<< ')';
2142
0
    }
2143
1
2144
1
    switch (BE.getOpcode()) {
2145
1
    case MCBinaryExpr::Add:
2146
1
      // Print "X-42" instead of "X+-42".
2147
1
      if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2148
1
        if (RHSC->getValue() < 0) {
2149
0
          OS << RHSC->getValue();
2150
0
          return;
2151
0
        }
2152
1
      }
2153
1
2154
1
      OS <<  '+';
2155
1
      break;
2156
1
    
default: 0
llvm_unreachable0
("Unhandled binary operator");
2157
1
    }
2158
1
2159
1
    // Only print parens around the LHS if it is non-trivial.
2160
1
    if (isa<MCConstantExpr>(BE.getRHS()) || 
isa<MCSymbolRefExpr>(BE.getRHS())0
) {
2161
1
      printMCExpr(*BE.getRHS(), OS);
2162
1
    } else {
2163
0
      OS << '(';
2164
0
      printMCExpr(*BE.getRHS(), OS);
2165
0
      OS << ')';
2166
0
    }
2167
1
    return;
2168
1
  }
2169
0
  }
2170
0
2171
0
  llvm_unreachable("Invalid expression kind!");
2172
0
}
2173
2174
/// PrintAsmOperand - Print out an operand for an inline asm expression.
2175
///
2176
bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2177
8
                                      const char *ExtraCode, raw_ostream &O) {
2178
8
  if (ExtraCode && 
ExtraCode[0]2
) {
2179
2
    if (ExtraCode[1] != 0)
2180
0
      return true; // Unknown modifier.
2181
2
2182
2
    switch (ExtraCode[0]) {
2183
2
    default:
2184
2
      // See if this is a generic print operand
2185
2
      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2186
2
    case 'r':
2187
0
      break;
2188
6
    }
2189
6
  }
2190
6
2191
6
  printOperand(MI, OpNo, O);
2192
6
2193
6
  return false;
2194
6
}
2195
2196
bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2197
                                            unsigned OpNo,
2198
                                            const char *ExtraCode,
2199
0
                                            raw_ostream &O) {
2200
0
  if (ExtraCode && ExtraCode[0])
2201
0
    return true; // Unknown modifier
2202
0
2203
0
  O << '[';
2204
0
  printMemOperand(MI, OpNo, O);
2205
0
  O << ']';
2206
0
2207
0
  return false;
2208
0
}
2209
2210
void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2211
6
                                   raw_ostream &O) {
2212
6
  const MachineOperand &MO = MI->getOperand(opNum);
2213
6
  switch (MO.getType()) {
2214
6
  case MachineOperand::MO_Register:
2215
6
    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
2216
0
      if (MO.getReg() == NVPTX::VRDepot)
2217
0
        O << DEPOTNAME << getFunctionNumber();
2218
0
      else
2219
0
        O << NVPTXInstPrinter::getRegisterName(MO.getReg());
2220
6
    } else {
2221
6
      emitVirtualRegister(MO.getReg(), O);
2222
6
    }
2223
6
    break;
2224
6
2225
6
  case MachineOperand::MO_Immediate:
2226
0
    O << MO.getImm();
2227
0
    break;
2228
6
2229
6
  case MachineOperand::MO_FPImmediate:
2230
0
    printFPConstant(MO.getFPImm(), O);
2231
0
    break;
2232
6
2233
6
  case MachineOperand::MO_GlobalAddress:
2234
0
    PrintSymbolOperand(MO, O);
2235
0
    break;
2236
6
2237
6
  case MachineOperand::MO_MachineBasicBlock:
2238
0
    MO.getMBB()->getSymbol()->print(O, MAI);
2239
0
    break;
2240
6
2241
6
  default:
2242
0
    llvm_unreachable("Operand type not supported.");
2243
6
  }
2244
6
}
2245
2246
void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2247
0
                                      raw_ostream &O, const char *Modifier) {
2248
0
  printOperand(MI, opNum, O);
2249
0
2250
0
  if (Modifier && strcmp(Modifier, "add") == 0) {
2251
0
    O << ", ";
2252
0
    printOperand(MI, opNum + 1, O);
2253
0
  } else {
2254
0
    if (MI->getOperand(opNum + 1).isImm() &&
2255
0
        MI->getOperand(opNum + 1).getImm() == 0)
2256
0
      return; // don't print ',0' or '+0'
2257
0
    O << "+";
2258
0
    printOperand(MI, opNum + 1, O);
2259
0
  }
2260
0
}
2261
2262
// Force static initialization.
2263
78.9k
extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2264
78.9k
  RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
2265
78.9k
  RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
2266
78.9k
}