Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "NVPTXTargetTransformInfo.h"
10
#include "NVPTXUtilities.h"
11
#include "llvm/Analysis/LoopInfo.h"
12
#include "llvm/Analysis/TargetTransformInfo.h"
13
#include "llvm/Analysis/ValueTracking.h"
14
#include "llvm/CodeGen/BasicTTIImpl.h"
15
#include "llvm/CodeGen/CostTable.h"
16
#include "llvm/CodeGen/TargetLowering.h"
17
#include "llvm/Support/Debug.h"
18
using namespace llvm;
19
20
#define DEBUG_TYPE "NVPTXtti"
21
22
// Whether the given intrinsic reads threadIdx.x/y/z.
23
16
static bool readsThreadIndex(const IntrinsicInst *II) {
24
16
  switch (II->getIntrinsicID()) {
25
16
    
default: return false2
;
26
16
    case Intrinsic::nvvm_read_ptx_sreg_tid_x:
27
14
    case Intrinsic::nvvm_read_ptx_sreg_tid_y:
28
14
    case Intrinsic::nvvm_read_ptx_sreg_tid_z:
29
14
      return true;
30
16
  }
31
16
}
32
33
2
static bool readsLaneId(const IntrinsicInst *II) {
34
2
  return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
35
2
}
36
37
// Whether the given intrinsic is an atomic instruction in PTX.
38
static bool isNVVMAtomic(const IntrinsicInst *II) {
39
  switch (II->getIntrinsicID()) {
40
    default: return false;
41
    case Intrinsic::nvvm_atomic_load_inc_32:
42
    case Intrinsic::nvvm_atomic_load_dec_32:
43
44
    case Intrinsic::nvvm_atomic_add_gen_f_cta:
45
    case Intrinsic::nvvm_atomic_add_gen_f_sys:
46
    case Intrinsic::nvvm_atomic_add_gen_i_cta:
47
    case Intrinsic::nvvm_atomic_add_gen_i_sys:
48
    case Intrinsic::nvvm_atomic_and_gen_i_cta:
49
    case Intrinsic::nvvm_atomic_and_gen_i_sys:
50
    case Intrinsic::nvvm_atomic_cas_gen_i_cta:
51
    case Intrinsic::nvvm_atomic_cas_gen_i_sys:
52
    case Intrinsic::nvvm_atomic_dec_gen_i_cta:
53
    case Intrinsic::nvvm_atomic_dec_gen_i_sys:
54
    case Intrinsic::nvvm_atomic_inc_gen_i_cta:
55
    case Intrinsic::nvvm_atomic_inc_gen_i_sys:
56
    case Intrinsic::nvvm_atomic_max_gen_i_cta:
57
    case Intrinsic::nvvm_atomic_max_gen_i_sys:
58
    case Intrinsic::nvvm_atomic_min_gen_i_cta:
59
    case Intrinsic::nvvm_atomic_min_gen_i_sys:
60
    case Intrinsic::nvvm_atomic_or_gen_i_cta:
61
    case Intrinsic::nvvm_atomic_or_gen_i_sys:
62
    case Intrinsic::nvvm_atomic_exch_gen_i_cta:
63
    case Intrinsic::nvvm_atomic_exch_gen_i_sys:
64
    case Intrinsic::nvvm_atomic_xor_gen_i_cta:
65
    case Intrinsic::nvvm_atomic_xor_gen_i_sys:
66
      return true;
67
  }
68
}
69
70
208
bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
71
208
  // Without inter-procedural analysis, we conservatively assume that arguments
72
208
  // to __device__ functions are divergent.
73
208
  if (const Argument *Arg = dyn_cast<Argument>(V))
74
30
    return !isKernelFunction(*Arg->getParent());
75
178
76
178
  if (const Instruction *I = dyn_cast<Instruction>(V)) {
77
178
    // Without pointer analysis, we conservatively assume values loaded from
78
178
    // generic or local address space are divergent.
79
178
    if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
80
0
      unsigned AS = LI->getPointerAddressSpace();
81
0
      return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
82
0
    }
83
178
    // Atomic instructions may cause divergence. Atomic instructions are
84
178
    // executed sequentially across all threads in a warp. Therefore, an earlier
85
178
    // executed thread may see different memory inputs than a later executed
86
178
    // thread. For example, suppose *a = 0 initially.
87
178
    //
88
178
    //   atom.global.add.s32 d, [a], 1
89
178
    //
90
178
    // returns 0 for the first thread that enters the critical region, and 1 for
91
178
    // the second thread.
92
178
    if (I->isAtomic())
93
0
      return true;
94
178
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
95
16
      // Instructions that read threadIdx are obviously divergent.
96
16
      if (readsThreadIndex(II) || 
readsLaneId(II)2
)
97
16
        return true;
98
0
      // Handle the NVPTX atomic instrinsics that cannot be represented as an
99
0
      // atomic IR instruction.
100
0
      if (isNVVMAtomic(II))
101
0
        return true;
102
162
    }
103
162
    // Conservatively consider the return value of function calls as divergent.
104
162
    // We could analyze callees with bodies more precisely using
105
162
    // inter-procedural analysis.
106
162
    if (isa<CallInst>(I))
107
0
      return true;
108
162
  }
109
162
110
162
  return false;
111
162
}
112
113
int NVPTXTTIImpl::getArithmeticInstrCost(
114
    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
115
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
116
13
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
117
13
  // Legalize the type.
118
13
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
119
13
120
13
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
121
13
122
13
  switch (ISD) {
123
13
  default:
124
10
    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
125
10
                                         Opd1PropInfo, Opd2PropInfo);
126
13
  case ISD::ADD:
127
3
  case ISD::MUL:
128
3
  case ISD::XOR:
129
3
  case ISD::OR:
130
3
  case ISD::AND:
131
3
    // The machine code (SASS) simulates an i64 with two i32. Therefore, we
132
3
    // estimate that arithmetic operations on i64 are twice as expensive as
133
3
    // those on types that can fit into one machine register.
134
3
    if (LT.second.SimpleTy == MVT::i64)
135
0
      return 2 * LT.first;
136
3
    // Delegate other cases to the basic TTI.
137
3
    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
138
3
                                         Opd1PropInfo, Opd2PropInfo);
139
13
  }
140
13
}
141
142
void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
143
2
                                           TTI::UnrollingPreferences &UP) {
144
2
  BaseT::getUnrollingPreferences(L, SE, UP);
145
2
146
2
  // Enable partial unrolling and runtime unrolling, but reduce the
147
2
  // threshold.  This partially unrolls small loops which are often
148
2
  // unrolled by the PTX to SASS compiler and unrolling earlier can be
149
2
  // beneficial.
150
2
  UP.Partial = UP.Runtime = true;
151
2
  UP.PartialThreshold = UP.Threshold / 4;
152
2
}