Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// \brief This pass propagates attributes from kernels to the non-entry
11
/// functions. Most of the library functions were not compiled for specific ABI,
12
/// yet will be correctly compiled if proper attrbutes are propagated from the
13
/// caller.
14
///
15
/// The pass analyzes call graph and propagates ABI target features through the
16
/// call graph.
17
///
18
/// It can run in two modes: as a function or module pass. A function pass
19
/// simply propagates attributes. A module pass clones functions if there are
20
/// callers with different ABI. If a function is clonned all call sites will
21
/// be updated to use a correct clone.
22
///
23
/// A function pass is limited in functionality but can run early in the
24
/// pipeline. A module pass is more powerful but has to run late, so misses
25
/// library folding opportunities.
26
//
27
//===----------------------------------------------------------------------===//
28
29
#include "AMDGPU.h"
30
#include "AMDGPUSubtarget.h"
31
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32
#include "Utils/AMDGPUBaseInfo.h"
33
#include "llvm/ADT/SmallSet.h"
34
#include "llvm/ADT/SmallVector.h"
35
#include "llvm/IR/Function.h"
36
#include "llvm/IR/Module.h"
37
#include "llvm/Target/TargetMachine.h"
38
#include "llvm/Transforms/Utils/Cloning.h"
39
#include <string>
40
41
#define DEBUG_TYPE "amdgpu-propagate-attributes"
42
43
using namespace llvm;
44
45
namespace llvm {
46
extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47
}
48
49
namespace {
50
51
class AMDGPUPropagateAttributes {
52
  const FeatureBitset TargetFeatures = {
53
    AMDGPU::FeatureWavefrontSize16,
54
    AMDGPU::FeatureWavefrontSize32,
55
    AMDGPU::FeatureWavefrontSize64
56
  };
57
58
  class Clone{
59
  public:
60
    Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
61
2
      FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
62
63
    FeatureBitset FeatureMask;
64
    Function *OrigF;
65
    Function *NewF;
66
  };
67
68
  const TargetMachine *TM;
69
70
  // Clone functions as needed or just set attributes.
71
  bool AllowClone;
72
73
  // Option propagation roots.
74
  SmallSet<Function *, 32> Roots;
75
76
  // Clones of functions with their attributes.
77
  SmallVector<Clone, 32> Clones;
78
79
  // Find a clone with required features.
80
  Function *findFunction(const FeatureBitset &FeaturesNeeded,
81
                         Function *OrigF);
82
83
  // Clone function F and set NewFeatures on the clone.
84
  // Cole takes the name of original function.
85
  Function *cloneWithFeatures(Function &F,
86
                              const FeatureBitset &NewFeatures);
87
88
  // Set new function's features in place.
89
  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
90
91
  std::string getFeatureString(const FeatureBitset &Features) const;
92
93
  // Propagate attributes from Roots.
94
  bool process();
95
96
public:
97
  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
98
25.6k
    TM(TM), AllowClone(AllowClone) {}
99
100
  // Use F as a root and propagate its attributes.
101
  bool process(Function &F);
102
103
  // Propagate attributes starting from kernel functions.
104
  bool process(Module &M);
105
};
106
107
// Allows to propagate attributes early, but no clonning is allowed as it must
108
// be a function pass to run before any optimizations.
109
// TODO: We shall only need a one instance of module pass, but that needs to be
110
// in the linker pipeline which is currently not possible.
111
class AMDGPUPropagateAttributesEarly : public FunctionPass {
112
  const TargetMachine *TM;
113
114
public:
115
  static char ID; // Pass identification
116
117
  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
118
2.89k
    FunctionPass(ID), TM(TM) {
119
2.89k
    initializeAMDGPUPropagateAttributesEarlyPass(
120
2.89k
      *PassRegistry::getPassRegistry());
121
2.89k
  }
122
123
  bool runOnFunction(Function &F) override;
124
};
125
126
// Allows to propagate attributes with clonning but does that late in the
127
// pipeline.
128
class AMDGPUPropagateAttributesLate : public ModulePass {
129
  const TargetMachine *TM;
130
131
public:
132
  static char ID; // Pass identification
133
134
  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
135
97
    ModulePass(ID), TM(TM) {
136
97
    initializeAMDGPUPropagateAttributesLatePass(
137
97
      *PassRegistry::getPassRegistry());
138
97
  }
139
140
  bool runOnModule(Module &M) override;
141
};
142
143
}  // end anonymous namespace.
144
145
char AMDGPUPropagateAttributesEarly::ID = 0;
146
char AMDGPUPropagateAttributesLate::ID = 0;
147
148
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
149
                "amdgpu-propagate-attributes-early",
150
                "Early propagate attributes from kernels to functions",
151
                false, false)
152
INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
153
                "amdgpu-propagate-attributes-late",
154
                "Late propagate attributes from kernels to functions",
155
                false, false)
156
157
Function *
158
AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
159
21
                                        Function *OrigF) {
160
21
  // TODO: search for clone's clones.
161
21
  for (Clone &C : Clones)
162
1
    if (C.OrigF == OrigF && 
FeaturesNeeded == C.FeatureMask0
)
163
0
      return C.NewF;
164
21
165
21
  return nullptr;
166
21
}
167
168
97
bool AMDGPUPropagateAttributes::process(Module &M) {
169
97
  for (auto &F : M.functions())
170
1.28k
    if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
171
297
      Roots.insert(&F);
172
97
173
97
  return process();
174
97
}
175
176
25.5k
bool AMDGPUPropagateAttributes::process(Function &F) {
177
25.5k
  Roots.insert(&F);
178
25.5k
  return process();
179
25.5k
}
180
181
25.6k
bool AMDGPUPropagateAttributes::process() {
182
25.6k
  bool Changed = false;
183
25.6k
  SmallSet<Function *, 32> NewRoots;
184
25.6k
  SmallSet<Function *, 32> Replaced;
185
25.6k
186
25.6k
  if (Roots.empty())
187
43
    return false;
188
25.6k
  Module &M = *(*Roots.begin())->getParent();
189
25.6k
190
26.0k
  do {
191
26.0k
    Roots.insert(NewRoots.begin(), NewRoots.end());
192
26.0k
    NewRoots.clear();
193
26.0k
194
966k
    for (auto &F : M.functions()) {
195
966k
      if (F.isDeclaration() || 
Roots.count(&F)848k
||
Roots.count(&F)821k
)
196
145k
        continue;
197
821k
198
821k
      const FeatureBitset &CalleeBits =
199
821k
        TM->getSubtargetImpl(F)->getFeatureBits();
200
821k
      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
201
821k
202
821k
      for (User *U : F.users()) {
203
57.8k
        Instruction *I = dyn_cast<Instruction>(U);
204
57.8k
        if (!I)
205
132
          continue;
206
57.7k
        CallBase *CI = dyn_cast<CallBase>(I);
207
57.7k
        if (!CI)
208
0
          continue;
209
57.7k
        Function *Caller = CI->getCaller();
210
57.7k
        if (!Caller)
211
0
          continue;
212
57.7k
        if (!Roots.count(Caller))
213
56.4k
          continue;
214
1.26k
215
1.26k
        const FeatureBitset &CallerBits =
216
1.26k
          TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
217
1.26k
218
1.26k
        if (CallerBits == (CalleeBits  & TargetFeatures)) {
219
1.24k
          NewRoots.insert(&F);
220
1.24k
          continue;
221
1.24k
        }
222
21
223
21
        Function *NewF = findFunction(CallerBits, &F);
224
21
        if (!NewF) {
225
21
          FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
226
21
                                    CallerBits);
227
21
          if (!AllowClone) {
228
19
            // This may set different features on different iteartions if
229
19
            // there is a contradiction in callers' attributes. In this case
230
19
            // we rely on a second pass running on Module, which is allowed
231
19
            // to clone.
232
19
            setFeatures(F, NewFeatures);
233
19
            NewRoots.insert(&F);
234
19
            Changed = true;
235
19
            break;
236
19
          }
237
2
238
2
          NewF = cloneWithFeatures(F, NewFeatures);
239
2
          Clones.push_back(Clone(CallerBits, &F, NewF));
240
2
          NewRoots.insert(NewF);
241
2
        }
242
21
243
21
        ToReplace.push_back(std::make_pair(CI, NewF));
244
2
        Replaced.insert(&F);
245
2
246
2
        Changed = true;
247
2
      }
248
821k
249
821k
      while (!ToReplace.empty()) {
250
2
        auto R = ToReplace.pop_back_val();
251
2
        R.first->setCalledFunction(R.second);
252
2
      }
253
821k
    }
254
26.0k
  } while (!NewRoots.empty());
255
25.6k
256
25.6k
  for (Function *F : Replaced) {
257
2
    if (F->use_empty())
258
0
      F->eraseFromParent();
259
2
  }
260
25.6k
261
25.6k
  return Changed;
262
25.6k
}
263
264
Function *
265
AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
266
2
                                             const FeatureBitset &NewFeatures) {
267
2
  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
268
2
269
2
  ValueToValueMapTy dummy;
270
2
  Function *NewF = CloneFunction(&F, dummy);
271
2
  setFeatures(*NewF, NewFeatures);
272
2
273
2
  // Swap names. If that is the only clone it will retain the name of now
274
2
  // dead value.
275
2
  if (F.hasName()) {
276
2
    std::string NewName = NewF->getName();
277
2
    NewF->takeName(&F);
278
2
    F.setName(NewName);
279
2
280
2
    // Name has changed, it does not need an external symbol.
281
2
    F.setVisibility(GlobalValue::DefaultVisibility);
282
2
    F.setLinkage(GlobalValue::InternalLinkage);
283
2
  }
284
2
285
2
  return NewF;
286
2
}
287
288
void AMDGPUPropagateAttributes::setFeatures(Function &F,
289
21
                                            const FeatureBitset &NewFeatures) {
290
21
  std::string NewFeatureStr = getFeatureString(NewFeatures);
291
21
292
21
  LLVM_DEBUG(dbgs() << "Set features "
293
21
                    << getFeatureString(NewFeatures & TargetFeatures)
294
21
                    << " on " << F.getName() << '\n');
295
21
296
21
  F.removeFnAttr("target-features");
297
21
  F.addFnAttr("target-features", NewFeatureStr);
298
21
}
299
300
std::string
301
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
302
21
{
303
21
  std::string Ret;
304
2.24k
  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
305
2.24k
    if (Features[KV.Value])
306
712
      Ret += (StringRef("+") + KV.Key + ",").str();
307
1.53k
    else if (TargetFeatures[KV.Value])
308
42
      Ret += (StringRef("-") + KV.Key + ",").str();
309
2.24k
  }
310
21
  Ret.pop_back(); // Remove last comma.
311
21
  return Ret;
312
21
}
313
314
28.8k
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
315
28.8k
  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
316
3.31k
    return false;
317
25.5k
318
25.5k
  return AMDGPUPropagateAttributes(TM, false).process(F);
319
25.5k
}
320
321
97
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
322
97
  if (!TM)
323
0
    return false;
324
97
325
97
  return AMDGPUPropagateAttributes(TM, true).process(M);
326
97
}
327
328
FunctionPass
329
2.89k
*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
330
2.89k
  return new AMDGPUPropagateAttributesEarly(TM);
331
2.89k
}
332
333
ModulePass
334
97
*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
335
97
  return new AMDGPUPropagateAttributesLate(TM);
336
97
}