Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/ExpandReductions.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass implements IR expansion for reduction intrinsics, allowing targets
10
// to enable the experimental intrinsics until just before codegen.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/CodeGen/ExpandReductions.h"
15
#include "llvm/Analysis/TargetTransformInfo.h"
16
#include "llvm/CodeGen/Passes.h"
17
#include "llvm/IR/Function.h"
18
#include "llvm/IR/IRBuilder.h"
19
#include "llvm/IR/InstIterator.h"
20
#include "llvm/IR/IntrinsicInst.h"
21
#include "llvm/IR/Intrinsics.h"
22
#include "llvm/IR/Module.h"
23
#include "llvm/Pass.h"
24
#include "llvm/Transforms/Utils/LoopUtils.h"
25
26
using namespace llvm;
27
28
namespace {
29
30
4.19k
unsigned getOpcode(Intrinsic::ID ID) {
31
4.19k
  switch (ID) {
32
4.19k
  case Intrinsic::experimental_vector_reduce_v2_fadd:
33
542
    return Instruction::FAdd;
34
4.19k
  case Intrinsic::experimental_vector_reduce_v2_fmul:
35
438
    return Instruction::FMul;
36
4.19k
  case Intrinsic::experimental_vector_reduce_add:
37
309
    return Instruction::Add;
38
4.19k
  case Intrinsic::experimental_vector_reduce_mul:
39
353
    return Instruction::Mul;
40
4.19k
  case Intrinsic::experimental_vector_reduce_and:
41
433
    return Instruction::And;
42
4.19k
  case Intrinsic::experimental_vector_reduce_or:
43
433
    return Instruction::Or;
44
4.19k
  case Intrinsic::experimental_vector_reduce_xor:
45
433
    return Instruction::Xor;
46
4.19k
  case Intrinsic::experimental_vector_reduce_smax:
47
1.06k
  case Intrinsic::experimental_vector_reduce_smin:
48
1.06k
  case Intrinsic::experimental_vector_reduce_umax:
49
1.06k
  case Intrinsic::experimental_vector_reduce_umin:
50
1.06k
    return Instruction::ICmp;
51
1.06k
  case Intrinsic::experimental_vector_reduce_fmax:
52
194
  case Intrinsic::experimental_vector_reduce_fmin:
53
194
    return Instruction::FCmp;
54
194
  default:
55
0
    llvm_unreachable("Unexpected ID");
56
4.19k
  }
57
4.19k
}
58
59
154k
RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
60
154k
  switch (ID) {
61
154k
  case Intrinsic::experimental_vector_reduce_smax:
62
265
    return RecurrenceDescriptor::MRK_SIntMax;
63
154k
  case Intrinsic::experimental_vector_reduce_smin:
64
265
    return RecurrenceDescriptor::MRK_SIntMin;
65
154k
  case Intrinsic::experimental_vector_reduce_umax:
66
265
    return RecurrenceDescriptor::MRK_UIntMax;
67
154k
  case Intrinsic::experimental_vector_reduce_umin:
68
265
    return RecurrenceDescriptor::MRK_UIntMin;
69
154k
  case Intrinsic::experimental_vector_reduce_fmax:
70
97
    return RecurrenceDescriptor::MRK_FloatMax;
71
154k
  case Intrinsic::experimental_vector_reduce_fmin:
72
97
    return RecurrenceDescriptor::MRK_FloatMin;
73
154k
  default:
74
153k
    return RecurrenceDescriptor::MRK_Invalid;
75
154k
  }
76
154k
}
77
78
499k
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
79
499k
  bool Changed = false;
80
499k
  SmallVector<IntrinsicInst *, 4> Worklist;
81
14.9M
  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; 
++I14.4M
)
82
14.4M
    if (auto II = dyn_cast<IntrinsicInst>(&*I))
83
389k
      Worklist.push_back(II);
84
499k
85
499k
  for (auto *II : Worklist) {
86
389k
    if (!TTI->shouldExpandReduction(II))
87
234k
      continue;
88
154k
89
154k
    FastMathFlags FMF =
90
154k
        isa<FPMathOperator>(II) ? 
II->getFastMathFlags()23.7k
:
FastMathFlags{}131k
;
91
154k
    Intrinsic::ID ID = II->getIntrinsicID();
92
154k
    RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
93
154k
94
154k
    Value *Rdx = nullptr;
95
154k
    IRBuilder<> Builder(II);
96
154k
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
97
154k
    Builder.setFastMathFlags(FMF);
98
154k
    switch (ID) {
99
154k
    case Intrinsic::experimental_vector_reduce_v2_fadd:
100
648
    case Intrinsic::experimental_vector_reduce_v2_fmul: {
101
648
      // FMFs must be attached to the call, otherwise it's an ordered reduction
102
648
      // and it can't be handled by generating a shuffle sequence.
103
648
      Value *Acc = II->getArgOperand(0);
104
648
      Value *Vec = II->getArgOperand(1);
105
648
      if (!FMF.allowReassoc())
106
316
        Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
107
332
      else {
108
332
        Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
109
332
        Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
110
332
                                  Acc, Rdx, "bin.rdx");
111
332
      }
112
648
    } break;
113
3.21k
    case Intrinsic::experimental_vector_reduce_add:
114
3.21k
    case Intrinsic::experimental_vector_reduce_mul:
115
3.21k
    case Intrinsic::experimental_vector_reduce_and:
116
3.21k
    case Intrinsic::experimental_vector_reduce_or:
117
3.21k
    case Intrinsic::experimental_vector_reduce_xor:
118
3.21k
    case Intrinsic::experimental_vector_reduce_smax:
119
3.21k
    case Intrinsic::experimental_vector_reduce_smin:
120
3.21k
    case Intrinsic::experimental_vector_reduce_umax:
121
3.21k
    case Intrinsic::experimental_vector_reduce_umin:
122
3.21k
    case Intrinsic::experimental_vector_reduce_fmax:
123
3.21k
    case Intrinsic::experimental_vector_reduce_fmin: {
124
3.21k
      Value *Vec = II->getArgOperand(0);
125
3.21k
      Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
126
3.21k
    } break;
127
150k
    default:
128
150k
      continue;
129
3.86k
    }
130
3.86k
    II->replaceAllUsesWith(Rdx);
131
3.86k
    II->eraseFromParent();
132
3.86k
    Changed = true;
133
3.86k
  }
134
499k
  return Changed;
135
499k
}
136
137
class ExpandReductions : public FunctionPass {
138
public:
139
  static char ID;
140
36.3k
  ExpandReductions() : FunctionPass(ID) {
141
36.3k
    initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
142
36.3k
  }
143
144
499k
  bool runOnFunction(Function &F) override {
145
499k
    const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
146
499k
    return expandReductions(F, TTI);
147
499k
  }
148
149
36.1k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
150
36.1k
    AU.addRequired<TargetTransformInfoWrapperPass>();
151
36.1k
    AU.setPreservesCFG();
152
36.1k
  }
153
};
154
}
155
156
char ExpandReductions::ID;
157
49.0k
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
158
49.0k
                      "Expand reduction intrinsics", false, false)
159
49.0k
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
160
49.0k
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
161
                    "Expand reduction intrinsics", false, false)
162
163
36.3k
FunctionPass *llvm::createExpandReductionsPass() {
164
36.3k
  return new ExpandReductions();
165
36.3k
}
166
167
PreservedAnalyses ExpandReductionsPass::run(Function &F,
168
0
                                            FunctionAnalysisManager &AM) {
169
0
  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
170
0
  if (!expandReductions(F, &TTI))
171
0
    return PreservedAnalyses::all();
172
0
  PreservedAnalyses PA;
173
0
  PA.preserveSet<CFGAnalyses>();
174
0
  return PA;
175
0
}