/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/ExpandReductions.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This pass implements IR expansion for reduction intrinsics, allowing targets |
10 | | // to enable the experimental intrinsics until just before codegen. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/CodeGen/ExpandReductions.h" |
15 | | #include "llvm/Analysis/TargetTransformInfo.h" |
16 | | #include "llvm/CodeGen/Passes.h" |
17 | | #include "llvm/IR/Function.h" |
18 | | #include "llvm/IR/IRBuilder.h" |
19 | | #include "llvm/IR/InstIterator.h" |
20 | | #include "llvm/IR/IntrinsicInst.h" |
21 | | #include "llvm/IR/Intrinsics.h" |
22 | | #include "llvm/IR/Module.h" |
23 | | #include "llvm/Pass.h" |
24 | | #include "llvm/Transforms/Utils/LoopUtils.h" |
25 | | |
26 | | using namespace llvm; |
27 | | |
28 | | namespace { |
29 | | |
30 | 4.19k | unsigned getOpcode(Intrinsic::ID ID) { |
31 | 4.19k | switch (ID) { |
32 | 4.19k | case Intrinsic::experimental_vector_reduce_v2_fadd: |
33 | 542 | return Instruction::FAdd; |
34 | 4.19k | case Intrinsic::experimental_vector_reduce_v2_fmul: |
35 | 438 | return Instruction::FMul; |
36 | 4.19k | case Intrinsic::experimental_vector_reduce_add: |
37 | 309 | return Instruction::Add; |
38 | 4.19k | case Intrinsic::experimental_vector_reduce_mul: |
39 | 353 | return Instruction::Mul; |
40 | 4.19k | case Intrinsic::experimental_vector_reduce_and: |
41 | 433 | return Instruction::And; |
42 | 4.19k | case Intrinsic::experimental_vector_reduce_or: |
43 | 433 | return Instruction::Or; |
44 | 4.19k | case Intrinsic::experimental_vector_reduce_xor: |
45 | 433 | return Instruction::Xor; |
46 | 4.19k | case Intrinsic::experimental_vector_reduce_smax: |
47 | 1.06k | case Intrinsic::experimental_vector_reduce_smin: |
48 | 1.06k | case Intrinsic::experimental_vector_reduce_umax: |
49 | 1.06k | case Intrinsic::experimental_vector_reduce_umin: |
50 | 1.06k | return Instruction::ICmp; |
51 | 1.06k | case Intrinsic::experimental_vector_reduce_fmax: |
52 | 194 | case Intrinsic::experimental_vector_reduce_fmin: |
53 | 194 | return Instruction::FCmp; |
54 | 194 | default: |
55 | 0 | llvm_unreachable("Unexpected ID"); |
56 | 4.19k | } |
57 | 4.19k | } |
58 | | |
59 | 154k | RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { |
60 | 154k | switch (ID) { |
61 | 154k | case Intrinsic::experimental_vector_reduce_smax: |
62 | 265 | return RecurrenceDescriptor::MRK_SIntMax; |
63 | 154k | case Intrinsic::experimental_vector_reduce_smin: |
64 | 265 | return RecurrenceDescriptor::MRK_SIntMin; |
65 | 154k | case Intrinsic::experimental_vector_reduce_umax: |
66 | 265 | return RecurrenceDescriptor::MRK_UIntMax; |
67 | 154k | case Intrinsic::experimental_vector_reduce_umin: |
68 | 265 | return RecurrenceDescriptor::MRK_UIntMin; |
69 | 154k | case Intrinsic::experimental_vector_reduce_fmax: |
70 | 97 | return RecurrenceDescriptor::MRK_FloatMax; |
71 | 154k | case Intrinsic::experimental_vector_reduce_fmin: |
72 | 97 | return RecurrenceDescriptor::MRK_FloatMin; |
73 | 154k | default: |
74 | 153k | return RecurrenceDescriptor::MRK_Invalid; |
75 | 154k | } |
76 | 154k | } |
77 | | |
78 | 499k | bool expandReductions(Function &F, const TargetTransformInfo *TTI) { |
79 | 499k | bool Changed = false; |
80 | 499k | SmallVector<IntrinsicInst *, 4> Worklist; |
81 | 14.9M | for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I14.4M ) |
82 | 14.4M | if (auto II = dyn_cast<IntrinsicInst>(&*I)) |
83 | 389k | Worklist.push_back(II); |
84 | 499k | |
85 | 499k | for (auto *II : Worklist) { |
86 | 389k | if (!TTI->shouldExpandReduction(II)) |
87 | 234k | continue; |
88 | 154k | |
89 | 154k | FastMathFlags FMF = |
90 | 154k | isa<FPMathOperator>(II) ? II->getFastMathFlags()23.7k : FastMathFlags{}131k ; |
91 | 154k | Intrinsic::ID ID = II->getIntrinsicID(); |
92 | 154k | RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); |
93 | 154k | |
94 | 154k | Value *Rdx = nullptr; |
95 | 154k | IRBuilder<> Builder(II); |
96 | 154k | IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); |
97 | 154k | Builder.setFastMathFlags(FMF); |
98 | 154k | switch (ID) { |
99 | 154k | case Intrinsic::experimental_vector_reduce_v2_fadd: |
100 | 648 | case Intrinsic::experimental_vector_reduce_v2_fmul: { |
101 | 648 | // FMFs must be attached to the call, otherwise it's an ordered reduction |
102 | 648 | // and it can't be handled by generating a shuffle sequence. |
103 | 648 | Value *Acc = II->getArgOperand(0); |
104 | 648 | Value *Vec = II->getArgOperand(1); |
105 | 648 | if (!FMF.allowReassoc()) |
106 | 316 | Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); |
107 | 332 | else { |
108 | 332 | Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); |
109 | 332 | Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), |
110 | 332 | Acc, Rdx, "bin.rdx"); |
111 | 332 | } |
112 | 648 | } break; |
113 | 3.21k | case Intrinsic::experimental_vector_reduce_add: |
114 | 3.21k | case Intrinsic::experimental_vector_reduce_mul: |
115 | 3.21k | case Intrinsic::experimental_vector_reduce_and: |
116 | 3.21k | case Intrinsic::experimental_vector_reduce_or: |
117 | 3.21k | case Intrinsic::experimental_vector_reduce_xor: |
118 | 3.21k | case Intrinsic::experimental_vector_reduce_smax: |
119 | 3.21k | case Intrinsic::experimental_vector_reduce_smin: |
120 | 3.21k | case Intrinsic::experimental_vector_reduce_umax: |
121 | 3.21k | case Intrinsic::experimental_vector_reduce_umin: |
122 | 3.21k | case Intrinsic::experimental_vector_reduce_fmax: |
123 | 3.21k | case Intrinsic::experimental_vector_reduce_fmin: { |
124 | 3.21k | Value *Vec = II->getArgOperand(0); |
125 | 3.21k | Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); |
126 | 3.21k | } break; |
127 | 150k | default: |
128 | 150k | continue; |
129 | 3.86k | } |
130 | 3.86k | II->replaceAllUsesWith(Rdx); |
131 | 3.86k | II->eraseFromParent(); |
132 | 3.86k | Changed = true; |
133 | 3.86k | } |
134 | 499k | return Changed; |
135 | 499k | } |
136 | | |
137 | | class ExpandReductions : public FunctionPass { |
138 | | public: |
139 | | static char ID; |
140 | 36.3k | ExpandReductions() : FunctionPass(ID) { |
141 | 36.3k | initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); |
142 | 36.3k | } |
143 | | |
144 | 499k | bool runOnFunction(Function &F) override { |
145 | 499k | const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
146 | 499k | return expandReductions(F, TTI); |
147 | 499k | } |
148 | | |
149 | 36.1k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
150 | 36.1k | AU.addRequired<TargetTransformInfoWrapperPass>(); |
151 | 36.1k | AU.setPreservesCFG(); |
152 | 36.1k | } |
153 | | }; |
154 | | } |
155 | | |
156 | | char ExpandReductions::ID; |
157 | 49.0k | INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", |
158 | 49.0k | "Expand reduction intrinsics", false, false) |
159 | 49.0k | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
160 | 49.0k | INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", |
161 | | "Expand reduction intrinsics", false, false) |
162 | | |
163 | 36.3k | FunctionPass *llvm::createExpandReductionsPass() { |
164 | 36.3k | return new ExpandReductions(); |
165 | 36.3k | } |
166 | | |
167 | | PreservedAnalyses ExpandReductionsPass::run(Function &F, |
168 | 0 | FunctionAnalysisManager &AM) { |
169 | 0 | const auto &TTI = AM.getResult<TargetIRAnalysis>(F); |
170 | 0 | if (!expandReductions(F, &TTI)) |
171 | 0 | return PreservedAnalyses::all(); |
172 | 0 | PreservedAnalyses PA; |
173 | 0 | PA.preserveSet<CFGAnalyses>(); |
174 | 0 | return PA; |
175 | 0 | } |