/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This pass hoists and/or decomposes integer division and remainder |
10 | | // instructions to enable CFG improvements and better codegen. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
15 | | #include "llvm/ADT/DenseMap.h" |
16 | | #include "llvm/ADT/MapVector.h" |
17 | | #include "llvm/ADT/Statistic.h" |
18 | | #include "llvm/Analysis/GlobalsModRef.h" |
19 | | #include "llvm/Analysis/TargetTransformInfo.h" |
20 | | #include "llvm/IR/Dominators.h" |
21 | | #include "llvm/IR/Function.h" |
22 | | #include "llvm/Pass.h" |
23 | | #include "llvm/Support/DebugCounter.h" |
24 | | #include "llvm/Transforms/Scalar.h" |
25 | | #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
26 | | using namespace llvm; |
27 | | |
28 | | #define DEBUG_TYPE "div-rem-pairs" |
29 | | STATISTIC(NumPairs, "Number of div/rem pairs"); |
30 | | STATISTIC(NumHoisted, "Number of instructions hoisted"); |
31 | | STATISTIC(NumDecomposed, "Number of instructions decomposed"); |
32 | | DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform", |
33 | | "Controls transformations in div-rem-pairs pass"); |
34 | | |
35 | | /// Find matching pairs of integer div/rem ops (they have the same numerator, |
36 | | /// denominator, and signedness). If they exist in different basic blocks, bring |
37 | | /// them together by hoisting or replace the common division operation that is |
38 | | /// implicit in the remainder: |
39 | | /// X % Y <--> X - ((X / Y) * Y). |
40 | | /// |
41 | | /// We can largely ignore the normal safety and cost constraints on speculation |
42 | | /// of these ops when we find a matching pair. This is because we are already |
43 | | /// guaranteed that any exceptions and most cost are already incurred by the |
44 | | /// first member of the pair. |
45 | | /// |
46 | | /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or |
47 | | /// SimplifyCFG, but it's split off on its own because it's different enough |
48 | | /// that it doesn't quite match the stated objectives of those passes. |
49 | | static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, |
50 | 279k | const DominatorTree &DT) { |
51 | 279k | bool Changed = false; |
52 | 279k | |
53 | 279k | // Insert all divide and remainder instructions into maps keyed by their |
54 | 279k | // operands and opcode (signed or unsigned). |
55 | 279k | DenseMap<DivRemMapKey, Instruction *> DivMap; |
56 | 279k | // Use a MapVector for RemMap so that instructions are moved/inserted in a |
57 | 279k | // deterministic order. |
58 | 279k | MapVector<DivRemMapKey, Instruction *> RemMap; |
59 | 2.39M | for (auto &BB : F) { |
60 | 13.0M | for (auto &I : BB) { |
61 | 13.0M | if (I.getOpcode() == Instruction::SDiv) |
62 | 11.4k | DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; |
63 | 13.0M | else if (I.getOpcode() == Instruction::UDiv) |
64 | 8.77k | DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; |
65 | 13.0M | else if (I.getOpcode() == Instruction::SRem) |
66 | 2.77k | RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; |
67 | 13.0M | else if (I.getOpcode() == Instruction::URem) |
68 | 3.62k | RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; |
69 | 13.0M | } |
70 | 2.39M | } |
71 | 279k | |
72 | 279k | // We can iterate over either map because we are only looking for matched |
73 | 279k | // pairs. Choose remainders for efficiency because they are usually even more |
74 | 279k | // rare than division. |
75 | 279k | for (auto &RemPair : RemMap) { |
76 | 6.30k | // Find the matching division instruction from the division map. |
77 | 6.30k | Instruction *DivInst = DivMap[RemPair.first]; |
78 | 6.30k | if (!DivInst) |
79 | 4.53k | continue; |
80 | 1.76k | |
81 | 1.76k | // We have a matching pair of div/rem instructions. If one dominates the |
82 | 1.76k | // other, hoist and/or replace one. |
83 | 1.76k | NumPairs++; |
84 | 1.76k | Instruction *RemInst = RemPair.second; |
85 | 1.76k | bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; |
86 | 1.76k | bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); |
87 | 1.76k | |
88 | 1.76k | // If the target supports div+rem and the instructions are in the same block |
89 | 1.76k | // already, there's nothing to do. The backend should handle this. If the |
90 | 1.76k | // target does not support div+rem, then we will decompose the rem. |
91 | 1.76k | if (HasDivRemOp && RemInst->getParent() == DivInst->getParent()420 ) |
92 | 355 | continue; |
93 | 1.41k | |
94 | 1.41k | bool DivDominates = DT.dominates(DivInst, RemInst); |
95 | 1.41k | if (!DivDominates && !DT.dominates(RemInst, DivInst)352 ) |
96 | 106 | continue; |
97 | 1.30k | |
98 | 1.30k | if (!DebugCounter::shouldExecute(DRPCounter)) |
99 | 0 | continue; |
100 | 1.30k | |
101 | 1.30k | if (HasDivRemOp) { |
102 | 22 | // The target has a single div/rem operation. Hoist the lower instruction |
103 | 22 | // to make the matched pair visible to the backend. |
104 | 22 | if (DivDominates) |
105 | 18 | RemInst->moveAfter(DivInst); |
106 | 4 | else |
107 | 4 | DivInst->moveAfter(RemInst); |
108 | 22 | NumHoisted++; |
109 | 1.28k | } else { |
110 | 1.28k | // The target does not have a single div/rem operation. Decompose the |
111 | 1.28k | // remainder calculation as: |
112 | 1.28k | // X % Y --> X - ((X / Y) * Y). |
113 | 1.28k | Value *X = RemInst->getOperand(0); |
114 | 1.28k | Value *Y = RemInst->getOperand(1); |
115 | 1.28k | Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y); |
116 | 1.28k | Instruction *Sub = BinaryOperator::CreateSub(X, Mul); |
117 | 1.28k | |
118 | 1.28k | // If the remainder dominates, then hoist the division up to that block: |
119 | 1.28k | // |
120 | 1.28k | // bb1: |
121 | 1.28k | // %rem = srem %x, %y |
122 | 1.28k | // bb2: |
123 | 1.28k | // %div = sdiv %x, %y |
124 | 1.28k | // --> |
125 | 1.28k | // bb1: |
126 | 1.28k | // %div = sdiv %x, %y |
127 | 1.28k | // %mul = mul %div, %y |
128 | 1.28k | // %rem = sub %x, %mul |
129 | 1.28k | // |
130 | 1.28k | // If the division dominates, it's already in the right place. The mul+sub |
131 | 1.28k | // will be in a different block because we don't assume that they are |
132 | 1.28k | // cheap to speculatively execute: |
133 | 1.28k | // |
134 | 1.28k | // bb1: |
135 | 1.28k | // %div = sdiv %x, %y |
136 | 1.28k | // bb2: |
137 | 1.28k | // %rem = srem %x, %y |
138 | 1.28k | // --> |
139 | 1.28k | // bb1: |
140 | 1.28k | // %div = sdiv %x, %y |
141 | 1.28k | // bb2: |
142 | 1.28k | // %mul = mul %div, %y |
143 | 1.28k | // %rem = sub %x, %mul |
144 | 1.28k | // |
145 | 1.28k | // If the div and rem are in the same block, we do the same transform, |
146 | 1.28k | // but any code movement would be within the same block. |
147 | 1.28k | |
148 | 1.28k | if (!DivDominates) |
149 | 242 | DivInst->moveBefore(RemInst); |
150 | 1.28k | Mul->insertAfter(RemInst); |
151 | 1.28k | Sub->insertAfter(Mul); |
152 | 1.28k | |
153 | 1.28k | // Now kill the explicit remainder. We have replaced it with: |
154 | 1.28k | // (sub X, (mul (div X, Y), Y) |
155 | 1.28k | RemInst->replaceAllUsesWith(Sub); |
156 | 1.28k | RemInst->eraseFromParent(); |
157 | 1.28k | NumDecomposed++; |
158 | 1.28k | } |
159 | 1.30k | Changed = true; |
160 | 1.30k | } |
161 | 279k | |
162 | 279k | return Changed; |
163 | 279k | } |
164 | | |
165 | | // Pass manager boilerplate below here. |
166 | | |
167 | | namespace { |
168 | | struct DivRemPairsLegacyPass : public FunctionPass { |
169 | | static char ID; |
170 | 12.9k | DivRemPairsLegacyPass() : FunctionPass(ID) { |
171 | 12.9k | initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry()); |
172 | 12.9k | } |
173 | | |
174 | 12.9k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
175 | 12.9k | AU.addRequired<DominatorTreeWrapperPass>(); |
176 | 12.9k | AU.addRequired<TargetTransformInfoWrapperPass>(); |
177 | 12.9k | AU.setPreservesCFG(); |
178 | 12.9k | AU.addPreserved<DominatorTreeWrapperPass>(); |
179 | 12.9k | AU.addPreserved<GlobalsAAWrapperPass>(); |
180 | 12.9k | FunctionPass::getAnalysisUsage(AU); |
181 | 12.9k | } |
182 | | |
183 | 278k | bool runOnFunction(Function &F) override { |
184 | 278k | if (skipFunction(F)) |
185 | 44 | return false; |
186 | 278k | auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
187 | 278k | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
188 | 278k | return optimizeDivRem(F, TTI, DT); |
189 | 278k | } |
190 | | }; |
191 | | } |
192 | | |
193 | | char DivRemPairsLegacyPass::ID = 0; |
194 | 48.6k | INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs", |
195 | 48.6k | "Hoist/decompose integer division and remainder", false, |
196 | 48.6k | false) |
197 | 48.6k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
198 | 48.6k | INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs", |
199 | | "Hoist/decompose integer division and remainder", false, |
200 | | false) |
201 | 12.9k | FunctionPass *llvm::createDivRemPairsPass() { |
202 | 12.9k | return new DivRemPairsLegacyPass(); |
203 | 12.9k | } |
204 | | |
205 | | PreservedAnalyses DivRemPairsPass::run(Function &F, |
206 | 860 | FunctionAnalysisManager &FAM) { |
207 | 860 | TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); |
208 | 860 | DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); |
209 | 860 | if (!optimizeDivRem(F, TTI, DT)) |
210 | 860 | return PreservedAnalyses::all(); |
211 | 0 | // TODO: This pass just hoists/replaces math ops - all analyses are preserved? |
212 | 0 | PreservedAnalyses PA; |
213 | 0 | PA.preserveSet<CFGAnalyses>(); |
214 | 0 | PA.preserve<GlobalsAA>(); |
215 | 0 | return PA; |
216 | 0 | } |