/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/LoadCombine.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | /// \file |
10 | | /// This transformation combines adjacent loads. |
11 | | /// |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/ADT/DenseMap.h" |
15 | | #include "llvm/ADT/Statistic.h" |
16 | | #include "llvm/Analysis/AliasAnalysis.h" |
17 | | #include "llvm/Analysis/AliasSetTracker.h" |
18 | | #include "llvm/Analysis/GlobalsModRef.h" |
19 | | #include "llvm/Analysis/TargetFolder.h" |
20 | | #include "llvm/IR/DataLayout.h" |
21 | | #include "llvm/IR/Dominators.h" |
22 | | #include "llvm/IR/Function.h" |
23 | | #include "llvm/IR/IRBuilder.h" |
24 | | #include "llvm/IR/Instructions.h" |
25 | | #include "llvm/IR/Module.h" |
26 | | #include "llvm/Pass.h" |
27 | | #include "llvm/Support/Debug.h" |
28 | | #include "llvm/Support/MathExtras.h" |
29 | | #include "llvm/Support/raw_ostream.h" |
30 | | #include "llvm/Transforms/Scalar.h" |
31 | | |
32 | | using namespace llvm; |
33 | | |
34 | | #define DEBUG_TYPE "load-combine" |
35 | | |
36 | | STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); |
37 | | STATISTIC(NumLoadsCombined, "Number of loads combined"); |
38 | | |
39 | 0 | #define LDCOMBINE_NAME "Combine Adjacent Loads" |
40 | | |
41 | | namespace { |
42 | | struct PointerOffsetPair { |
43 | | Value *Pointer; |
44 | | APInt Offset; |
45 | | }; |
46 | | |
47 | | struct LoadPOPPair { |
48 | | LoadInst *Load; |
49 | | PointerOffsetPair POP; |
50 | | /// \brief The new load needs to be created before the first load in IR order. |
51 | | unsigned InsertOrder; |
52 | | }; |
53 | | |
54 | | class LoadCombine : public BasicBlockPass { |
55 | | LLVMContext *C; |
56 | | AliasAnalysis *AA; |
57 | | DominatorTree *DT; |
58 | | |
59 | | public: |
60 | 5 | LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { |
61 | 5 | initializeLoadCombinePass(*PassRegistry::getPassRegistry()); |
62 | 5 | } |
63 | | |
64 | | using llvm::Pass::doInitialization; |
65 | | bool doInitialization(Function &) override; |
66 | | bool runOnBasicBlock(BasicBlock &BB) override; |
67 | 5 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
68 | 5 | AU.setPreservesCFG(); |
69 | 5 | AU.addRequired<AAResultsWrapperPass>(); |
70 | 5 | AU.addRequired<DominatorTreeWrapperPass>(); |
71 | 5 | AU.addPreserved<GlobalsAAWrapperPass>(); |
72 | 5 | } |
73 | | |
74 | 0 | StringRef getPassName() const override { return 0 LDCOMBINE_NAME0 ; } |
75 | | static char ID; |
76 | | |
77 | | typedef IRBuilder<TargetFolder> BuilderTy; |
78 | | |
79 | | private: |
80 | | BuilderTy *Builder; |
81 | | |
82 | | PointerOffsetPair getPointerOffsetPair(LoadInst &); |
83 | | bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &); |
84 | | bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &); |
85 | | bool combineLoads(SmallVectorImpl<LoadPOPPair> &); |
86 | | }; |
87 | | } |
88 | | |
89 | 15 | bool LoadCombine::doInitialization(Function &F) { |
90 | 15 | DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); |
91 | 15 | C = &F.getContext(); |
92 | 15 | return true; |
93 | 15 | } |
94 | | |
95 | 38 | PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { |
96 | 38 | auto &DL = LI.getModule()->getDataLayout(); |
97 | 38 | |
98 | 38 | PointerOffsetPair POP; |
99 | 38 | POP.Pointer = LI.getPointerOperand(); |
100 | 38 | unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace()); |
101 | 38 | POP.Offset = APInt(BitWidth, 0); |
102 | 38 | |
103 | 75 | while (isa<BitCastInst>(POP.Pointer) || 75 isa<GetElementPtrInst>(POP.Pointer)63 ) {37 |
104 | 37 | if (auto *GEP37 = dyn_cast<GetElementPtrInst>(POP.Pointer)) {25 |
105 | 25 | APInt LastOffset = POP.Offset; |
106 | 25 | if (!GEP->accumulateConstantOffset(DL, POP.Offset)25 ) {0 |
107 | 0 | // Can't handle GEPs with variable indices. |
108 | 0 | POP.Offset = LastOffset; |
109 | 0 | return POP; |
110 | 0 | } |
111 | 25 | POP.Pointer = GEP->getPointerOperand(); |
112 | 12 | } else if (auto *12 BC12 = dyn_cast<BitCastInst>(POP.Pointer)) {12 |
113 | 12 | POP.Pointer = BC->getOperand(0); |
114 | 12 | } |
115 | 37 | } |
116 | 38 | return POP; |
117 | 38 | } |
118 | | |
119 | | bool LoadCombine::combineLoads( |
120 | 19 | DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) { |
121 | 19 | bool Combined = false; |
122 | 16 | for (auto &Loads : LoadMap) { |
123 | 16 | if (Loads.second.size() < 2) |
124 | 6 | continue; |
125 | 10 | std::sort(Loads.second.begin(), Loads.second.end(), |
126 | 39 | [](const LoadPOPPair &A, const LoadPOPPair &B) { |
127 | 39 | return A.POP.Offset.slt(B.POP.Offset); |
128 | 39 | }); |
129 | 10 | if (aggregateLoads(Loads.second)) |
130 | 9 | Combined = true; |
131 | 10 | } |
132 | 19 | return Combined; |
133 | 19 | } |
134 | | |
135 | | /// \brief Try to aggregate loads from a sorted list of loads to be combined. |
136 | | /// |
137 | | /// It is guaranteed that no writes occur between any of the loads. All loads |
138 | | /// have the same base pointer. There are at least two loads. |
139 | 10 | bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { |
140 | 10 | assert(Loads.size() >= 2 && "Insufficient loads!"); |
141 | 10 | LoadInst *BaseLoad = nullptr; |
142 | 10 | SmallVector<LoadPOPPair, 8> AggregateLoads; |
143 | 10 | bool Combined = false; |
144 | 10 | bool ValidPrevOffset = false; |
145 | 10 | APInt PrevOffset; |
146 | 10 | uint64_t PrevSize = 0; |
147 | 32 | for (auto &L : Loads) { |
148 | 32 | if (ValidPrevOffset == false32 ) {10 |
149 | 10 | BaseLoad = L.Load; |
150 | 10 | PrevOffset = L.POP.Offset; |
151 | 10 | PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( |
152 | 10 | L.Load->getType()); |
153 | 10 | AggregateLoads.push_back(L); |
154 | 10 | ValidPrevOffset = true; |
155 | 10 | continue; |
156 | 10 | } |
157 | 22 | if (22 L.Load->getAlignment() > BaseLoad->getAlignment()22 ) |
158 | 0 | continue; |
159 | 22 | APInt PrevEnd = PrevOffset + PrevSize; |
160 | 22 | if (L.POP.Offset.sgt(PrevEnd)22 ) {1 |
161 | 1 | // No other load will be combinable |
162 | 1 | if (combineLoads(AggregateLoads)) |
163 | 0 | Combined = true; |
164 | 1 | AggregateLoads.clear(); |
165 | 1 | ValidPrevOffset = false; |
166 | 1 | continue; |
167 | 1 | } |
168 | 21 | if (21 L.POP.Offset != PrevEnd21 ) |
169 | 21 | // This load is offset less than the size of the last load. |
170 | 21 | // FIXME: We may want to handle this case. |
171 | 0 | continue; |
172 | 21 | PrevOffset = L.POP.Offset; |
173 | 21 | PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( |
174 | 21 | L.Load->getType()); |
175 | 21 | AggregateLoads.push_back(L); |
176 | 21 | } |
177 | 10 | if (combineLoads(AggregateLoads)) |
178 | 9 | Combined = true; |
179 | 10 | return Combined; |
180 | 10 | } |
181 | | |
182 | | /// \brief Given a list of combinable load. Combine the maximum number of them. |
183 | 11 | bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { |
184 | 11 | // Remove loads from the end while the size is not a power of 2. |
185 | 11 | unsigned TotalSize = 0; |
186 | 11 | for (const auto &L : Loads) |
187 | 31 | TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); |
188 | 14 | while (TotalSize != 0 && 14 !isPowerOf2_32(TotalSize)13 ) |
189 | 3 | TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); |
190 | 11 | if (Loads.size() < 2) |
191 | 2 | return false; |
192 | 11 | |
193 | 9 | DEBUG9 ({9 |
194 | 9 | dbgs() << "***** Combining Loads ******\n"; |
195 | 9 | for (const auto &L : Loads) { |
196 | 9 | dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; |
197 | 9 | } |
198 | 9 | }); |
199 | 9 | |
200 | 9 | // Find first load. This is where we put the new load. |
201 | 9 | LoadPOPPair FirstLP; |
202 | 9 | FirstLP.InsertOrder = -1u; |
203 | 9 | for (const auto &L : Loads) |
204 | 27 | if (27 L.InsertOrder < FirstLP.InsertOrder27 ) |
205 | 10 | FirstLP = L; |
206 | 9 | |
207 | 9 | unsigned AddressSpace = |
208 | 9 | FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); |
209 | 9 | |
210 | 9 | Builder->SetInsertPoint(FirstLP.Load); |
211 | 9 | Value *Ptr = Builder->CreateConstGEP1_64( |
212 | 9 | Builder->CreatePointerCast(Loads[0].POP.Pointer, |
213 | 9 | Builder->getInt8PtrTy(AddressSpace)), |
214 | 9 | Loads[0].POP.Offset.getSExtValue()); |
215 | 9 | LoadInst *NewLoad = new LoadInst( |
216 | 9 | Builder->CreatePointerCast( |
217 | 9 | Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), |
218 | 9 | Ptr->getType()->getPointerAddressSpace())), |
219 | 9 | Twine(Loads[0].Load->getName()) + ".combined", false, |
220 | 9 | Loads[0].Load->getAlignment(), FirstLP.Load); |
221 | 9 | |
222 | 27 | for (const auto &L : Loads) { |
223 | 27 | Builder->SetInsertPoint(L.Load); |
224 | 27 | Value *V = Builder->CreateExtractInteger( |
225 | 27 | L.Load->getModule()->getDataLayout(), NewLoad, |
226 | 27 | cast<IntegerType>(L.Load->getType()), |
227 | 27 | (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract"); |
228 | 27 | L.Load->replaceAllUsesWith(V); |
229 | 27 | } |
230 | 9 | |
231 | 9 | NumLoadsCombined += Loads.size(); |
232 | 9 | return true; |
233 | 11 | } |
234 | | |
235 | 17 | bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { |
236 | 17 | if (skipBasicBlock(BB)) |
237 | 0 | return false; |
238 | 17 | |
239 | 17 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
240 | 17 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
241 | 17 | |
242 | 17 | // Skip analysing dead blocks (not forward reachable from function entry). |
243 | 17 | if (!DT->isReachableFromEntry(&BB)17 ) {2 |
244 | 2 | DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() << |
245 | 2 | " in " << BB.getParent()->getName() << "\n"); |
246 | 2 | return false; |
247 | 2 | } |
248 | 17 | |
249 | 15 | IRBuilder<TargetFolder> TheBuilder( |
250 | 15 | BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); |
251 | 15 | Builder = &TheBuilder; |
252 | 15 | |
253 | 15 | DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; |
254 | 15 | AliasSetTracker AST(*AA); |
255 | 15 | |
256 | 15 | bool Combined = false; |
257 | 15 | unsigned Index = 0; |
258 | 182 | for (auto &I : BB) { |
259 | 182 | if (I.mayThrow() || 182 AST.containsUnknown(&I)182 ) {4 |
260 | 4 | if (combineLoads(LoadMap)) |
261 | 0 | Combined = true; |
262 | 4 | LoadMap.clear(); |
263 | 4 | AST.clear(); |
264 | 4 | continue; |
265 | 4 | } |
266 | 178 | if (178 I.mayWriteToMemory()178 ) {4 |
267 | 4 | AST.add(&I); |
268 | 4 | continue; |
269 | 4 | } |
270 | 174 | LoadInst *LI = dyn_cast<LoadInst>(&I); |
271 | 174 | if (!LI) |
272 | 136 | continue; |
273 | 38 | ++NumLoadsAnalyzed; |
274 | 38 | if (!LI->isSimple() || 38 !LI->getType()->isIntegerTy()38 ) |
275 | 0 | continue; |
276 | 38 | auto POP = getPointerOffsetPair(*LI); |
277 | 38 | if (!POP.Pointer) |
278 | 0 | continue; |
279 | 38 | LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++}); |
280 | 38 | AST.add(LI); |
281 | 38 | } |
282 | 15 | if (combineLoads(LoadMap)) |
283 | 9 | Combined = true; |
284 | 15 | return Combined; |
285 | 17 | } |
286 | | |
287 | | char LoadCombine::ID = 0; |
288 | | |
289 | 0 | BasicBlockPass *llvm::createLoadCombinePass() { |
290 | 0 | return new LoadCombine(); |
291 | 0 | } |
292 | | |
293 | 23.1k | INITIALIZE_PASS_BEGIN23.1k (LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)23.1k
|
294 | 23.1k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
295 | 23.1k | INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) |