/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonGenExtract.cpp ----------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "llvm/ADT/APInt.h" |
10 | | #include "llvm/ADT/GraphTraits.h" |
11 | | #include "llvm/IR/BasicBlock.h" |
12 | | #include "llvm/IR/CFG.h" |
13 | | #include "llvm/IR/Constants.h" |
14 | | #include "llvm/IR/Dominators.h" |
15 | | #include "llvm/IR/Function.h" |
16 | | #include "llvm/IR/IRBuilder.h" |
17 | | #include "llvm/IR/Instruction.h" |
18 | | #include "llvm/IR/Instructions.h" |
19 | | #include "llvm/IR/Intrinsics.h" |
20 | | #include "llvm/IR/PatternMatch.h" |
21 | | #include "llvm/IR/Type.h" |
22 | | #include "llvm/IR/Value.h" |
23 | | #include "llvm/Pass.h" |
24 | | #include "llvm/Support/CommandLine.h" |
25 | | #include <algorithm> |
26 | | #include <cstdint> |
27 | | #include <iterator> |
28 | | |
29 | | using namespace llvm; |
30 | | |
31 | | static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U), |
32 | | cl::Hidden, cl::desc("Cutoff for generating \"extract\"" |
33 | | " instructions")); |
34 | | |
35 | | // This prevents generating extract instructions that have the offset of 0. |
36 | | // One of the reasons for "extract" is to put a sequence of bits in a regis- |
37 | | // ter, starting at offset 0 (so that these bits can then be used by an |
38 | | // "insert"). If the bits are already at offset 0, it is better not to gene- |
39 | | // rate "extract", since logical bit operations can be merged into compound |
40 | | // instructions (as opposed to "extract"). |
41 | | static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden, |
42 | | cl::desc("No extract instruction with offset 0")); |
43 | | |
44 | | static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden, |
45 | | cl::desc("Require & in extract patterns")); |
46 | | |
47 | | namespace llvm { |
48 | | |
49 | | void initializeHexagonGenExtractPass(PassRegistry&); |
50 | | FunctionPass *createHexagonGenExtract(); |
51 | | |
52 | | } // end namespace llvm |
53 | | |
54 | | namespace { |
55 | | |
56 | | class HexagonGenExtract : public FunctionPass { |
57 | | public: |
58 | | static char ID; |
59 | | |
60 | 861 | HexagonGenExtract() : FunctionPass(ID) { |
61 | 861 | initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry()); |
62 | 861 | } |
63 | | |
64 | 3.36k | StringRef getPassName() const override { |
65 | 3.36k | return "Hexagon generate \"extract\" instructions"; |
66 | 3.36k | } |
67 | | |
68 | | bool runOnFunction(Function &F) override; |
69 | | |
70 | 854 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
71 | 854 | AU.addRequired<DominatorTreeWrapperPass>(); |
72 | 854 | AU.addPreserved<DominatorTreeWrapperPass>(); |
73 | 854 | FunctionPass::getAnalysisUsage(AU); |
74 | 854 | } |
75 | | |
76 | | private: |
77 | | bool visitBlock(BasicBlock *B); |
78 | | bool convert(Instruction *In); |
79 | | |
80 | | unsigned ExtractCount = 0; |
81 | | DominatorTree *DT; |
82 | | }; |
83 | | |
84 | | } // end anonymous namespace |
85 | | |
86 | | char HexagonGenExtract::ID = 0; |
87 | | |
88 | 861 | INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate " |
89 | 861 | "\"extract\" instructions", false, false) |
90 | 861 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
91 | 861 | INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate " |
92 | | "\"extract\" instructions", false, false) |
93 | | |
94 | 26.1k | bool HexagonGenExtract::convert(Instruction *In) { |
95 | 26.1k | using namespace PatternMatch; |
96 | 26.1k | |
97 | 26.1k | Value *BF = nullptr; |
98 | 26.1k | ConstantInt *CSL = nullptr, *CSR = nullptr, *CM = nullptr; |
99 | 26.1k | BasicBlock *BB = In->getParent(); |
100 | 26.1k | LLVMContext &Ctx = BB->getContext(); |
101 | 26.1k | bool LogicalSR; |
102 | 26.1k | |
103 | 26.1k | // (and (shl (lshr x, #sr), #sl), #m) |
104 | 26.1k | LogicalSR = true; |
105 | 26.1k | bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), |
106 | 26.1k | m_ConstantInt(CSL)), |
107 | 26.1k | m_ConstantInt(CM))); |
108 | 26.1k | |
109 | 26.1k | if (!Match) { |
110 | 26.1k | // (and (shl (ashr x, #sr), #sl), #m) |
111 | 26.1k | LogicalSR = false; |
112 | 26.1k | Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), |
113 | 26.1k | m_ConstantInt(CSL)), |
114 | 26.1k | m_ConstantInt(CM))); |
115 | 26.1k | } |
116 | 26.1k | if (!Match) { |
117 | 26.1k | // (and (shl x, #sl), #m) |
118 | 26.1k | LogicalSR = true; |
119 | 26.1k | CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0); |
120 | 26.1k | Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)), |
121 | 26.1k | m_ConstantInt(CM))); |
122 | 26.1k | if (Match && NoSR09 ) |
123 | 9 | return false; |
124 | 26.1k | } |
125 | 26.1k | if (!Match) { |
126 | 26.1k | // (and (lshr x, #sr), #m) |
127 | 26.1k | LogicalSR = true; |
128 | 26.1k | CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); |
129 | 26.1k | Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)), |
130 | 26.1k | m_ConstantInt(CM))); |
131 | 26.1k | } |
132 | 26.1k | if (!Match) { |
133 | 26.0k | // (and (ashr x, #sr), #m) |
134 | 26.0k | LogicalSR = false; |
135 | 26.0k | CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); |
136 | 26.0k | Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)), |
137 | 26.0k | m_ConstantInt(CM))); |
138 | 26.0k | } |
139 | 26.1k | if (!Match) { |
140 | 26.0k | CM = nullptr; |
141 | 26.0k | // (shl (lshr x, #sr), #sl) |
142 | 26.0k | LogicalSR = true; |
143 | 26.0k | Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), |
144 | 26.0k | m_ConstantInt(CSL))); |
145 | 26.0k | } |
146 | 26.1k | if (!Match) { |
147 | 26.0k | CM = nullptr; |
148 | 26.0k | // (shl (ashr x, #sr), #sl) |
149 | 26.0k | LogicalSR = false; |
150 | 26.0k | Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), |
151 | 26.0k | m_ConstantInt(CSL))); |
152 | 26.0k | } |
153 | 26.1k | if (!Match) |
154 | 26.0k | return false; |
155 | 66 | |
156 | 66 | Type *Ty = BF->getType(); |
157 | 66 | if (!Ty->isIntegerTy()) |
158 | 0 | return false; |
159 | 66 | unsigned BW = Ty->getPrimitiveSizeInBits(); |
160 | 66 | if (BW != 32 && BW != 6441 ) |
161 | 0 | return false; |
162 | 66 | |
163 | 66 | uint32_t SR = CSR->getZExtValue(); |
164 | 66 | uint32_t SL = CSL->getZExtValue(); |
165 | 66 | |
166 | 66 | if (!CM) { |
167 | 20 | // If there was no and, and the shift left did not remove all potential |
168 | 20 | // sign bits created by the shift right, then extractu cannot reproduce |
169 | 20 | // this value. |
170 | 20 | if (!LogicalSR && (SR > SL)1 ) |
171 | 0 | return false; |
172 | 20 | APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL); |
173 | 20 | CM = ConstantInt::get(Ctx, A); |
174 | 20 | } |
175 | 66 | |
176 | 66 | // CM is the shifted-left mask. Shift it back right to remove the zero |
177 | 66 | // bits on least-significant positions. |
178 | 66 | APInt M = CM->getValue().lshr(SL); |
179 | 66 | uint32_t T = M.countTrailingOnes(); |
180 | 66 | |
181 | 66 | // During the shifts some of the bits will be lost. Calculate how many |
182 | 66 | // of the original value will remain after shift right and then left. |
183 | 66 | uint32_t U = BW - std::max(SL, SR); |
184 | 66 | // The width of the extracted field is the minimum of the original bits |
185 | 66 | // that remain after the shifts and the number of contiguous 1s in the mask. |
186 | 66 | uint32_t W = std::min(U, T); |
187 | 66 | if (W == 0) |
188 | 1 | return false; |
189 | 65 | |
190 | 65 | // Check if the extracted bits are contained within the mask that it is |
191 | 65 | // and-ed with. The extract operation will copy these bits, and so the |
192 | 65 | // mask cannot any holes in it that would clear any of the bits of the |
193 | 65 | // extracted field. |
194 | 65 | if (!LogicalSR) { |
195 | 1 | // If the shift right was arithmetic, it could have included some 1 bits. |
196 | 1 | // It is still ok to generate extract, but only if the mask eliminates |
197 | 1 | // those bits (i.e. M does not have any bits set beyond U). |
198 | 1 | APInt C = APInt::getHighBitsSet(BW, BW-U); |
199 | 1 | if (M.intersects(C) || !M.isMask(W)) |
200 | 0 | return false; |
201 | 64 | } else { |
202 | 64 | // Check if M starts with a contiguous sequence of W times 1 bits. Get |
203 | 64 | // the low U bits of M (which eliminates the 0 bits shifted in on the |
204 | 64 | // left), and check if the result is APInt's "mask": |
205 | 64 | if (!M.getLoBits(U).isMask(W)) |
206 | 1 | return false; |
207 | 64 | } |
208 | 64 | |
209 | 64 | IRBuilder<> IRB(In); |
210 | 64 | Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu24 |
211 | 64 | : Intrinsic::hexagon_S2_extractup40 ; |
212 | 64 | Module *Mod = BB->getParent()->getParent(); |
213 | 64 | Function *ExtF = Intrinsic::getDeclaration(Mod, IntId); |
214 | 64 | Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)}); |
215 | 64 | if (SL != 0) |
216 | 23 | NewIn = IRB.CreateShl(NewIn, SL, CSL->getName()); |
217 | 64 | In->replaceAllUsesWith(NewIn); |
218 | 64 | return true; |
219 | 64 | } |
220 | | |
221 | 4.98k | bool HexagonGenExtract::visitBlock(BasicBlock *B) { |
222 | 4.98k | // Depth-first, bottom-up traversal. |
223 | 4.98k | for (auto *DTN : children<DomTreeNode*>(DT->getNode(B))) |
224 | 1.63k | visitBlock(DTN->getBlock()); |
225 | 4.98k | |
226 | 4.98k | // Allow limiting the number of generated extracts for debugging purposes. |
227 | 4.98k | bool HasCutoff = ExtractCutoff.getPosition(); |
228 | 4.98k | unsigned Cutoff = ExtractCutoff; |
229 | 4.98k | |
230 | 4.98k | bool Changed = false; |
231 | 4.98k | BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin(); |
232 | 26.1k | while (true) { |
233 | 26.1k | if (HasCutoff && (ExtractCount >= Cutoff)0 ) |
234 | 0 | return Changed; |
235 | 26.1k | bool Last = (I == Begin); |
236 | 26.1k | if (!Last) |
237 | 21.1k | NextI = std::prev(I); |
238 | 26.1k | Instruction *In = &*I; |
239 | 26.1k | bool Done = convert(In); |
240 | 26.1k | if (HasCutoff && Done0 ) |
241 | 0 | ExtractCount++; |
242 | 26.1k | Changed |= Done; |
243 | 26.1k | if (Last) |
244 | 4.98k | break; |
245 | 21.1k | I = NextI; |
246 | 21.1k | } |
247 | 4.98k | return Changed; |
248 | 4.98k | } |
249 | | |
250 | 3.36k | bool HexagonGenExtract::runOnFunction(Function &F) { |
251 | 3.36k | if (skipFunction(F)) |
252 | 10 | return false; |
253 | 3.35k | |
254 | 3.35k | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
255 | 3.35k | bool Changed; |
256 | 3.35k | |
257 | 3.35k | // Traverse the function bottom-up, to see super-expressions before their |
258 | 3.35k | // sub-expressions. |
259 | 3.35k | BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F); |
260 | 3.35k | Changed = visitBlock(Entry); |
261 | 3.35k | |
262 | 3.35k | return Changed; |
263 | 3.35k | } |
264 | | |
265 | 861 | FunctionPass *llvm::createHexagonGenExtract() { |
266 | 861 | return new HexagonGenExtract(); |
267 | 861 | } |