/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/Transforms/Utils/FunctionComparator.h

Line | Count | Source |

1 | //===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===// | |

2 | // | |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |

4 | // See https://llvm.org/LICENSE.txt for license information. | |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |

6 | // | |

7 | //===----------------------------------------------------------------------===// | |

8 | // | |

9 | // This file defines the FunctionComparator and GlobalNumberState classes which | |

10 | // are used by the MergeFunctions pass for comparing functions. | |

11 | // | |

12 | //===----------------------------------------------------------------------===// | |

13 | ||

14 | #ifndef LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H | |

15 | #define LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H | |

16 | ||

17 | #include "llvm/ADT/DenseMap.h" | |

18 | #include "llvm/ADT/StringRef.h" | |

19 | #include "llvm/IR/Attributes.h" | |

20 | #include "llvm/IR/Instructions.h" | |

21 | #include "llvm/IR/Operator.h" | |

22 | #include "llvm/IR/ValueMap.h" | |

23 | #include "llvm/Support/AtomicOrdering.h" | |

24 | #include "llvm/Support/Casting.h" | |

25 | #include <cstdint> | |

26 | #include <tuple> | |

27 | ||

28 | namespace llvm { | |

29 | ||

30 | class APFloat; | |

31 | class APInt; | |

32 | class BasicBlock; | |

33 | class Constant; | |

34 | class Function; | |

35 | class GlobalValue; | |

36 | class InlineAsm; | |

37 | class Instruction; | |

38 | class MDNode; | |

39 | class Type; | |

40 | class Value; | |

41 | ||

42 | /// GlobalNumberState assigns an integer to each global value in the program, | |

43 | /// which is used by the comparison routine to order references to globals. This | |

44 | /// state must be preserved throughout the pass, because Functions and other | |

45 | /// globals need to maintain their relative order. Globals are assigned a number | |

46 | /// when they are first visited. This order is deterministic, and so the | |

47 | /// assigned numbers are as well. When two functions are merged, neither number | |

48 | /// is updated. If the symbols are weak, this would be incorrect. If they are | |

49 | /// strong, then one will be replaced at all references to the other, and so | |

50 | /// direct callsites will now see one or the other symbol, and no update is | |

51 | /// necessary. Note that if we were guaranteed unique names, we could just | |

52 | /// compare those, but this would not work for stripped bitcodes or for those | |

53 | /// few symbols without a name. | |

54 | class GlobalNumberState { | |

55 | struct Config : ValueMapConfig<GlobalValue *> { | |

56 | enum { FollowRAUW = false }; | |

57 | }; | |

58 | ||

59 | // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW | |

60 | // occurs, the mapping does not change. Tracking changes is unnecessary, and | |

61 | // also problematic for weak symbols (which may be overwritten). | |

62 | using ValueNumberMap = ValueMap<GlobalValue *, uint64_t, Config>; | |

63 | ValueNumberMap GlobalNumbers; | |

64 | ||

65 | // The next unused serial number to assign to a global. | |

66 | uint64_t NextNumber = 0; | |

67 | ||

68 | public: | |

69 | 60 | GlobalNumberState() = default; |

70 | ||

71 | 228 | uint64_t getNumber(GlobalValue* Global) { |

72 | 228 | ValueNumberMap::iterator MapIter; |

73 | 228 | bool Inserted; |

74 | 228 | std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber}); |

75 | 228 | if (Inserted) |

76 | 28 | NextNumber++; |

77 | 228 | return MapIter->second; |

78 | 228 | } |

79 | ||

80 | 14 | void erase(GlobalValue *Global) { |

81 | 14 | GlobalNumbers.erase(Global); |

82 | 14 | } |

83 | ||

84 | 59 | void clear() { |

85 | 59 | GlobalNumbers.clear(); |

86 | 59 | } |

87 | }; | |

88 | ||

89 | /// FunctionComparator - Compares two functions to determine whether or not | |

90 | /// they will generate machine code with the same behaviour. DataLayout is | |

91 | /// used if available. The comparator always fails conservatively (erring on the | |

92 | /// side of claiming that two functions are different). | |

93 | class FunctionComparator { | |

94 | public: | |

95 | FunctionComparator(const Function *F1, const Function *F2, | |

96 | GlobalNumberState* GN) | |

97 | 318 | : FnL(F1), FnR(F2), GlobalNumbers(GN) {} |

98 | ||

99 | /// Test whether the two functions have equivalent behaviour. | |

100 | int compare(); | |

101 | ||

102 | /// Hash a function. Equivalent functions will have the same hash, and unequal | |

103 | /// functions will have different hashes with high probability. | |

104 | using FunctionHash = uint64_t; | |

105 | static FunctionHash functionHash(Function &); | |

106 | ||

107 | protected: | |

108 | /// Start the comparison. | |

109 | 326 | void beginCompare() { |

110 | 326 | sn_mapL.clear(); |

111 | 326 | sn_mapR.clear(); |

112 | 326 | } |

113 | ||

114 | /// Compares the signature and other general attributes of the two functions. | |

115 | int compareSignature() const; | |

116 | ||

117 | /// Test whether two basic blocks have equivalent behaviour. | |

118 | int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const; | |

119 | ||

120 | /// Constants comparison. | |

121 | /// Its analog to lexicographical comparison between hypothetical numbers | |

122 | /// of next format: | |

123 | /// <bitcastability-trait><raw-bit-contents> | |

124 | /// | |

125 | /// 1. Bitcastability. | |

126 | /// Check whether L's type could be losslessly bitcasted to R's type. | |

127 | /// On this stage method, in case when lossless bitcast is not possible | |

128 | /// method returns -1 or 1, thus also defining which type is greater in | |

129 | /// context of bitcastability. | |

130 | /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight | |

131 | /// to the contents comparison. | |

132 | /// If types differ, remember types comparison result and check | |

133 | /// whether we still can bitcast types. | |

134 | /// Stage 1: Types that satisfies isFirstClassType conditions are always | |

135 | /// greater then others. | |

136 | /// Stage 2: Vector is greater then non-vector. | |

137 | /// If both types are vectors, then vector with greater bitwidth is | |

138 | /// greater. | |

139 | /// If both types are vectors with the same bitwidth, then types | |

140 | /// are bitcastable, and we can skip other stages, and go to contents | |

141 | /// comparison. | |

142 | /// Stage 3: Pointer types are greater than non-pointers. If both types are | |

143 | /// pointers of the same address space - go to contents comparison. | |

144 | /// Different address spaces: pointer with greater address space is | |

145 | /// greater. | |

146 | /// Stage 4: Types are neither vectors, nor pointers. And they differ. | |

147 | /// We don't know how to bitcast them. So, we better don't do it, | |

148 | /// and return types comparison result (so it determines the | |

149 | /// relationship among constants we don't know how to bitcast). | |

150 | /// | |

151 | /// Just for clearance, let's see how the set of constants could look | |

152 | /// on single dimension axis: | |

153 | /// | |

154 | /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] | |

155 | /// Where: NFCT - Not a FirstClassType | |

156 | /// FCT - FirstClassTyp: | |

157 | /// | |

158 | /// 2. Compare raw contents. | |

159 | /// It ignores types on this stage and only compares bits from L and R. | |

160 | /// Returns 0, if L and R has equivalent contents. | |

161 | /// -1 or 1 if values are different. | |

162 | /// Pretty trivial: | |

163 | /// 2.1. If contents are numbers, compare numbers. | |

164 | /// Ints with greater bitwidth are greater. Ints with same bitwidths | |

165 | /// compared by their contents. | |

166 | /// 2.2. "And so on". Just to avoid discrepancies with comments | |

167 | /// perhaps it would be better to read the implementation itself. | |

168 | /// 3. And again about overall picture. Let's look back at how the ordered set | |

169 | /// of constants will look like: | |

170 | /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] | |

171 | /// | |

172 | /// Now look, what could be inside [FCT, "others"], for example: | |

173 | /// [FCT, "others"] = | |

174 | /// [ | |

175 | /// [double 0.1], [double 1.23], | |

176 | /// [i32 1], [i32 2], | |

177 | /// { double 1.0 }, ; StructTyID, NumElements = 1 | |

178 | /// { i32 1 }, ; StructTyID, NumElements = 1 | |

179 | /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 | |

180 | /// { i32 1, double 1 } ; StructTyID, NumElements = 2 | |

181 | /// ] | |

182 | /// | |

183 | /// Let's explain the order. Float numbers will be less than integers, just | |

184 | /// because of cmpType terms: FloatTyID < IntegerTyID. | |

185 | /// Floats (with same fltSemantics) are sorted according to their value. | |

186 | /// Then you can see integers, and they are, like a floats, | |

187 | /// could be easy sorted among each others. | |

188 | /// The structures. Structures are grouped at the tail, again because of their | |

189 | /// TypeID: StructTyID > IntegerTyID > FloatTyID. | |

190 | /// Structures with greater number of elements are greater. Structures with | |

191 | /// greater elements going first are greater. | |

192 | /// The same logic with vectors, arrays and other possible complex types. | |

193 | /// | |

194 | /// Bitcastable constants. | |

195 | /// Let's assume, that some constant, belongs to some group of | |

196 | /// "so-called-equal" values with different types, and at the same time | |

197 | /// belongs to another group of constants with equal types | |

198 | /// and "really" equal values. | |

199 | /// | |

200 | /// Now, prove that this is impossible: | |

201 | /// | |

202 | /// If constant A with type TyA is bitcastable to B with type TyB, then: | |

203 | /// 1. All constants with equal types to TyA, are bitcastable to B. Since | |

204 | /// those should be vectors (if TyA is vector), pointers | |

205 | /// (if TyA is pointer), or else (if TyA equal to TyB), those types should | |

206 | /// be equal to TyB. | |

207 | /// 2. All constants with non-equal, but bitcastable types to TyA, are | |

208 | /// bitcastable to B. | |

209 | /// Once again, just because we allow it to vectors and pointers only. | |

210 | /// This statement could be expanded as below: | |

211 | /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to | |

212 | /// vector B, and thus bitcastable to B as well. | |

213 | /// 2.2. All pointers of the same address space, no matter what they point to, | |

214 | /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. | |

215 | /// So any constant equal or bitcastable to A is equal or bitcastable to B. | |

216 | /// QED. | |

217 | /// | |

218 | /// In another words, for pointers and vectors, we ignore top-level type and | |

219 | /// look at their particular properties (bit-width for vectors, and | |

220 | /// address space for pointers). | |

221 | /// If these properties are equal - compare their contents. | |

222 | int cmpConstants(const Constant *L, const Constant *R) const; | |

223 | ||

224 | /// Compares two global values by number. Uses the GlobalNumbersState to | |

225 | /// identify the same gobals across function calls. | |

226 | int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const; | |

227 | ||

228 | /// Assign or look up previously assigned numbers for the two values, and | |

229 | /// return whether the numbers are equal. Numbers are assigned in the order | |

230 | /// visited. | |

231 | /// Comparison order: | |

232 | /// Stage 0: Value that is function itself is always greater then others. | |

233 | /// If left and right values are references to their functions, then | |

234 | /// they are equal. | |

235 | /// Stage 1: Constants are greater than non-constants. | |

236 | /// If both left and right are constants, then the result of | |

237 | /// cmpConstants is used as cmpValues result. | |

238 | /// Stage 2: InlineAsm instances are greater than others. If both left and | |

239 | /// right are InlineAsm instances, InlineAsm* pointers casted to | |

240 | /// integers and compared as numbers. | |

241 | /// Stage 3: For all other cases we compare order we meet these values in | |

242 | /// their functions. If right value was met first during scanning, | |

243 | /// then left value is greater. | |

244 | /// In another words, we compare serial numbers, for more details | |

245 | /// see comments for sn_mapL and sn_mapR. | |

246 | int cmpValues(const Value *L, const Value *R) const; | |

247 | ||

248 | /// Compare two Instructions for equivalence, similar to | |

249 | /// Instruction::isSameOperationAs. | |

250 | /// | |

251 | /// Stages are listed in "most significant stage first" order: | |

252 | /// On each stage below, we do comparison between some left and right | |

253 | /// operation parts. If parts are non-equal, we assign parts comparison | |

254 | /// result to the operation comparison result and exit from method. | |

255 | /// Otherwise we proceed to the next stage. | |

256 | /// Stages: | |

257 | /// 1. Operations opcodes. Compared as numbers. | |

258 | /// 2. Number of operands. | |

259 | /// 3. Operation types. Compared with cmpType method. | |

260 | /// 4. Compare operation subclass optional data as stream of bytes: | |

261 | /// just convert it to integers and call cmpNumbers. | |

262 | /// 5. Compare in operation operand types with cmpType in | |

263 | /// most significant operand first order. | |

264 | /// 6. Last stage. Check operations for some specific attributes. | |

265 | /// For example, for Load it would be: | |

266 | /// 6.1.Load: volatile (as boolean flag) | |

267 | /// 6.2.Load: alignment (as integer numbers) | |

268 | /// 6.3.Load: ordering (as underlying enum class value) | |

269 | /// 6.4.Load: synch-scope (as integer numbers) | |

270 | /// 6.5.Load: range metadata (as integer ranges) | |

271 | /// On this stage its better to see the code, since its not more than 10-15 | |

272 | /// strings for particular instruction, and could change sometimes. | |

273 | /// | |

274 | /// Sets \p needToCmpOperands to true if the operands of the instructions | |

275 | /// still must be compared afterwards. In this case it's already guaranteed | |

276 | /// that both instructions have the same number of operands. | |

277 | int cmpOperations(const Instruction *L, const Instruction *R, | |

278 | bool &needToCmpOperands) const; | |

279 | ||

280 | /// cmpType - compares two types, | |

281 | /// defines total ordering among the types set. | |

282 | /// | |

283 | /// Return values: | |

284 | /// 0 if types are equal, | |

285 | /// -1 if Left is less than Right, | |

286 | /// +1 if Left is greater than Right. | |

287 | /// | |

288 | /// Description: | |

289 | /// Comparison is broken onto stages. Like in lexicographical comparison | |

290 | /// stage coming first has higher priority. | |

291 | /// On each explanation stage keep in mind total ordering properties. | |

292 | /// | |

293 | /// 0. Before comparison we coerce pointer types of 0 address space to | |

294 | /// integer. | |

295 | /// We also don't bother with same type at left and right, so | |

296 | /// just return 0 in this case. | |

297 | /// | |

298 | /// 1. If types are of different kind (different type IDs). | |

299 | /// Return result of type IDs comparison, treating them as numbers. | |

300 | /// 2. If types are integers, check that they have the same width. If they | |

301 | /// are vectors, check that they have the same count and subtype. | |

302 | /// 3. Types have the same ID, so check whether they are one of: | |

303 | /// * Void | |

304 | /// * Float | |

305 | /// * Double | |

306 | /// * X86_FP80 | |

307 | /// * FP128 | |

308 | /// * PPC_FP128 | |

309 | /// * Label | |

310 | /// * Metadata | |

311 | /// We can treat these types as equal whenever their IDs are same. | |

312 | /// 4. If Left and Right are pointers, return result of address space | |

313 | /// comparison (numbers comparison). We can treat pointer types of same | |

314 | /// address space as equal. | |

315 | /// 5. If types are complex. | |

316 | /// Then both Left and Right are to be expanded and their element types will | |

317 | /// be checked with the same way. If we get Res != 0 on some stage, return it. | |

318 | /// Otherwise return 0. | |

319 | /// 6. For all other cases put llvm_unreachable. | |

320 | int cmpTypes(Type *TyL, Type *TyR) const; | |

321 | ||

322 | int cmpNumbers(uint64_t L, uint64_t R) const; | |

323 | int cmpAPInts(const APInt &L, const APInt &R) const; | |

324 | int cmpAPFloats(const APFloat &L, const APFloat &R) const; | |

325 | int cmpMem(StringRef L, StringRef R) const; | |

326 | ||

327 | // The two functions undergoing comparison. | |

328 | const Function *FnL, *FnR; | |

329 | ||

330 | private: | |

331 | int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const; | |

332 | int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; | |

333 | int cmpAttrs(const AttributeList L, const AttributeList R) const; | |

334 | int cmpRangeMetadata(const MDNode *L, const MDNode *R) const; | |

335 | int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; | |

336 | ||

337 | /// Compare two GEPs for equivalent pointer arithmetic. | |

338 | /// Parts to be compared for each comparison stage, | |

339 | /// most significant stage first: | |

340 | /// 1. Address space. As numbers. | |

341 | /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method). | |

342 | /// 3. Pointer operand type (using cmpType method). | |

343 | /// 4. Number of operands. | |

344 | /// 5. Compare operands, using cmpValues method. | |

345 | int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const; | |

346 | int cmpGEPs(const GetElementPtrInst *GEPL, | |

347 | 54 | const GetElementPtrInst *GEPR) const { |

348 | 54 | return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); |

349 | 54 | } |

350 | ||

351 | /// Assign serial numbers to values from left function, and values from | |

352 | /// right function. | |

353 | /// Explanation: | |

354 | /// Being comparing functions we need to compare values we meet at left and | |

355 | /// right sides. | |

356 | /// Its easy to sort things out for external values. It just should be | |

357 | /// the same value at left and right. | |

358 | /// But for local values (those were introduced inside function body) | |

359 | /// we have to ensure they were introduced at exactly the same place, | |

360 | /// and plays the same role. | |

361 | /// Let's assign serial number to each value when we meet it first time. | |

362 | /// Values that were met at same place will be with same serial numbers. | |

363 | /// In this case it would be good to explain few points about values assigned | |

364 | /// to BBs and other ways of implementation (see below). | |

365 | /// | |

366 | /// 1. Safety of BB reordering. | |

367 | /// It's safe to change the order of BasicBlocks in function. | |

368 | /// Relationship with other functions and serial numbering will not be | |

369 | /// changed in this case. | |

370 | /// As follows from FunctionComparator::compare(), we do CFG walk: we start | |

371 | /// from the entry, and then take each terminator. So it doesn't matter how in | |

372 | /// fact BBs are ordered in function. And since cmpValues are called during | |

373 | /// this walk, the numbering depends only on how BBs located inside the CFG. | |

374 | /// So the answer is - yes. We will get the same numbering. | |

375 | /// | |

376 | /// 2. Impossibility to use dominance properties of values. | |

377 | /// If we compare two instruction operands: first is usage of local | |

378 | /// variable AL from function FL, and second is usage of local variable AR | |

379 | /// from FR, we could compare their origins and check whether they are | |

380 | /// defined at the same place. | |

381 | /// But, we are still not able to compare operands of PHI nodes, since those | |

382 | /// could be operands from further BBs we didn't scan yet. | |

383 | /// So it's impossible to use dominance properties in general. | |

384 | mutable DenseMap<const Value*, int> sn_mapL, sn_mapR; | |

385 | ||

386 | // The global state we will use | |

387 | GlobalNumberState* GlobalNumbers; | |

388 | }; | |

389 | ||

390 | } // end namespace llvm | |

391 | ||

392 | #endif // LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H |