Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- PoisonChecking.cpp - -----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Implements a transform pass which instruments IR such that poison semantics
10
// are made explicit.  That is, it provides a (possibly partial) executable
11
// semantics for every instruction w.r.t. poison as specified in the LLVM
12
// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
13
// is focused purely on the semantics of LLVM IR, not any particular source
14
// language.   If you're looking for something to see if your C/C++ contains
15
// UB, this is not it.  
16
// 
17
// The rewritten semantics of each instruction will include the following
18
// components: 
19
//
20
// 1) The original instruction, unmodified.
21
// 2) A propagation rule which translates dynamic information about the poison
22
//    state of each input to whether the dynamic output of the instruction
23
//    produces poison.
24
// 3) A flag validation rule which validates any poison producing flags on the
25
//    instruction itself (e.g. checks for overflow on nsw).
26
// 4) A check rule which traps (to a handler function) if this instruction must
27
//    execute undefined behavior given the poison state of it's inputs.
28
//
29
// At the moment, the UB detection is done in a best effort manner; that is,
30
// the resulting code may produce a false negative result (not report UB when
31
// it actually exists according to the LangRef spec), but should never produce
32
// a false positive (report UB where it doesn't exist).  The intention is to
33
// eventually support a "strict" mode which never dynamically reports a false
34
// negative at the cost of rejecting some valid inputs to translation.
35
//
36
// Use cases for this pass include:
37
// - Understanding (and testing!) the implications of the definition of poison
38
//   from the LangRef.
39
// - Validating the output of a IR fuzzer to ensure that all programs produced
40
//   are well defined on the specific input used.
41
// - Finding/confirming poison specific miscompiles by checking the poison
42
//   status of an input/IR pair is the same before and after an optimization
43
//   transform. 
44
// - Checking that a bugpoint reduction does not introduce UB which didn't
45
//   exist in the original program being reduced.
46
//
47
// The major sources of inaccuracy are currently:
48
// - Most validation rules not yet implemented for instructions with poison
49
//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
50
// - UB which is control dependent on a branch on poison is not yet
51
//   reported. Currently, only data flow dependence is modeled.
52
// - Poison which is propagated through memory is not modeled.  As such,
53
//   storing poison to memory and then reloading it will cause a false negative
54
//   as we consider the reloaded value to not be poisoned.
55
// - Poison propagation across function boundaries is not modeled.  At the
56
//   moment, all arguments and return values are assumed not to be poison.
57
// - Undef is not modeled.  In particular, the optimizer's freedom to pick
58
//   concrete values for undef bits so as to maximize potential for producing
59
//   poison is not modeled.  
60
//
61
//===----------------------------------------------------------------------===//
62
63
#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
64
#include "llvm/ADT/DenseMap.h"
65
#include "llvm/ADT/Statistic.h"
66
#include "llvm/Analysis/MemoryBuiltins.h"
67
#include "llvm/Analysis/ValueTracking.h"
68
#include "llvm/IR/InstVisitor.h"
69
#include "llvm/IR/IntrinsicInst.h"
70
#include "llvm/IR/IRBuilder.h"
71
#include "llvm/IR/PatternMatch.h"
72
#include "llvm/Support/Debug.h"
73
74
using namespace llvm;
75
76
#define DEBUG_TYPE "poison-checking"
77
78
static cl::opt<bool>
79
LocalCheck("poison-checking-function-local",
80
           cl::init(false),
81
           cl::desc("Check that returns are non-poison (for testing)"));
82
83
84
172
static bool isConstantFalse(Value* V) {
85
172
  assert(V->getType()->isIntegerTy(1));
86
172
  if (auto *CI = dyn_cast<ConstantInt>(V))
87
107
    return CI->isZero();
88
65
  return false;
89
65
}
90
91
160
static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
92
160
  if (Ops.size() == 0)
93
51
    return B.getFalse();
94
109
  unsigned i = 0;
95
212
  for (; i < Ops.size() && 
isConstantFalse(Ops[i])165
;
i++103
)
{}103
96
109
  if (i == Ops.size())
97
47
    return B.getFalse();
98
62
  Value *Accum = Ops[i++];
99
69
  for (; i < Ops.size(); 
i++7
)
100
7
    if (!isConstantFalse(Ops[i]))
101
3
      Accum = B.CreateOr(Accum, Ops[i]);
102
62
  return Accum;
103
62
}
104
105
static void generatePoisonChecksForBinOp(Instruction &I,
106
36
                                         SmallVector<Value*, 2> &Checks) {
107
36
  assert(isa<BinaryOperator>(I));
108
36
  
109
36
  IRBuilder<> B(&I);
110
36
  Value *LHS = I.getOperand(0);
111
36
  Value *RHS = I.getOperand(1);
112
36
  switch (I.getOpcode()) {
113
36
  default:
114
2
    return;
115
36
  case Instruction::Add: {
116
12
    if (I.hasNoSignedWrap()) {
117
6
      auto *OverflowOp =
118
6
        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
119
6
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
120
6
    }
121
12
    if (I.hasNoUnsignedWrap()) {
122
6
      auto *OverflowOp =
123
6
        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
124
6
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
125
6
    }
126
12
    break;
127
36
  }
128
36
  case Instruction::Sub: {
129
4
    if (I.hasNoSignedWrap()) {
130
2
      auto *OverflowOp =
131
2
        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
132
2
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
133
2
    }
134
4
    if (I.hasNoUnsignedWrap()) {
135
2
      auto *OverflowOp =
136
2
        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
137
2
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
138
2
    }
139
4
    break;
140
36
  }
141
36
  case Instruction::Mul: {
142
4
    if (I.hasNoSignedWrap()) {
143
2
      auto *OverflowOp =
144
2
        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
145
2
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
146
2
    }
147
4
    if (I.hasNoUnsignedWrap()) {
148
2
      auto *OverflowOp =
149
2
        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
150
2
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
151
2
    }
152
4
    break;
153
36
  }
154
36
  case Instruction::UDiv: {
155
3
    if (I.isExact()) {
156
1
      auto *Check =
157
1
        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
158
1
                     ConstantInt::get(LHS->getType(), 0));
159
1
      Checks.push_back(Check);
160
1
    }
161
3
    break;
162
36
  }
163
36
  case Instruction::SDiv: {
164
3
    if (I.isExact()) {
165
1
      auto *Check =
166
1
        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
167
1
                     ConstantInt::get(LHS->getType(), 0));
168
1
      Checks.push_back(Check);
169
1
    }
170
3
    break;
171
36
  }
172
36
  case Instruction::AShr:
173
8
  case Instruction::LShr:
174
8
  case Instruction::Shl: {
175
8
    Value *ShiftCheck =
176
8
      B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
177
8
                   ConstantInt::get(RHS->getType(),
178
8
                                    LHS->getType()->getScalarSizeInBits()));
179
8
    Checks.push_back(ShiftCheck);
180
8
    break;
181
34
  }
182
34
  };
183
34
}
184
185
80
static Value* generatePoisonChecks(Instruction &I) {
186
80
  IRBuilder<> B(&I);
187
80
  SmallVector<Value*, 2> Checks;
188
80
  if (isa<BinaryOperator>(I) && 
!I.getType()->isVectorTy()36
)
189
36
    generatePoisonChecksForBinOp(I, Checks);
190
80
191
80
  // Handle non-binops seperately
192
80
  switch (I.getOpcode()) {
193
80
  default:
194
78
    break;
195
80
  case Instruction::ExtractElement: {
196
1
    Value *Vec = I.getOperand(0);
197
1
    if (Vec->getType()->getVectorIsScalable())
198
0
      break;
199
1
    Value *Idx = I.getOperand(1);
200
1
    unsigned NumElts = Vec->getType()->getVectorNumElements();
201
1
    Value *Check =
202
1
      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
203
1
                   ConstantInt::get(Idx->getType(), NumElts));
204
1
    Checks.push_back(Check);
205
1
    break;
206
1
  }
207
1
  case Instruction::InsertElement: {
208
1
    Value *Vec = I.getOperand(0);
209
1
    if (Vec->getType()->getVectorIsScalable())
210
0
      break;
211
1
    Value *Idx = I.getOperand(2);
212
1
    unsigned NumElts = Vec->getType()->getVectorNumElements();
213
1
    Value *Check =
214
1
      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
215
1
                   ConstantInt::get(Idx->getType(), NumElts));
216
1
    Checks.push_back(Check);
217
1
    break;
218
1
  }
219
80
  };
220
80
  return buildOrChain(B, Checks);
221
80
}
222
223
98
static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
224
98
  auto Itr = ValToPoison.find(V);
225
98
  if (Itr != ValToPoison.end())
226
38
    return Itr->second;
227
60
  if (isa<Constant>(V)) {
228
8
    return ConstantInt::getFalse(V->getContext());
229
8
  }
230
52
  // Return false for unknwon values - this implements a non-strict mode where
231
52
  // unhandled IR constructs are simply considered to never produce poison.  At
232
52
  // some point in the future, we probably want a "strict mode" for testing if
233
52
  // nothing else.
234
52
  return ConstantInt::getFalse(V->getContext());
235
52
}
236
237
38
static void CreateAssert(IRBuilder<> &B, Value *Cond) {
238
38
  assert(Cond->getType()->isIntegerTy(1));
239
38
  if (auto *CI = dyn_cast<ConstantInt>(Cond))
240
9
    if (CI->isAllOnesValue())
241
9
      return;
242
29
243
29
  Module *M = B.GetInsertBlock()->getModule();
244
29
  M->getOrInsertFunction("__poison_checker_assert",
245
29
                         Type::getVoidTy(M->getContext()),
246
29
                         Type::getInt1Ty(M->getContext()));
247
29
  Function *TrapFunc = M->getFunction("__poison_checker_assert");
248
29
  B.CreateCall(TrapFunc, Cond);
249
29
}
250
251
38
static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
252
38
  assert(Cond->getType()->isIntegerTy(1));
253
38
  CreateAssert(B, B.CreateNot(Cond));
254
38
}
255
256
44
static bool rewrite(Function &F) {
257
44
  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
258
44
259
44
  DenseMap<Value *, Value *> ValToPoison;
260
44
261
44
  for (BasicBlock &BB : F)
262
34
    for (auto I = BB.begin(); isa<PHINode>(&*I); 
I++0
) {
263
0
      auto *OldPHI = cast<PHINode>(&*I);
264
0
      auto *NewPHI = PHINode::Create(Int1Ty, 
265
0
                                     OldPHI->getNumIncomingValues());
266
0
      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
267
0
        NewPHI->addIncoming(UndefValue::get(Int1Ty),
268
0
                            OldPHI->getIncomingBlock(i));
269
0
      NewPHI->insertBefore(OldPHI);
270
0
      ValToPoison[OldPHI] = NewPHI;
271
0
    }
272
44
  
273
44
  for (BasicBlock &BB : F)
274
80
    
for (Instruction &I : BB)34
{
275
80
      if (isa<PHINode>(I)) 
continue0
;
276
80
277
80
      IRBuilder<> B(cast<Instruction>(&I));
278
80
      
279
80
      // Note: There are many more sources of documented UB, but this pass only
280
80
      // attempts to find UB triggered by propagation of poison.
281
80
      if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
282
12
        CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
283
80
284
80
      if (LocalCheck)
285
52
        if (auto *RI = dyn_cast<ReturnInst>(&I))
286
26
          if (RI->getNumOperands() != 0) {
287
26
            Value *Op = RI->getOperand(0);
288
26
            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
289
26
          }
290
80
291
80
      SmallVector<Value*, 4> Checks;
292
80
      if (propagatesFullPoison(&I))
293
30
        for (Value *V : I.operands())
294
60
          Checks.push_back(getPoisonFor(ValToPoison, V));
295
80
296
80
      if (auto *Check = generatePoisonChecks(I))
297
80
        Checks.push_back(Check);
298
80
      ValToPoison[&I] = buildOrChain(B, Checks);
299
80
    }
300
44
301
44
  for (BasicBlock &BB : F)
302
34
    for (auto I = BB.begin(); isa<PHINode>(&*I); 
I++0
) {
303
0
      auto *OldPHI = cast<PHINode>(&*I);
304
0
      if (!ValToPoison.count(OldPHI))
305
0
        continue; // skip the newly inserted phis
306
0
      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
307
0
      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
308
0
        auto *OldVal = OldPHI->getIncomingValue(i);
309
0
        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
310
0
      }
311
0
    }
312
44
  return true;
313
44
}
314
315
316
PreservedAnalyses PoisonCheckingPass::run(Module &M,
317
2
                                          ModuleAnalysisManager &AM) {
318
2
  bool Changed = false;
319
2
  for (auto &F : M)
320
44
    Changed |= rewrite(F);
321
2
322
2
  return Changed ? PreservedAnalyses::none() : 
PreservedAnalyses::all()0
;
323
2
}
324
325
PreservedAnalyses PoisonCheckingPass::run(Function &F,
326
0
                                          FunctionAnalysisManager &AM) {
327
0
  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
328
0
}
329
330
331
/* Major TODO Items:
332
   - Control dependent poison UB
333
   - Strict mode - (i.e. must analyze every operand)
334
     - Poison through memory
335
     - Function ABIs
336
     - Full coverage of intrinsics, etc.. (ouch)
337
338
   Instructions w/Unclear Semantics:
339
   - shufflevector - It would seem reasonable for an out of bounds mask element
340
     to produce poison, but the LangRef does not state.  
341
   - and/or - It would seem reasonable for poison to propagate from both
342
     arguments, but LangRef doesn't state and propagatesFullPoison doesn't
343
     include these two.
344
   - all binary ops w/vector operands - The likely interpretation would be that
345
     any element overflowing should produce poison for the entire result, but
346
     the LangRef does not state.
347
   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
348
     strange that only certian flags should be documented as producing poison.
349
350
   Cases of clear poison semantics not yet implemented:
351
   - Exact flags on ashr/lshr produce poison
352
   - NSW/NUW flags on shl produce poison
353
   - Inbounds flag on getelementptr produce poison
354
   - fptosi/fptoui (out of bounds input) produce poison
355
   - Scalable vector types for insertelement/extractelement
356
   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
357
 */