Coverage Report

Created: 2019-03-24 22:13

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/Bitcode/BitstreamReader.h
Line
Count
Source (jump to first uncovered line)
1
//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This header defines the BitstreamReader class.  This class can be used to
10
// read an arbitrary bitstream, regardless of its contents.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_BITCODE_BITSTREAMREADER_H
15
#define LLVM_BITCODE_BITSTREAMREADER_H
16
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/Bitcode/BitCodes.h"
20
#include "llvm/Support/Endian.h"
21
#include "llvm/Support/ErrorHandling.h"
22
#include "llvm/Support/MathExtras.h"
23
#include "llvm/Support/MemoryBuffer.h"
24
#include <algorithm>
25
#include <cassert>
26
#include <climits>
27
#include <cstddef>
28
#include <cstdint>
29
#include <memory>
30
#include <string>
31
#include <utility>
32
#include <vector>
33
34
namespace llvm {
35
36
/// This class maintains the abbreviations read from a block info block.
37
class BitstreamBlockInfo {
38
public:
39
  /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
40
  /// describe abbreviations that all blocks of the specified ID inherit.
41
  struct BlockInfo {
42
    unsigned BlockID;
43
    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
44
    std::string Name;
45
    std::vector<std::pair<unsigned, std::string>> RecordNames;
46
  };
47
48
private:
49
  std::vector<BlockInfo> BlockInfoRecords;
50
51
public:
52
  /// If there is block info for the specified ID, return it, otherwise return
53
  /// null.
54
1.11M
  const BlockInfo *getBlockInfo(unsigned BlockID) const {
55
1.11M
    // Common case, the most recent entry matches BlockID.
56
1.11M
    if (!BlockInfoRecords.empty() && 
BlockInfoRecords.back().BlockID == BlockID1.09M
)
57
338k
      return &BlockInfoRecords.back();
58
781k
59
781k
    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
60
1.70M
         i != e; 
++i920k
)
61
1.41M
      if (BlockInfoRecords[i].BlockID == BlockID)
62
499k
        return &BlockInfoRecords[i];
63
781k
    
return nullptr282k
;
64
781k
  }
65
66
25.1k
  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
67
25.1k
    if (const BlockInfo *BI = getBlockInfo(BlockID))
68
32
      return *const_cast<BlockInfo*>(BI);
69
25.1k
70
25.1k
    // Otherwise, add a new record.
71
25.1k
    BlockInfoRecords.emplace_back();
72
25.1k
    BlockInfoRecords.back().BlockID = BlockID;
73
25.1k
    return BlockInfoRecords.back();
74
25.1k
  }
75
};
76
77
/// This represents a position within a bitstream. There may be multiple
78
/// independent cursors reading within one bitstream, each maintaining their
79
/// own local state.
80
class SimpleBitstreamCursor {
81
  ArrayRef<uint8_t> BitcodeBytes;
82
  size_t NextChar = 0;
83
84
public:
85
  /// This is the current data we have pulled from the stream but have not
86
  /// returned to the client. This is specifically and intentionally defined to
87
  /// follow the word size of the host machine for efficiency. We use word_t in
88
  /// places that are aware of this to make it perfectly explicit what is going
89
  /// on.
90
  using word_t = size_t;
91
92
private:
93
  word_t CurWord = 0;
94
95
  /// This is the number of bits in CurWord that are valid. This is always from
96
  /// [0...bits_of(size_t)-1] inclusive.
97
  unsigned BitsInCurWord = 0;
98
99
public:
100
  static const size_t MaxChunkSize = sizeof(word_t) * 8;
101
102
44.7k
  SimpleBitstreamCursor() = default;
103
  explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
104
19.3k
      : BitcodeBytes(BitcodeBytes) {}
105
  explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
106
      : BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
107
30.8k
                     BitcodeBytes.size()) {}
108
  explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
109
201
      : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
110
111
400k
  bool canSkipToPos(size_t pos) const {
112
400k
    // pos can be skipped to if it is a valid address or one byte past the end.
113
400k
    return pos <= BitcodeBytes.size();
114
400k
  }
115
116
31.3M
  bool AtEndOfStream() {
117
31.3M
    return BitsInCurWord == 0 && 
BitcodeBytes.size() <= NextChar2.57M
;
118
31.3M
  }
119
120
  /// Return the bit # of the bit we are reading.
121
1.82M
  uint64_t GetCurrentBitNo() const {
122
1.82M
    return NextChar*CHAR_BIT - BitsInCurWord;
123
1.82M
  }
124
125
  // Return the byte # of the current bit.
126
35.3k
  uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
127
128
34.1k
  ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
129
130
  /// Reset the stream to the specified bit number.
131
1.97M
  void JumpToBit(uint64_t BitNo) {
132
1.97M
    size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
133
1.97M
    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
134
1.97M
    assert(canSkipToPos(ByteNo) && "Invalid location");
135
1.97M
136
1.97M
    // Move the cursor to the right word.
137
1.97M
    NextChar = ByteNo;
138
1.97M
    BitsInCurWord = 0;
139
1.97M
140
1.97M
    // Skip over any bits that are already consumed.
141
1.97M
    if (WordBitNo)
142
1.70M
      Read(WordBitNo);
143
1.97M
  }
144
145
  /// Get a pointer into the bitstream at the specified byte offset.
146
239k
  const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
147
239k
    return BitcodeBytes.data() + ByteNo;
148
239k
  }
149
150
  /// Get a pointer into the bitstream at the specified bit offset.
151
  ///
152
  /// The bit offset must be on a byte boundary.
153
239k
  const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
154
239k
    assert(!(BitNo % 8) && "Expected bit on byte boundary");
155
239k
    return getPointerToByte(BitNo / 8, NumBytes);
156
239k
  }
157
158
35.8M
  void fillCurWord() {
159
35.8M
    if (NextChar >= BitcodeBytes.size())
160
0
      report_fatal_error("Unexpected end of file");
161
35.8M
162
35.8M
    // Read the next word from the stream.
163
35.8M
    const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
164
35.8M
    unsigned BytesRead;
165
35.8M
    if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
166
35.8M
      BytesRead = sizeof(word_t);
167
35.8M
      CurWord =
168
35.8M
          support::endian::read<word_t, support::little, support::unaligned>(
169
35.8M
              NextCharPtr);
170
35.8M
    } else {
171
24.9k
      // Short read.
172
24.9k
      BytesRead = BitcodeBytes.size() - NextChar;
173
24.9k
      CurWord = 0;
174
124k
      for (unsigned B = 0; B != BytesRead; 
++B99.9k
)
175
99.9k
        CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
176
24.9k
    }
177
35.8M
    NextChar += BytesRead;
178
35.8M
    BitsInCurWord = BytesRead * 8;
179
35.8M
  }
180
181
363M
  word_t Read(unsigned NumBits) {
182
363M
    static const unsigned BitsInWord = MaxChunkSize;
183
363M
184
363M
    assert(NumBits && NumBits <= BitsInWord &&
185
363M
           "Cannot return zero or more than BitsInWord bits!");
186
363M
187
363M
    static const unsigned Mask = sizeof(word_t) > 4 ? 
0x3f0
: 0x1f;
188
363M
189
363M
    // If the field is fully contained by CurWord, return it quickly.
190
363M
    if (BitsInCurWord >= NumBits) {
191
327M
      word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
192
327M
193
327M
      // Use a mask to avoid undefined behavior.
194
327M
      CurWord >>= (NumBits & Mask);
195
327M
196
327M
      BitsInCurWord -= NumBits;
197
327M
      return R;
198
327M
    }
199
35.8M
200
35.8M
    word_t R = BitsInCurWord ? 
CurWord25.1M
:
010.7M
;
201
35.8M
    unsigned BitsLeft = NumBits - BitsInCurWord;
202
35.8M
203
35.8M
    fillCurWord();
204
35.8M
205
35.8M
    // If we run out of data, abort.
206
35.8M
    if (BitsLeft > BitsInCurWord)
207
0
      report_fatal_error("Unexpected end of file");
208
35.8M
209
35.8M
    word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
210
35.8M
211
35.8M
    // Use a mask to avoid undefined behavior.
212
35.8M
    CurWord >>= (BitsLeft & Mask);
213
35.8M
214
35.8M
    BitsInCurWord -= BitsLeft;
215
35.8M
216
35.8M
    R |= R2 << (NumBits - BitsLeft);
217
35.8M
218
35.8M
    return R;
219
35.8M
  }
220
221
34.6M
  uint32_t ReadVBR(unsigned NumBits) {
222
34.6M
    uint32_t Piece = Read(NumBits);
223
34.6M
    if ((Piece & (1U << (NumBits-1))) == 0)
224
31.1M
      return Piece;
225
3.50M
226
3.50M
    uint32_t Result = 0;
227
3.50M
    unsigned NextBit = 0;
228
7.03M
    while (
true7.03M
) {
229
7.03M
      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
230
7.03M
231
7.03M
      if ((Piece & (1U << (NumBits-1))) == 0)
232
3.50M
        return Result;
233
3.52M
234
3.52M
      NextBit += NumBits-1;
235
3.52M
      Piece = Read(NumBits);
236
3.52M
    }
237
3.50M
  }
238
239
  // Read a VBR that may have a value up to 64-bits in size. The chunk size of
240
  // the VBR must still be <= 32 bits though.
241
100M
  uint64_t ReadVBR64(unsigned NumBits) {
242
100M
    uint32_t Piece = Read(NumBits);
243
100M
    if ((Piece & (1U << (NumBits-1))) == 0)
244
59.0M
      return uint64_t(Piece);
245
41.6M
246
41.6M
    uint64_t Result = 0;
247
41.6M
    unsigned NextBit = 0;
248
89.1M
    while (
true89.1M
) {
249
89.1M
      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
250
89.1M
251
89.1M
      if ((Piece & (1U << (NumBits-1))) == 0)
252
41.6M
        return Result;
253
47.5M
254
47.5M
      NextBit += NumBits-1;
255
47.5M
      Piece = Read(NumBits);
256
47.5M
    }
257
41.6M
  }
258
259
2.66M
  void SkipToFourByteBoundary() {
260
2.66M
    // If word_t is 64-bits and if we've read less than 32 bits, just dump
261
2.66M
    // the bits we have up to the next 32-bit boundary.
262
2.66M
    if (sizeof(word_t) > 4 &&
263
2.66M
        BitsInCurWord >= 32) {
264
1.41M
      CurWord >>= BitsInCurWord-32;
265
1.41M
      BitsInCurWord = 32;
266
1.41M
      return;
267
1.41M
    }
268
1.24M
269
1.24M
    BitsInCurWord = 0;
270
1.24M
  }
271
272
  /// Skip to the end of the file.
273
0
  void skipToEnd() { NextChar = BitcodeBytes.size(); }
274
};
275
276
/// When advancing through a bitstream cursor, each advance can discover a few
277
/// different kinds of entries:
278
struct BitstreamEntry {
279
  enum {
280
    Error,    // Malformed bitcode was found.
281
    EndBlock, // We've reached the end of the current block, (or the end of the
282
              // file, which is treated like a series of EndBlock records.
283
    SubBlock, // This is the start of a new subblock of a specific ID.
284
    Record    // This is a record with a specific AbbrevID.
285
  } Kind;
286
287
  unsigned ID;
288
289
7.30k
  static BitstreamEntry getError() {
290
7.30k
    BitstreamEntry E; E.Kind = Error; return E;
291
7.30k
  }
292
293
1.12M
  static BitstreamEntry getEndBlock() {
294
1.12M
    BitstreamEntry E; E.Kind = EndBlock; return E;
295
1.12M
  }
296
297
909k
  static BitstreamEntry getSubBlock(unsigned ID) {
298
909k
    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
299
909k
  }
300
301
27.5M
  static BitstreamEntry getRecord(unsigned AbbrevID) {
302
27.5M
    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
303
27.5M
  }
304
};
305
306
/// This represents a position within a bitcode file, implemented on top of a
307
/// SimpleBitstreamCursor.
308
///
309
/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
310
/// be passed by value.
311
class BitstreamCursor : SimpleBitstreamCursor {
312
  // This is the declared size of code values used for the current block, in
313
  // bits.
314
  unsigned CurCodeSize = 2;
315
316
  /// Abbrevs installed at in this block.
317
  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
318
319
  struct Block {
320
    unsigned PrevCodeSize;
321
    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
322
323
1.15M
    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
324
  };
325
326
  /// This tracks the codesize of parent blocks.
327
  SmallVector<Block, 8> BlockScope;
328
329
  BitstreamBlockInfo *BlockInfo = nullptr;
330
331
public:
332
  static const size_t MaxChunkSize = sizeof(word_t) * 8;
333
334
44.7k
  BitstreamCursor() = default;
335
  explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
336
19.3k
      : SimpleBitstreamCursor(BitcodeBytes) {}
337
  explicit BitstreamCursor(StringRef BitcodeBytes)
338
16.5k
      : SimpleBitstreamCursor(BitcodeBytes) {}
339
  explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
340
201
      : SimpleBitstreamCursor(BitcodeBytes) {}
341
342
  using SimpleBitstreamCursor::canSkipToPos;
343
  using SimpleBitstreamCursor::AtEndOfStream;
344
  using SimpleBitstreamCursor::getBitcodeBytes;
345
  using SimpleBitstreamCursor::GetCurrentBitNo;
346
  using SimpleBitstreamCursor::getCurrentByteNo;
347
  using SimpleBitstreamCursor::getPointerToByte;
348
  using SimpleBitstreamCursor::JumpToBit;
349
  using SimpleBitstreamCursor::fillCurWord;
350
  using SimpleBitstreamCursor::Read;
351
  using SimpleBitstreamCursor::ReadVBR;
352
  using SimpleBitstreamCursor::ReadVBR64;
353
354
  /// Return the number of bits used to encode an abbrev #.
355
335k
  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
356
357
  /// Flags that modify the behavior of advance().
358
  enum {
359
    /// If this flag is used, the advance() method does not automatically pop
360
    /// the block scope when the end of a block is reached.
361
    AF_DontPopBlockAtEnd = 1,
362
363
    /// If this flag is used, abbrev entries are returned just like normal
364
    /// records.
365
    AF_DontAutoprocessAbbrevs = 2
366
  };
367
368
  /// Advance the current bitstream, returning the next entry in the stream.
369
29.5M
  BitstreamEntry advance(unsigned Flags = 0) {
370
29.9M
    while (
true29.9M
) {
371
29.9M
      if (AtEndOfStream())
372
7.29k
        return BitstreamEntry::getError();
373
29.9M
374
29.9M
      unsigned Code = ReadCode();
375
29.9M
      if (Code == bitc::END_BLOCK) {
376
1.12M
        // Pop the end of the block unless Flags tells us not to.
377
1.12M
        if (!(Flags & AF_DontPopBlockAtEnd) && 
ReadBlockEnd()1.12M
)
378
6
          return BitstreamEntry::getError();
379
1.12M
        return BitstreamEntry::getEndBlock();
380
1.12M
      }
381
28.7M
382
28.7M
      if (Code == bitc::ENTER_SUBBLOCK)
383
909k
        return BitstreamEntry::getSubBlock(ReadSubBlockID());
384
27.8M
385
27.8M
      if (Code == bitc::DEFINE_ABBREV &&
386
27.8M
          
!(Flags & AF_DontAutoprocessAbbrevs)509k
) {
387
364k
        // We read and accumulate abbrev's, the client can't do anything with
388
364k
        // them anyway.
389
364k
        ReadAbbrevRecord();
390
364k
        continue;
391
364k
      }
392
27.5M
393
27.5M
      return BitstreamEntry::getRecord(Code);
394
27.5M
    }
395
29.5M
  }
396
397
  /// This is a convenience function for clients that don't expect any
398
  /// subblocks. This just skips over them automatically.
399
16.4M
  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
400
16.4M
    while (
true16.4M
) {
401
16.4M
      // If we found a normal entry, return it.
402
16.4M
      BitstreamEntry Entry = advance(Flags);
403
16.4M
      if (Entry.Kind != BitstreamEntry::SubBlock)
404
16.4M
        return Entry;
405
3.93k
406
3.93k
      // If we found a sub-block, just skip over it and check the next entry.
407
3.93k
      if (SkipBlock())
408
0
        return BitstreamEntry::getError();
409
3.93k
    }
410
16.4M
  }
411
412
30.5M
  unsigned ReadCode() {
413
30.5M
    return Read(CurCodeSize);
414
30.5M
  }
415
416
  // Block header:
417
  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
418
419
  /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
420
909k
  unsigned ReadSubBlockID() {
421
909k
    return ReadVBR(bitc::BlockIDWidth);
422
909k
  }
423
424
  /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
425
  /// of this block. If the block record is malformed, return true.
426
136k
  bool SkipBlock() {
427
136k
    // Read and ignore the codelen value.  Since we are skipping this block, we
428
136k
    // don't care what code widths are used inside of it.
429
136k
    ReadVBR(bitc::CodeLenWidth);
430
136k
    SkipToFourByteBoundary();
431
136k
    size_t NumFourBytes = Read(bitc::BlockSizeWidth);
432
136k
433
136k
    // Check that the block wasn't partially defined, and that the offset isn't
434
136k
    // bogus.
435
136k
    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
436
136k
    if (AtEndOfStream() || 
!canSkipToPos(SkipTo/8)136k
)
437
6
      return true;
438
136k
439
136k
    JumpToBit(SkipTo);
440
136k
    return false;
441
136k
  }
442
443
  /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
444
  /// if the block has an error.
445
  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
446
447
1.12M
  bool ReadBlockEnd() {
448
1.12M
    if (BlockScope.empty()) 
return true6
;
449
1.12M
450
1.12M
    // Block tail:
451
1.12M
    //    [END_BLOCK, <align4bytes>]
452
1.12M
    SkipToFourByteBoundary();
453
1.12M
454
1.12M
    popBlockScope();
455
1.12M
    return false;
456
1.12M
  }
457
458
private:
459
1.12M
  void popBlockScope() {
460
1.12M
    CurCodeSize = BlockScope.back().PrevCodeSize;
461
1.12M
462
1.12M
    CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
463
1.12M
    BlockScope.pop_back();
464
1.12M
  }
465
466
  //===--------------------------------------------------------------------===//
467
  // Record Processing
468
  //===--------------------------------------------------------------------===//
469
470
public:
471
  /// Return the abbreviation for the specified AbbrevId.
472
17.9M
  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
473
17.9M
    unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
474
17.9M
    if (AbbrevNo >= CurAbbrevs.size())
475
0
      report_fatal_error("Invalid abbrev number");
476
17.9M
    return CurAbbrevs[AbbrevNo].get();
477
17.9M
  }
478
479
  /// Read the current record and discard it, returning the code for the record.
480
  unsigned skipRecord(unsigned AbbrevID);
481
482
  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
483
                      StringRef *Blob = nullptr);
484
485
  //===--------------------------------------------------------------------===//
486
  // Abbrev Processing
487
  //===--------------------------------------------------------------------===//
488
  void ReadAbbrevRecord();
489
490
  /// Read and return a block info block from the bitstream. If an error was
491
  /// encountered, return None.
492
  ///
493
  /// \param ReadBlockInfoNames Whether to read block/record name information in
494
  /// the BlockInfo block. Only llvm-bcanalyzer uses this.
495
  Optional<BitstreamBlockInfo>
496
  ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
497
498
  /// Set the block info to be used by this BitstreamCursor to interpret
499
  /// abbreviated records.
500
8.58k
  void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
501
};
502
503
} // end llvm namespace
504
505
#endif // LLVM_BITCODE_BITSTREAMREADER_H