Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Support/YAMLParser.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
//  This file implements a YAML parser.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Support/YAMLParser.h"
15
#include "llvm/ADT/AllocatorList.h"
16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/None.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallString.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/StringExtras.h"
22
#include "llvm/ADT/StringRef.h"
23
#include "llvm/ADT/Twine.h"
24
#include "llvm/Support/Compiler.h"
25
#include "llvm/Support/ErrorHandling.h"
26
#include "llvm/Support/MemoryBuffer.h"
27
#include "llvm/Support/SMLoc.h"
28
#include "llvm/Support/SourceMgr.h"
29
#include "llvm/Support/raw_ostream.h"
30
#include <algorithm>
31
#include <cassert>
32
#include <cstddef>
33
#include <cstdint>
34
#include <map>
35
#include <memory>
36
#include <string>
37
#include <system_error>
38
#include <utility>
39
40
using namespace llvm;
41
using namespace yaml;
42
43
enum UnicodeEncodingForm {
44
  UEF_UTF32_LE, ///< UTF-32 Little Endian
45
  UEF_UTF32_BE, ///< UTF-32 Big Endian
46
  UEF_UTF16_LE, ///< UTF-16 Little Endian
47
  UEF_UTF16_BE, ///< UTF-16 Big Endian
48
  UEF_UTF8,     ///< UTF-8 or ascii.
49
  UEF_Unknown   ///< Not a valid Unicode encoding.
50
};
51
52
/// EncodingInfo - Holds the encoding type and length of the byte order mark if
53
///                it exists. Length is in {0, 2, 3, 4}.
54
using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
55
56
/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
57
///                      encoding form of \a Input.
58
///
59
/// @param Input A string of length 0 or more.
60
/// @returns An EncodingInfo indicating the Unicode encoding form of the input
61
///          and how long the byte order mark is if one exists.
62
2.53k
static EncodingInfo getUnicodeEncoding(StringRef Input) {
63
2.53k
  if (Input.empty())
64
5
    return std::make_pair(UEF_Unknown, 0);
65
2.52k
66
2.52k
  switch (uint8_t(Input[0])) {
67
0
  case 0x00:
68
0
    if (
Input.size() >= 40
) {
69
0
      if (  Input[1] == 0
70
0
         && uint8_t(Input[2]) == 0xFE
71
0
         && uint8_t(Input[3]) == 0xFF)
72
0
        return std::make_pair(UEF_UTF32_BE, 4);
73
0
      
if (0
Input[1] == 0 && 0
Input[2] == 00
&&
Input[3] != 00
)
74
0
        return std::make_pair(UEF_UTF32_BE, 0);
75
0
    }
76
0
77
0
    
if (0
Input.size() >= 2 && 0
Input[1] != 00
)
78
0
      return std::make_pair(UEF_UTF16_BE, 0);
79
0
    return std::make_pair(UEF_Unknown, 0);
80
0
  case 0xFF:
81
0
    if (  Input.size() >= 4
82
0
       && uint8_t(Input[1]) == 0xFE
83
0
       && Input[2] == 0
84
0
       && Input[3] == 0)
85
0
      return std::make_pair(UEF_UTF32_LE, 4);
86
0
87
0
    
if (0
Input.size() >= 2 && 0
uint8_t(Input[1]) == 0xFE0
)
88
0
      return std::make_pair(UEF_UTF16_LE, 2);
89
0
    return std::make_pair(UEF_Unknown, 0);
90
0
  case 0xFE:
91
0
    if (
Input.size() >= 2 && 0
uint8_t(Input[1]) == 0xFF0
)
92
0
      return std::make_pair(UEF_UTF16_BE, 2);
93
0
    return std::make_pair(UEF_Unknown, 0);
94
3
  case 0xEF:
95
3
    if (  Input.size() >= 3
96
3
       && uint8_t(Input[1]) == 0xBB
97
3
       && uint8_t(Input[2]) == 0xBF)
98
3
      return std::make_pair(UEF_UTF8, 3);
99
0
    return std::make_pair(UEF_Unknown, 0);
100
2.52k
  }
101
2.52k
102
2.52k
  // It could still be utf-32 or utf-16.
103
2.52k
  
if (2.52k
Input.size() >= 4 && 2.52k
Input[1] == 02.51k
&&
Input[2] == 00
&&
Input[3] == 00
)
104
0
    return std::make_pair(UEF_UTF32_LE, 0);
105
2.52k
106
2.52k
  
if (2.52k
Input.size() >= 2 && 2.52k
Input[1] == 02.52k
)
107
0
    return std::make_pair(UEF_UTF16_LE, 0);
108
2.52k
109
2.52k
  return std::make_pair(UEF_UTF8, 0);
110
2.52k
}
111
112
/// Pin the vtables to this file.
113
0
void Node::anchor() {}
114
0
void NullNode::anchor() {}
115
0
void ScalarNode::anchor() {}
116
0
void BlockScalarNode::anchor() {}
117
0
void KeyValueNode::anchor() {}
118
0
void MappingNode::anchor() {}
119
0
void SequenceNode::anchor() {}
120
0
void AliasNode::anchor() {}
121
122
namespace llvm {
123
namespace yaml {
124
125
/// Token - A single YAML token.
126
struct Token {
127
  enum TokenKind {
128
    TK_Error, // Uninitialized token.
129
    TK_StreamStart,
130
    TK_StreamEnd,
131
    TK_VersionDirective,
132
    TK_TagDirective,
133
    TK_DocumentStart,
134
    TK_DocumentEnd,
135
    TK_BlockEntry,
136
    TK_BlockEnd,
137
    TK_BlockSequenceStart,
138
    TK_BlockMappingStart,
139
    TK_FlowEntry,
140
    TK_FlowSequenceStart,
141
    TK_FlowSequenceEnd,
142
    TK_FlowMappingStart,
143
    TK_FlowMappingEnd,
144
    TK_Key,
145
    TK_Value,
146
    TK_Scalar,
147
    TK_BlockScalar,
148
    TK_Alias,
149
    TK_Anchor,
150
    TK_Tag
151
  } Kind = TK_Error;
152
153
  /// A string of length 0 or more whose begin() points to the logical location
154
  /// of the token in the input.
155
  StringRef Range;
156
157
  /// The value of a block scalar node.
158
  std::string Value;
159
160
1.67M
  Token() = default;
161
};
162
163
} // end namespace yaml
164
} // end namespace llvm
165
166
using TokenQueueT = BumpPtrList<Token>;
167
168
namespace {
169
170
/// @brief This struct is used to track simple keys.
171
///
172
/// Simple keys are handled by creating an entry in SimpleKeys for each Token
173
/// which could legally be the start of a simple key. When peekNext is called,
174
/// if the Token To be returned is referenced by a SimpleKey, we continue
175
/// tokenizing until that potential simple key has either been found to not be
176
/// a simple key (we moved on to the next line or went further than 1024 chars).
177
/// Or when we run into a Value, and then insert a Key token (and possibly
178
/// others) before the SimpleKey's Tok.
179
struct SimpleKey {
180
  TokenQueueT::iterator Tok;
181
  unsigned Column;
182
  unsigned Line;
183
  unsigned FlowLevel;
184
  bool IsRequired;
185
186
344k
  bool operator ==(const SimpleKey &Other) {
187
344k
    return Tok == Other.Tok;
188
344k
  }
189
};
190
191
} // end anonymous namespace
192
193
/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
194
///        subsequence and the subsequence's length in code units (uint8_t).
195
///        A length of 0 represents an error.
196
using UTF8Decoded = std::pair<uint32_t, unsigned>;
197
198
66
static UTF8Decoded decodeUTF8(StringRef Range) {
199
66
  StringRef::iterator Position= Range.begin();
200
66
  StringRef::iterator End = Range.end();
201
66
  // 1 byte: [0x00, 0x7f]
202
66
  // Bit pattern: 0xxxxxxx
203
66
  if (
(*Position & 0x80) == 066
) {
204
0
     return std::make_pair(*Position, 1);
205
0
  }
206
66
  // 2 bytes: [0x80, 0x7ff]
207
66
  // Bit pattern: 110xxxxx 10xxxxxx
208
66
  
if (66
Position + 1 != End &&
209
66
      ((*Position & 0xE0) == 0xC0) &&
210
66
      
((*(Position + 1) & 0xC0) == 0x80)40
) {
211
40
    uint32_t codepoint = ((*Position & 0x1F) << 6) |
212
40
                          (*(Position + 1) & 0x3F);
213
40
    if (codepoint >= 0x80)
214
40
      return std::make_pair(codepoint, 2);
215
26
  }
216
26
  // 3 bytes: [0x8000, 0xffff]
217
26
  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
218
26
  
if (26
Position + 2 != End &&
219
26
      ((*Position & 0xF0) == 0xE0) &&
220
26
      ((*(Position + 1) & 0xC0) == 0x80) &&
221
26
      
((*(Position + 2) & 0xC0) == 0x80)26
) {
222
26
    uint32_t codepoint = ((*Position & 0x0F) << 12) |
223
26
                         ((*(Position + 1) & 0x3F) << 6) |
224
26
                          (*(Position + 2) & 0x3F);
225
26
    // Codepoints between 0xD800 and 0xDFFF are invalid, as
226
26
    // they are high / low surrogate halves used by UTF-16.
227
26
    if (codepoint >= 0x800 &&
228
26
        
(codepoint < 0xD800 || 26
codepoint > 0xDFFF1
))
229
26
      return std::make_pair(codepoint, 3);
230
0
  }
231
0
  // 4 bytes: [0x10000, 0x10FFFF]
232
0
  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
233
0
  
if (0
Position + 3 != End &&
234
0
      ((*Position & 0xF8) == 0xF0) &&
235
0
      ((*(Position + 1) & 0xC0) == 0x80) &&
236
0
      ((*(Position + 2) & 0xC0) == 0x80) &&
237
0
      
((*(Position + 3) & 0xC0) == 0x80)0
) {
238
0
    uint32_t codepoint = ((*Position & 0x07) << 18) |
239
0
                         ((*(Position + 1) & 0x3F) << 12) |
240
0
                         ((*(Position + 2) & 0x3F) << 6) |
241
0
                          (*(Position + 3) & 0x3F);
242
0
    if (
codepoint >= 0x10000 && 0
codepoint <= 0x10FFFF0
)
243
0
      return std::make_pair(codepoint, 4);
244
0
  }
245
0
  return std::make_pair(0, 0);
246
0
}
247
248
namespace llvm {
249
namespace yaml {
250
251
/// @brief Scans YAML tokens from a MemoryBuffer.
252
class Scanner {
253
public:
254
  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
255
          std::error_code *EC = nullptr);
256
  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
257
          std::error_code *EC = nullptr);
258
259
  /// @brief Parse the next token and return it without popping it.
260
  Token &peekNext();
261
262
  /// @brief Parse the next token and pop it from the queue.
263
  Token getNext();
264
265
  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
266
128
                  ArrayRef<SMRange> Ranges = None) {
267
128
    SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
268
128
  }
269
270
45
  void setError(const Twine &Message, StringRef::iterator Position) {
271
45
    if (Current >= End)
272
30
      Current = End - 1;
273
45
274
45
    // propagate the error if possible
275
45
    if (EC)
276
4
      *EC = make_error_code(std::errc::invalid_argument);
277
45
278
45
    // Don't print out more errors after the first one we encounter. The rest
279
45
    // are just the result of the first, and have no meaning.
280
45
    if (!Failed)
281
45
      printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
282
45
    Failed = true;
283
45
  }
284
285
1
  void setError(const Twine &Message) {
286
1
    setError(Message, Current);
287
1
  }
288
289
  /// @brief Returns true if an error occurred while parsing.
290
364k
  bool failed() {
291
364k
    return Failed;
292
364k
  }
293
294
private:
295
  void init(MemoryBufferRef Buffer);
296
297
2.53k
  StringRef currentInput() {
298
2.53k
    return StringRef(Current, End - Current);
299
2.53k
  }
300
301
  /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
302
  ///        at \a Position.
303
  ///
304
  /// If the UTF-8 code units starting at Position do not form a well-formed
305
  /// code unit subsequence, then the Unicode scalar value is 0, and the length
306
  /// is 0.
307
31
  UTF8Decoded decodeUTF8(StringRef::iterator Position) {
308
31
    return ::decodeUTF8(StringRef(Position, End - Position));
309
31
  }
310
311
  // The following functions are based on the gramar rules in the YAML spec. The
312
  // style of the function names it meant to closely match how they are written
313
  // in the spec. The number within the [] is the number of the grammar rule in
314
  // the spec.
315
  //
316
  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
317
  //
318
  // c-
319
  //   A production starting and ending with a special character.
320
  // b-
321
  //   A production matching a single line break.
322
  // nb-
323
  //   A production starting and ending with a non-break character.
324
  // s-
325
  //   A production starting and ending with a white space character.
326
  // ns-
327
  //   A production starting and ending with a non-space character.
328
  // l-
329
  //   A production matching complete line(s).
330
331
  /// @brief Skip a single nb-char[27] starting at Position.
332
  ///
333
  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
334
  ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
335
  ///
336
  /// @returns The code unit after the nb-char, or Position if it's not an
337
  ///          nb-char.
338
  StringRef::iterator skip_nb_char(StringRef::iterator Position);
339
340
  /// @brief Skip a single b-break[28] starting at Position.
341
  ///
342
  /// A b-break is 0xD 0xA | 0xD | 0xA
343
  ///
344
  /// @returns The code unit after the b-break, or Position if it's not a
345
  ///          b-break.
346
  StringRef::iterator skip_b_break(StringRef::iterator Position);
347
348
  /// Skip a single s-space[31] starting at Position.
349
  ///
350
  /// An s-space is 0x20
351
  ///
352
  /// @returns The code unit after the s-space, or Position if it's not a
353
  ///          s-space.
354
  StringRef::iterator skip_s_space(StringRef::iterator Position);
355
356
  /// @brief Skip a single s-white[33] starting at Position.
357
  ///
358
  /// A s-white is 0x20 | 0x9
359
  ///
360
  /// @returns The code unit after the s-white, or Position if it's not a
361
  ///          s-white.
362
  StringRef::iterator skip_s_white(StringRef::iterator Position);
363
364
  /// @brief Skip a single ns-char[34] starting at Position.
365
  ///
366
  /// A ns-char is nb-char - s-white
367
  ///
368
  /// @returns The code unit after the ns-char, or Position if it's not a
369
  ///          ns-char.
370
  StringRef::iterator skip_ns_char(StringRef::iterator Position);
371
372
  using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
373
374
  /// @brief Skip minimal well-formed code unit subsequences until Func
375
  ///        returns its input.
376
  ///
377
  /// @returns The code unit after the last minimal well-formed code unit
378
  ///          subsequence that Func accepted.
379
  StringRef::iterator skip_while( SkipWhileFunc Func
380
                                , StringRef::iterator Position);
381
382
  /// Skip minimal well-formed code unit subsequences until Func returns its
383
  /// input.
384
  void advanceWhile(SkipWhileFunc Func);
385
386
  /// @brief Scan ns-uri-char[39]s starting at Cur.
387
  ///
388
  /// This updates Cur and Column while scanning.
389
  void scan_ns_uri_char();
390
391
  /// @brief Consume a minimal well-formed code unit subsequence starting at
392
  ///        \a Cur. Return false if it is not the same Unicode scalar value as
393
  ///        \a Expected. This updates \a Column.
394
  bool consume(uint32_t Expected);
395
396
  /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
397
  void skip(uint32_t Distance);
398
399
  /// @brief Return true if the minimal well-formed code unit subsequence at
400
  ///        Pos is whitespace or a new line
401
  bool isBlankOrBreak(StringRef::iterator Position);
402
403
  /// Consume a single b-break[28] if it's present at the current position.
404
  ///
405
  /// Return false if the code unit at the current position isn't a line break.
406
  bool consumeLineBreakIfPresent();
407
408
  /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
409
  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
410
                             , unsigned AtColumn
411
                             , bool IsRequired);
412
413
  /// @brief Remove simple keys that can no longer be valid simple keys.
414
  ///
415
  /// Invalid simple keys are not on the current line or are further than 1024
416
  /// columns back.
417
  void removeStaleSimpleKeyCandidates();
418
419
  /// @brief Remove all simple keys on FlowLevel \a Level.
420
  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
421
422
  /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
423
  ///        tokens if needed.
424
  bool unrollIndent(int ToColumn);
425
426
  /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
427
  ///        if needed.
428
  bool rollIndent( int ToColumn
429
                 , Token::TokenKind Kind
430
                 , TokenQueueT::iterator InsertPoint);
431
432
  /// @brief Skip a single-line comment when the comment starts at the current
433
  /// position of the scanner.
434
  void skipComment();
435
436
  /// @brief Skip whitespace and comments until the start of the next token.
437
  void scanToNextToken();
438
439
  /// @brief Must be the first token generated.
440
  bool scanStreamStart();
441
442
  /// @brief Generate tokens needed to close out the stream.
443
  bool scanStreamEnd();
444
445
  /// @brief Scan a %BLAH directive.
446
  bool scanDirective();
447
448
  /// @brief Scan a ... or ---.
449
  bool scanDocumentIndicator(bool IsStart);
450
451
  /// @brief Scan a [ or { and generate the proper flow collection start token.
452
  bool scanFlowCollectionStart(bool IsSequence);
453
454
  /// @brief Scan a ] or } and generate the proper flow collection end token.
455
  bool scanFlowCollectionEnd(bool IsSequence);
456
457
  /// @brief Scan the , that separates entries in a flow collection.
458
  bool scanFlowEntry();
459
460
  /// @brief Scan the - that starts block sequence entries.
461
  bool scanBlockEntry();
462
463
  /// @brief Scan an explicit ? indicating a key.
464
  bool scanKey();
465
466
  /// @brief Scan an explicit : indicating a value.
467
  bool scanValue();
468
469
  /// @brief Scan a quoted scalar.
470
  bool scanFlowScalar(bool IsDoubleQuoted);
471
472
  /// @brief Scan an unquoted scalar.
473
  bool scanPlainScalar();
474
475
  /// @brief Scan an Alias or Anchor starting with * or &.
476
  bool scanAliasOrAnchor(bool IsAlias);
477
478
  /// @brief Scan a block scalar starting with | or >.
479
  bool scanBlockScalar(bool IsLiteral);
480
481
  /// Scan a chomping indicator in a block scalar header.
482
  char scanBlockChompingIndicator();
483
484
  /// Scan an indentation indicator in a block scalar header.
485
  unsigned scanBlockIndentationIndicator();
486
487
  /// Scan a block scalar header.
488
  ///
489
  /// Return false if an error occurred.
490
  bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
491
                             bool &IsDone);
492
493
  /// Look for the indentation level of a block scalar.
494
  ///
495
  /// Return false if an error occurred.
496
  bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
497
                             unsigned &LineBreaks, bool &IsDone);
498
499
  /// Scan the indentation of a text line in a block scalar.
500
  ///
501
  /// Return false if an error occurred.
502
  bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
503
                             bool &IsDone);
504
505
  /// @brief Scan a tag of the form !stuff.
506
  bool scanTag();
507
508
  /// @brief Dispatch to the next scanning function based on \a *Cur.
509
  bool fetchMoreTokens();
510
511
  /// @brief The SourceMgr used for diagnostics and buffer management.
512
  SourceMgr &SM;
513
514
  /// @brief The original input.
515
  MemoryBufferRef InputBuffer;
516
517
  /// @brief The current position of the scanner.
518
  StringRef::iterator Current;
519
520
  /// @brief The end of the input (one past the last character).
521
  StringRef::iterator End;
522
523
  /// @brief Current YAML indentation level in spaces.
524
  int Indent;
525
526
  /// @brief Current column number in Unicode code points.
527
  unsigned Column;
528
529
  /// @brief Current line number.
530
  unsigned Line;
531
532
  /// @brief How deep we are in flow style containers. 0 Means at block level.
533
  unsigned FlowLevel;
534
535
  /// @brief Are we at the start of the stream?
536
  bool IsStartOfStream;
537
538
  /// @brief Can the next token be the start of a simple key?
539
  bool IsSimpleKeyAllowed;
540
541
  /// @brief True if an error has occurred.
542
  bool Failed;
543
544
  /// @brief Should colors be used when printing out the diagnostic messages?
545
  bool ShowColors;
546
547
  /// @brief Queue of tokens. This is required to queue up tokens while looking
548
  ///        for the end of a simple key. And for cases where a single character
549
  ///        can produce multiple tokens (e.g. BlockEnd).
550
  TokenQueueT TokenQueue;
551
552
  /// @brief Indentation levels.
553
  SmallVector<int, 4> Indents;
554
555
  /// @brief Potential simple keys.
556
  SmallVector<SimpleKey, 4> SimpleKeys;
557
558
  std::error_code *EC;
559
};
560
561
} // end namespace yaml
562
} // end namespace llvm
563
564
/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
565
static void encodeUTF8( uint32_t UnicodeScalarValue
566
10
                      , SmallVectorImpl<char> &Result) {
567
10
  if (
UnicodeScalarValue <= 0x7F10
) {
568
5
    Result.push_back(UnicodeScalarValue & 0x7F);
569
10
  } else 
if (5
UnicodeScalarValue <= 0x7FF5
) {
570
2
    uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
571
2
    uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
572
2
    Result.push_back(FirstByte);
573
2
    Result.push_back(SecondByte);
574
5
  } else 
if (3
UnicodeScalarValue <= 0xFFFF3
) {
575
3
    uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
576
3
    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
577
3
    uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
578
3
    Result.push_back(FirstByte);
579
3
    Result.push_back(SecondByte);
580
3
    Result.push_back(ThirdByte);
581
3
  } else 
if (0
UnicodeScalarValue <= 0x10FFFF0
) {
582
0
    uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
583
0
    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
584
0
    uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
585
0
    uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
586
0
    Result.push_back(FirstByte);
587
0
    Result.push_back(SecondByte);
588
0
    Result.push_back(ThirdByte);
589
0
    Result.push_back(FourthByte);
590
0
  }
591
10
}
592
593
0
bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
594
0
  SourceMgr SM;
595
0
  Scanner scanner(Input, SM);
596
0
  while (
true0
) {
597
0
    Token T = scanner.getNext();
598
0
    switch (T.Kind) {
599
0
    case Token::TK_StreamStart:
600
0
      OS << "Stream-Start: ";
601
0
      break;
602
0
    case Token::TK_StreamEnd:
603
0
      OS << "Stream-End: ";
604
0
      break;
605
0
    case Token::TK_VersionDirective:
606
0
      OS << "Version-Directive: ";
607
0
      break;
608
0
    case Token::TK_TagDirective:
609
0
      OS << "Tag-Directive: ";
610
0
      break;
611
0
    case Token::TK_DocumentStart:
612
0
      OS << "Document-Start: ";
613
0
      break;
614
0
    case Token::TK_DocumentEnd:
615
0
      OS << "Document-End: ";
616
0
      break;
617
0
    case Token::TK_BlockEntry:
618
0
      OS << "Block-Entry: ";
619
0
      break;
620
0
    case Token::TK_BlockEnd:
621
0
      OS << "Block-End: ";
622
0
      break;
623
0
    case Token::TK_BlockSequenceStart:
624
0
      OS << "Block-Sequence-Start: ";
625
0
      break;
626
0
    case Token::TK_BlockMappingStart:
627
0
      OS << "Block-Mapping-Start: ";
628
0
      break;
629
0
    case Token::TK_FlowEntry:
630
0
      OS << "Flow-Entry: ";
631
0
      break;
632
0
    case Token::TK_FlowSequenceStart:
633
0
      OS << "Flow-Sequence-Start: ";
634
0
      break;
635
0
    case Token::TK_FlowSequenceEnd:
636
0
      OS << "Flow-Sequence-End: ";
637
0
      break;
638
0
    case Token::TK_FlowMappingStart:
639
0
      OS << "Flow-Mapping-Start: ";
640
0
      break;
641
0
    case Token::TK_FlowMappingEnd:
642
0
      OS << "Flow-Mapping-End: ";
643
0
      break;
644
0
    case Token::TK_Key:
645
0
      OS << "Key: ";
646
0
      break;
647
0
    case Token::TK_Value:
648
0
      OS << "Value: ";
649
0
      break;
650
0
    case Token::TK_Scalar:
651
0
      OS << "Scalar: ";
652
0
      break;
653
0
    case Token::TK_BlockScalar:
654
0
      OS << "Block Scalar: ";
655
0
      break;
656
0
    case Token::TK_Alias:
657
0
      OS << "Alias: ";
658
0
      break;
659
0
    case Token::TK_Anchor:
660
0
      OS << "Anchor: ";
661
0
      break;
662
0
    case Token::TK_Tag:
663
0
      OS << "Tag: ";
664
0
      break;
665
0
    case Token::TK_Error:
666
0
      break;
667
0
    }
668
0
    OS << T.Range << "\n";
669
0
    if (T.Kind == Token::TK_StreamEnd)
670
0
      break;
671
0
    else 
if (0
T.Kind == Token::TK_Error0
)
672
0
      return false;
673
0
  }
674
0
  return true;
675
0
}
676
677
0
bool yaml::scanTokens(StringRef Input) {
678
0
  SourceMgr SM;
679
0
  Scanner scanner(Input, SM);
680
0
  while (
true0
) {
681
0
    Token T = scanner.getNext();
682
0
    if (T.Kind == Token::TK_StreamEnd)
683
0
      break;
684
0
    else 
if (0
T.Kind == Token::TK_Error0
)
685
0
      return false;
686
0
  }
687
0
  return true;
688
0
}
689
690
2.03k
std::string yaml::escape(StringRef Input) {
691
2.03k
  std::string EscapedInput;
692
87.5k
  for (StringRef::iterator i = Input.begin(), e = Input.end(); 
i != e87.5k
;
++i85.4k
) {
693
85.4k
    if (*i == '\\')
694
7
      EscapedInput += "\\\\";
695
85.4k
    else 
if (85.4k
*i == '"'85.4k
)
696
15
      EscapedInput += "\\\"";
697
85.4k
    else 
if (85.4k
*i == 085.4k
)
698
1
      EscapedInput += "\\0";
699
85.4k
    else 
if (85.4k
*i == 0x0785.4k
)
700
1
      EscapedInput += "\\a";
701
85.4k
    else 
if (85.4k
*i == 0x0885.4k
)
702
2
      EscapedInput += "\\b";
703
85.4k
    else 
if (85.4k
*i == 0x0985.4k
)
704
30
      EscapedInput += "\\t";
705
85.4k
    else 
if (85.4k
*i == 0x0A85.4k
)
706
209
      EscapedInput += "\\n";
707
85.2k
    else 
if (85.2k
*i == 0x0B85.2k
)
708
1
      EscapedInput += "\\v";
709
85.2k
    else 
if (85.2k
*i == 0x0C85.2k
)
710
1
      EscapedInput += "\\f";
711
85.2k
    else 
if (85.2k
*i == 0x0D85.2k
)
712
2
      EscapedInput += "\\r";
713
85.2k
    else 
if (85.2k
*i == 0x1B85.2k
)
714
1
      EscapedInput += "\\e";
715
85.2k
    else 
if (85.2k
(unsigned char)*i < 0x2085.2k
) { // Control characters not handled above.
716
2
      std::string HexStr = utohexstr(*i);
717
2
      EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
718
85.2k
    } else 
if (85.2k
*i & 0x8085.2k
) { // UTF-8 multiple code unit subsequence.
719
35
      UTF8Decoded UnicodeScalarValue
720
35
        = decodeUTF8(StringRef(i, Input.end() - i));
721
35
      if (
UnicodeScalarValue.second == 035
) {
722
0
        // Found invalid char.
723
0
        SmallString<4> Val;
724
0
        encodeUTF8(0xFFFD, Val);
725
0
        EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
726
0
        // FIXME: Error reporting.
727
0
        return EscapedInput;
728
0
      }
729
35
      
if (35
UnicodeScalarValue.first == 0x8535
)
730
19
        EscapedInput += "\\N";
731
16
      else 
if (16
UnicodeScalarValue.first == 0xA016
)
732
1
        EscapedInput += "\\_";
733
15
      else 
if (15
UnicodeScalarValue.first == 0x202815
)
734
7
        EscapedInput += "\\L";
735
8
      else 
if (8
UnicodeScalarValue.first == 0x20298
)
736
4
        EscapedInput += "\\P";
737
4
      else {
738
4
        std::string HexStr = utohexstr(UnicodeScalarValue.first);
739
4
        if (HexStr.size() <= 2)
740
0
          EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
741
4
        else 
if (4
HexStr.size() <= 44
)
742
4
          EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
743
0
        else 
if (0
HexStr.size() <= 80
)
744
0
          EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
745
16
      }
746
35
      i += UnicodeScalarValue.second - 1;
747
35
    } else
748
85.1k
      EscapedInput.push_back(*i);
749
85.4k
  }
750
2.03k
  return EscapedInput;
751
2.03k
}
752
753
Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
754
                 std::error_code *EC)
755
2.43k
    : SM(sm), ShowColors(ShowColors), EC(EC) {
756
2.43k
  init(MemoryBufferRef(Input, "YAML"));
757
2.43k
}
758
759
Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
760
                 std::error_code *EC)
761
101
    : SM(SM_), ShowColors(ShowColors), EC(EC) {
762
101
  init(Buffer);
763
101
}
764
765
2.53k
void Scanner::init(MemoryBufferRef Buffer) {
766
2.53k
  InputBuffer = Buffer;
767
2.53k
  Current = InputBuffer.getBufferStart();
768
2.53k
  End = InputBuffer.getBufferEnd();
769
2.53k
  Indent = -1;
770
2.53k
  Column = 0;
771
2.53k
  Line = 0;
772
2.53k
  FlowLevel = 0;
773
2.53k
  IsStartOfStream = true;
774
2.53k
  IsSimpleKeyAllowed = true;
775
2.53k
  Failed = false;
776
2.53k
  std::unique_ptr<MemoryBuffer> InputBufferOwner =
777
2.53k
      MemoryBuffer::getMemBuffer(Buffer);
778
2.53k
  SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
779
2.53k
}
780
781
2.09M
Token &Scanner::peekNext() {
782
2.09M
  // If the current token is a possible simple key, keep parsing until we
783
2.09M
  // can confirm.
784
2.09M
  bool NeedMore = false;
785
2.32M
  while (
true2.32M
) {
786
2.32M
    if (
TokenQueue.empty() || 2.32M
NeedMore2.08M
) {
787
465k
      if (
!fetchMoreTokens()465k
) {
788
23
        TokenQueue.clear();
789
23
        TokenQueue.push_back(Token());
790
23
        return TokenQueue.front();
791
23
      }
792
2.32M
    }
793
2.32M
    assert(!TokenQueue.empty() &&
794
2.32M
            "fetchMoreTokens lied about getting tokens!");
795
2.32M
796
2.32M
    removeStaleSimpleKeyCandidates();
797
2.32M
    SimpleKey SK;
798
2.32M
    SK.Tok = TokenQueue.begin();
799
2.32M
    if (!is_contained(SimpleKeys, SK))
800
2.09M
      break;
801
2.32M
    else
802
227k
      NeedMore = true;
803
2.32M
  }
804
2.09M
  return TokenQueue.front();
805
2.09M
}
806
807
644k
Token Scanner::getNext() {
808
644k
  Token Ret = peekNext();
809
644k
  // TokenQueue can be empty if there was an error getting the next token.
810
644k
  if (!TokenQueue.empty())
811
644k
    TokenQueue.pop_front();
812
644k
813
644k
  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
814
644k
  // quick deallocation of them all.
815
644k
  if (TokenQueue.empty())
816
235k
    TokenQueue.resetAlloc();
817
644k
818
644k
  return Ret;
819
644k
}
820
821
4.98M
StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
822
4.98M
  if (Position == End)
823
332
    return Position;
824
4.98M
  // Check 7 bit c-printable - b-char.
825
4.98M
  
if ( 4.98M
*Position == 0x09
826
4.98M
      || 
(*Position >= 0x20 && 4.98M
*Position <= 0x7E4.89M
))
827
4.89M
    return Position + 1;
828
84.6k
829
84.6k
  // Check for valid UTF-8.
830
84.6k
  
if (84.6k
uint8_t(*Position) & 0x8084.6k
) {
831
31
    UTF8Decoded u8d = decodeUTF8(Position);
832
31
    if (   u8d.second != 0
833
31
        && u8d.first != 0xFEFF
834
30
        && ( u8d.first == 0x85
835
10
          || ( u8d.first >= 0xA0
836
10
            && u8d.first <= 0xD7FF)
837
0
          || ( u8d.first >= 0xE000
838
0
            && u8d.first <= 0xFFFD)
839
0
          || ( u8d.first >= 0x10000
840
0
            && u8d.first <= 0x10FFFF)))
841
30
      return Position + u8d.second;
842
84.5k
  }
843
84.5k
  return Position;
844
84.5k
}
845
846
739k
StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
847
739k
  if (Position == End)
848
1.57k
    return Position;
849
737k
  
if (737k
*Position == 0x0D737k
) {
850
540
    if (
Position + 1 != End && 540
*(Position + 1) == 0x0A539
)
851
522
      return Position + 2;
852
18
    return Position + 1;
853
18
  }
854
737k
855
737k
  
if (737k
*Position == 0x0A737k
)
856
275k
    return Position + 1;
857
461k
  return Position;
858
461k
}
859
860
98.1k
StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
861
98.1k
  if (Position == End)
862
46
    return Position;
863
98.1k
  
if (98.1k
*Position == ' '98.1k
)
864
85.3k
    return Position + 1;
865
12.7k
  return Position;
866
12.7k
}
867
868
504k
StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
869
504k
  if (Position == End)
870
1
    return Position;
871
504k
  
if (504k
*Position == ' ' || 504k
*Position == '\t'81.0k
)
872
423k
    return Position + 1;
873
81.0k
  return Position;
874
81.0k
}
875
876
8.03k
StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
877
8.03k
  if (Position == End)
878
0
    return Position;
879
8.03k
  
if (8.03k
*Position == ' ' || 8.03k
*Position == '\t'7.92k
)
880
104
    return Position;
881
7.92k
  return skip_nb_char(Position);
882
7.92k
}
883
884
StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
885
56.6k
                                       , StringRef::iterator Position) {
886
1.70M
  while (
true1.70M
) {
887
1.70M
    StringRef::iterator i = (this->*Func)(Position);
888
1.70M
    if (i == Position)
889
56.6k
      break;
890
1.64M
    Position = i;
891
1.64M
  }
892
56.6k
  return Position;
893
56.6k
}
894
895
52.5k
void Scanner::advanceWhile(SkipWhileFunc Func) {
896
52.5k
  auto Final = skip_while(Func, Current);
897
52.5k
  Column += Final - Current;
898
52.5k
  Current = Final;
899
52.5k
}
900
901
0
static bool is_ns_hex_digit(const char C) {
902
0
  return    (C >= '0' && C <= '9')
903
0
         || 
(C >= 'a' && 0
C <= 'z'0
)
904
0
         || 
(C >= 'A' && 0
C <= 'Z'0
);
905
0
}
906
907
36
static bool is_ns_word_char(const char C) {
908
36
  return    C == '-'
909
36
         || 
(C >= 'a' && 36
C <= 'z'26
)
910
10
         || 
(C >= 'A' && 10
C <= 'Z'0
);
911
36
}
912
913
3
void Scanner::scan_ns_uri_char() {
914
36
  while (
true36
) {
915
36
    if (Current == End)
916
0
      break;
917
36
    
if (36
( *Current == '%'
918
0
          && Current + 2 < End
919
0
          && is_ns_hex_digit(*(Current + 1))
920
0
          && is_ns_hex_digit(*(Current + 2)))
921
36
        || is_ns_word_char(*Current)
922
10
        || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
923
36
          != StringRef::npos) {
924
33
      ++Current;
925
33
      ++Column;
926
33
    } else
927
3
      break;
928
36
  }
929
3
}
930
931
22
bool Scanner::consume(uint32_t Expected) {
932
22
  if (Expected >= 0x80)
933
0
    report_fatal_error("Not dealing with this yet");
934
22
  
if (22
Current == End22
)
935
0
    return false;
936
22
  
if (22
uint8_t(*Current) >= 0x8022
)
937
0
    report_fatal_error("Not dealing with this yet");
938
22
  
if (22
uint8_t(*Current) == Expected22
) {
939
20
    ++Current;
940
20
    ++Column;
941
20
    return true;
942
20
  }
943
2
  return false;
944
2
}
945
946
1.51M
void Scanner::skip(uint32_t Distance) {
947
1.51M
  Current += Distance;
948
1.51M
  Column += Distance;
949
1.51M
  assert(Current <= End && "Skipped past the end");
950
1.51M
}
951
952
3.26M
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
953
3.26M
  if (Position == End)
954
634
    return false;
955
3.26M
  
return *Position == ' ' || 3.26M
*Position == '\t'2.55M
||
*Position == '\r'2.55M
||
956
2.55M
         *Position == '\n';
957
3.26M
}
958
959
52.4k
bool Scanner::consumeLineBreakIfPresent() {
960
52.4k
  auto Next = skip_b_break(Current);
961
52.4k
  if (Next == Current)
962
3
    return false;
963
52.4k
  Column = 0;
964
52.4k
  ++Line;
965
52.4k
  Current = Next;
966
52.4k
  return true;
967
52.4k
}
968
969
void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
970
                                    , unsigned AtColumn
971
260k
                                    , bool IsRequired) {
972
260k
  if (
IsSimpleKeyAllowed260k
) {
973
148k
    SimpleKey SK;
974
148k
    SK.Tok = Tok;
975
148k
    SK.Line = Line;
976
148k
    SK.Column = AtColumn;
977
148k
    SK.IsRequired = IsRequired;
978
148k
    SK.FlowLevel = FlowLevel;
979
148k
    SimpleKeys.push_back(SK);
980
148k
  }
981
260k
}
982
983
2.78M
void Scanner::removeStaleSimpleKeyCandidates() {
984
2.78M
  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
985
3.41M
                                            
i != SimpleKeys.end()3.41M
;) {
986
625k
    if (
i->Line != Line || 625k
i->Column + 1024 < Column614k
) {
987
11.1k
      if (i->IsRequired)
988
0
        setError( "Could not find expected : for simple key"
989
0
                , i->Tok->Range.begin());
990
11.1k
      i = SimpleKeys.erase(i);
991
11.1k
    } else
992
614k
      ++i;
993
625k
  }
994
2.78M
}
995
996
65.4k
void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
997
65.4k
  if (
!SimpleKeys.empty() && 65.4k
(SimpleKeys.end() - 1)->FlowLevel == Level38.9k
)
998
12.7k
    SimpleKeys.pop_back();
999
65.4k
}
1000
1001
471k
bool Scanner::unrollIndent(int ToColumn) {
1002
471k
  Token T;
1003
471k
  // Indentation is ignored in flow.
1004
471k
  if (FlowLevel != 0)
1005
138k
    return true;
1006
332k
1007
361k
  
while (332k
Indent > ToColumn361k
) {
1008
28.5k
    T.Kind = Token::TK_BlockEnd;
1009
28.5k
    T.Range = StringRef(Current, 1);
1010
28.5k
    TokenQueue.push_back(T);
1011
28.5k
    Indent = Indents.pop_back_val();
1012
28.5k
  }
1013
471k
1014
471k
  return true;
1015
471k
}
1016
1017
bool Scanner::rollIndent( int ToColumn
1018
                        , Token::TokenKind Kind
1019
148k
                        , TokenQueueT::iterator InsertPoint) {
1020
148k
  if (FlowLevel)
1021
27.8k
    return true;
1022
120k
  
if (120k
Indent < ToColumn120k
) {
1023
28.5k
    Indents.push_back(Indent);
1024
28.5k
    Indent = ToColumn;
1025
28.5k
1026
28.5k
    Token T;
1027
28.5k
    T.Kind = Kind;
1028
28.5k
    T.Range = StringRef(Current, 0);
1029
28.5k
    TokenQueue.insert(InsertPoint, T);
1030
28.5k
  }
1031
148k
  return true;
1032
148k
}
1033
1034
611k
void Scanner::skipComment() {
1035
611k
  if (*Current != '#')
1036
584k
    return;
1037
1.26M
  
while (26.6k
true1.26M
) {
1038
1.26M
    // This may skip more than one byte, thus Column is only incremented
1039
1.26M
    // for code points.
1040
1.26M
    StringRef::iterator I = skip_nb_char(Current);
1041
1.26M
    if (I == Current)
1042
26.6k
      break;
1043
1.24M
    Current = I;
1044
1.24M
    ++Column;
1045
1.24M
  }
1046
611k
}
1047
1048
462k
void Scanner::scanToNextToken() {
1049
608k
  while (
true608k
) {
1050
1.89M
    while (
*Current == ' ' || 1.89M
*Current == '\t'608k
) {
1051
1.28M
      skip(1);
1052
1.28M
    }
1053
608k
1054
608k
    skipComment();
1055
608k
1056
608k
    // Skip EOL.
1057
608k
    StringRef::iterator i = skip_b_break(Current);
1058
608k
    if (i == Current)
1059
462k
      break;
1060
145k
    Current = i;
1061
145k
    ++Line;
1062
145k
    Column = 0;
1063
145k
    // New lines may start a simple key.
1064
145k
    if (!FlowLevel)
1065
142k
      IsSimpleKeyAllowed = true;
1066
608k
  }
1067
462k
}
1068
1069
2.53k
bool Scanner::scanStreamStart() {
1070
2.53k
  IsStartOfStream = false;
1071
2.53k
1072
2.53k
  EncodingInfo EI = getUnicodeEncoding(currentInput());
1073
2.53k
1074
2.53k
  Token T;
1075
2.53k
  T.Kind = Token::TK_StreamStart;
1076
2.53k
  T.Range = StringRef(Current, EI.second);
1077
2.53k
  TokenQueue.push_back(T);
1078
2.53k
  Current += EI.second;
1079
2.53k
  return true;
1080
2.53k
}
1081
1082
1.56k
bool Scanner::scanStreamEnd() {
1083
1.56k
  // Force an ending new line if one isn't present.
1084
1.56k
  if (
Column != 01.56k
) {
1085
365
    Column = 0;
1086
365
    ++Line;
1087
365
  }
1088
1.56k
1089
1.56k
  unrollIndent(-1);
1090
1.56k
  SimpleKeys.clear();
1091
1.56k
  IsSimpleKeyAllowed = false;
1092
1.56k
1093
1.56k
  Token T;
1094
1.56k
  T.Kind = Token::TK_StreamEnd;
1095
1.56k
  T.Range = StringRef(Current, 0);
1096
1.56k
  TokenQueue.push_back(T);
1097
1.56k
  return true;
1098
1.56k
}
1099
1100
19
bool Scanner::scanDirective() {
1101
19
  // Reset the indentation level.
1102
19
  unrollIndent(-1);
1103
19
  SimpleKeys.clear();
1104
19
  IsSimpleKeyAllowed = false;
1105
19
1106
19
  StringRef::iterator Start = Current;
1107
19
  consume('%');
1108
19
  StringRef::iterator NameStart = Current;
1109
19
  Current = skip_while(&Scanner::skip_ns_char, Current);
1110
19
  StringRef Name(NameStart, Current - NameStart);
1111
19
  Current = skip_while(&Scanner::skip_s_white, Current);
1112
19
  
1113
19
  Token T;
1114
19
  if (
Name == "YAML"19
) {
1115
5
    Current = skip_while(&Scanner::skip_ns_char, Current);
1116
5
    T.Kind = Token::TK_VersionDirective;
1117
5
    T.Range = StringRef(Start, Current - Start);
1118
5
    TokenQueue.push_back(T);
1119
5
    return true;
1120
14
  } else 
if(14
Name == "TAG"14
) {
1121
13
    Current = skip_while(&Scanner::skip_ns_char, Current);
1122
13
    Current = skip_while(&Scanner::skip_s_white, Current);
1123
13
    Current = skip_while(&Scanner::skip_ns_char, Current);
1124
13
    T.Kind = Token::TK_TagDirective;
1125
13
    T.Range = StringRef(Start, Current - Start);
1126
13
    TokenQueue.push_back(T);
1127
13
    return true;
1128
13
  }
1129
1
  return false;
1130
1
}
1131
1132
8.15k
bool Scanner::scanDocumentIndicator(bool IsStart) {
1133
8.15k
  unrollIndent(-1);
1134
8.15k
  SimpleKeys.clear();
1135
8.15k
  IsSimpleKeyAllowed = false;
1136
8.15k
1137
8.15k
  Token T;
1138
8.15k
  T.Kind = IsStart ? 
Token::TK_DocumentStart4.13k
:
Token::TK_DocumentEnd4.02k
;
1139
8.15k
  T.Range = StringRef(Current, 3);
1140
8.15k
  skip(3);
1141
8.15k
  TokenQueue.push_back(T);
1142
8.15k
  return true;
1143
8.15k
}
1144
1145
14.0k
bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1146
14.0k
  Token T;
1147
3.58k
  T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1148
10.4k
                      : Token::TK_FlowMappingStart;
1149
14.0k
  T.Range = StringRef(Current, 1);
1150
14.0k
  skip(1);
1151
14.0k
  TokenQueue.push_back(T);
1152
14.0k
1153
14.0k
  // [ and { may begin a simple key.
1154
14.0k
  saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1155
14.0k
1156
14.0k
  // And may also be followed by a simple key.
1157
14.0k
  IsSimpleKeyAllowed = true;
1158
14.0k
  ++FlowLevel;
1159
14.0k
  return true;
1160
14.0k
}
1161
1162
13.9k
bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1163
13.9k
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1164
13.9k
  IsSimpleKeyAllowed = false;
1165
13.9k
  Token T;
1166
3.56k
  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1167
10.4k
                      : Token::TK_FlowMappingEnd;
1168
13.9k
  T.Range = StringRef(Current, 1);
1169
13.9k
  skip(1);
1170
13.9k
  TokenQueue.push_back(T);
1171
13.9k
  if (FlowLevel)
1172
13.9k
    --FlowLevel;
1173
13.9k
  return true;
1174
13.9k
}
1175
1176
27.6k
bool Scanner::scanFlowEntry() {
1177
27.6k
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1178
27.6k
  IsSimpleKeyAllowed = true;
1179
27.6k
  Token T;
1180
27.6k
  T.Kind = Token::TK_FlowEntry;
1181
27.6k
  T.Range = StringRef(Current, 1);
1182
27.6k
  skip(1);
1183
27.6k
  TokenQueue.push_back(T);
1184
27.6k
  return true;
1185
27.6k
}
1186
1187
23.7k
bool Scanner::scanBlockEntry() {
1188
23.7k
  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1189
23.7k
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1190
23.7k
  IsSimpleKeyAllowed = true;
1191
23.7k
  Token T;
1192
23.7k
  T.Kind = Token::TK_BlockEntry;
1193
23.7k
  T.Range = StringRef(Current, 1);
1194
23.7k
  skip(1);
1195
23.7k
  TokenQueue.push_back(T);
1196
23.7k
  return true;
1197
23.7k
}
1198
1199
30
bool Scanner::scanKey() {
1200
30
  if (!FlowLevel)
1201
16
    rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1202
30
1203
30
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1204
30
  IsSimpleKeyAllowed = !FlowLevel;
1205
30
1206
30
  Token T;
1207
30
  T.Kind = Token::TK_Key;
1208
30
  T.Range = StringRef(Current, 1);
1209
30
  skip(1);
1210
30
  TokenQueue.push_back(T);
1211
30
  return true;
1212
30
}
1213
1214
124k
bool Scanner::scanValue() {
1215
124k
  // If the previous token could have been a simple key, insert the key token
1216
124k
  // into the token queue.
1217
124k
  if (
!SimpleKeys.empty()124k
) {
1218
124k
    SimpleKey SK = SimpleKeys.pop_back_val();
1219
124k
    Token T;
1220
124k
    T.Kind = Token::TK_Key;
1221
124k
    T.Range = SK.Tok->Range;
1222
124k
    TokenQueueT::iterator i, e;
1223
300k
    for (i = TokenQueue.begin(), e = TokenQueue.end(); 
i != e300k
;
++i176k
) {
1224
300k
      if (i == SK.Tok)
1225
124k
        break;
1226
300k
    }
1227
124k
    assert(i != e && "SimpleKey not in token queue!");
1228
124k
    i = TokenQueue.insert(i, T);
1229
124k
1230
124k
    // We may also need to add a Block-Mapping-Start token.
1231
124k
    rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1232
124k
1233
124k
    IsSimpleKeyAllowed = false;
1234
124k
  } else {
1235
23
    if (!FlowLevel)
1236
8
      rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1237
23
    IsSimpleKeyAllowed = !FlowLevel;
1238
23
  }
1239
124k
1240
124k
  Token T;
1241
124k
  T.Kind = Token::TK_Value;
1242
124k
  T.Range = StringRef(Current, 1);
1243
124k
  skip(1);
1244
124k
  TokenQueue.push_back(T);
1245
124k
  return true;
1246
124k
}
1247
1248
// Forbidding inlining improves performance by roughly 20%.
1249
// FIXME: Remove once llvm optimizes this to the faster version without hints.
1250
LLVM_ATTRIBUTE_NOINLINE static bool
1251
wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1252
1253
// Returns whether a character at 'Position' was escaped with a leading '\'.
1254
// 'First' specifies the position of the first character in the string.
1255
static bool wasEscaped(StringRef::iterator First,
1256
58
                       StringRef::iterator Position) {
1257
58
  assert(Position - 1 >= First);
1258
58
  StringRef::iterator I = Position - 1;
1259
58
  // We calculate the number of consecutive '\'s before the current position
1260
58
  // by iterating backwards through our string.
1261
140
  while (
I >= First && 140
*I == '\\'123
)
--I82
;
1262
58
  // (Position - 1 - I) now contains the number of '\'s before the current
1263
58
  // position. If it is odd, the character at 'Position' was escaped.
1264
58
  return (Position - 1 - I) % 2 == 1;
1265
58
}
1266
1267
8.40k
bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1268
8.40k
  StringRef::iterator Start = Current;
1269
8.40k
  unsigned ColStart = Column;
1270
8.40k
  if (
IsDoubleQuoted8.40k
) {
1271
850
    do {
1272
850
      ++Current;
1273
22.1k
      while (
Current != End && 22.1k
*Current != '"'22.0k
)
1274
21.2k
        ++Current;
1275
850
      // Repeat until the previous character was not a '\' or was an escaped
1276
850
      // backslash.
1277
796
    } while (   Current != End
1278
841
             && *(Current - 1) == '\\'
1279
58
             && wasEscaped(Start + 1, Current));
1280
8.40k
  } else {
1281
7.60k
    skip(1);
1282
316k
    while (
true316k
) {
1283
316k
      // Skip a ' followed by another '.
1284
316k
      if (
Current + 1 < End && 316k
*Current == '\''316k
&&
*(Current + 1) == '\''7.60k
) {
1285
5
        skip(2);
1286
5
        continue;
1287
316k
      } else 
if (316k
*Current == '\''316k
)
1288
7.60k
        break;
1289
309k
      StringRef::iterator i = skip_nb_char(Current);
1290
309k
      if (
i == Current309k
) {
1291
13
        i = skip_b_break(Current);
1292
13
        if (i == Current)
1293
1
          break;
1294
12
        Current = i;
1295
12
        Column = 0;
1296
12
        ++Line;
1297
309k
      } else {
1298
309k
        if (i == End)
1299
0
          break;
1300
309k
        Current = i;
1301
309k
        ++Column;
1302
309k
      }
1303
316k
    }
1304
7.60k
  }
1305
8.40k
1306
8.40k
  if (
Current == End8.40k
) {
1307
10
    setError("Expected quote at end of scalar", Current);
1308
10
    return false;
1309
10
  }
1310
8.39k
1311
8.39k
  skip(1); // Skip ending quote.
1312
8.39k
  Token T;
1313
8.39k
  T.Kind = Token::TK_Scalar;
1314
8.39k
  T.Range = StringRef(Start, Current - Start);
1315
8.39k
  TokenQueue.push_back(T);
1316
8.39k
1317
8.39k
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1318
8.39k
1319
8.39k
  IsSimpleKeyAllowed = false;
1320
8.39k
1321
8.39k
  return true;
1322
8.39k
}
1323
1324
236k
bool Scanner::scanPlainScalar() {
1325
236k
  StringRef::iterator Start = Current;
1326
236k
  unsigned ColStart = Column;
1327
236k
  unsigned LeadingBlanks = 0;
1328
236k
  assert(Indent >= -1 && "Indent must be >= -1 !");
1329
236k
  unsigned indent = static_cast<unsigned>(Indent + 1);
1330
247k
  while (
true247k
) {
1331
247k
    if (*Current == '#')
1332
252
      break;
1333
247k
1334
1.90M
    
while (247k
!isBlankOrBreak(Current)1.90M
) {
1335
1.81M
      if (  
FlowLevel && 1.81M
*Current == ':'352k
1336
1.81M
          && 
!(isBlankOrBreak(Current + 1) || 25.8k
*(Current + 1) == ','3
)) {
1337
2
        setError("Found unexpected ':' while scanning a plain scalar", Current);
1338
2
        return false;
1339
2
      }
1340
1.81M
1341
1.81M
      // Check for the end of the plain scalar.
1342
1.81M
      
if ( 1.81M
(*Current == ':' && 1.81M
isBlankOrBreak(Current + 1)122k
)
1343
1.69M
          || (  FlowLevel
1344
327k
          && (StringRef(Current, 1).find_first_of(",:?[]{}")
1345
327k
              != StringRef::npos)))
1346
158k
        break;
1347
1.65M
1348
1.65M
      StringRef::iterator i = skip_nb_char(Current);
1349
1.65M
      if (i == Current)
1350
226
        break;
1351
1.65M
      Current = i;
1352
1.65M
      ++Column;
1353
1.65M
    }
1354
247k
1355
247k
    // Are we at the end?
1356
247k
    
if (247k
!isBlankOrBreak(Current)247k
)
1357
158k
      break;
1358
88.7k
1359
88.7k
    // Eat blanks.
1360
88.7k
    StringRef::iterator Tmp = Current;
1361
590k
    while (
isBlankOrBreak(Tmp)590k
) {
1362
501k
      StringRef::iterator i = skip_s_white(Tmp);
1363
501k
      if (
i != Tmp501k
) {
1364
423k
        if (
LeadingBlanks && 423k
(Column < indent)411k
&&
*Tmp == '\t'411k
) {
1365
0
          setError("Found invalid tab character in indentation", Tmp);
1366
0
          return false;
1367
0
        }
1368
423k
        Tmp = i;
1369
423k
        ++Column;
1370
501k
      } else {
1371
78.0k
        i = skip_b_break(Tmp);
1372
78.0k
        if (!LeadingBlanks)
1373
77.4k
          LeadingBlanks = 1;
1374
78.0k
        Tmp = i;
1375
78.0k
        Column = 0;
1376
78.0k
        ++Line;
1377
78.0k
      }
1378
501k
    }
1379
88.7k
1380
88.7k
    
if (88.7k
!FlowLevel && 88.7k
Column < indent78.4k
)
1381
77.3k
      break;
1382
11.3k
1383
11.3k
    Current = Tmp;
1384
11.3k
  }
1385
236k
  
if (236k
Start == Current236k
) {
1386
1
    setError("Got empty plain scalar", Start);
1387
1
    return false;
1388
1
  }
1389
236k
  Token T;
1390
236k
  T.Kind = Token::TK_Scalar;
1391
236k
  T.Range = StringRef(Start, Current - Start);
1392
236k
  TokenQueue.push_back(T);
1393
236k
1394
236k
  // Plain scalars can be simple keys.
1395
236k
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1396
236k
1397
236k
  IsSimpleKeyAllowed = false;
1398
236k
1399
236k
  return true;
1400
236k
}
1401
1402
29
bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1403
29
  StringRef::iterator Start = Current;
1404
29
  unsigned ColStart = Column;
1405
29
  skip(1);
1406
168
  while(
true168
) {
1407
168
    if (   
*Current == '[' || 168
*Current == ']'168
1408
168
        || 
*Current == '{'168
||
*Current == '}'168
1409
168
        || *Current == ','
1410
164
        || *Current == ':')
1411
6
      break;
1412
162
    StringRef::iterator i = skip_ns_char(Current);
1413
162
    if (i == Current)
1414
23
      break;
1415
139
    Current = i;
1416
139
    ++Column;
1417
139
  }
1418
29
1419
29
  if (
Start == Current29
) {
1420
0
    setError("Got empty alias or anchor", Start);
1421
0
    return false;
1422
0
  }
1423
29
1424
29
  Token T;
1425
29
  T.Kind = IsAlias ? 
Token::TK_Alias16
:
Token::TK_Anchor13
;
1426
29
  T.Range = StringRef(Start, Current - Start);
1427
29
  TokenQueue.push_back(T);
1428
29
1429
29
  // Alias and anchors can be simple keys.
1430
29
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1431
29
1432
29
  IsSimpleKeyAllowed = false;
1433
29
1434
29
  return true;
1435
29
}
1436
1437
5.98k
char Scanner::scanBlockChompingIndicator() {
1438
5.98k
  char Indicator = ' ';
1439
5.98k
  if (
Current != End && 5.98k
(*Current == '+' || 5.98k
*Current == '-'5.97k
)) {
1440
17
    Indicator = *Current;
1441
17
    skip(1);
1442
17
  }
1443
5.98k
  return Indicator;
1444
5.98k
}
1445
1446
/// Get the number of line breaks after chomping.
1447
///
1448
/// Return the number of trailing line breaks to emit, depending on
1449
/// \p ChompingIndicator.
1450
static unsigned getChompedLineBreaks(char ChompingIndicator,
1451
2.99k
                                     unsigned LineBreaks, StringRef Str) {
1452
2.99k
  if (ChompingIndicator == '-') // Strip all line breaks.
1453
11
    return 0;
1454
2.98k
  
if (2.98k
ChompingIndicator == '+'2.98k
) // Keep all line breaks.
1455
6
    return LineBreaks;
1456
2.97k
  // Clip trailing lines.
1457
2.97k
  
return Str.empty() ? 2.97k
06
:
12.96k
;
1458
2.99k
}
1459
1460
2.99k
unsigned Scanner::scanBlockIndentationIndicator() {
1461
2.99k
  unsigned Indent = 0;
1462
2.99k
  if (
Current != End && 2.99k
(*Current >= '1' && 2.99k
*Current <= '9'7
)) {
1463
6
    Indent = unsigned(*Current - '0');
1464
6
    skip(1);
1465
6
  }
1466
2.99k
  return Indent;
1467
2.99k
}
1468
1469
bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1470
2.99k
                                    unsigned &IndentIndicator, bool &IsDone) {
1471
2.99k
  auto Start = Current;
1472
2.99k
1473
2.99k
  ChompingIndicator = scanBlockChompingIndicator();
1474
2.99k
  IndentIndicator = scanBlockIndentationIndicator();
1475
2.99k
  // Check for the chomping indicator once again.
1476
2.99k
  if (ChompingIndicator == ' ')
1477
2.98k
    ChompingIndicator = scanBlockChompingIndicator();
1478
2.99k
  Current = skip_while(&Scanner::skip_s_white, Current);
1479
2.99k
  skipComment();
1480
2.99k
1481
2.99k
  if (
Current == End2.99k
) { // EOF, we have an empty scalar.
1482
2
    Token T;
1483
2
    T.Kind = Token::TK_BlockScalar;
1484
2
    T.Range = StringRef(Start, Current - Start);
1485
2
    TokenQueue.push_back(T);
1486
2
    IsDone = true;
1487
2
    return true;
1488
2
  }
1489
2.99k
1490
2.99k
  
if (2.99k
!consumeLineBreakIfPresent()2.99k
) {
1491
3
    setError("Expected a line break after block scalar header", Current);
1492
3
    return false;
1493
3
  }
1494
2.99k
  return true;
1495
2.99k
}
1496
1497
bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1498
                                    unsigned BlockExitIndent,
1499
2.98k
                                    unsigned &LineBreaks, bool &IsDone) {
1500
2.98k
  unsigned MaxAllSpaceLineCharacters = 0;
1501
2.98k
  StringRef::iterator LongestAllSpaceLine;
1502
2.98k
1503
3.21k
  while (
true3.21k
) {
1504
3.21k
    advanceWhile(&Scanner::skip_s_space);
1505
3.21k
    if (
skip_nb_char(Current) != Current3.21k
) {
1506
2.98k
      // This line isn't empty, so try and find the indentation.
1507
2.98k
      if (
Column <= BlockExitIndent2.98k
) { // End of the block literal.
1508
7
        IsDone = true;
1509
7
        return true;
1510
7
      }
1511
2.98k
      // We found the block's indentation.
1512
2.98k
      BlockIndent = Column;
1513
2.98k
      if (
MaxAllSpaceLineCharacters > BlockIndent2.98k
) {
1514
1
        setError(
1515
1
            "Leading all-spaces line must be smaller than the block indent",
1516
1
            LongestAllSpaceLine);
1517
1
        return false;
1518
1
      }
1519
2.97k
      return true;
1520
2.97k
    }
1521
227
    
if (227
skip_b_break(Current) != Current &&
1522
227
        
Column > MaxAllSpaceLineCharacters226
) {
1523
10
      // Record the longest all-space line in case it's longer than the
1524
10
      // discovered block indent.
1525
10
      MaxAllSpaceLineCharacters = Column;
1526
10
      LongestAllSpaceLine = Current;
1527
10
    }
1528
227
1529
227
    // Check for EOF.
1530
227
    if (
Current == End227
) {
1531
1
      IsDone = true;
1532
1
      return true;
1533
1
    }
1534
226
1535
226
    
if (226
!consumeLineBreakIfPresent()226
) {
1536
0
      IsDone = true;
1537
0
      return true;
1538
0
    }
1539
226
    ++LineBreaks;
1540
226
  }
1541
0
  return true;
1542
2.98k
}
1543
1544
bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1545
52.2k
                                    unsigned BlockExitIndent, bool &IsDone) {
1546
52.2k
  // Skip the indentation.
1547
131k
  while (
Column < BlockIndent131k
) {
1548
88.9k
    auto I = skip_s_space(Current);
1549
88.9k
    if (I == Current)
1550
9.58k
      break;
1551
79.3k
    Current = I;
1552
79.3k
    ++Column;
1553
79.3k
  }
1554
52.2k
1555
52.2k
  if (skip_nb_char(Current) == Current)
1556
7.42k
    return true;
1557
44.8k
1558
44.8k
  
if (44.8k
Column <= BlockExitIndent44.8k
) { // End of the block literal.
1559
2.93k
    IsDone = true;
1560
2.93k
    return true;
1561
2.93k
  }
1562
41.8k
1563
41.8k
  
if (41.8k
Column < BlockIndent41.8k
) {
1564
4
    if (
Current != End && 4
*Current == '#'4
) { // Trailing comment.
1565
2
      IsDone = true;
1566
2
      return true;
1567
2
    }
1568
2
    setError("A text line is less indented than the block scalar", Current);
1569
2
    return false;
1570
2
  }
1571
41.8k
  return true; // A normal text line.
1572
41.8k
}
1573
1574
2.99k
bool Scanner::scanBlockScalar(bool IsLiteral) {
1575
2.99k
  // Eat '|' or '>'
1576
2.99k
  assert(*Current == '|' || *Current == '>');
1577
2.99k
  skip(1);
1578
2.99k
1579
2.99k
  char ChompingIndicator;
1580
2.99k
  unsigned BlockIndent;
1581
2.99k
  bool IsDone = false;
1582
2.99k
  if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1583
3
    return false;
1584
2.99k
  
if (2.99k
IsDone2.99k
)
1585
2
    return true;
1586
2.99k
1587
2.99k
  auto Start = Current;
1588
2.99k
  unsigned BlockExitIndent = Indent < 0 ? 
0609
:
(unsigned)Indent2.38k
;
1589
2.99k
  unsigned LineBreaks = 0;
1590
2.99k
  if (
BlockIndent == 02.99k
) {
1591
2.98k
    if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1592
2.98k
                               IsDone))
1593
1
      return false;
1594
2.99k
  }
1595
2.99k
1596
2.99k
  // Scan the block's scalars body.
1597
2.99k
  SmallString<256> Str;
1598
52.2k
  while (
!IsDone52.2k
) {
1599
52.2k
    if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1600
2
      return false;
1601
52.2k
    
if (52.2k
IsDone52.2k
)
1602
2.93k
      break;
1603
49.2k
1604
49.2k
    // Parse the current line.
1605
49.2k
    auto LineStart = Current;
1606
49.2k
    advanceWhile(&Scanner::skip_nb_char);
1607
49.2k
    if (
LineStart != Current49.2k
) {
1608
41.8k
      Str.append(LineBreaks, '\n');
1609
41.8k
      Str.append(StringRef(LineStart, Current - LineStart));
1610
41.8k
      LineBreaks = 0;
1611
41.8k
    }
1612
49.2k
1613
49.2k
    // Check for EOF.
1614
49.2k
    if (Current == End)
1615
47
      break;
1616
49.2k
1617
49.2k
    
if (49.2k
!consumeLineBreakIfPresent()49.2k
)
1618
0
      break;
1619
49.2k
    ++LineBreaks;
1620
49.2k
  }
1621
2.99k
1622
2.99k
  
if (2.99k
Current == End && 2.99k
!LineBreaks48
)
1623
2.99k
    // Ensure that there is at least one line break before the end of file.
1624
3
    LineBreaks = 1;
1625
2.99k
  Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1626
2.99k
1627
2.99k
  // New lines may start a simple key.
1628
2.99k
  if (!FlowLevel)
1629
2.99k
    IsSimpleKeyAllowed = true;
1630
2.99k
1631
2.99k
  Token T;
1632
2.99k
  T.Kind = Token::TK_BlockScalar;
1633
2.99k
  T.Range = StringRef(Start, Current - Start);
1634
2.99k
  T.Value = Str.str().str();
1635
2.99k
  TokenQueue.push_back(T);
1636
2.99k
  return true;
1637
2.99k
}
1638
1639
1.10k
bool Scanner::scanTag() {
1640
1.10k
  StringRef::iterator Start = Current;
1641
1.10k
  unsigned ColStart = Column;
1642
1.10k
  skip(1); // Eat !.
1643
1.10k
  if (
Current == End || 1.10k
isBlankOrBreak(Current)1.10k
)
;2
// An empty tag.
1644
1.10k
  else 
if (1.10k
*Current == '<'1.10k
) {
1645
3
    skip(1);
1646
3
    scan_ns_uri_char();
1647
3
    if (!consume('>'))
1648
2
      return false;
1649
1.09k
  } else {
1650
1.09k
    // FIXME: Actually parse the c-ns-shorthand-tag rule.
1651
1.09k
    Current = skip_while(&Scanner::skip_ns_char, Current);
1652
1.09k
  }
1653
1.10k
1654
1.10k
  Token T;
1655
1.10k
  T.Kind = Token::TK_Tag;
1656
1.10k
  T.Range = StringRef(Start, Current - Start);
1657
1.10k
  TokenQueue.push_back(T);
1658
1.10k
1659
1.10k
  // Tags can be simple keys.
1660
1.10k
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1661
1.10k
1662
1.10k
  IsSimpleKeyAllowed = false;
1663
1.10k
1664
1.10k
  return true;
1665
1.10k
}
1666
1667
465k
bool Scanner::fetchMoreTokens() {
1668
465k
  if (IsStartOfStream)
1669
2.53k
    return scanStreamStart();
1670
462k
1671
462k
  scanToNextToken();
1672
462k
1673
462k
  if (Current == End)
1674
1.56k
    return scanStreamEnd();
1675
461k
1676
461k
  removeStaleSimpleKeyCandidates();
1677
461k
1678
461k
  unrollIndent(Column);
1679
461k
1680
461k
  if (
Column == 0 && 461k
*Current == '%'27.3k
)
1681
19
    return scanDirective();
1682
461k
1683
461k
  
if (461k
Column == 0 && 461k
Current + 4 <= End27.3k
1684
27.2k
      && *Current == '-'
1685
5.11k
      && *(Current + 1) == '-'
1686
4.13k
      && *(Current + 2) == '-'
1687
4.13k
      && 
(Current + 3 == End || 4.13k
isBlankOrBreak(Current + 3)4.13k
))
1688
4.13k
    return scanDocumentIndicator(true);
1689
457k
1690
457k
  
if (457k
Column == 0 && 457k
Current + 4 <= End23.1k
1691
23.0k
      && *Current == '.'
1692
4.02k
      && *(Current + 1) == '.'
1693
4.02k
      && *(Current + 2) == '.'
1694
4.02k
      && 
(Current + 3 == End || 4.02k
isBlankOrBreak(Current + 3)4.02k
))
1695
4.02k
    return scanDocumentIndicator(false);
1696
453k
1697
453k
  
if (453k
*Current == '['453k
)
1698
3.58k
    return scanFlowCollectionStart(true);
1699
449k
1700
449k
  
if (449k
*Current == '{'449k
)
1701
10.4k
    return scanFlowCollectionStart(false);
1702
439k
1703
439k
  
if (439k
*Current == ']'439k
)
1704
3.56k
    return scanFlowCollectionEnd(true);
1705
435k
1706
435k
  
if (435k
*Current == '}'435k
)
1707
10.4k
    return scanFlowCollectionEnd(false);
1708
425k
1709
425k
  
if (425k
*Current == ','425k
)
1710
27.6k
    return scanFlowEntry();
1711
397k
1712
397k
  
if (397k
*Current == '-' && 397k
isBlankOrBreak(Current + 1)24.0k
)
1713
23.7k
    return scanBlockEntry();
1714
373k
1715
373k
  
if (373k
*Current == '?' && 373k
(FlowLevel || 30
isBlankOrBreak(Current + 1)16
))
1716
30
    return scanKey();
1717
373k
1718
373k
  
if (373k
*Current == ':' && 373k
(FlowLevel || 124k
isBlankOrBreak(Current + 1)96.7k
))
1719
124k
    return scanValue();
1720
249k
1721
249k
  
if (249k
*Current == '*'249k
)
1722
16
    return scanAliasOrAnchor(true);
1723
249k
1724
249k
  
if (249k
*Current == '&'249k
)
1725
13
    return scanAliasOrAnchor(false);
1726
249k
1727
249k
  
if (249k
*Current == '!'249k
)
1728
1.10k
    return scanTag();
1729
247k
1730
247k
  
if (247k
*Current == '|' && 247k
!FlowLevel2.97k
)
1731
2.97k
    return scanBlockScalar(true);
1732
244k
1733
244k
  
if (244k
*Current == '>' && 244k
!FlowLevel20
)
1734
20
    return scanBlockScalar(false);
1735
244k
1736
244k
  
if (244k
*Current == '\''244k
)
1737
7.60k
    return scanFlowScalar(false);
1738
237k
1739
237k
  
if (237k
*Current == '"'237k
)
1740
796
    return scanFlowScalar(true);
1741
236k
1742
236k
  // Get a plain scalar.
1743
236k
  StringRef FirstChar(Current, 1);
1744
236k
  if (!(isBlankOrBreak(Current)
1745
236k
        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1746
308
      || 
(*Current == '-' && 308
!isBlankOrBreak(Current + 1)306
)
1747
2
      || 
(!FlowLevel && 2
(*Current == '?' || 2
*Current == ':'2
)
1748
2
          && isBlankOrBreak(Current + 1))
1749
2
      || 
(!FlowLevel && 2
*Current == ':'2
1750
1
                      && Current + 2 < End
1751
1
                      && *(Current + 1) == ':'
1752
1
                      && !isBlankOrBreak(Current + 2)))
1753
236k
    return scanPlainScalar();
1754
1
1755
1
  setError("Unrecognized character while tokenizing.");
1756
1
  return false;
1757
1
}
1758
1759
Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1760
               std::error_code *EC)
1761
2.43k
    : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
1762
1763
Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1764
               std::error_code *EC)
1765
101
    : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
1766
1767
2.49k
Stream::~Stream() = default;
1768
1769
809
bool Stream::failed() { return scanner->failed(); }
1770
1771
83
void Stream::printError(Node *N, const Twine &Msg) {
1772
83
  scanner->printError( N->getSourceRange().Start
1773
83
                     , SourceMgr::DK_Error
1774
83
                     , Msg
1775
83
                     , N->getSourceRange());
1776
83
}
1777
1778
2.53k
document_iterator Stream::begin() {
1779
2.53k
  if (CurrentDoc)
1780
0
    report_fatal_error("Can only iterate over the stream once");
1781
2.53k
1782
2.53k
  // Skip Stream-Start.
1783
2.53k
  scanner->getNext();
1784
2.53k
1785
2.53k
  CurrentDoc.reset(new Document(*this));
1786
2.53k
  return document_iterator(CurrentDoc);
1787
2.53k
}
1788
1789
9.21k
document_iterator Stream::end() {
1790
9.21k
  return document_iterator();
1791
9.21k
}
1792
1793
52
void Stream::skip() {
1794
105
  for (document_iterator i = begin(), e = end(); 
i != e105
;
++i53
)
1795
53
    i->skip();
1796
52
}
1797
1798
Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1799
           StringRef T)
1800
415k
    : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1801
415k
  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1802
415k
  SourceRange = SMRange(Start, Start);
1803
415k
}
1804
1805
3.52k
std::string Node::getVerbatimTag() const {
1806
3.52k
  StringRef Raw = getRawTag();
1807
3.52k
  if (
!Raw.empty() && 3.52k
Raw != "!"2.32k
) {
1808
2.32k
    std::string Ret;
1809
2.32k
    if (
Raw.find_last_of('!') == 02.32k
) {
1810
2.28k
      Ret = Doc->getTagMap().find("!")->second;
1811
2.28k
      Ret += Raw.substr(1);
1812
2.28k
      return Ret;
1813
37
    } else 
if (37
Raw.startswith("!!")37
) {
1814
31
      Ret = Doc->getTagMap().find("!!")->second;
1815
31
      Ret += Raw.substr(2);
1816
31
      return Ret;
1817
0
    } else {
1818
6
      StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1819
6
      std::map<StringRef, StringRef>::const_iterator It =
1820
6
          Doc->getTagMap().find(TagHandle);
1821
6
      if (It != Doc->getTagMap().end())
1822
4
        Ret = It->second;
1823
2
      else {
1824
2
        Token T;
1825
2
        T.Kind = Token::TK_Tag;
1826
2
        T.Range = TagHandle;
1827
2
        setError(Twine("Unknown tag handle ") + TagHandle, T);
1828
2
      }
1829
37
      Ret += Raw.substr(Raw.find_last_of('!') + 1);
1830
37
      return Ret;
1831
37
    }
1832
1.20k
  }
1833
1.20k
1834
1.20k
  switch (getType()) {
1835
42
  case NK_Null:
1836
42
    return "tag:yaml.org,2002:null";
1837
861
  case NK_Scalar:
1838
861
  case NK_BlockScalar:
1839
861
    // TODO: Tag resolution.
1840
861
    return "tag:yaml.org,2002:str";
1841
206
  case NK_Mapping:
1842
206
    return "tag:yaml.org,2002:map";
1843
94
  case NK_Sequence:
1844
94
    return "tag:yaml.org,2002:seq";
1845
0
  }
1846
0
1847
0
  return "";
1848
0
}
1849
1850
1.14M
Token &Node::peekNext() {
1851
1.14M
  return Doc->peekNext();
1852
1.14M
}
1853
1854
343k
Token Node::getNext() {
1855
343k
  return Doc->getNext();
1856
343k
}
1857
1858
285k
Node *Node::parseBlockNode() {
1859
285k
  return Doc->parseBlockNode();
1860
285k
}
1861
1862
125k
BumpPtrAllocator &Node::getAllocator() {
1863
125k
  return Doc->NodeAllocator;
1864
125k
}
1865
1866
23
void Node::setError(const Twine &Msg, Token &Tok) const {
1867
23
  Doc->setError(Msg, Tok);
1868
23
}
1869
1870
356k
bool Node::failed() const {
1871
356k
  return Doc->failed();
1872
356k
}
1873
1874
243k
StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
1875
243k
  // TODO: Handle newlines properly. We need to remove leading whitespace.
1876
243k
  if (
Value[0] == '"'243k
) { // Double quoted.
1877
742
    // Pull off the leading and trailing "s.
1878
742
    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1879
742
    // Search for characters that would require unescaping the value.
1880
742
    StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1881
742
    if (i != StringRef::npos)
1882
36
      return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1883
706
    return UnquotedValue;
1884
242k
  } else 
if (242k
Value[0] == '\''242k
) { // Single quoted.
1885
7.25k
    // Pull off the leading and trailing 's.
1886
7.25k
    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1887
7.25k
    StringRef::size_type i = UnquotedValue.find('\'');
1888
7.25k
    if (
i != StringRef::npos7.25k
) {
1889
4
      // We're going to need Storage.
1890
4
      Storage.clear();
1891
4
      Storage.reserve(UnquotedValue.size());
1892
9
      for (; 
i != StringRef::npos9
;
i = UnquotedValue.find('\'')5
) {
1893
5
        StringRef Valid(UnquotedValue.begin(), i);
1894
5
        Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1895
5
        Storage.push_back('\'');
1896
5
        UnquotedValue = UnquotedValue.substr(i + 2);
1897
5
      }
1898
4
      Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1899
4
      return StringRef(Storage.begin(), Storage.size());
1900
4
    }
1901
7.25k
    return UnquotedValue;
1902
7.25k
  }
1903
235k
  // Plain or block.
1904
235k
  return Value.rtrim(' ');
1905
235k
}
1906
1907
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1908
                                          , StringRef::size_type i
1909
                                          , SmallVectorImpl<char> &Storage)
1910
36
                                          const {
1911
36
  // Use Storage to build proper value.
1912
36
  Storage.clear();
1913
36
  Storage.reserve(UnquotedValue.size());
1914
147
  for (; 
i != StringRef::npos147
;
i = UnquotedValue.find_first_of("\\\r\n")111
) {
1915
112
    // Insert all previous chars into Storage.
1916
112
    StringRef Valid(UnquotedValue.begin(), i);
1917
112
    Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1918
112
    // Chop off inserted chars.
1919
112
    UnquotedValue = UnquotedValue.substr(i);
1920
112
1921
112
    assert(!UnquotedValue.empty() && "Can't be empty!");
1922
112
1923
112
    // Parse escape or line break.
1924
112
    switch (UnquotedValue[0]) {
1925
23
    case '\r':
1926
23
    case '\n':
1927
23
      Storage.push_back('\n');
1928
23
      if (   UnquotedValue.size() > 1
1929
23
          && 
(UnquotedValue[1] == '\r' || 23
UnquotedValue[1] == '\n'23
))
1930
3
        UnquotedValue = UnquotedValue.substr(1);
1931
23
      UnquotedValue = UnquotedValue.substr(1);
1932
23
      break;
1933
89
    default:
1934
89
      if (UnquotedValue.size() == 1)
1935
89
        // TODO: Report error.
1936
0
        break;
1937
89
      UnquotedValue = UnquotedValue.substr(1);
1938
89
      switch (UnquotedValue[0]) {
1939
1
      default: {
1940
1
          Token T;
1941
1
          T.Range = StringRef(UnquotedValue.begin(), 1);
1942
1
          setError("Unrecognized escape code!", T);
1943
1
          return "";
1944
89
        }
1945
5
      case '\r':
1946
5
      case '\n':
1947
5
        // Remove the new line.
1948
5
        if (   UnquotedValue.size() > 1
1949
5
            && 
(UnquotedValue[1] == '\r' || 5
UnquotedValue[1] == '\n'5
))
1950
0
          UnquotedValue = UnquotedValue.substr(1);
1951
5
        // If this was just a single byte newline, it will get skipped
1952
5
        // below.
1953
5
        break;
1954
1
      case '0':
1955
1
        Storage.push_back(0x00);
1956
1
        break;
1957
1
      case 'a':
1958
1
        Storage.push_back(0x07);
1959
1
        break;
1960
2
      case 'b':
1961
2
        Storage.push_back(0x08);
1962
2
        break;
1963
3
      case 't':
1964
3
      case 0x09:
1965
3
        Storage.push_back(0x09);
1966
3
        break;
1967
5
      case 'n':
1968
5
        Storage.push_back(0x0A);
1969
5
        break;
1970
1
      case 'v':
1971
1
        Storage.push_back(0x0B);
1972
1
        break;
1973
1
      case 'f':
1974
1
        Storage.push_back(0x0C);
1975
1
        break;
1976
2
      case 'r':
1977
2
        Storage.push_back(0x0D);
1978
2
        break;
1979
1
      case 'e':
1980
1
        Storage.push_back(0x1B);
1981
1
        break;
1982
3
      case ' ':
1983
3
        Storage.push_back(0x20);
1984
3
        break;
1985
42
      case '"':
1986
42
        Storage.push_back(0x22);
1987
42
        break;
1988
0
      case '/':
1989
0
        Storage.push_back(0x2F);
1990
0
        break;
1991
11
      case '\\':
1992
11
        Storage.push_back(0x5C);
1993
11
        break;
1994
1
      case 'N':
1995
1
        encodeUTF8(0x85, Storage);
1996
1
        break;
1997
1
      case '_':
1998
1
        encodeUTF8(0xA0, Storage);
1999
1
        break;
2000
1
      case 'L':
2001
1
        encodeUTF8(0x2028, Storage);
2002
1
        break;
2003
1
      case 'P':
2004
1
        encodeUTF8(0x2029, Storage);
2005
1
        break;
2006
3
      case 'x': {
2007
3
          if (UnquotedValue.size() < 3)
2008
3
            // TODO: Report error.
2009
0
            break;
2010
3
          unsigned int UnicodeScalarValue;
2011
3
          if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2012
3
            // TODO: Report error.
2013
0
            UnicodeScalarValue = 0xFFFD;
2014
3
          encodeUTF8(UnicodeScalarValue, Storage);
2015
3
          UnquotedValue = UnquotedValue.substr(2);
2016
3
          break;
2017
3
        }
2018
2
      case 'u': {
2019
2
          if (UnquotedValue.size() < 5)
2020
2
            // TODO: Report error.
2021
0
            break;
2022
2
          unsigned int UnicodeScalarValue;
2023
2
          if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2024
2
            // TODO: Report error.
2025
0
            UnicodeScalarValue = 0xFFFD;
2026
2
          encodeUTF8(UnicodeScalarValue, Storage);
2027
2
          UnquotedValue = UnquotedValue.substr(4);
2028
2
          break;
2029
2
        }
2030
1
      case 'U': {
2031
1
          if (UnquotedValue.size() < 9)
2032
1
            // TODO: Report error.
2033
0
            break;
2034
1
          unsigned int UnicodeScalarValue;
2035
1
          if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2036
1
            // TODO: Report error.
2037
0
            UnicodeScalarValue = 0xFFFD;
2038
5
          encodeUTF8(UnicodeScalarValue, Storage);
2039
5
          UnquotedValue = UnquotedValue.substr(8);
2040
5
          break;
2041
5
        }
2042
88
      }
2043
88
      UnquotedValue = UnquotedValue.substr(1);
2044
112
    }
2045
112
  }
2046
35
  Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
2047
35
  return StringRef(Storage.begin(), Storage.size());
2048
36
}
2049
2050
393k
Node *KeyValueNode::getKey() {
2051
393k
  if (Key)
2052
268k
    return Key;
2053
124k
  // Handle implicit null keys.
2054
124k
  {
2055
124k
    Token &t = peekNext();
2056
124k
    if (   t.Kind == Token::TK_BlockEnd
2057
124k
        || t.Kind == Token::TK_Value
2058
124k
        || 
t.Kind == Token::TK_Error124k
) {
2059
0
      return Key = new (getAllocator()) NullNode(Doc);
2060
0
    }
2061
124k
    
if (124k
t.Kind == Token::TK_Key124k
)
2062
124k
      getNext(); // skip TK_Key.
2063
124k
  }
2064
124k
2065
124k
  // Handle explicit null keys.
2066
124k
  Token &t = peekNext();
2067
124k
  if (
t.Kind == Token::TK_BlockEnd || 124k
t.Kind == Token::TK_Value124k
) {
2068
3
    return Key = new (getAllocator()) NullNode(Doc);
2069
3
  }
2070
124k
2071
124k
  // We've got a normal key.
2072
124k
  return Key = parseBlockNode();
2073
124k
}
2074
2075
266k
Node *KeyValueNode::getValue() {
2076
266k
  if (Value)
2077
141k
    return Value;
2078
124k
  getKey()->skip();
2079
124k
  if (failed())
2080
1
    return Value = new (getAllocator()) NullNode(Doc);
2081
124k
2082
124k
  // Handle implicit null values.
2083
124k
  {
2084
124k
    Token &t = peekNext();
2085
124k
    if (   t.Kind == Token::TK_BlockEnd
2086
124k
        || t.Kind == Token::TK_FlowMappingEnd
2087
124k
        || t.Kind == Token::TK_Key
2088
124k
        || t.Kind == Token::TK_FlowEntry
2089
124k
        || 
t.Kind == Token::TK_Error124k
) {
2090
20
      return Value = new (getAllocator()) NullNode(Doc);
2091
20
    }
2092
124k
2093
124k
    
if (124k
t.Kind != Token::TK_Value124k
) {
2094
3
      setError("Unexpected token in Key Value.", t);
2095
3
      return Value = new (getAllocator()) NullNode(Doc);
2096
3
    }
2097
124k
    getNext(); // skip TK_Value.
2098
124k
  }
2099
124k
2100
124k
  // Handle explicit null values.
2101
124k
  Token &t = peekNext();
2102
124k
  if (
t.Kind == Token::TK_BlockEnd || 124k
t.Kind == Token::TK_Key124k
) {
2103
659
    return Value = new (getAllocator()) NullNode(Doc);
2104
659
  }
2105
123k
2106
123k
  // We got a normal value.
2107
123k
  return Value = parseBlockNode();
2108
123k
}
2109
2110
174k
void MappingNode::increment() {
2111
174k
  if (
failed()174k
) {
2112
9
    IsAtEnd = true;
2113
9
    CurrentEntry = nullptr;
2114
9
    return;
2115
9
  }
2116
174k
  
if (174k
CurrentEntry174k
) {
2117
142k
    CurrentEntry->skip();
2118
142k
    if (
Type == MT_Inline142k
) {
2119
12
      IsAtEnd = true;
2120
12
      CurrentEntry = nullptr;
2121
12
      return;
2122
12
    }
2123
174k
  }
2124
174k
  Token T = peekNext();
2125
174k
  if (
T.Kind == Token::TK_Key || 174k
T.Kind == Token::TK_Scalar49.8k
) {
2126
124k
    // KeyValueNode eats the TK_Key. That way it can detect null keys.
2127
124k
    CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2128
174k
  } else 
if (49.8k
Type == MT_Block49.8k
) {
2129
22.0k
    switch (T.Kind) {
2130
22.0k
    case Token::TK_BlockEnd:
2131
22.0k
      getNext();
2132
22.0k
      IsAtEnd = true;
2133
22.0k
      CurrentEntry = nullptr;
2134
22.0k
      break;
2135
0
    default:
2136
0
      setError("Unexpected token. Expected Key or Block End", T);
2137
0
      LLVM_FALLTHROUGH;
2138
0
    case Token::TK_Error:
2139
0
      IsAtEnd = true;
2140
0
      CurrentEntry = nullptr;
2141
22.0k
    }
2142
49.8k
  } else {
2143
27.8k
    switch (T.Kind) {
2144
17.4k
    case Token::TK_FlowEntry:
2145
17.4k
      // Eat the flow entry and recurse.
2146
17.4k
      getNext();
2147
17.4k
      return increment();
2148
10.4k
    case Token::TK_FlowMappingEnd:
2149
10.4k
      getNext();
2150
10.4k
      LLVM_FALLTHROUGH;
2151
10.4k
    case Token::TK_Error:
2152
10.4k
      // Set this to end iterator.
2153
10.4k
      IsAtEnd = true;
2154
10.4k
      CurrentEntry = nullptr;
2155
10.4k
      break;
2156
7
    default:
2157
7
      setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2158
7
                "Mapping End."
2159
7
              , T);
2160
7
      IsAtEnd = true;
2161
7
      CurrentEntry = nullptr;
2162
49.8k
    }
2163
49.8k
  }
2164
174k
}
2165
2166
57.2k
void SequenceNode::increment() {
2167
57.2k
  if (
failed()57.2k
) {
2168
6
    IsAtEnd = true;
2169
6
    CurrentEntry = nullptr;
2170
6
    return;
2171
6
  }
2172
57.2k
  
if (57.2k
CurrentEntry57.2k
)
2173
47.1k
    CurrentEntry->skip();
2174
57.2k
  Token T = peekNext();
2175
57.2k
  if (
SeqType == ST_Block57.2k
) {
2176
30.2k
    switch (T.Kind) {
2177
23.7k
    case Token::TK_BlockEntry:
2178
23.7k
      getNext();
2179
23.7k
      CurrentEntry = parseBlockNode();
2180
23.7k
      if (
!CurrentEntry23.7k
) { // An error occurred.
2181
1
        IsAtEnd = true;
2182
1
        CurrentEntry = nullptr;
2183
1
      }
2184
23.7k
      break;
2185
6.51k
    case Token::TK_BlockEnd:
2186
6.51k
      getNext();
2187
6.51k
      IsAtEnd = true;
2188
6.51k
      CurrentEntry = nullptr;
2189
6.51k
      break;
2190
1
    default:
2191
1
      setError( "Unexpected token. Expected Block Entry or Block End."
2192
1
              , T);
2193
1
      LLVM_FALLTHROUGH;
2194
1
    case Token::TK_Error:
2195
1
      IsAtEnd = true;
2196
1
      CurrentEntry = nullptr;
2197
30.2k
    }
2198
57.2k
  } else 
if (26.9k
SeqType == ST_Indentless26.9k
) {
2199
52
    switch (T.Kind) {
2200
35
    case Token::TK_BlockEntry:
2201
35
      getNext();
2202
35
      CurrentEntry = parseBlockNode();
2203
35
      if (
!CurrentEntry35
) { // An error occurred.
2204
0
        IsAtEnd = true;
2205
0
        CurrentEntry = nullptr;
2206
0
      }
2207
35
      break;
2208
17
    default:
2209
17
    case Token::TK_Error:
2210
17
      IsAtEnd = true;
2211
17
      CurrentEntry = nullptr;
2212
52
    }
2213
26.9k
  } else 
if (26.9k
SeqType == ST_Flow26.9k
) {
2214
26.9k
    switch (T.Kind) {
2215
10.1k
    case Token::TK_FlowEntry:
2216
10.1k
      // Eat the flow entry and recurse.
2217
10.1k
      getNext();
2218
10.1k
      WasPreviousTokenFlowEntry = true;
2219
10.1k
      return increment();
2220
3.50k
    case Token::TK_FlowSequenceEnd:
2221
3.50k
      getNext();
2222
3.50k
      LLVM_FALLTHROUGH;
2223
3.50k
    case Token::TK_Error:
2224
3.50k
      // Set this to end iterator.
2225
3.50k
      IsAtEnd = true;
2226
3.50k
      CurrentEntry = nullptr;
2227
3.50k
      break;
2228
7
    case Token::TK_StreamEnd:
2229
7
    case Token::TK_DocumentEnd:
2230
7
    case Token::TK_DocumentStart:
2231
7
      setError("Could not find closing ]!", T);
2232
7
      // Set this to end iterator.
2233
7
      IsAtEnd = true;
2234
7
      CurrentEntry = nullptr;
2235
7
      break;
2236
13.2k
    default:
2237
13.2k
      if (
!WasPreviousTokenFlowEntry13.2k
) {
2238
2
        setError("Expected , between entries!", T);
2239
2
        IsAtEnd = true;
2240
2
        CurrentEntry = nullptr;
2241
2
        break;
2242
2
      }
2243
13.2k
      // Otherwise it must be a flow entry.
2244
13.2k
      CurrentEntry = parseBlockNode();
2245
13.2k
      if (
!CurrentEntry13.2k
) {
2246
0
        IsAtEnd = true;
2247
0
      }
2248
3.50k
      WasPreviousTokenFlowEntry = false;
2249
3.50k
      break;
2250
26.9k
    }
2251
26.9k
  }
2252
57.2k
}
2253
2254
4.93k
Document::Document(Stream &S) : stream(S), Root(nullptr) {
2255
4.93k
  // Tag maps starts with two default mappings.
2256
4.93k
  TagMap["!"] = "!";
2257
4.93k
  TagMap["!!"] = "tag:yaml.org,2002:";
2258
4.93k
2259
4.93k
  if (parseDirectives())
2260
13
    expectToken(Token::TK_DocumentStart);
2261
4.93k
  Token &T = peekNext();
2262
4.93k
  if (T.Kind == Token::TK_DocumentStart)
2263
4.09k
    getNext();
2264
4.93k
}
2265
2266
6.99k
bool Document::skip()  {
2267
6.99k
  if (stream.scanner->failed())
2268
45
    return false;
2269
6.95k
  
if (6.95k
!Root6.95k
)
2270
58
    getRoot();
2271
6.95k
  Root->skip();
2272
6.95k
  Token &T = peekNext();
2273
6.95k
  if (T.Kind == Token::TK_StreamEnd)
2274
1.34k
    return false;
2275
5.60k
  
if (5.60k
T.Kind == Token::TK_DocumentEnd5.60k
) {
2276
3.19k
    getNext();
2277
3.19k
    return skip();
2278
3.19k
  }
2279
2.41k
  return true;
2280
2.41k
}
2281
2282
1.45M
Token &Document::peekNext() {
2283
1.45M
  return stream.scanner->peekNext();
2284
1.45M
}
2285
2286
641k
Token Document::getNext() {
2287
641k
  return stream.scanner->getNext();
2288
641k
}
2289
2290
25
void Document::setError(const Twine &Message, Token &Location) const {
2291
25
  stream.scanner->setError(Message, Location.Range.begin());
2292
25
}
2293
2294
356k
bool Document::failed() const {
2295
356k
  return stream.scanner->failed();
2296
356k
}
2297
2298
290k
Node *Document::parseBlockNode() {
2299
290k
  Token T = peekNext();
2300
290k
  // Handle properties.
2301
290k
  Token AnchorInfo;
2302
290k
  Token TagInfo;
2303
291k
parse_property:
2304
291k
  switch (T.Kind) {
2305
16
  case Token::TK_Alias:
2306
16
    getNext();
2307
16
    return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2308
13
  case Token::TK_Anchor:
2309
13
    if (
AnchorInfo.Kind == Token::TK_Anchor13
) {
2310
0
      setError("Already encountered an anchor for this node!", T);
2311
0
      return nullptr;
2312
0
    }
2313
13
    AnchorInfo = getNext(); // Consume TK_Anchor.
2314
13
    T = peekNext();
2315
13
    goto parse_property;
2316
1.10k
  case Token::TK_Tag:
2317
1.10k
    if (
TagInfo.Kind == Token::TK_Tag1.10k
) {
2318
0
      setError("Already encountered a tag for this node!", T);
2319
0
      return nullptr;
2320
0
    }
2321
1.10k
    TagInfo = getNext(); // Consume TK_Tag.
2322
1.10k
    T = peekNext();
2323
1.10k
    goto parse_property;
2324
290k
  default:
2325
290k
    break;
2326
290k
  }
2327
290k
2328
290k
  switch (T.Kind) {
2329
17
  case Token::TK_BlockEntry:
2330
17
    // We got an unindented BlockEntry sequence. This is not terminated with
2331
17
    // a BlockEnd.
2332
17
    // Don't eat the TK_BlockEntry, SequenceNode needs it.
2333
17
    return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2334
17
                                           , AnchorInfo.Range.substr(1)
2335
17
                                           , TagInfo.Range
2336
17
                                           , SequenceNode::ST_Indentless);
2337
6.51k
  case Token::TK_BlockSequenceStart:
2338
6.51k
    getNext();
2339
6.51k
    return new (NodeAllocator)
2340
6.51k
      SequenceNode( stream.CurrentDoc
2341
6.51k
                  , AnchorInfo.Range.substr(1)
2342
6.51k
                  , TagInfo.Range
2343
6.51k
                  , SequenceNode::ST_Block);
2344
22.0k
  case Token::TK_BlockMappingStart:
2345
22.0k
    getNext();
2346
22.0k
    return new (NodeAllocator)
2347
22.0k
      MappingNode( stream.CurrentDoc
2348
22.0k
                 , AnchorInfo.Range.substr(1)
2349
22.0k
                 , TagInfo.Range
2350
22.0k
                 , MappingNode::MT_Block);
2351
3.57k
  case Token::TK_FlowSequenceStart:
2352
3.57k
    getNext();
2353
3.57k
    return new (NodeAllocator)
2354
3.57k
      SequenceNode( stream.CurrentDoc
2355
3.57k
                  , AnchorInfo.Range.substr(1)
2356
3.57k
                  , TagInfo.Range
2357
3.57k
                  , SequenceNode::ST_Flow);
2358
10.4k
  case Token::TK_FlowMappingStart:
2359
10.4k
    getNext();
2360
10.4k
    return new (NodeAllocator)
2361
10.4k
      MappingNode( stream.CurrentDoc
2362
10.4k
                 , AnchorInfo.Range.substr(1)
2363
10.4k
                 , TagInfo.Range
2364
10.4k
                 , MappingNode::MT_Flow);
2365
244k
  case Token::TK_Scalar:
2366
244k
    getNext();
2367
244k
    return new (NodeAllocator)
2368
244k
      ScalarNode( stream.CurrentDoc
2369
244k
                , AnchorInfo.Range.substr(1)
2370
244k
                , TagInfo.Range
2371
244k
                , T.Range);
2372
2.99k
  case Token::TK_BlockScalar: {
2373
2.99k
    getNext();
2374
2.99k
    StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2375
2.99k
    StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
2376
2.99k
    return new (NodeAllocator)
2377
2.99k
        BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
2378
2.99k
                        TagInfo.Range, StrCopy, T.Range);
2379
290k
  }
2380
13
  case Token::TK_Key:
2381
13
    // Don't eat the TK_Key, KeyValueNode expects it.
2382
13
    return new (NodeAllocator)
2383
13
      MappingNode( stream.CurrentDoc
2384
13
                 , AnchorInfo.Range.substr(1)
2385
13
                 , TagInfo.Range
2386
13
                 , MappingNode::MT_Inline);
2387
56
  case Token::TK_DocumentStart:
2388
56
  case Token::TK_DocumentEnd:
2389
56
  case Token::TK_StreamEnd:
2390
65
  default:
2391
65
    // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2392
65
    //       !!null null.
2393
65
    return new (NodeAllocator) NullNode(stream.CurrentDoc);
2394
13
  case Token::TK_Error:
2395
13
    return nullptr;
2396
0
  }
2397
0
  
llvm_unreachable0
("Control flow shouldn't reach here.");
2398
0
  return nullptr;
2399
290k
}
2400
2401
4.93k
bool Document::parseDirectives() {
2402
4.93k
  bool isDirective = false;
2403
4.95k
  while (
true4.95k
) {
2404
4.95k
    Token T = peekNext();
2405
4.95k
    if (
T.Kind == Token::TK_TagDirective4.95k
) {
2406
13
      parseTAGDirective();
2407
13
      isDirective = true;
2408
4.95k
    } else 
if (4.93k
T.Kind == Token::TK_VersionDirective4.93k
) {
2409
5
      parseYAMLDirective();
2410
5
      isDirective = true;
2411
5
    } else
2412
4.93k
      break;
2413
4.95k
  }
2414
4.93k
  return isDirective;
2415
4.93k
}
2416
2417
5
void Document::parseYAMLDirective() {
2418
5
  getNext(); // Eat %YAML <version>
2419
5
}
2420
2421
13
void Document::parseTAGDirective() {
2422
13
  Token Tag = getNext(); // %TAG <handle> <prefix>
2423
13
  StringRef T = Tag.Range;
2424
13
  // Strip %TAG
2425
13
  T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2426
13
  std::size_t HandleEnd = T.find_first_of(" \t");
2427
13
  StringRef TagHandle = T.substr(0, HandleEnd);
2428
13
  StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2429
13
  TagMap[TagHandle] = TagPrefix;
2430
13
}
2431
2432
13
bool Document::expectToken(int TK) {
2433
13
  Token T = getNext();
2434
13
  if (
T.Kind != TK13
) {
2435
2
    setError("Unexpected token", T);
2436
2
    return false;
2437
2
  }
2438
11
  return true;
2439
11
}