Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Support/YAMLParser.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file implements a YAML parser.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/Support/YAMLParser.h"
14
#include "llvm/ADT/AllocatorList.h"
15
#include "llvm/ADT/ArrayRef.h"
16
#include "llvm/ADT/None.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/SmallString.h"
19
#include "llvm/ADT/SmallVector.h"
20
#include "llvm/ADT/StringExtras.h"
21
#include "llvm/ADT/StringRef.h"
22
#include "llvm/ADT/Twine.h"
23
#include "llvm/Support/Compiler.h"
24
#include "llvm/Support/ErrorHandling.h"
25
#include "llvm/Support/MemoryBuffer.h"
26
#include "llvm/Support/SMLoc.h"
27
#include "llvm/Support/SourceMgr.h"
28
#include "llvm/Support/Unicode.h"
29
#include "llvm/Support/raw_ostream.h"
30
#include <algorithm>
31
#include <cassert>
32
#include <cstddef>
33
#include <cstdint>
34
#include <map>
35
#include <memory>
36
#include <string>
37
#include <system_error>
38
#include <utility>
39
40
using namespace llvm;
41
using namespace yaml;
42
43
enum UnicodeEncodingForm {
44
  UEF_UTF32_LE, ///< UTF-32 Little Endian
45
  UEF_UTF32_BE, ///< UTF-32 Big Endian
46
  UEF_UTF16_LE, ///< UTF-16 Little Endian
47
  UEF_UTF16_BE, ///< UTF-16 Big Endian
48
  UEF_UTF8,     ///< UTF-8 or ascii.
49
  UEF_Unknown   ///< Not a valid Unicode encoding.
50
};
51
52
/// EncodingInfo - Holds the encoding type and length of the byte order mark if
53
///                it exists. Length is in {0, 2, 3, 4}.
54
using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
55
56
/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
57
///                      encoding form of \a Input.
58
///
59
/// @param Input A string of length 0 or more.
60
/// @returns An EncodingInfo indicating the Unicode encoding form of the input
61
///          and how long the byte order mark is if one exists.
62
4.90k
static EncodingInfo getUnicodeEncoding(StringRef Input) {
63
4.90k
  if (Input.empty())
64
6
    return std::make_pair(UEF_Unknown, 0);
65
4.89k
66
4.89k
  switch (uint8_t(Input[0])) {
67
4.89k
  case 0x00:
68
0
    if (Input.size() >= 4) {
69
0
      if (  Input[1] == 0
70
0
         && uint8_t(Input[2]) == 0xFE
71
0
         && uint8_t(Input[3]) == 0xFF)
72
0
        return std::make_pair(UEF_UTF32_BE, 4);
73
0
      if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
74
0
        return std::make_pair(UEF_UTF32_BE, 0);
75
0
    }
76
0
77
0
    if (Input.size() >= 2 && Input[1] != 0)
78
0
      return std::make_pair(UEF_UTF16_BE, 0);
79
0
    return std::make_pair(UEF_Unknown, 0);
80
1
  case 0xFF:
81
1
    if (  Input.size() >= 4
82
1
       && 
uint8_t(Input[1]) == 0xFE0
83
1
       && 
Input[2] == 00
84
1
       && 
Input[3] == 00
)
85
0
      return std::make_pair(UEF_UTF32_LE, 4);
86
1
87
1
    if (Input.size() >= 2 && 
uint8_t(Input[1]) == 0xFE0
)
88
0
      return std::make_pair(UEF_UTF16_LE, 2);
89
1
    return std::make_pair(UEF_Unknown, 0);
90
1
  case 0xFE:
91
0
    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
92
0
      return std::make_pair(UEF_UTF16_BE, 2);
93
0
    return std::make_pair(UEF_Unknown, 0);
94
3
  case 0xEF:
95
3
    if (  Input.size() >= 3
96
3
       && uint8_t(Input[1]) == 0xBB
97
3
       && uint8_t(Input[2]) == 0xBF)
98
3
      return std::make_pair(UEF_UTF8, 3);
99
0
    return std::make_pair(UEF_Unknown, 0);
100
4.89k
  }
101
4.89k
102
4.89k
  // It could still be utf-32 or utf-16.
103
4.89k
  if (Input.size() >= 4 && 
Input[1] == 04.86k
&&
Input[2] == 00
&&
Input[3] == 00
)
104
0
    return std::make_pair(UEF_UTF32_LE, 0);
105
4.89k
106
4.89k
  if (Input.size() >= 2 && 
Input[1] == 04.88k
)
107
0
    return std::make_pair(UEF_UTF16_LE, 0);
108
4.89k
109
4.89k
  return std::make_pair(UEF_UTF8, 0);
110
4.89k
}
111
112
/// Pin the vtables to this file.
113
0
void Node::anchor() {}
114
0
void NullNode::anchor() {}
115
0
void ScalarNode::anchor() {}
116
0
void BlockScalarNode::anchor() {}
117
0
void KeyValueNode::anchor() {}
118
0
void MappingNode::anchor() {}
119
0
void SequenceNode::anchor() {}
120
0
void AliasNode::anchor() {}
121
122
namespace llvm {
123
namespace yaml {
124
125
/// Token - A single YAML token.
126
struct Token {
127
  enum TokenKind {
128
    TK_Error, // Uninitialized token.
129
    TK_StreamStart,
130
    TK_StreamEnd,
131
    TK_VersionDirective,
132
    TK_TagDirective,
133
    TK_DocumentStart,
134
    TK_DocumentEnd,
135
    TK_BlockEntry,
136
    TK_BlockEnd,
137
    TK_BlockSequenceStart,
138
    TK_BlockMappingStart,
139
    TK_FlowEntry,
140
    TK_FlowSequenceStart,
141
    TK_FlowSequenceEnd,
142
    TK_FlowMappingStart,
143
    TK_FlowMappingEnd,
144
    TK_Key,
145
    TK_Value,
146
    TK_Scalar,
147
    TK_BlockScalar,
148
    TK_Alias,
149
    TK_Anchor,
150
    TK_Tag
151
  } Kind = TK_Error;
152
153
  /// A string of length 0 or more whose begin() points to the logical location
154
  /// of the token in the input.
155
  StringRef Range;
156
157
  /// The value of a block scalar node.
158
  std::string Value;
159
160
3.89M
  Token() = default;
161
};
162
163
} // end namespace yaml
164
} // end namespace llvm
165
166
using TokenQueueT = BumpPtrList<Token>;
167
168
namespace {
169
170
/// This struct is used to track simple keys.
171
///
172
/// Simple keys are handled by creating an entry in SimpleKeys for each Token
173
/// which could legally be the start of a simple key. When peekNext is called,
174
/// if the Token To be returned is referenced by a SimpleKey, we continue
175
/// tokenizing until that potential simple key has either been found to not be
176
/// a simple key (we moved on to the next line or went further than 1024 chars).
177
/// Or when we run into a Value, and then insert a Key token (and possibly
178
/// others) before the SimpleKey's Tok.
179
struct SimpleKey {
180
  TokenQueueT::iterator Tok;
181
  unsigned Column;
182
  unsigned Line;
183
  unsigned FlowLevel;
184
  bool IsRequired;
185
186
839k
  bool operator ==(const SimpleKey &Other) {
187
839k
    return Tok == Other.Tok;
188
839k
  }
189
};
190
191
} // end anonymous namespace
192
193
/// The Unicode scalar value of a UTF-8 minimal well-formed code unit
194
///        subsequence and the subsequence's length in code units (uint8_t).
195
///        A length of 0 represents an error.
196
using UTF8Decoded = std::pair<uint32_t, unsigned>;
197
198
85
static UTF8Decoded decodeUTF8(StringRef Range) {
199
85
  StringRef::iterator Position= Range.begin();
200
85
  StringRef::iterator End = Range.end();
201
85
  // 1 byte: [0x00, 0x7f]
202
85
  // Bit pattern: 0xxxxxxx
203
85
  if ((*Position & 0x80) == 0) {
204
0
     return std::make_pair(*Position, 1);
205
0
  }
206
85
  // 2 bytes: [0x80, 0x7ff]
207
85
  // Bit pattern: 110xxxxx 10xxxxxx
208
85
  if (Position + 1 != End &&
209
85
      
((*Position & 0xE0) == 0xC0)84
&&
210
85
      
((*(Position + 1) & 0xC0) == 0x80)57
) {
211
57
    uint32_t codepoint = ((*Position & 0x1F) << 6) |
212
57
                          (*(Position + 1) & 0x3F);
213
57
    if (codepoint >= 0x80)
214
57
      return std::make_pair(codepoint, 2);
215
28
  }
216
28
  // 3 bytes: [0x8000, 0xffff]
217
28
  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
218
28
  if (Position + 2 != End &&
219
28
      ((*Position & 0xF0) == 0xE0) &&
220
28
      
((*(Position + 1) & 0xC0) == 0x80)27
&&
221
28
      
((*(Position + 2) & 0xC0) == 0x80)27
) {
222
27
    uint32_t codepoint = ((*Position & 0x0F) << 12) |
223
27
                         ((*(Position + 1) & 0x3F) << 6) |
224
27
                          (*(Position + 2) & 0x3F);
225
27
    // Codepoints between 0xD800 and 0xDFFF are invalid, as
226
27
    // they are high / low surrogate halves used by UTF-16.
227
27
    if (codepoint >= 0x800 &&
228
27
        (codepoint < 0xD800 || 
codepoint > 0xDFFF1
))
229
27
      return std::make_pair(codepoint, 3);
230
1
  }
231
1
  // 4 bytes: [0x10000, 0x10FFFF]
232
1
  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
233
1
  if (Position + 3 != End &&
234
1
      ((*Position & 0xF8) == 0xF0) &&
235
1
      
((*(Position + 1) & 0xC0) == 0x80)0
&&
236
1
      
((*(Position + 2) & 0xC0) == 0x80)0
&&
237
1
      
((*(Position + 3) & 0xC0) == 0x80)0
) {
238
0
    uint32_t codepoint = ((*Position & 0x07) << 18) |
239
0
                         ((*(Position + 1) & 0x3F) << 12) |
240
0
                         ((*(Position + 2) & 0x3F) << 6) |
241
0
                          (*(Position + 3) & 0x3F);
242
0
    if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
243
0
      return std::make_pair(codepoint, 4);
244
1
  }
245
1
  return std::make_pair(0, 0);
246
1
}
247
248
namespace llvm {
249
namespace yaml {
250
251
/// Scans YAML tokens from a MemoryBuffer.
252
class Scanner {
253
public:
254
  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
255
          std::error_code *EC = nullptr);
256
  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
257
          std::error_code *EC = nullptr);
258
259
  /// Parse the next token and return it without popping it.
260
  Token &peekNext();
261
262
  /// Parse the next token and pop it from the queue.
263
  Token getNext();
264
265
  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
266
175
                  ArrayRef<SMRange> Ranges = None) {
267
175
    SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
268
175
  }
269
270
47
  void setError(const Twine &Message, StringRef::iterator Position) {
271
47
    if (Current >= End)
272
31
      Current = End - 1;
273
47
274
47
    // propagate the error if possible
275
47
    if (EC)
276
4
      *EC = make_error_code(std::errc::invalid_argument);
277
47
278
47
    // Don't print out more errors after the first one we encounter. The rest
279
47
    // are just the result of the first, and have no meaning.
280
47
    if (!Failed)
281
47
      printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
282
47
    Failed = true;
283
47
  }
284
285
1
  void setError(const Twine &Message) {
286
1
    setError(Message, Current);
287
1
  }
288
289
  /// Returns true if an error occurred while parsing.
290
866k
  bool failed() {
291
866k
    return Failed;
292
866k
  }
293
294
private:
295
  void init(MemoryBufferRef Buffer);
296
297
4.90k
  StringRef currentInput() {
298
4.90k
    return StringRef(Current, End - Current);
299
4.90k
  }
300
301
  /// Decode a UTF-8 minimal well-formed code unit subsequence starting
302
  ///        at \a Position.
303
  ///
304
  /// If the UTF-8 code units starting at Position do not form a well-formed
305
  /// code unit subsequence, then the Unicode scalar value is 0, and the length
306
  /// is 0.
307
33
  UTF8Decoded decodeUTF8(StringRef::iterator Position) {
308
33
    return ::decodeUTF8(StringRef(Position, End - Position));
309
33
  }
310
311
  // The following functions are based on the gramar rules in the YAML spec. The
312
  // style of the function names it meant to closely match how they are written
313
  // in the spec. The number within the [] is the number of the grammar rule in
314
  // the spec.
315
  //
316
  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
317
  //
318
  // c-
319
  //   A production starting and ending with a special character.
320
  // b-
321
  //   A production matching a single line break.
322
  // nb-
323
  //   A production starting and ending with a non-break character.
324
  // s-
325
  //   A production starting and ending with a white space character.
326
  // ns-
327
  //   A production starting and ending with a non-space character.
328
  // l-
329
  //   A production matching complete line(s).
330
331
  /// Skip a single nb-char[27] starting at Position.
332
  ///
333
  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
334
  ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
335
  ///
336
  /// @returns The code unit after the nb-char, or Position if it's not an
337
  ///          nb-char.
338
  StringRef::iterator skip_nb_char(StringRef::iterator Position);
339
340
  /// Skip a single b-break[28] starting at Position.
341
  ///
342
  /// A b-break is 0xD 0xA | 0xD | 0xA
343
  ///
344
  /// @returns The code unit after the b-break, or Position if it's not a
345
  ///          b-break.
346
  StringRef::iterator skip_b_break(StringRef::iterator Position);
347
348
  /// Skip a single s-space[31] starting at Position.
349
  ///
350
  /// An s-space is 0x20
351
  ///
352
  /// @returns The code unit after the s-space, or Position if it's not a
353
  ///          s-space.
354
  StringRef::iterator skip_s_space(StringRef::iterator Position);
355
356
  /// Skip a single s-white[33] starting at Position.
357
  ///
358
  /// A s-white is 0x20 | 0x9
359
  ///
360
  /// @returns The code unit after the s-white, or Position if it's not a
361
  ///          s-white.
362
  StringRef::iterator skip_s_white(StringRef::iterator Position);
363
364
  /// Skip a single ns-char[34] starting at Position.
365
  ///
366
  /// A ns-char is nb-char - s-white
367
  ///
368
  /// @returns The code unit after the ns-char, or Position if it's not a
369
  ///          ns-char.
370
  StringRef::iterator skip_ns_char(StringRef::iterator Position);
371
372
  using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
373
374
  /// Skip minimal well-formed code unit subsequences until Func
375
  ///        returns its input.
376
  ///
377
  /// @returns The code unit after the last minimal well-formed code unit
378
  ///          subsequence that Func accepted.
379
  StringRef::iterator skip_while( SkipWhileFunc Func
380
                                , StringRef::iterator Position);
381
382
  /// Skip minimal well-formed code unit subsequences until Func returns its
383
  /// input.
384
  void advanceWhile(SkipWhileFunc Func);
385
386
  /// Scan ns-uri-char[39]s starting at Cur.
387
  ///
388
  /// This updates Cur and Column while scanning.
389
  void scan_ns_uri_char();
390
391
  /// Consume a minimal well-formed code unit subsequence starting at
392
  ///        \a Cur. Return false if it is not the same Unicode scalar value as
393
  ///        \a Expected. This updates \a Column.
394
  bool consume(uint32_t Expected);
395
396
  /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
397
  void skip(uint32_t Distance);
398
399
  /// Return true if the minimal well-formed code unit subsequence at
400
  ///        Pos is whitespace or a new line
401
  bool isBlankOrBreak(StringRef::iterator Position);
402
403
  /// Consume a single b-break[28] if it's present at the current position.
404
  ///
405
  /// Return false if the code unit at the current position isn't a line break.
406
  bool consumeLineBreakIfPresent();
407
408
  /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
409
  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
410
                             , unsigned AtColumn
411
                             , bool IsRequired);
412
413
  /// Remove simple keys that can no longer be valid simple keys.
414
  ///
415
  /// Invalid simple keys are not on the current line or are further than 1024
416
  /// columns back.
417
  void removeStaleSimpleKeyCandidates();
418
419
  /// Remove all simple keys on FlowLevel \a Level.
420
  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
421
422
  /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
423
  ///        tokens if needed.
424
  bool unrollIndent(int ToColumn);
425
426
  /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
427
  ///        if needed.
428
  bool rollIndent( int ToColumn
429
                 , Token::TokenKind Kind
430
                 , TokenQueueT::iterator InsertPoint);
431
432
  /// Skip a single-line comment when the comment starts at the current
433
  /// position of the scanner.
434
  void skipComment();
435
436
  /// Skip whitespace and comments until the start of the next token.
437
  void scanToNextToken();
438
439
  /// Must be the first token generated.
440
  bool scanStreamStart();
441
442
  /// Generate tokens needed to close out the stream.
443
  bool scanStreamEnd();
444
445
  /// Scan a %BLAH directive.
446
  bool scanDirective();
447
448
  /// Scan a ... or ---.
449
  bool scanDocumentIndicator(bool IsStart);
450
451
  /// Scan a [ or { and generate the proper flow collection start token.
452
  bool scanFlowCollectionStart(bool IsSequence);
453
454
  /// Scan a ] or } and generate the proper flow collection end token.
455
  bool scanFlowCollectionEnd(bool IsSequence);
456
457
  /// Scan the , that separates entries in a flow collection.
458
  bool scanFlowEntry();
459
460
  /// Scan the - that starts block sequence entries.
461
  bool scanBlockEntry();
462
463
  /// Scan an explicit ? indicating a key.
464
  bool scanKey();
465
466
  /// Scan an explicit : indicating a value.
467
  bool scanValue();
468
469
  /// Scan a quoted scalar.
470
  bool scanFlowScalar(bool IsDoubleQuoted);
471
472
  /// Scan an unquoted scalar.
473
  bool scanPlainScalar();
474
475
  /// Scan an Alias or Anchor starting with * or &.
476
  bool scanAliasOrAnchor(bool IsAlias);
477
478
  /// Scan a block scalar starting with | or >.
479
  bool scanBlockScalar(bool IsLiteral);
480
481
  /// Scan a chomping indicator in a block scalar header.
482
  char scanBlockChompingIndicator();
483
484
  /// Scan an indentation indicator in a block scalar header.
485
  unsigned scanBlockIndentationIndicator();
486
487
  /// Scan a block scalar header.
488
  ///
489
  /// Return false if an error occurred.
490
  bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
491
                             bool &IsDone);
492
493
  /// Look for the indentation level of a block scalar.
494
  ///
495
  /// Return false if an error occurred.
496
  bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
497
                             unsigned &LineBreaks, bool &IsDone);
498
499
  /// Scan the indentation of a text line in a block scalar.
500
  ///
501
  /// Return false if an error occurred.
502
  bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
503
                             bool &IsDone);
504
505
  /// Scan a tag of the form !stuff.
506
  bool scanTag();
507
508
  /// Dispatch to the next scanning function based on \a *Cur.
509
  bool fetchMoreTokens();
510
511
  /// The SourceMgr used for diagnostics and buffer management.
512
  SourceMgr &SM;
513
514
  /// The original input.
515
  MemoryBufferRef InputBuffer;
516
517
  /// The current position of the scanner.
518
  StringRef::iterator Current;
519
520
  /// The end of the input (one past the last character).
521
  StringRef::iterator End;
522
523
  /// Current YAML indentation level in spaces.
524
  int Indent;
525
526
  /// Current column number in Unicode code points.
527
  unsigned Column;
528
529
  /// Current line number.
530
  unsigned Line;
531
532
  /// How deep we are in flow style containers. 0 Means at block level.
533
  unsigned FlowLevel;
534
535
  /// Are we at the start of the stream?
536
  bool IsStartOfStream;
537
538
  /// Can the next token be the start of a simple key?
539
  bool IsSimpleKeyAllowed;
540
541
  /// True if an error has occurred.
542
  bool Failed;
543
544
  /// Should colors be used when printing out the diagnostic messages?
545
  bool ShowColors;
546
547
  /// Queue of tokens. This is required to queue up tokens while looking
548
  ///        for the end of a simple key. And for cases where a single character
549
  ///        can produce multiple tokens (e.g. BlockEnd).
550
  TokenQueueT TokenQueue;
551
552
  /// Indentation levels.
553
  SmallVector<int, 4> Indents;
554
555
  /// Potential simple keys.
556
  SmallVector<SimpleKey, 4> SimpleKeys;
557
558
  std::error_code *EC;
559
};
560
561
} // end namespace yaml
562
} // end namespace llvm
563
564
/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
565
static void encodeUTF8( uint32_t UnicodeScalarValue
566
11
                      , SmallVectorImpl<char> &Result) {
567
11
  if (UnicodeScalarValue <= 0x7F) {
568
6
    Result.push_back(UnicodeScalarValue & 0x7F);
569
6
  } else 
if (5
UnicodeScalarValue <= 0x7FF5
) {
570
2
    uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
571
2
    uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
572
2
    Result.push_back(FirstByte);
573
2
    Result.push_back(SecondByte);
574
3
  } else if (UnicodeScalarValue <= 0xFFFF) {
575
3
    uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
576
3
    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
577
3
    uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
578
3
    Result.push_back(FirstByte);
579
3
    Result.push_back(SecondByte);
580
3
    Result.push_back(ThirdByte);
581
3
  } else 
if (0
UnicodeScalarValue <= 0x10FFFF0
) {
582
0
    uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
583
0
    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
584
0
    uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
585
0
    uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
586
0
    Result.push_back(FirstByte);
587
0
    Result.push_back(SecondByte);
588
0
    Result.push_back(ThirdByte);
589
0
    Result.push_back(FourthByte);
590
0
  }
591
11
}
592
593
0
bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
594
0
  SourceMgr SM;
595
0
  Scanner scanner(Input, SM);
596
0
  while (true) {
597
0
    Token T = scanner.getNext();
598
0
    switch (T.Kind) {
599
0
    case Token::TK_StreamStart:
600
0
      OS << "Stream-Start: ";
601
0
      break;
602
0
    case Token::TK_StreamEnd:
603
0
      OS << "Stream-End: ";
604
0
      break;
605
0
    case Token::TK_VersionDirective:
606
0
      OS << "Version-Directive: ";
607
0
      break;
608
0
    case Token::TK_TagDirective:
609
0
      OS << "Tag-Directive: ";
610
0
      break;
611
0
    case Token::TK_DocumentStart:
612
0
      OS << "Document-Start: ";
613
0
      break;
614
0
    case Token::TK_DocumentEnd:
615
0
      OS << "Document-End: ";
616
0
      break;
617
0
    case Token::TK_BlockEntry:
618
0
      OS << "Block-Entry: ";
619
0
      break;
620
0
    case Token::TK_BlockEnd:
621
0
      OS << "Block-End: ";
622
0
      break;
623
0
    case Token::TK_BlockSequenceStart:
624
0
      OS << "Block-Sequence-Start: ";
625
0
      break;
626
0
    case Token::TK_BlockMappingStart:
627
0
      OS << "Block-Mapping-Start: ";
628
0
      break;
629
0
    case Token::TK_FlowEntry:
630
0
      OS << "Flow-Entry: ";
631
0
      break;
632
0
    case Token::TK_FlowSequenceStart:
633
0
      OS << "Flow-Sequence-Start: ";
634
0
      break;
635
0
    case Token::TK_FlowSequenceEnd:
636
0
      OS << "Flow-Sequence-End: ";
637
0
      break;
638
0
    case Token::TK_FlowMappingStart:
639
0
      OS << "Flow-Mapping-Start: ";
640
0
      break;
641
0
    case Token::TK_FlowMappingEnd:
642
0
      OS << "Flow-Mapping-End: ";
643
0
      break;
644
0
    case Token::TK_Key:
645
0
      OS << "Key: ";
646
0
      break;
647
0
    case Token::TK_Value:
648
0
      OS << "Value: ";
649
0
      break;
650
0
    case Token::TK_Scalar:
651
0
      OS << "Scalar: ";
652
0
      break;
653
0
    case Token::TK_BlockScalar:
654
0
      OS << "Block Scalar: ";
655
0
      break;
656
0
    case Token::TK_Alias:
657
0
      OS << "Alias: ";
658
0
      break;
659
0
    case Token::TK_Anchor:
660
0
      OS << "Anchor: ";
661
0
      break;
662
0
    case Token::TK_Tag:
663
0
      OS << "Tag: ";
664
0
      break;
665
0
    case Token::TK_Error:
666
0
      break;
667
0
    }
668
0
    OS << T.Range << "\n";
669
0
    if (T.Kind == Token::TK_StreamEnd)
670
0
      break;
671
0
    else if (T.Kind == Token::TK_Error)
672
0
      return false;
673
0
  }
674
0
  return true;
675
0
}
676
677
0
bool yaml::scanTokens(StringRef Input) {
678
0
  SourceMgr SM;
679
0
  Scanner scanner(Input, SM);
680
0
  while (true) {
681
0
    Token T = scanner.getNext();
682
0
    if (T.Kind == Token::TK_StreamEnd)
683
0
      break;
684
0
    else if (T.Kind == Token::TK_Error)
685
0
      return false;
686
0
  }
687
0
  return true;
688
0
}
689
690
2.16k
std::string yaml::escape(StringRef Input, bool EscapePrintable) {
691
2.16k
  std::string EscapedInput;
692
91.1k
  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; 
++i89.0k
) {
693
89.0k
    if (*i == '\\')
694
7
      EscapedInput += "\\\\";
695
89.0k
    else if (*i == '"')
696
16
      EscapedInput += "\\\"";
697
89.0k
    else if (*i == 0)
698
4
      EscapedInput += "\\0";
699
89.0k
    else if (*i == 0x07)
700
1
      EscapedInput += "\\a";
701
89.0k
    else if (*i == 0x08)
702
2
      EscapedInput += "\\b";
703
89.0k
    else if (*i == 0x09)
704
30
      EscapedInput += "\\t";
705
88.9k
    else if (*i == 0x0A)
706
209
      EscapedInput += "\\n";
707
88.7k
    else if (*i == 0x0B)
708
1
      EscapedInput += "\\v";
709
88.7k
    else if (*i == 0x0C)
710
1
      EscapedInput += "\\f";
711
88.7k
    else if (*i == 0x0D)
712
2
      EscapedInput += "\\r";
713
88.7k
    else if (*i == 0x1B)
714
1
      EscapedInput += "\\e";
715
88.7k
    else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
716
6
      std::string HexStr = utohexstr(*i);
717
6
      EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
718
88.7k
    } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
719
52
      UTF8Decoded UnicodeScalarValue
720
52
        = decodeUTF8(StringRef(i, Input.end() - i));
721
52
      if (UnicodeScalarValue.second == 0) {
722
0
        // Found invalid char.
723
0
        SmallString<4> Val;
724
0
        encodeUTF8(0xFFFD, Val);
725
0
        EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
726
0
        // FIXME: Error reporting.
727
0
        return EscapedInput;
728
0
      }
729
52
      if (UnicodeScalarValue.first == 0x85)
730
19
        EscapedInput += "\\N";
731
33
      else if (UnicodeScalarValue.first == 0xA0)
732
1
        EscapedInput += "\\_";
733
32
      else if (UnicodeScalarValue.first == 0x2028)
734
7
        EscapedInput += "\\L";
735
25
      else if (UnicodeScalarValue.first == 0x2029)
736
4
        EscapedInput += "\\P";
737
21
      else if (!EscapePrintable &&
738
21
               
sys::unicode::isPrintable(UnicodeScalarValue.first)17
)
739
16
        EscapedInput += StringRef(i, UnicodeScalarValue.second);
740
5
      else {
741
5
        std::string HexStr = utohexstr(UnicodeScalarValue.first);
742
5
        if (HexStr.size() <= 2)
743
0
          EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
744
5
        else if (HexStr.size() <= 4)
745
5
          EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
746
0
        else if (HexStr.size() <= 8)
747
0
          EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
748
5
      }
749
52
      i += UnicodeScalarValue.second - 1;
750
52
    } else
751
88.7k
      EscapedInput.push_back(*i);
752
89.0k
  }
753
2.16k
  return EscapedInput;
754
2.16k
}
755
756
Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
757
                 std::error_code *EC)
758
4.78k
    : SM(sm), ShowColors(ShowColors), EC(EC) {
759
4.78k
  init(MemoryBufferRef(Input, "YAML"));
760
4.78k
}
761
762
Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
763
                 std::error_code *EC)
764
120
    : SM(SM_), ShowColors(ShowColors), EC(EC) {
765
120
  init(Buffer);
766
120
}
767
768
4.90k
void Scanner::init(MemoryBufferRef Buffer) {
769
4.90k
  InputBuffer = Buffer;
770
4.90k
  Current = InputBuffer.getBufferStart();
771
4.90k
  End = InputBuffer.getBufferEnd();
772
4.90k
  Indent = -1;
773
4.90k
  Column = 0;
774
4.90k
  Line = 0;
775
4.90k
  FlowLevel = 0;
776
4.90k
  IsStartOfStream = true;
777
4.90k
  IsSimpleKeyAllowed = true;
778
4.90k
  Failed = false;
779
4.90k
  std::unique_ptr<MemoryBuffer> InputBufferOwner =
780
4.90k
      MemoryBuffer::getMemBuffer(Buffer);
781
4.90k
  SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
782
4.90k
}
783
784
4.85M
Token &Scanner::peekNext() {
785
4.85M
  // If the current token is a possible simple key, keep parsing until we
786
4.85M
  // can confirm.
787
4.85M
  bool NeedMore = false;
788
5.36M
  while (true) {
789
5.36M
    if (TokenQueue.empty() || 
NeedMore4.77M
) {
790
1.10M
      if (!fetchMoreTokens()) {
791
26
        TokenQueue.clear();
792
26
        TokenQueue.push_back(Token());
793
26
        return TokenQueue.front();
794
26
      }
795
5.36M
    }
796
5.36M
    assert(!TokenQueue.empty() &&
797
5.36M
            "fetchMoreTokens lied about getting tokens!");
798
5.36M
799
5.36M
    removeStaleSimpleKeyCandidates();
800
5.36M
    SimpleKey SK;
801
5.36M
    SK.Tok = TokenQueue.begin();
802
5.36M
    if (!is_contained(SimpleKeys, SK))
803
4.85M
      break;
804
512k
    else
805
512k
      NeedMore = true;
806
5.36M
  }
807
4.85M
  
return TokenQueue.front()4.85M
;
808
4.85M
}
809
810
1.49M
Token Scanner::getNext() {
811
1.49M
  Token Ret = peekNext();
812
1.49M
  // TokenQueue can be empty if there was an error getting the next token.
813
1.49M
  if (!TokenQueue.empty())
814
1.49M
    TokenQueue.pop_front();
815
1.49M
816
1.49M
  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
817
1.49M
  // quick deallocation of them all.
818
1.49M
  if (TokenQueue.empty())
819
589k
    TokenQueue.resetAlloc();
820
1.49M
821
1.49M
  return Ret;
822
1.49M
}
823
824
19.7M
StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
825
19.7M
  if (Position == End)
826
489
    return Position;
827
19.7M
  // Check 7 bit c-printable - b-char.
828
19.7M
  if (   *Position == 0x09
829
19.7M
      || 
(19.7M
*Position >= 0x2019.7M
&&
*Position <= 0x7E19.3M
))
830
19.3M
    return Position + 1;
831
379k
832
379k
  // Check for valid UTF-8.
833
379k
  if (uint8_t(*Position) & 0x80) {
834
33
    UTF8Decoded u8d = decodeUTF8(Position);
835
33
    if (   u8d.second != 0
836
33
        && 
u8d.first != 0xFEFF32
837
33
        && 
( 31
u8d.first == 0x8531
838
31
          || 
( 11
u8d.first >= 0xA011
839
11
            && u8d.first <= 0xD7FF)
840
31
          || 
( 0
u8d.first >= 0xE0000
841
0
            && u8d.first <= 0xFFFD)
842
31
          || 
( 0
u8d.first >= 0x100000
843
0
            && u8d.first <= 0x10FFFF)))
844
31
      return Position + u8d.second;
845
379k
  }
846
379k
  return Position;
847
379k
}
848
849
1.93M
StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
850
1.93M
  if (Position == End)
851
3.55k
    return Position;
852
1.93M
  if (*Position == 0x0D) {
853
641
    if (Position + 1 != End && 
*(Position + 1) == 0x0A640
)
854
622
      return Position + 2;
855
19
    return Position + 1;
856
19
  }
857
1.93M
858
1.93M
  if (*Position == 0x0A)
859
831k
    return Position + 1;
860
1.09M
  return Position;
861
1.09M
}
862
863
574k
StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
864
574k
  if (Position == End)
865
108
    return Position;
866
574k
  if (*Position == ' ')
867
526k
    return Position + 1;
868
47.6k
  return Position;
869
47.6k
}
870
871
725k
StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
872
725k
  if (Position == End)
873
1
    return Position;
874
725k
  if (*Position == ' ' || 
*Position == '\t'152k
)
875
572k
    return Position + 1;
876
152k
  return Position;
877
152k
}
878
879
19.4k
StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
880
19.4k
  if (Position == End)
881
0
    return Position;
882
19.4k
  if (*Position == ' ' || 
*Position == '\t'19.3k
)
883
112
    return Position;
884
19.3k
  return skip_nb_char(Position);
885
19.3k
}
886
887
StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
888
314k
                                       , StringRef::iterator Position) {
889
12.2M
  while (true) {
890
12.2M
    StringRef::iterator i = (this->*Func)(Position);
891
12.2M
    if (i == Position)
892
314k
      break;
893
11.9M
    Position = i;
894
11.9M
  }
895
314k
  return Position;
896
314k
}
897
898
298k
void Scanner::advanceWhile(SkipWhileFunc Func) {
899
298k
  auto Final = skip_while(Func, Current);
900
298k
  Column += Final - Current;
901
298k
  Current = Final;
902
298k
}
903
904
0
static bool is_ns_hex_digit(const char C) {
905
0
  return    (C >= '0' && C <= '9')
906
0
         || (C >= 'a' && C <= 'z')
907
0
         || (C >= 'A' && C <= 'Z');
908
0
}
909
910
36
static bool is_ns_word_char(const char C) {
911
36
  return    C == '-'
912
36
         || (C >= 'a' && 
C <= 'z'26
)
913
36
         || 
(10
C >= 'A'10
&&
C <= 'Z'0
);
914
36
}
915
916
3
void Scanner::scan_ns_uri_char() {
917
36
  while (true) {
918
36
    if (Current == End)
919
0
      break;
920
36
    if ((   *Current == '%'
921
36
          && 
Current + 2 < End0
922
36
          && 
is_ns_hex_digit(*(Current + 1))0
923
36
          && 
is_ns_hex_digit(*(Current + 2))0
)
924
36
        || is_ns_word_char(*Current)
925
36
        || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
926
33
          != StringRef::npos) {
927
33
      ++Current;
928
33
      ++Column;
929
33
    } else
930
3
      break;
931
36
  }
932
3
}
933
934
22
bool Scanner::consume(uint32_t Expected) {
935
22
  if (Expected >= 0x80)
936
0
    report_fatal_error("Not dealing with this yet");
937
22
  if (Current == End)
938
0
    return false;
939
22
  if (uint8_t(*Current) >= 0x80)
940
0
    report_fatal_error("Not dealing with this yet");
941
22
  if (uint8_t(*Current) == Expected) {
942
20
    ++Current;
943
20
    ++Column;
944
20
    return true;
945
20
  }
946
2
  return false;
947
2
}
948
949
2.81M
void Scanner::skip(uint32_t Distance) {
950
2.81M
  Current += Distance;
951
2.81M
  Column += Distance;
952
2.81M
  assert(Current <= End && "Skipped past the end");
953
2.81M
}
954
955
6.18M
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
956
6.18M
  if (Position == End)
957
921
    return false;
958
6.18M
  return *Position == ' ' || 
*Position == '\t'5.07M
||
*Position == '\r'5.07M
||
959
6.18M
         
*Position == '\n'5.07M
;
960
6.18M
}
961
962
298k
bool Scanner::consumeLineBreakIfPresent() {
963
298k
  auto Next = skip_b_break(Current);
964
298k
  if (Next == Current)
965
3
    return false;
966
298k
  Column = 0;
967
298k
  ++Line;
968
298k
  Current = Next;
969
298k
  return true;
970
298k
}
971
972
void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
973
                                    , unsigned AtColumn
974
593k
                                    , bool IsRequired) {
975
593k
  if (IsSimpleKeyAllowed) {
976
341k
    SimpleKey SK;
977
341k
    SK.Tok = Tok;
978
341k
    SK.Line = Line;
979
341k
    SK.Column = AtColumn;
980
341k
    SK.IsRequired = IsRequired;
981
341k
    SK.FlowLevel = FlowLevel;
982
341k
    SimpleKeys.push_back(SK);
983
341k
  }
984
593k
}
985
986
6.46M
void Scanner::removeStaleSimpleKeyCandidates() {
987
6.46M
  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
988
7.93M
                                            i != SimpleKeys.end();) {
989
1.46M
    if (i->Line != Line || 
i->Column + 1024 < Column1.43M
) {
990
37.5k
      if (i->IsRequired)
991
0
        setError( "Could not find expected : for simple key"
992
0
                , i->Tok->Range.begin());
993
37.5k
      i = SimpleKeys.erase(i);
994
37.5k
    } else
995
1.43M
      ++i;
996
1.46M
  }
997
6.46M
}
998
999
179k
void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1000
179k
  if (!SimpleKeys.empty() && 
(SimpleKeys.end() - 1)->FlowLevel == Level77.5k
)
1001
20.6k
    SimpleKeys.pop_back();
1002
179k
}
1003
1004
1.13M
bool Scanner::unrollIndent(int ToColumn) {
1005
1.13M
  Token T;
1006
1.13M
  // Indentation is ignored in flow.
1007
1.13M
  if (FlowLevel != 0)
1008
469k
    return true;
1009
662k
1010
719k
  
while (662k
Indent > ToColumn) {
1011
56.4k
    T.Kind = Token::TK_BlockEnd;
1012
56.4k
    T.Range = StringRef(Current, 1);
1013
56.4k
    TokenQueue.push_back(T);
1014
56.4k
    Indent = Indents.pop_back_val();
1015
56.4k
  }
1016
662k
1017
662k
  return true;
1018
662k
}
1019
1020
bool Scanner::rollIndent( int ToColumn
1021
                        , Token::TokenKind Kind
1022
327k
                        , TokenQueueT::iterator InsertPoint) {
1023
327k
  if (FlowLevel)
1024
99.8k
    return true;
1025
227k
  if (Indent < ToColumn) {
1026
56.5k
    Indents.push_back(Indent);
1027
56.5k
    Indent = ToColumn;
1028
56.5k
1029
56.5k
    Token T;
1030
56.5k
    T.Kind = Kind;
1031
56.5k
    T.Range = StringRef(Current, 0);
1032
56.5k
    TokenQueue.insert(InsertPoint, T);
1033
56.5k
  }
1034
227k
  return true;
1035
227k
}
1036
1037
1.50M
void Scanner::skipComment() {
1038
1.50M
  if (*Current != '#')
1039
1.44M
    return;
1040
3.39M
  
while (65.7k
true) {
1041
3.39M
    // This may skip more than one byte, thus Column is only incremented
1042
3.39M
    // for code points.
1043
3.39M
    StringRef::iterator I = skip_nb_char(Current);
1044
3.39M
    if (I == Current)
1045
65.7k
      break;
1046
3.33M
    Current = I;
1047
3.33M
    ++Column;
1048
3.33M
  }
1049
65.7k
}
1050
1051
1.10M
void Scanner::scanToNextToken() {
1052
1.49M
  while (true) {
1053
3.63M
    while (*Current == ' ' || 
*Current == '\t'1.49M
) {
1054
2.14M
      skip(1);
1055
2.14M
    }
1056
1.49M
1057
1.49M
    skipComment();
1058
1.49M
1059
1.49M
    // Skip EOL.
1060
1.49M
    StringRef::iterator i = skip_b_break(Current);
1061
1.49M
    if (i == Current)
1062
1.10M
      break;
1063
392k
    Current = i;
1064
392k
    ++Line;
1065
392k
    Column = 0;
1066
392k
    // New lines may start a simple key.
1067
392k
    if (!FlowLevel)
1068
310k
      IsSimpleKeyAllowed = true;
1069
392k
  }
1070
1.10M
}
1071
1072
4.90k
bool Scanner::scanStreamStart() {
1073
4.90k
  IsStartOfStream = false;
1074
4.90k
1075
4.90k
  EncodingInfo EI = getUnicodeEncoding(currentInput());
1076
4.90k
1077
4.90k
  Token T;
1078
4.90k
  T.Kind = Token::TK_StreamStart;
1079
4.90k
  T.Range = StringRef(Current, EI.second);
1080
4.90k
  TokenQueue.push_back(T);
1081
4.90k
  Current += EI.second;
1082
4.90k
  return true;
1083
4.90k
}
1084
1085
3.55k
bool Scanner::scanStreamEnd() {
1086
3.55k
  // Force an ending new line if one isn't present.
1087
3.55k
  if (Column != 0) {
1088
431
    Column = 0;
1089
431
    ++Line;
1090
431
  }
1091
3.55k
1092
3.55k
  unrollIndent(-1);
1093
3.55k
  SimpleKeys.clear();
1094
3.55k
  IsSimpleKeyAllowed = false;
1095
3.55k
1096
3.55k
  Token T;
1097
3.55k
  T.Kind = Token::TK_StreamEnd;
1098
3.55k
  T.Range = StringRef(Current, 0);
1099
3.55k
  TokenQueue.push_back(T);
1100
3.55k
  return true;
1101
3.55k
}
1102
1103
19
bool Scanner::scanDirective() {
1104
19
  // Reset the indentation level.
1105
19
  unrollIndent(-1);
1106
19
  SimpleKeys.clear();
1107
19
  IsSimpleKeyAllowed = false;
1108
19
1109
19
  StringRef::iterator Start = Current;
1110
19
  consume('%');
1111
19
  StringRef::iterator NameStart = Current;
1112
19
  Current = skip_while(&Scanner::skip_ns_char, Current);
1113
19
  StringRef Name(NameStart, Current - NameStart);
1114
19
  Current = skip_while(&Scanner::skip_s_white, Current);
1115
19
1116
19
  Token T;
1117
19
  if (Name == "YAML") {
1118
5
    Current = skip_while(&Scanner::skip_ns_char, Current);
1119
5
    T.Kind = Token::TK_VersionDirective;
1120
5
    T.Range = StringRef(Start, Current - Start);
1121
5
    TokenQueue.push_back(T);
1122
5
    return true;
1123
14
  } else if(Name == "TAG") {
1124
13
    Current = skip_while(&Scanner::skip_ns_char, Current);
1125
13
    Current = skip_while(&Scanner::skip_s_white, Current);
1126
13
    Current = skip_while(&Scanner::skip_ns_char, Current);
1127
13
    T.Kind = Token::TK_TagDirective;
1128
13
    T.Range = StringRef(Start, Current - Start);
1129
13
    TokenQueue.push_back(T);
1130
13
    return true;
1131
13
  }
1132
1
  return false;
1133
1
}
1134
1135
29.8k
bool Scanner::scanDocumentIndicator(bool IsStart) {
1136
29.8k
  unrollIndent(-1);
1137
29.8k
  SimpleKeys.clear();
1138
29.8k
  IsSimpleKeyAllowed = false;
1139
29.8k
1140
29.8k
  Token T;
1141
29.8k
  T.Kind = IsStart ? 
Token::TK_DocumentStart15.5k
:
Token::TK_DocumentEnd14.2k
;
1142
29.8k
  T.Range = StringRef(Current, 3);
1143
29.8k
  skip(3);
1144
29.8k
  TokenQueue.push_back(T);
1145
29.8k
  return true;
1146
29.8k
}
1147
1148
43.1k
bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1149
43.1k
  Token T;
1150
43.1k
  T.Kind = IsSequence ? 
Token::TK_FlowSequenceStart6.66k
1151
43.1k
                      : 
Token::TK_FlowMappingStart36.5k
;
1152
43.1k
  T.Range = StringRef(Current, 1);
1153
43.1k
  skip(1);
1154
43.1k
  TokenQueue.push_back(T);
1155
43.1k
1156
43.1k
  // [ and { may begin a simple key.
1157
43.1k
  saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1158
43.1k
1159
43.1k
  // And may also be followed by a simple key.
1160
43.1k
  IsSimpleKeyAllowed = true;
1161
43.1k
  ++FlowLevel;
1162
43.1k
  return true;
1163
43.1k
}
1164
1165
43.0k
bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1166
43.0k
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1167
43.0k
  IsSimpleKeyAllowed = false;
1168
43.0k
  Token T;
1169
43.0k
  T.Kind = IsSequence ? 
Token::TK_FlowSequenceEnd6.63k
1170
43.0k
                      : 
Token::TK_FlowMappingEnd36.4k
;
1171
43.0k
  T.Range = StringRef(Current, 1);
1172
43.0k
  skip(1);
1173
43.0k
  TokenQueue.push_back(T);
1174
43.0k
  if (FlowLevel)
1175
43.0k
    --FlowLevel;
1176
43.0k
  return true;
1177
43.0k
}
1178
1179
92.5k
bool Scanner::scanFlowEntry() {
1180
92.5k
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1181
92.5k
  IsSimpleKeyAllowed = true;
1182
92.5k
  Token T;
1183
92.5k
  T.Kind = Token::TK_FlowEntry;
1184
92.5k
  T.Range = StringRef(Current, 1);
1185
92.5k
  skip(1);
1186
92.5k
  TokenQueue.push_back(T);
1187
92.5k
  return true;
1188
92.5k
}
1189
1190
43.9k
bool Scanner::scanBlockEntry() {
1191
43.9k
  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1192
43.9k
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1193
43.9k
  IsSimpleKeyAllowed = true;
1194
43.9k
  Token T;
1195
43.9k
  T.Kind = Token::TK_BlockEntry;
1196
43.9k
  T.Range = StringRef(Current, 1);
1197
43.9k
  skip(1);
1198
43.9k
  TokenQueue.push_back(T);
1199
43.9k
  return true;
1200
43.9k
}
1201
1202
31
bool Scanner::scanKey() {
1203
31
  if (!FlowLevel)
1204
17
    rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1205
31
1206
31
  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1207
31
  IsSimpleKeyAllowed = !FlowLevel;
1208
31
1209
31
  Token T;
1210
31
  T.Kind = Token::TK_Key;
1211
31
  T.Range = StringRef(Current, 1);
1212
31
  skip(1);
1213
31
  TokenQueue.push_back(T);
1214
31
  return true;
1215
31
}
1216
1217
283k
bool Scanner::scanValue() {
1218
283k
  // If the previous token could have been a simple key, insert the key token
1219
283k
  // into the token queue.
1220
283k
  if (!SimpleKeys.empty()) {
1221
283k
    SimpleKey SK = SimpleKeys.pop_back_val();
1222
283k
    Token T;
1223
283k
    T.Kind = Token::TK_Key;
1224
283k
    T.Range = SK.Tok->Range;
1225
283k
    TokenQueueT::iterator i, e;
1226
625k
    for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; 
++i341k
) {
1227
625k
      if (i == SK.Tok)
1228
283k
        break;
1229
625k
    }
1230
283k
    assert(i != e && "SimpleKey not in token queue!");
1231
283k
    i = TokenQueue.insert(i, T);
1232
283k
1233
283k
    // We may also need to add a Block-Mapping-Start token.
1234
283k
    rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1235
283k
1236
283k
    IsSimpleKeyAllowed = false;
1237
283k
  } else {
1238
23
    if (!FlowLevel)
1239
8
      rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1240
23
    IsSimpleKeyAllowed = !FlowLevel;
1241
23
  }
1242
283k
1243
283k
  Token T;
1244
283k
  T.Kind = Token::TK_Value;
1245
283k
  T.Range = StringRef(Current, 1);
1246
283k
  skip(1);
1247
283k
  TokenQueue.push_back(T);
1248
283k
  return true;
1249
283k
}
1250
1251
// Forbidding inlining improves performance by roughly 20%.
1252
// FIXME: Remove once llvm optimizes this to the faster version without hints.
1253
LLVM_ATTRIBUTE_NOINLINE static bool
1254
wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1255
1256
// Returns whether a character at 'Position' was escaped with a leading '\'.
1257
// 'First' specifies the position of the first character in the string.
1258
static bool wasEscaped(StringRef::iterator First,
1259
64
                       StringRef::iterator Position) {
1260
64
  assert(Position - 1 >= First);
1261
64
  StringRef::iterator I = Position - 1;
1262
64
  // We calculate the number of consecutive '\'s before the current position
1263
64
  // by iterating backwards through our string.
1264
164
  while (I >= First && 
*I == '\\'147
)
--I100
;
1265
64
  // (Position - 1 - I) now contains the number of '\'s before the current
1266
64
  // position. If it is odd, the character at 'Position' was escaped.
1267
64
  return (Position - 1 - I) % 2 == 1;
1268
64
}
1269
1270
99.7k
bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1271
99.7k
  StringRef::iterator Start = Current;
1272
99.7k
  unsigned ColStart = Column;
1273
99.7k
  if (IsDoubleQuoted) {
1274
78.8k
    do {
1275
78.8k
      ++Current;
1276
21.9M
      while (Current != End && 
*Current != '"'21.9M
)
1277
21.8M
        ++Current;
1278
78.8k
      // Repeat until the previous character was not a '\' or was an escaped
1279
78.8k
      // backslash.
1280
78.8k
    } while (   Current != End
1281
78.8k
             && 
*(Current - 1) == '\\'78.8k
1282
78.8k
             && 
wasEscaped(Start + 1, Current)64
);
1283
78.7k
  } else {
1284
20.9k
    skip(1);
1285
419k
    while (true) {
1286
419k
      // Skip a ' followed by another '.
1287
419k
      if (Current + 1 < End && 
*Current == '\''419k
&&
*(Current + 1) == '\''20.9k
) {
1288
9
        skip(2);
1289
9
        continue;
1290
419k
      } else if (*Current == '\'')
1291
20.9k
        break;
1292
398k
      StringRef::iterator i = skip_nb_char(Current);
1293
398k
      if (i == Current) {
1294
15
        i = skip_b_break(Current);
1295
15
        if (i == Current)
1296
1
          break;
1297
14
        Current = i;
1298
14
        Column = 0;
1299
14
        ++Line;
1300
398k
      } else {
1301
398k
        if (i == End)
1302
0
          break;
1303
398k
        Current = i;
1304
398k
        ++Column;
1305
398k
      }
1306
398k
    }
1307
20.9k
  }
1308
99.7k
1309
99.7k
  if (Current == End) {
1310
11
    setError("Expected quote at end of scalar", Current);
1311
11
    return false;
1312
11
  }
1313
99.7k
1314
99.7k
  skip(1); // Skip ending quote.
1315
99.7k
  Token T;
1316
99.7k
  T.Kind = Token::TK_Scalar;
1317
99.7k
  T.Range = StringRef(Start, Current - Start);
1318
99.7k
  TokenQueue.push_back(T);
1319
99.7k
1320
99.7k
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1321
99.7k
1322
99.7k
  IsSimpleKeyAllowed = false;
1323
99.7k
1324
99.7k
  return true;
1325
99.7k
}
1326
1327
446k
bool Scanner::scanPlainScalar() {
1328
446k
  StringRef::iterator Start = Current;
1329
446k
  unsigned ColStart = Column;
1330
446k
  unsigned LeadingBlanks = 0;
1331
446k
  assert(Indent >= -1 && "Indent must be >= -1 !");
1332
446k
  unsigned indent = static_cast<unsigned>(Indent + 1);
1333
467k
  while (true) {
1334
467k
    if (*Current == '#')
1335
324
      break;
1336
467k
1337
3.83M
    
while (467k
!isBlankOrBreak(Current)) {
1338
3.67M
      if (  FlowLevel && 
*Current == ':'774k
1339
3.67M
          && 
!(58.6k
isBlankOrBreak(Current + 1)58.6k
||
*(Current + 1) == ','3
)) {
1340
2
        setError("Found unexpected ':' while scanning a plain scalar", Current);
1341
2
        return false;
1342
2
      }
1343
3.67M
1344
3.67M
      // Check for the end of the plain scalar.
1345
3.67M
      if (  (*Current == ':' && 
isBlankOrBreak(Current + 1)242k
)
1346
3.67M
          || 
( 3.43M
FlowLevel3.43M
1347
3.43M
          && (StringRef(Current, 1).find_first_of(",:?[]{}")
1348
716k
              != StringRef::npos)))
1349
310k
        break;
1350
3.36M
1351
3.36M
      StringRef::iterator i = skip_nb_char(Current);
1352
3.36M
      if (i == Current)
1353
261
        break;
1354
3.36M
      Current = i;
1355
3.36M
      ++Column;
1356
3.36M
    }
1357
467k
1358
467k
    // Are we at the end?
1359
467k
    
if (467k
!isBlankOrBreak(Current)467k
)
1360
310k
      break;
1361
156k
1362
156k
    // Eat blanks.
1363
156k
    StringRef::iterator Tmp = Current;
1364
869k
    while (isBlankOrBreak(Tmp)) {
1365
712k
      StringRef::iterator i = skip_s_white(Tmp);
1366
712k
      if (i != Tmp) {
1367
572k
        if (LeadingBlanks && 
(Column < indent)551k
&&
*Tmp == '\t'551k
) {
1368
0
          setError("Found invalid tab character in indentation", Tmp);
1369
0
          return false;
1370
0
        }
1371
572k
        Tmp = i;
1372
572k
        ++Column;
1373
572k
      } else {
1374
140k
        i = skip_b_break(Tmp);
1375
140k
        if (!LeadingBlanks)
1376
135k
          LeadingBlanks = 1;
1377
140k
        Tmp = i;
1378
140k
        Column = 0;
1379
140k
        ++Line;
1380
140k
      }
1381
712k
    }
1382
156k
1383
156k
    if (!FlowLevel && 
Column < indent136k
)
1384
135k
      break;
1385
20.5k
1386
20.5k
    Current = Tmp;
1387
20.5k
  }
1388
446k
  
if (446k
Start == Current446k
) {
1389
3
    setError("Got empty plain scalar", Start);
1390
3
    return false;
1391
3
  }
1392
446k
  Token T;
1393
446k
  T.Kind = Token::TK_Scalar;
1394
446k
  T.Range = StringRef(Start, Current - Start);
1395
446k
  TokenQueue.push_back(T);
1396
446k
1397
446k
  // Plain scalars can be simple keys.
1398
446k
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1399
446k
1400
446k
  IsSimpleKeyAllowed = false;
1401
446k
1402
446k
  return true;
1403
446k
}
1404
1405
29
bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1406
29
  StringRef::iterator Start = Current;
1407
29
  unsigned ColStart = Column;
1408
29
  skip(1);
1409
168
  while(true) {
1410
168
    if (   *Current == '[' || *Current == ']'
1411
168
        || *Current == '{' || *Current == '}'
1412
168
        || *Current == ','
1413
168
        || 
*Current == ':'164
)
1414
6
      break;
1415
162
    StringRef::iterator i = skip_ns_char(Current);
1416
162
    if (i == Current)
1417
23
      break;
1418
139
    Current = i;
1419
139
    ++Column;
1420
139
  }
1421
29
1422
29
  if (Start == Current) {
1423
0
    setError("Got empty alias or anchor", Start);
1424
0
    return false;
1425
0
  }
1426
29
1427
29
  Token T;
1428
29
  T.Kind = IsAlias ? 
Token::TK_Alias16
:
Token::TK_Anchor13
;
1429
29
  T.Range = StringRef(Start, Current - Start);
1430
29
  TokenQueue.push_back(T);
1431
29
1432
29
  // Alias and anchors can be simple keys.
1433
29
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1434
29
1435
29
  IsSimpleKeyAllowed = false;
1436
29
1437
29
  return true;
1438
29
}
1439
1440
24.9k
char Scanner::scanBlockChompingIndicator() {
1441
24.9k
  char Indicator = ' ';
1442
24.9k
  if (Current != End && (*Current == '+' || 
*Current == '-'24.9k
)) {
1443
17
    Indicator = *Current;
1444
17
    skip(1);
1445
17
  }
1446
24.9k
  return Indicator;
1447
24.9k
}
1448
1449
/// Get the number of line breaks after chomping.
1450
///
1451
/// Return the number of trailing line breaks to emit, depending on
1452
/// \p ChompingIndicator.
1453
static unsigned getChompedLineBreaks(char ChompingIndicator,
1454
12.4k
                                     unsigned LineBreaks, StringRef Str) {
1455
12.4k
  if (ChompingIndicator == '-') // Strip all line breaks.
1456
11
    return 0;
1457
12.4k
  if (ChompingIndicator == '+') // Keep all line breaks.
1458
6
    return LineBreaks;
1459
12.4k
  // Clip trailing lines.
1460
12.4k
  return Str.empty() ? 
07
:
112.4k
;
1461
12.4k
}
1462
1463
12.5k
unsigned Scanner::scanBlockIndentationIndicator() {
1464
12.5k
  unsigned Indent = 0;
1465
12.5k
  if (Current != End && (*Current >= '1' && 
*Current <= '9'7
)) {
1466
6
    Indent = unsigned(*Current - '0');
1467
6
    skip(1);
1468
6
  }
1469
12.5k
  return Indent;
1470
12.5k
}
1471
1472
bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1473
12.5k
                                    unsigned &IndentIndicator, bool &IsDone) {
1474
12.5k
  auto Start = Current;
1475
12.5k
1476
12.5k
  ChompingIndicator = scanBlockChompingIndicator();
1477
12.5k
  IndentIndicator = scanBlockIndentationIndicator();
1478
12.5k
  // Check for the chomping indicator once again.
1479
12.5k
  if (ChompingIndicator == ' ')
1480
12.4k
    ChompingIndicator = scanBlockChompingIndicator();
1481
12.5k
  Current = skip_while(&Scanner::skip_s_white, Current);
1482
12.5k
  skipComment();
1483
12.5k
1484
12.5k
  if (Current == End) { // EOF, we have an empty scalar.
1485
2
    Token T;
1486
2
    T.Kind = Token::TK_BlockScalar;
1487
2
    T.Range = StringRef(Start, Current - Start);
1488
2
    TokenQueue.push_back(T);
1489
2
    IsDone = true;
1490
2
    return true;
1491
2
  }
1492
12.4k
1493
12.4k
  if (!consumeLineBreakIfPresent()) {
1494
3
    setError("Expected a line break after block scalar header", Current);
1495
3
    return false;
1496
3
  }
1497
12.4k
  return true;
1498
12.4k
}
1499
1500
bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1501
                                    unsigned BlockExitIndent,
1502
12.4k
                                    unsigned &LineBreaks, bool &IsDone) {
1503
12.4k
  unsigned MaxAllSpaceLineCharacters = 0;
1504
12.4k
  StringRef::iterator LongestAllSpaceLine;
1505
12.4k
1506
12.9k
  while (true) {
1507
12.9k
    advanceWhile(&Scanner::skip_s_space);
1508
12.9k
    if (skip_nb_char(Current) != Current) {
1509
12.4k
      // This line isn't empty, so try and find the indentation.
1510
12.4k
      if (Column <= BlockExitIndent) { // End of the block literal.
1511
8
        IsDone = true;
1512
8
        return true;
1513
8
      }
1514
12.4k
      // We found the block's indentation.
1515
12.4k
      BlockIndent = Column;
1516
12.4k
      if (MaxAllSpaceLineCharacters > BlockIndent) {
1517
1
        setError(
1518
1
            "Leading all-spaces line must be smaller than the block indent",
1519
1
            LongestAllSpaceLine);
1520
1
        return false;
1521
1
      }
1522
12.4k
      return true;
1523
12.4k
    }
1524
423
    if (skip_b_break(Current) != Current &&
1525
423
        
Column > MaxAllSpaceLineCharacters422
) {
1526
16
      // Record the longest all-space line in case it's longer than the
1527
16
      // discovered block indent.
1528
16
      MaxAllSpaceLineCharacters = Column;
1529
16
      LongestAllSpaceLine = Current;
1530
16
    }
1531
423
1532
423
    // Check for EOF.
1533
423
    if (Current == End) {
1534
1
      IsDone = true;
1535
1
      return true;
1536
1
    }
1537
422
1538
422
    if (!consumeLineBreakIfPresent()) {
1539
0
      IsDone = true;
1540
0
      return true;
1541
0
    }
1542
422
    ++LineBreaks;
1543
422
  }
1544
12.4k
  
return true0
;
1545
12.4k
}
1546
1547
bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1548
298k
                                    unsigned BlockExitIndent, bool &IsDone) {
1549
298k
  // Skip the indentation.
1550
800k
  while (Column < BlockIndent) {
1551
536k
    auto I = skip_s_space(Current);
1552
536k
    if (I == Current)
1553
34.8k
      break;
1554
501k
    Current = I;
1555
501k
    ++Column;
1556
501k
  }
1557
298k
1558
298k
  if (skip_nb_char(Current) == Current)
1559
24.6k
    return true;
1560
273k
1561
273k
  if (Column <= BlockExitIndent) { // End of the block literal.
1562
12.3k
    IsDone = true;
1563
12.3k
    return true;
1564
12.3k
  }
1565
261k
1566
261k
  if (Column < BlockIndent) {
1567
4
    if (Current != End && *Current == '#') { // Trailing comment.
1568
2
      IsDone = true;
1569
2
      return true;
1570
2
    }
1571
2
    setError("A text line is less indented than the block scalar", Current);
1572
2
    return false;
1573
2
  }
1574
261k
  return true; // A normal text line.
1575
261k
}
1576
1577
12.5k
bool Scanner::scanBlockScalar(bool IsLiteral) {
1578
12.5k
  // Eat '|' or '>'
1579
12.5k
  assert(*Current == '|' || *Current == '>');
1580
12.5k
  skip(1);
1581
12.5k
1582
12.5k
  char ChompingIndicator;
1583
12.5k
  unsigned BlockIndent;
1584
12.5k
  bool IsDone = false;
1585
12.5k
  if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1586
3
    return false;
1587
12.4k
  if (IsDone)
1588
2
    return true;
1589
12.4k
1590
12.4k
  auto Start = Current;
1591
12.4k
  unsigned BlockExitIndent = Indent < 0 ? 
01.14k
:
(unsigned)Indent11.3k
;
1592
12.4k
  unsigned LineBreaks = 0;
1593
12.4k
  if (BlockIndent == 0) {
1594
12.4k
    if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1595
12.4k
                               IsDone))
1596
1
      return false;
1597
12.4k
  }
1598
12.4k
1599
12.4k
  // Scan the block's scalars body.
1600
12.4k
  SmallString<256> Str;
1601
298k
  while (!IsDone) {
1602
298k
    if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1603
2
      return false;
1604
298k
    if (IsDone)
1605
12.3k
      break;
1606
285k
1607
285k
    // Parse the current line.
1608
285k
    auto LineStart = Current;
1609
285k
    advanceWhile(&Scanner::skip_nb_char);
1610
285k
    if (LineStart != Current) {
1611
261k
      Str.append(LineBreaks, '\n');
1612
261k
      Str.append(StringRef(LineStart, Current - LineStart));
1613
261k
      LineBreaks = 0;
1614
261k
    }
1615
285k
1616
285k
    // Check for EOF.
1617
285k
    if (Current == End)
1618
109
      break;
1619
285k
1620
285k
    if (!consumeLineBreakIfPresent())
1621
0
      break;
1622
285k
    ++LineBreaks;
1623
285k
  }
1624
12.4k
1625
12.4k
  
if (12.4k
Current == End12.4k
&&
!LineBreaks110
)
1626
3
    // Ensure that there is at least one line break before the end of file.
1627
3
    LineBreaks = 1;
1628
12.4k
  Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1629
12.4k
1630
12.4k
  // New lines may start a simple key.
1631
12.4k
  if (!FlowLevel)
1632
12.4k
    IsSimpleKeyAllowed = true;
1633
12.4k
1634
12.4k
  Token T;
1635
12.4k
  T.Kind = Token::TK_BlockScalar;
1636
12.4k
  T.Range = StringRef(Start, Current - Start);
1637
12.4k
  T.Value = Str.str().str();
1638
12.4k
  TokenQueue.push_back(T);
1639
12.4k
  return true;
1640
12.4k
}
1641
1642
3.51k
bool Scanner::scanTag() {
1643
3.51k
  StringRef::iterator Start = Current;
1644
3.51k
  unsigned ColStart = Column;
1645
3.51k
  skip(1); // Eat !.
1646
3.51k
  if (Current == End || isBlankOrBreak(Current))
;2
// An empty tag.
1647
3.51k
  else if (*Current == '<') {
1648
3
    skip(1);
1649
3
    scan_ns_uri_char();
1650
3
    if (!consume('>'))
1651
2
      return false;
1652
3.50k
  } else {
1653
3.50k
    // FIXME: Actually parse the c-ns-shorthand-tag rule.
1654
3.50k
    Current = skip_while(&Scanner::skip_ns_char, Current);
1655
3.50k
  }
1656
3.51k
1657
3.51k
  Token T;
1658
3.51k
  T.Kind = Token::TK_Tag;
1659
3.51k
  T.Range = StringRef(Start, Current - Start);
1660
3.51k
  TokenQueue.push_back(T);
1661
3.51k
1662
3.51k
  // Tags can be simple keys.
1663
3.51k
  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1664
3.51k
1665
3.51k
  IsSimpleKeyAllowed = false;
1666
3.51k
1667
3.51k
  return true;
1668
3.51k
}
1669
1670
1.10M
bool Scanner::fetchMoreTokens() {
1671
1.10M
  if (IsStartOfStream)
1672
4.90k
    return scanStreamStart();
1673
1.10M
1674
1.10M
  scanToNextToken();
1675
1.10M
1676
1.10M
  if (Current == End)
1677
3.55k
    return scanStreamEnd();
1678
1.09M
1679
1.09M
  removeStaleSimpleKeyCandidates();
1680
1.09M
1681
1.09M
  unrollIndent(Column);
1682
1.09M
1683
1.09M
  if (Column == 0 && 
*Current == '%'120k
)
1684
19
    return scanDirective();
1685
1.09M
1686
1.09M
  if (Column == 0 && 
Current + 4 <= End120k
1687
1.09M
      && 
*Current == '-'120k
1688
1.09M
      && 
*(Current + 1) == '-'16.6k
1689
1.09M
      && 
*(Current + 2) == '-'15.5k
1690
1.09M
      && 
(15.5k
Current + 3 == End15.5k
||
isBlankOrBreak(Current + 3)15.5k
))
1691
15.5k
    return scanDocumentIndicator(true);
1692
1.08M
1693
1.08M
  if (Column == 0 && 
Current + 4 <= End105k
1694
1.08M
      && 
*Current == '.'105k
1695
1.08M
      && 
*(Current + 1) == '.'14.2k
1696
1.08M
      && 
*(Current + 2) == '.'14.2k
1697
1.08M
      && 
(14.2k
Current + 3 == End14.2k
||
isBlankOrBreak(Current + 3)14.2k
))
1698
14.2k
    return scanDocumentIndicator(false);
1699
1.06M
1700
1.06M
  if (*Current == '[')
1701
6.66k
    return scanFlowCollectionStart(true);
1702
1.06M
1703
1.06M
  if (*Current == '{')
1704
36.5k
    return scanFlowCollectionStart(false);
1705
1.02M
1706
1.02M
  if (*Current == ']')
1707
6.63k
    return scanFlowCollectionEnd(true);
1708
1.01M
1709
1.01M
  if (*Current == '}')
1710
36.4k
    return scanFlowCollectionEnd(false);
1711
982k
1712
982k
  if (*Current == ',')
1713
92.5k
    return scanFlowEntry();
1714
890k
1715
890k
  if (*Current == '-' && 
isBlankOrBreak(Current + 1)44.9k
)
1716
43.9k
    return scanBlockEntry();
1717
846k
1718
846k
  if (*Current == '?' && 
(31
FlowLevel31
||
isBlankOrBreak(Current + 1)17
))
1719
31
    return scanKey();
1720
846k
1721
846k
  if (*Current == ':' && 
(283k
FlowLevel283k
||
isBlankOrBreak(Current + 1)183k
))
1722
283k
    return scanValue();
1723
562k
1724
562k
  if (*Current == '*')
1725
16
    return scanAliasOrAnchor(true);
1726
562k
1727
562k
  if (*Current == '&')
1728
13
    return scanAliasOrAnchor(false);
1729
562k
1730
562k
  if (*Current == '!')
1731
3.51k
    return scanTag();
1732
559k
1733
559k
  if (*Current == '|' && 
!FlowLevel12.4k
)
1734
12.4k
    return scanBlockScalar(true);
1735
546k
1736
546k
  if (*Current == '>' && 
!FlowLevel20
)
1737
20
    return scanBlockScalar(false);
1738
546k
1739
546k
  if (*Current == '\'')
1740
20.9k
    return scanFlowScalar(false);
1741
525k
1742
525k
  if (*Current == '"')
1743
78.7k
    return scanFlowScalar(true);
1744
446k
1745
446k
  // Get a plain scalar.
1746
446k
  StringRef FirstChar(Current, 1);
1747
446k
  if (!(isBlankOrBreak(Current)
1748
446k
        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1749
446k
      || 
(1.03k
*Current == '-'1.03k
&&
!isBlankOrBreak(Current + 1)1.03k
)
1750
446k
      || 
(2
!FlowLevel2
&&
(2
*Current == '?'2
||
*Current == ':'2
)
1751
2
          && 
isBlankOrBreak(Current + 1)1
)
1752
446k
      || 
(2
!FlowLevel2
&&
*Current == ':'2
1753
2
                      && 
Current + 2 < End1
1754
2
                      && 
*(Current + 1) == ':'1
1755
2
                      && 
!isBlankOrBreak(Current + 2)1
))
1756
446k
    return scanPlainScalar();
1757
1
1758
1
  setError("Unrecognized character while tokenizing.");
1759
1
  return false;
1760
1
}
1761
1762
Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1763
               std::error_code *EC)
1764
4.78k
    : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
1765
1766
Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1767
               std::error_code *EC)
1768
120
    : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
1769
1770
4.79k
Stream::~Stream() = default;
1771
1772
935
bool Stream::failed() { return scanner->failed(); }
1773
1774
128
void Stream::printError(Node *N, const Twine &Msg) {
1775
128
  scanner->printError( N->getSourceRange().Start
1776
128
                     , SourceMgr::DK_Error
1777
128
                     , Msg
1778
128
                     , N->getSourceRange());
1779
128
}
1780
1781
4.90k
document_iterator Stream::begin() {
1782
4.90k
  if (CurrentDoc)
1783
0
    report_fatal_error("Can only iterate over the stream once");
1784
4.90k
1785
4.90k
  // Skip Stream-Start.
1786
4.90k
  scanner->getNext();
1787
4.90k
1788
4.90k
  CurrentDoc.reset(new Document(*this));
1789
4.90k
  return document_iterator(CurrentDoc);
1790
4.90k
}
1791
1792
32.1k
document_iterator Stream::end() {
1793
32.1k
  return document_iterator();
1794
32.1k
}
1795
1796
53
void Stream::skip() {
1797
107
  for (document_iterator i = begin(), e = end(); i != e; 
++i54
)
1798
54
    i->skip();
1799
53
}
1800
1801
Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1802
           StringRef T)
1803
945k
    : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1804
945k
  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1805
945k
  SourceRange = SMRange(Start, Start);
1806
945k
}
1807
1808
13.4k
std::string Node::getVerbatimTag() const {
1809
13.4k
  StringRef Raw = getRawTag();
1810
13.4k
  if (!Raw.empty() && 
Raw != "!"5.58k
) {
1811
5.58k
    std::string Ret;
1812
5.58k
    if (Raw.find_last_of('!') == 0) {
1813
5.55k
      Ret = Doc->getTagMap().find("!")->second;
1814
5.55k
      Ret += Raw.substr(1);
1815
5.55k
      return Ret;
1816
5.55k
    } else 
if (37
Raw.startswith("!!")37
) {
1817
31
      Ret = Doc->getTagMap().find("!!")->second;
1818
31
      Ret += Raw.substr(2);
1819
31
      return Ret;
1820
31
    } else {
1821
6
      StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1822
6
      std::map<StringRef, StringRef>::const_iterator It =
1823
6
          Doc->getTagMap().find(TagHandle);
1824
6
      if (It != Doc->getTagMap().end())
1825
4
        Ret = It->second;
1826
2
      else {
1827
2
        Token T;
1828
2
        T.Kind = Token::TK_Tag;
1829
2
        T.Range = TagHandle;
1830
2
        setError(Twine("Unknown tag handle ") + TagHandle, T);
1831
2
      }
1832
6
      Ret += Raw.substr(Raw.find_last_of('!') + 1);
1833
6
      return Ret;
1834
6
    }
1835
7.89k
  }
1836
7.89k
1837
7.89k
  switch (getType()) {
1838
7.89k
  case NK_Null:
1839
42
    return "tag:yaml.org,2002:null";
1840
7.89k
  case NK_Scalar:
1841
7.48k
  case NK_BlockScalar:
1842
7.48k
    // TODO: Tag resolution.
1843
7.48k
    return "tag:yaml.org,2002:str";
1844
7.48k
  case NK_Mapping:
1845
276
    return "tag:yaml.org,2002:map";
1846
7.48k
  case NK_Sequence:
1847
94
    return "tag:yaml.org,2002:seq";
1848
0
  }
1849
0
1850
0
  return "";
1851
0
}
1852
1853
2.63M
Token &Node::peekNext() {
1854
2.63M
  return Doc->peekNext();
1855
2.63M
}
1856
1857
802k
Token Node::getNext() {
1858
802k
  return Doc->getNext();
1859
802k
}
1860
1861
642k
Node *Node::parseBlockNode() {
1862
642k
  return Doc->parseBlockNode();
1863
642k
}
1864
1865
286k
BumpPtrAllocator &Node::getAllocator() {
1866
286k
  return Doc->NodeAllocator;
1867
286k
}
1868
1869
22
void Node::setError(const Twine &Msg, Token &Tok) const {
1870
22
  Doc->setError(Msg, Tok);
1871
22
}
1872
1873
837k
bool Node::failed() const {
1874
837k
  return Doc->failed();
1875
837k
}
1876
1877
492k
StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
1878
492k
  // TODO: Handle newlines properly. We need to remove leading whitespace.
1879
492k
  if (Value[0] == '"') { // Double quoted.
1880
52.8k
    // Pull off the leading and trailing "s.
1881
52.8k
    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1882
52.8k
    // Search for characters that would require unescaping the value.
1883
52.8k
    StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1884
52.8k
    if (i != StringRef::npos)
1885
38
      return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1886
52.8k
    return UnquotedValue;
1887
439k
  } else if (Value[0] == '\'') { // Single quoted.
1888
20.5k
    // Pull off the leading and trailing 's.
1889
20.5k
    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1890
20.5k
    StringRef::size_type i = UnquotedValue.find('\'');
1891
20.5k
    if (i != StringRef::npos) {
1892
8
      // We're going to need Storage.
1893
8
      Storage.clear();
1894
8
      Storage.reserve(UnquotedValue.size());
1895
17
      for (; i != StringRef::npos; 
i = UnquotedValue.find('\'')9
) {
1896
9
        StringRef Valid(UnquotedValue.begin(), i);
1897
9
        Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1898
9
        Storage.push_back('\'');
1899
9
        UnquotedValue = UnquotedValue.substr(i + 2);
1900
9
      }
1901
8
      Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1902
8
      return StringRef(Storage.begin(), Storage.size());
1903
8
    }
1904
20.4k
    return UnquotedValue;
1905
20.4k
  }
1906
418k
  // Plain or block.
1907
418k
  return Value.rtrim(' ');
1908
418k
}
1909
1910
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1911
                                          , StringRef::size_type i
1912
                                          , SmallVectorImpl<char> &Storage)
1913
38
                                          const {
1914
38
  // Use Storage to build proper value.
1915
38
  Storage.clear();
1916
38
  Storage.reserve(UnquotedValue.size());
1917
153
  for (; i != StringRef::npos; 
i = UnquotedValue.find_first_of("\\\r\n")115
) {
1918
116
    // Insert all previous chars into Storage.
1919
116
    StringRef Valid(UnquotedValue.begin(), i);
1920
116
    Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1921
116
    // Chop off inserted chars.
1922
116
    UnquotedValue = UnquotedValue.substr(i);
1923
116
1924
116
    assert(!UnquotedValue.empty() && "Can't be empty!");
1925
116
1926
116
    // Parse escape or line break.
1927
116
    switch (UnquotedValue[0]) {
1928
116
    case '\r':
1929
23
    case '\n':
1930
23
      Storage.push_back('\n');
1931
23
      if (   UnquotedValue.size() > 1
1932
23
          && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1933
3
        UnquotedValue = UnquotedValue.substr(1);
1934
23
      UnquotedValue = UnquotedValue.substr(1);
1935
23
      break;
1936
93
    default:
1937
93
      if (UnquotedValue.size() == 1)
1938
0
        // TODO: Report error.
1939
0
        break;
1940
93
      UnquotedValue = UnquotedValue.substr(1);
1941
93
      switch (UnquotedValue[0]) {
1942
93
      default: {
1943
1
          Token T;
1944
1
          T.Range = StringRef(UnquotedValue.begin(), 1);
1945
1
          setError("Unrecognized escape code!", T);
1946
1
          return "";
1947
93
        }
1948
93
      case '\r':
1949
5
      case '\n':
1950
5
        // Remove the new line.
1951
5
        if (   UnquotedValue.size() > 1
1952
5
            && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1953
0
          UnquotedValue = UnquotedValue.substr(1);
1954
5
        // If this was just a single byte newline, it will get skipped
1955
5
        // below.
1956
5
        break;
1957
5
      case '0':
1958
4
        Storage.push_back(0x00);
1959
4
        break;
1960
5
      case 'a':
1961
1
        Storage.push_back(0x07);
1962
1
        break;
1963
5
      case 'b':
1964
2
        Storage.push_back(0x08);
1965
2
        break;
1966
5
      case 't':
1967
3
      case 0x09:
1968
3
        Storage.push_back(0x09);
1969
3
        break;
1970
5
      case 'n':
1971
5
        Storage.push_back(0x0A);
1972
5
        break;
1973
3
      case 'v':
1974
1
        Storage.push_back(0x0B);
1975
1
        break;
1976
3
      case 'f':
1977
1
        Storage.push_back(0x0C);
1978
1
        break;
1979
3
      case 'r':
1980
2
        Storage.push_back(0x0D);
1981
2
        break;
1982
3
      case 'e':
1983
1
        Storage.push_back(0x1B);
1984
1
        break;
1985
3
      case ' ':
1986
3
        Storage.push_back(0x20);
1987
3
        break;
1988
42
      case '"':
1989
42
        Storage.push_back(0x22);
1990
42
        break;
1991
3
      case '/':
1992
0
        Storage.push_back(0x2F);
1993
0
        break;
1994
11
      case '\\':
1995
11
        Storage.push_back(0x5C);
1996
11
        break;
1997
3
      case 'N':
1998
1
        encodeUTF8(0x85, Storage);
1999
1
        break;
2000
3
      case '_':
2001
1
        encodeUTF8(0xA0, Storage);
2002
1
        break;
2003
3
      case 'L':
2004
1
        encodeUTF8(0x2028, Storage);
2005
1
        break;
2006
3
      case 'P':
2007
1
        encodeUTF8(0x2029, Storage);
2008
1
        break;
2009
4
      case 'x': {
2010
4
          if (UnquotedValue.size() < 3)
2011
0
            // TODO: Report error.
2012
0
            break;
2013
4
          unsigned int UnicodeScalarValue;
2014
4
          if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2015
0
            // TODO: Report error.
2016
0
            UnicodeScalarValue = 0xFFFD;
2017
4
          encodeUTF8(UnicodeScalarValue, Storage);
2018
4
          UnquotedValue = UnquotedValue.substr(2);
2019
4
          break;
2020
4
        }
2021
4
      case 'u': {
2022
2
          if (UnquotedValue.size() < 5)
2023
0
            // TODO: Report error.
2024
0
            break;
2025
2
          unsigned int UnicodeScalarValue;
2026
2
          if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2027
0
            // TODO: Report error.
2028
0
            UnicodeScalarValue = 0xFFFD;
2029
2
          encodeUTF8(UnicodeScalarValue, Storage);
2030
2
          UnquotedValue = UnquotedValue.substr(4);
2031
2
          break;
2032
2
        }
2033
2
      case 'U': {
2034
1
          if (UnquotedValue.size() < 9)
2035
0
            // TODO: Report error.
2036
0
            break;
2037
1
          unsigned int UnicodeScalarValue;
2038
1
          if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2039
0
            // TODO: Report error.
2040
0
            UnicodeScalarValue = 0xFFFD;
2041
1
          encodeUTF8(UnicodeScalarValue, Storage);
2042
1
          UnquotedValue = UnquotedValue.substr(8);
2043
1
          break;
2044
1
        }
2045
92
      }
2046
92
      UnquotedValue = UnquotedValue.substr(1);
2047
116
    }
2048
116
  }
2049
38
  Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
2050
37
  return StringRef(Storage.begin(), Storage.size());
2051
38
}
2052
2053
916k
Node *KeyValueNode::getKey() {
2054
916k
  if (Key)
2055
632k
    return Key;
2056
283k
  // Handle implicit null keys.
2057
283k
  {
2058
283k
    Token &t = peekNext();
2059
283k
    if (   t.Kind == Token::TK_BlockEnd
2060
283k
        || t.Kind == Token::TK_Value
2061
283k
        || t.Kind == Token::TK_Error) {
2062
0
      return Key = new (getAllocator()) NullNode(Doc);
2063
0
    }
2064
283k
    if (t.Kind == Token::TK_Key)
2065
283k
      getNext(); // skip TK_Key.
2066
283k
  }
2067
283k
2068
283k
  // Handle explicit null keys.
2069
283k
  Token &t = peekNext();
2070
283k
  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2071
3
    return Key = new (getAllocator()) NullNode(Doc);
2072
3
  }
2073
283k
2074
283k
  // We've got a normal key.
2075
283k
  return Key = parseBlockNode();
2076
283k
}
2077
2078
631k
Node *KeyValueNode::getValue() {
2079
631k
  if (Value)
2080
347k
    return Value;
2081
283k
  getKey()->skip();
2082
283k
  if (failed())
2083
1
    return Value = new (getAllocator()) NullNode(Doc);
2084
283k
2085
283k
  // Handle implicit null values.
2086
283k
  {
2087
283k
    Token &t = peekNext();
2088
283k
    if (   t.Kind == Token::TK_BlockEnd
2089
283k
        || 
t.Kind == Token::TK_FlowMappingEnd283k
2090
283k
        || 
t.Kind == Token::TK_Key283k
2091
283k
        || 
t.Kind == Token::TK_FlowEntry283k
2092
283k
        || 
t.Kind == Token::TK_Error283k
) {
2093
20
      return Value = new (getAllocator()) NullNode(Doc);
2094
20
    }
2095
283k
2096
283k
    if (t.Kind != Token::TK_Value) {
2097
3
      setError("Unexpected token in Key Value.", t);
2098
3
      return Value = new (getAllocator()) NullNode(Doc);
2099
3
    }
2100
283k
    getNext(); // skip TK_Value.
2101
283k
  }
2102
283k
2103
283k
  // Handle explicit null values.
2104
283k
  Token &t = peekNext();
2105
283k
  if (t.Kind == Token::TK_BlockEnd || 
t.Kind == Token::TK_Key283k
) {
2106
2.70k
    return Value = new (getAllocator()) NullNode(Doc);
2107
2.70k
  }
2108
280k
2109
280k
  // We got a normal value.
2110
280k
  return Value = parseBlockNode();
2111
280k
}
2112
2113
428k
void MappingNode::increment() {
2114
428k
  if (failed()) {
2115
10
    IsAtEnd = true;
2116
10
    CurrentEntry = nullptr;
2117
10
    return;
2118
10
  }
2119
428k
  if (CurrentEntry) {
2120
347k
    CurrentEntry->skip();
2121
347k
    if (Type == MT_Inline) {
2122
12
      IsAtEnd = true;
2123
12
      CurrentEntry = nullptr;
2124
12
      return;
2125
12
    }
2126
428k
  }
2127
428k
  Token T = peekNext();
2128
428k
  if (T.Kind == Token::TK_Key || 
T.Kind == Token::TK_Scalar144k
) {
2129
283k
    // KeyValueNode eats the TK_Key. That way it can detect null keys.
2130
283k
    CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2131
283k
  } else 
if (144k
Type == MT_Block144k
) {
2132
44.6k
    switch (T.Kind) {
2133
44.6k
    case Token::TK_BlockEnd:
2134
44.6k
      getNext();
2135
44.6k
      IsAtEnd = true;
2136
44.6k
      CurrentEntry = nullptr;
2137
44.6k
      break;
2138
44.6k
    default:
2139
0
      setError("Unexpected token. Expected Key or Block End", T);
2140
0
      LLVM_FALLTHROUGH;
2141
0
    case Token::TK_Error:
2142
0
      IsAtEnd = true;
2143
0
      CurrentEntry = nullptr;
2144
44.6k
    }
2145
100k
  } else {
2146
100k
    switch (T.Kind) {
2147
100k
    case Token::TK_FlowEntry:
2148
63.8k
      // Eat the flow entry and recurse.
2149
63.8k
      getNext();
2150
63.8k
      return increment();
2151
100k
    case Token::TK_FlowMappingEnd:
2152
36.4k
      getNext();
2153
36.4k
      LLVM_FALLTHROUGH;
2154
36.4k
    case Token::TK_Error:
2155
36.4k
      // Set this to end iterator.
2156
36.4k
      IsAtEnd = true;
2157
36.4k
      CurrentEntry = nullptr;
2158
36.4k
      break;
2159
36.4k
    default:
2160
7
      setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2161
7
                "Mapping End."
2162
7
              , T);
2163
7
      IsAtEnd = true;
2164
7
      CurrentEntry = nullptr;
2165
100k
    }
2166
100k
  }
2167
428k
}
2168
2169
125k
void SequenceNode::increment() {
2170
125k
  if (failed()) {
2171
6
    IsAtEnd = true;
2172
6
    CurrentEntry = nullptr;
2173
6
    return;
2174
6
  }
2175
125k
  if (CurrentEntry)
2176
106k
    CurrentEntry->skip();
2177
125k
  Token T = peekNext();
2178
125k
  if (SeqType == ST_Block) {
2179
55.5k
    switch (T.Kind) {
2180
55.5k
    case Token::TK_BlockEntry:
2181
43.8k
      getNext();
2182
43.8k
      CurrentEntry = parseBlockNode();
2183
43.8k
      if (!CurrentEntry) { // An error occurred.
2184
1
        IsAtEnd = true;
2185
1
        CurrentEntry = nullptr;
2186
1
      }
2187
43.8k
      break;
2188
55.5k
    case Token::TK_BlockEnd:
2189
11.7k
      getNext();
2190
11.7k
      IsAtEnd = true;
2191
11.7k
      CurrentEntry = nullptr;
2192
11.7k
      break;
2193
55.5k
    default:
2194
0
      setError( "Unexpected token. Expected Block Entry or Block End."
2195
0
              , T);
2196
0
      LLVM_FALLTHROUGH;
2197
0
    case Token::TK_Error:
2198
0
      IsAtEnd = true;
2199
0
      CurrentEntry = nullptr;
2200
55.5k
    }
2201
69.6k
  } else if (SeqType == ST_Indentless) {
2202
154
    switch (T.Kind) {
2203
154
    case Token::TK_BlockEntry:
2204
89
      getNext();
2205
89
      CurrentEntry = parseBlockNode();
2206
89
      if (!CurrentEntry) { // An error occurred.
2207
0
        IsAtEnd = true;
2208
0
        CurrentEntry = nullptr;
2209
0
      }
2210
89
      break;
2211
154
    default:
2212
65
    case Token::TK_Error:
2213
65
      IsAtEnd = true;
2214
65
      CurrentEntry = nullptr;
2215
154
    }
2216
69.4k
  } else if (SeqType == ST_Flow) {
2217
69.4k
    switch (T.Kind) {
2218
69.4k
    case Token::TK_FlowEntry:
2219
28.6k
      // Eat the flow entry and recurse.
2220
28.6k
      getNext();
2221
28.6k
      WasPreviousTokenFlowEntry = true;
2222
28.6k
      return increment();
2223
69.4k
    case Token::TK_FlowSequenceEnd:
2224
6.58k
      getNext();
2225
6.58k
      LLVM_FALLTHROUGH;
2226
6.58k
    case Token::TK_Error:
2227
6.58k
      // Set this to end iterator.
2228
6.58k
      IsAtEnd = true;
2229
6.58k
      CurrentEntry = nullptr;
2230
6.58k
      break;
2231
6.58k
    case Token::TK_StreamEnd:
2232
7
    case Token::TK_DocumentEnd:
2233
7
    case Token::TK_DocumentStart:
2234
7
      setError("Could not find closing ]!", T);
2235
7
      // Set this to end iterator.
2236
7
      IsAtEnd = true;
2237
7
      CurrentEntry = nullptr;
2238
7
      break;
2239
34.1k
    default:
2240
34.1k
      if (!WasPreviousTokenFlowEntry) {
2241
2
        setError("Expected , between entries!", T);
2242
2
        IsAtEnd = true;
2243
2
        CurrentEntry = nullptr;
2244
2
        break;
2245
2
      }
2246
34.1k
      // Otherwise it must be a flow entry.
2247
34.1k
      CurrentEntry = parseBlockNode();
2248
34.1k
      if (!CurrentEntry) {
2249
0
        IsAtEnd = true;
2250
0
      }
2251
34.1k
      WasPreviousTokenFlowEntry = false;
2252
34.1k
      break;
2253
69.4k
    }
2254
69.4k
  }
2255
125k
}
2256
2257
16.6k
Document::Document(Stream &S) : stream(S), Root(nullptr) {
2258
16.6k
  // Tag maps starts with two default mappings.
2259
16.6k
  TagMap["!"] = "!";
2260
16.6k
  TagMap["!!"] = "tag:yaml.org,2002:";
2261
16.6k
2262
16.6k
  if (parseDirectives())
2263
13
    expectToken(Token::TK_DocumentStart);
2264
16.6k
  Token &T = peekNext();
2265
16.6k
  if (T.Kind == Token::TK_DocumentStart)
2266
15.3k
    getNext();
2267
16.6k
}
2268
2269
28.0k
bool Document::skip()  {
2270
28.0k
  if (stream.scanner->failed())
2271
46
    return false;
2272
27.9k
  if (!Root)
2273
1.32k
    getRoot();
2274
27.9k
  Root->skip();
2275
27.9k
  Token &T = peekNext();
2276
27.9k
  if (T.Kind == Token::TK_StreamEnd)
2277
2.88k
    return false;
2278
25.0k
  if (T.Kind == Token::TK_DocumentEnd) {
2279
13.2k
    getNext();
2280
13.2k
    return skip();
2281
13.2k
  }
2282
11.8k
  return true;
2283
11.8k
}
2284
2285
3.35M
Token &Document::peekNext() {
2286
3.35M
  return stream.scanner->peekNext();
2287
3.35M
}
2288
2289
1.49M
Token Document::getNext() {
2290
1.49M
  return stream.scanner->getNext();
2291
1.49M
}
2292
2293
24
void Document::setError(const Twine &Message, Token &Location) const {
2294
24
  stream.scanner->setError(Message, Location.Range.begin());
2295
24
}
2296
2297
837k
bool Document::failed() const {
2298
837k
  return stream.scanner->failed();
2299
837k
}
2300
2301
659k
Node *Document::parseBlockNode() {
2302
659k
  Token T = peekNext();
2303
659k
  // Handle properties.
2304
659k
  Token AnchorInfo;
2305
659k
  Token TagInfo;
2306
662k
parse_property:
2307
662k
  switch (T.Kind) {
2308
662k
  case Token::TK_Alias:
2309
16
    getNext();
2310
16
    return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2311
662k
  case Token::TK_Anchor:
2312
13
    if (AnchorInfo.Kind == Token::TK_Anchor) {
2313
0
      setError("Already encountered an anchor for this node!", T);
2314
0
      return nullptr;
2315
0
    }
2316
13
    AnchorInfo = getNext(); // Consume TK_Anchor.
2317
13
    T = peekNext();
2318
13
    goto parse_property;
2319
3.51k
  case Token::TK_Tag:
2320
3.51k
    if (TagInfo.Kind == Token::TK_Tag) {
2321
0
      setError("Already encountered a tag for this node!", T);
2322
0
      return nullptr;
2323
0
    }
2324
3.51k
    TagInfo = getNext(); // Consume TK_Tag.
2325
3.51k
    T = peekNext();
2326
3.51k
    goto parse_property;
2327
658k
  default:
2328
658k
    break;
2329
658k
  }
2330
658k
2331
658k
  switch (T.Kind) {
2332
658k
  case Token::TK_BlockEntry:
2333
65
    // We got an unindented BlockEntry sequence. This is not terminated with
2334
65
    // a BlockEnd.
2335
65
    // Don't eat the TK_BlockEntry, SequenceNode needs it.
2336
65
    return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2337
65
                                           , AnchorInfo.Range.substr(1)
2338
65
                                           , TagInfo.Range
2339
65
                                           , SequenceNode::ST_Indentless);
2340
658k
  case Token::TK_BlockSequenceStart:
2341
11.7k
    getNext();
2342
11.7k
    return new (NodeAllocator)
2343
11.7k
      SequenceNode( stream.CurrentDoc
2344
11.7k
                  , AnchorInfo.Range.substr(1)
2345
11.7k
                  , TagInfo.Range
2346
11.7k
                  , SequenceNode::ST_Block);
2347
658k
  case Token::TK_BlockMappingStart:
2348
44.7k
    getNext();
2349
44.7k
    return new (NodeAllocator)
2350
44.7k
      MappingNode( stream.CurrentDoc
2351
44.7k
                 , AnchorInfo.Range.substr(1)
2352
44.7k
                 , TagInfo.Range
2353
44.7k
                 , MappingNode::MT_Block);
2354
658k
  case Token::TK_FlowSequenceStart:
2355
6.65k
    getNext();
2356
6.65k
    return new (NodeAllocator)
2357
6.65k
      SequenceNode( stream.CurrentDoc
2358
6.65k
                  , AnchorInfo.Range.substr(1)
2359
6.65k
                  , TagInfo.Range
2360
6.65k
                  , SequenceNode::ST_Flow);
2361
658k
  case Token::TK_FlowMappingStart:
2362
36.5k
    getNext();
2363
36.5k
    return new (NodeAllocator)
2364
36.5k
      MappingNode( stream.CurrentDoc
2365
36.5k
                 , AnchorInfo.Range.substr(1)
2366
36.5k
                 , TagInfo.Range
2367
36.5k
                 , MappingNode::MT_Flow);
2368
658k
  case Token::TK_Scalar:
2369
546k
    getNext();
2370
546k
    return new (NodeAllocator)
2371
546k
      ScalarNode( stream.CurrentDoc
2372
546k
                , AnchorInfo.Range.substr(1)
2373
546k
                , TagInfo.Range
2374
546k
                , T.Range);
2375
658k
  case Token::TK_BlockScalar: {
2376
12.4k
    getNext();
2377
12.4k
    StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2378
12.4k
    StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
2379
12.4k
    return new (NodeAllocator)
2380
12.4k
        BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
2381
12.4k
                        TagInfo.Range, StrCopy, T.Range);
2382
658k
  }
2383
658k
  case Token::TK_Key:
2384
13
    // Don't eat the TK_Key, KeyValueNode expects it.
2385
13
    return new (NodeAllocator)
2386
13
      MappingNode( stream.CurrentDoc
2387
13
                 , AnchorInfo.Range.substr(1)
2388
13
                 , TagInfo.Range
2389
13
                 , MappingNode::MT_Inline);
2390
658k
  case Token::TK_DocumentStart:
2391
190
  case Token::TK_DocumentEnd:
2392
190
  case Token::TK_StreamEnd:
2393
199
  default:
2394
199
    // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2395
199
    //       !!null null.
2396
199
    return new (NodeAllocator) NullNode(stream.CurrentDoc);
2397
190
  case Token::TK_Error:
2398
15
    return nullptr;
2399
0
  }
2400
0
  llvm_unreachable("Control flow shouldn't reach here.");
2401
0
  return nullptr;
2402
0
}
2403
2404
16.6k
bool Document::parseDirectives() {
2405
16.6k
  bool isDirective = false;
2406
16.7k
  while (true) {
2407
16.7k
    Token T = peekNext();
2408
16.7k
    if (T.Kind == Token::TK_TagDirective) {
2409
13
      parseTAGDirective();
2410
13
      isDirective = true;
2411
16.6k
    } else if (T.Kind == Token::TK_VersionDirective) {
2412
5
      parseYAMLDirective();
2413
5
      isDirective = true;
2414
5
    } else
2415
16.6k
      break;
2416
16.7k
  }
2417
16.6k
  return isDirective;
2418
16.6k
}
2419
2420
5
void Document::parseYAMLDirective() {
2421
5
  getNext(); // Eat %YAML <version>
2422
5
}
2423
2424
13
void Document::parseTAGDirective() {
2425
13
  Token Tag = getNext(); // %TAG <handle> <prefix>
2426
13
  StringRef T = Tag.Range;
2427
13
  // Strip %TAG
2428
13
  T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2429
13
  std::size_t HandleEnd = T.find_first_of(" \t");
2430
13
  StringRef TagHandle = T.substr(0, HandleEnd);
2431
13
  StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2432
13
  TagMap[TagHandle] = TagPrefix;
2433
13
}
2434
2435
13
bool Document::expectToken(int TK) {
2436
13
  Token T = getNext();
2437
13
  if (T.Kind != TK) {
2438
2
    setError("Unexpected token", T);
2439
2
    return false;
2440
2
  }
2441
11
  return true;
2442
11
}