/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Support/YAMLParser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- YAMLParser.cpp - Simple YAML parser --------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements a YAML parser. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "llvm/Support/YAMLParser.h" |
14 | | #include "llvm/ADT/AllocatorList.h" |
15 | | #include "llvm/ADT/ArrayRef.h" |
16 | | #include "llvm/ADT/None.h" |
17 | | #include "llvm/ADT/STLExtras.h" |
18 | | #include "llvm/ADT/SmallString.h" |
19 | | #include "llvm/ADT/SmallVector.h" |
20 | | #include "llvm/ADT/StringExtras.h" |
21 | | #include "llvm/ADT/StringRef.h" |
22 | | #include "llvm/ADT/Twine.h" |
23 | | #include "llvm/Support/Compiler.h" |
24 | | #include "llvm/Support/ErrorHandling.h" |
25 | | #include "llvm/Support/MemoryBuffer.h" |
26 | | #include "llvm/Support/SMLoc.h" |
27 | | #include "llvm/Support/SourceMgr.h" |
28 | | #include "llvm/Support/Unicode.h" |
29 | | #include "llvm/Support/raw_ostream.h" |
30 | | #include <algorithm> |
31 | | #include <cassert> |
32 | | #include <cstddef> |
33 | | #include <cstdint> |
34 | | #include <map> |
35 | | #include <memory> |
36 | | #include <string> |
37 | | #include <system_error> |
38 | | #include <utility> |
39 | | |
40 | | using namespace llvm; |
41 | | using namespace yaml; |
42 | | |
43 | | enum UnicodeEncodingForm { |
44 | | UEF_UTF32_LE, ///< UTF-32 Little Endian |
45 | | UEF_UTF32_BE, ///< UTF-32 Big Endian |
46 | | UEF_UTF16_LE, ///< UTF-16 Little Endian |
47 | | UEF_UTF16_BE, ///< UTF-16 Big Endian |
48 | | UEF_UTF8, ///< UTF-8 or ascii. |
49 | | UEF_Unknown ///< Not a valid Unicode encoding. |
50 | | }; |
51 | | |
52 | | /// EncodingInfo - Holds the encoding type and length of the byte order mark if |
53 | | /// it exists. Length is in {0, 2, 3, 4}. |
54 | | using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; |
55 | | |
56 | | /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode |
57 | | /// encoding form of \a Input. |
58 | | /// |
59 | | /// @param Input A string of length 0 or more. |
60 | | /// @returns An EncodingInfo indicating the Unicode encoding form of the input |
61 | | /// and how long the byte order mark is if one exists. |
62 | 4.90k | static EncodingInfo getUnicodeEncoding(StringRef Input) { |
63 | 4.90k | if (Input.empty()) |
64 | 6 | return std::make_pair(UEF_Unknown, 0); |
65 | 4.89k | |
66 | 4.89k | switch (uint8_t(Input[0])) { |
67 | 4.89k | case 0x00: |
68 | 0 | if (Input.size() >= 4) { |
69 | 0 | if ( Input[1] == 0 |
70 | 0 | && uint8_t(Input[2]) == 0xFE |
71 | 0 | && uint8_t(Input[3]) == 0xFF) |
72 | 0 | return std::make_pair(UEF_UTF32_BE, 4); |
73 | 0 | if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) |
74 | 0 | return std::make_pair(UEF_UTF32_BE, 0); |
75 | 0 | } |
76 | 0 | |
77 | 0 | if (Input.size() >= 2 && Input[1] != 0) |
78 | 0 | return std::make_pair(UEF_UTF16_BE, 0); |
79 | 0 | return std::make_pair(UEF_Unknown, 0); |
80 | 1 | case 0xFF: |
81 | 1 | if ( Input.size() >= 4 |
82 | 1 | && uint8_t(Input[1]) == 0xFE0 |
83 | 1 | && Input[2] == 00 |
84 | 1 | && Input[3] == 00 ) |
85 | 0 | return std::make_pair(UEF_UTF32_LE, 4); |
86 | 1 | |
87 | 1 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE0 ) |
88 | 0 | return std::make_pair(UEF_UTF16_LE, 2); |
89 | 1 | return std::make_pair(UEF_Unknown, 0); |
90 | 1 | case 0xFE: |
91 | 0 | if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) |
92 | 0 | return std::make_pair(UEF_UTF16_BE, 2); |
93 | 0 | return std::make_pair(UEF_Unknown, 0); |
94 | 3 | case 0xEF: |
95 | 3 | if ( Input.size() >= 3 |
96 | 3 | && uint8_t(Input[1]) == 0xBB |
97 | 3 | && uint8_t(Input[2]) == 0xBF) |
98 | 3 | return std::make_pair(UEF_UTF8, 3); |
99 | 0 | return std::make_pair(UEF_Unknown, 0); |
100 | 4.89k | } |
101 | 4.89k | |
102 | 4.89k | // It could still be utf-32 or utf-16. |
103 | 4.89k | if (Input.size() >= 4 && Input[1] == 04.86k && Input[2] == 00 && Input[3] == 00 ) |
104 | 0 | return std::make_pair(UEF_UTF32_LE, 0); |
105 | 4.89k | |
106 | 4.89k | if (Input.size() >= 2 && Input[1] == 04.88k ) |
107 | 0 | return std::make_pair(UEF_UTF16_LE, 0); |
108 | 4.89k | |
109 | 4.89k | return std::make_pair(UEF_UTF8, 0); |
110 | 4.89k | } |
111 | | |
112 | | /// Pin the vtables to this file. |
113 | 0 | void Node::anchor() {} |
114 | 0 | void NullNode::anchor() {} |
115 | 0 | void ScalarNode::anchor() {} |
116 | 0 | void BlockScalarNode::anchor() {} |
117 | 0 | void KeyValueNode::anchor() {} |
118 | 0 | void MappingNode::anchor() {} |
119 | 0 | void SequenceNode::anchor() {} |
120 | 0 | void AliasNode::anchor() {} |
121 | | |
122 | | namespace llvm { |
123 | | namespace yaml { |
124 | | |
125 | | /// Token - A single YAML token. |
126 | | struct Token { |
127 | | enum TokenKind { |
128 | | TK_Error, // Uninitialized token. |
129 | | TK_StreamStart, |
130 | | TK_StreamEnd, |
131 | | TK_VersionDirective, |
132 | | TK_TagDirective, |
133 | | TK_DocumentStart, |
134 | | TK_DocumentEnd, |
135 | | TK_BlockEntry, |
136 | | TK_BlockEnd, |
137 | | TK_BlockSequenceStart, |
138 | | TK_BlockMappingStart, |
139 | | TK_FlowEntry, |
140 | | TK_FlowSequenceStart, |
141 | | TK_FlowSequenceEnd, |
142 | | TK_FlowMappingStart, |
143 | | TK_FlowMappingEnd, |
144 | | TK_Key, |
145 | | TK_Value, |
146 | | TK_Scalar, |
147 | | TK_BlockScalar, |
148 | | TK_Alias, |
149 | | TK_Anchor, |
150 | | TK_Tag |
151 | | } Kind = TK_Error; |
152 | | |
153 | | /// A string of length 0 or more whose begin() points to the logical location |
154 | | /// of the token in the input. |
155 | | StringRef Range; |
156 | | |
157 | | /// The value of a block scalar node. |
158 | | std::string Value; |
159 | | |
160 | 3.89M | Token() = default; |
161 | | }; |
162 | | |
163 | | } // end namespace yaml |
164 | | } // end namespace llvm |
165 | | |
166 | | using TokenQueueT = BumpPtrList<Token>; |
167 | | |
168 | | namespace { |
169 | | |
170 | | /// This struct is used to track simple keys. |
171 | | /// |
172 | | /// Simple keys are handled by creating an entry in SimpleKeys for each Token |
173 | | /// which could legally be the start of a simple key. When peekNext is called, |
174 | | /// if the Token To be returned is referenced by a SimpleKey, we continue |
175 | | /// tokenizing until that potential simple key has either been found to not be |
176 | | /// a simple key (we moved on to the next line or went further than 1024 chars). |
177 | | /// Or when we run into a Value, and then insert a Key token (and possibly |
178 | | /// others) before the SimpleKey's Tok. |
179 | | struct SimpleKey { |
180 | | TokenQueueT::iterator Tok; |
181 | | unsigned Column; |
182 | | unsigned Line; |
183 | | unsigned FlowLevel; |
184 | | bool IsRequired; |
185 | | |
186 | 839k | bool operator ==(const SimpleKey &Other) { |
187 | 839k | return Tok == Other.Tok; |
188 | 839k | } |
189 | | }; |
190 | | |
191 | | } // end anonymous namespace |
192 | | |
193 | | /// The Unicode scalar value of a UTF-8 minimal well-formed code unit |
194 | | /// subsequence and the subsequence's length in code units (uint8_t). |
195 | | /// A length of 0 represents an error. |
196 | | using UTF8Decoded = std::pair<uint32_t, unsigned>; |
197 | | |
198 | 85 | static UTF8Decoded decodeUTF8(StringRef Range) { |
199 | 85 | StringRef::iterator Position= Range.begin(); |
200 | 85 | StringRef::iterator End = Range.end(); |
201 | 85 | // 1 byte: [0x00, 0x7f] |
202 | 85 | // Bit pattern: 0xxxxxxx |
203 | 85 | if ((*Position & 0x80) == 0) { |
204 | 0 | return std::make_pair(*Position, 1); |
205 | 0 | } |
206 | 85 | // 2 bytes: [0x80, 0x7ff] |
207 | 85 | // Bit pattern: 110xxxxx 10xxxxxx |
208 | 85 | if (Position + 1 != End && |
209 | 85 | ((*Position & 0xE0) == 0xC0)84 && |
210 | 85 | ((*(Position + 1) & 0xC0) == 0x80)57 ) { |
211 | 57 | uint32_t codepoint = ((*Position & 0x1F) << 6) | |
212 | 57 | (*(Position + 1) & 0x3F); |
213 | 57 | if (codepoint >= 0x80) |
214 | 57 | return std::make_pair(codepoint, 2); |
215 | 28 | } |
216 | 28 | // 3 bytes: [0x8000, 0xffff] |
217 | 28 | // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx |
218 | 28 | if (Position + 2 != End && |
219 | 28 | ((*Position & 0xF0) == 0xE0) && |
220 | 28 | ((*(Position + 1) & 0xC0) == 0x80)27 && |
221 | 28 | ((*(Position + 2) & 0xC0) == 0x80)27 ) { |
222 | 27 | uint32_t codepoint = ((*Position & 0x0F) << 12) | |
223 | 27 | ((*(Position + 1) & 0x3F) << 6) | |
224 | 27 | (*(Position + 2) & 0x3F); |
225 | 27 | // Codepoints between 0xD800 and 0xDFFF are invalid, as |
226 | 27 | // they are high / low surrogate halves used by UTF-16. |
227 | 27 | if (codepoint >= 0x800 && |
228 | 27 | (codepoint < 0xD800 || codepoint > 0xDFFF1 )) |
229 | 27 | return std::make_pair(codepoint, 3); |
230 | 1 | } |
231 | 1 | // 4 bytes: [0x10000, 0x10FFFF] |
232 | 1 | // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
233 | 1 | if (Position + 3 != End && |
234 | 1 | ((*Position & 0xF8) == 0xF0) && |
235 | 1 | ((*(Position + 1) & 0xC0) == 0x80)0 && |
236 | 1 | ((*(Position + 2) & 0xC0) == 0x80)0 && |
237 | 1 | ((*(Position + 3) & 0xC0) == 0x80)0 ) { |
238 | 0 | uint32_t codepoint = ((*Position & 0x07) << 18) | |
239 | 0 | ((*(Position + 1) & 0x3F) << 12) | |
240 | 0 | ((*(Position + 2) & 0x3F) << 6) | |
241 | 0 | (*(Position + 3) & 0x3F); |
242 | 0 | if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) |
243 | 0 | return std::make_pair(codepoint, 4); |
244 | 1 | } |
245 | 1 | return std::make_pair(0, 0); |
246 | 1 | } |
247 | | |
248 | | namespace llvm { |
249 | | namespace yaml { |
250 | | |
251 | | /// Scans YAML tokens from a MemoryBuffer. |
252 | | class Scanner { |
253 | | public: |
254 | | Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, |
255 | | std::error_code *EC = nullptr); |
256 | | Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, |
257 | | std::error_code *EC = nullptr); |
258 | | |
259 | | /// Parse the next token and return it without popping it. |
260 | | Token &peekNext(); |
261 | | |
262 | | /// Parse the next token and pop it from the queue. |
263 | | Token getNext(); |
264 | | |
265 | | void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, |
266 | 175 | ArrayRef<SMRange> Ranges = None) { |
267 | 175 | SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); |
268 | 175 | } |
269 | | |
270 | 47 | void setError(const Twine &Message, StringRef::iterator Position) { |
271 | 47 | if (Current >= End) |
272 | 31 | Current = End - 1; |
273 | 47 | |
274 | 47 | // propagate the error if possible |
275 | 47 | if (EC) |
276 | 4 | *EC = make_error_code(std::errc::invalid_argument); |
277 | 47 | |
278 | 47 | // Don't print out more errors after the first one we encounter. The rest |
279 | 47 | // are just the result of the first, and have no meaning. |
280 | 47 | if (!Failed) |
281 | 47 | printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); |
282 | 47 | Failed = true; |
283 | 47 | } |
284 | | |
285 | 1 | void setError(const Twine &Message) { |
286 | 1 | setError(Message, Current); |
287 | 1 | } |
288 | | |
289 | | /// Returns true if an error occurred while parsing. |
290 | 866k | bool failed() { |
291 | 866k | return Failed; |
292 | 866k | } |
293 | | |
294 | | private: |
295 | | void init(MemoryBufferRef Buffer); |
296 | | |
297 | 4.90k | StringRef currentInput() { |
298 | 4.90k | return StringRef(Current, End - Current); |
299 | 4.90k | } |
300 | | |
301 | | /// Decode a UTF-8 minimal well-formed code unit subsequence starting |
302 | | /// at \a Position. |
303 | | /// |
304 | | /// If the UTF-8 code units starting at Position do not form a well-formed |
305 | | /// code unit subsequence, then the Unicode scalar value is 0, and the length |
306 | | /// is 0. |
307 | 33 | UTF8Decoded decodeUTF8(StringRef::iterator Position) { |
308 | 33 | return ::decodeUTF8(StringRef(Position, End - Position)); |
309 | 33 | } |
310 | | |
311 | | // The following functions are based on the gramar rules in the YAML spec. The |
312 | | // style of the function names it meant to closely match how they are written |
313 | | // in the spec. The number within the [] is the number of the grammar rule in |
314 | | // the spec. |
315 | | // |
316 | | // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. |
317 | | // |
318 | | // c- |
319 | | // A production starting and ending with a special character. |
320 | | // b- |
321 | | // A production matching a single line break. |
322 | | // nb- |
323 | | // A production starting and ending with a non-break character. |
324 | | // s- |
325 | | // A production starting and ending with a white space character. |
326 | | // ns- |
327 | | // A production starting and ending with a non-space character. |
328 | | // l- |
329 | | // A production matching complete line(s). |
330 | | |
331 | | /// Skip a single nb-char[27] starting at Position. |
332 | | /// |
333 | | /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] |
334 | | /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] |
335 | | /// |
336 | | /// @returns The code unit after the nb-char, or Position if it's not an |
337 | | /// nb-char. |
338 | | StringRef::iterator skip_nb_char(StringRef::iterator Position); |
339 | | |
340 | | /// Skip a single b-break[28] starting at Position. |
341 | | /// |
342 | | /// A b-break is 0xD 0xA | 0xD | 0xA |
343 | | /// |
344 | | /// @returns The code unit after the b-break, or Position if it's not a |
345 | | /// b-break. |
346 | | StringRef::iterator skip_b_break(StringRef::iterator Position); |
347 | | |
348 | | /// Skip a single s-space[31] starting at Position. |
349 | | /// |
350 | | /// An s-space is 0x20 |
351 | | /// |
352 | | /// @returns The code unit after the s-space, or Position if it's not a |
353 | | /// s-space. |
354 | | StringRef::iterator skip_s_space(StringRef::iterator Position); |
355 | | |
356 | | /// Skip a single s-white[33] starting at Position. |
357 | | /// |
358 | | /// A s-white is 0x20 | 0x9 |
359 | | /// |
360 | | /// @returns The code unit after the s-white, or Position if it's not a |
361 | | /// s-white. |
362 | | StringRef::iterator skip_s_white(StringRef::iterator Position); |
363 | | |
364 | | /// Skip a single ns-char[34] starting at Position. |
365 | | /// |
366 | | /// A ns-char is nb-char - s-white |
367 | | /// |
368 | | /// @returns The code unit after the ns-char, or Position if it's not a |
369 | | /// ns-char. |
370 | | StringRef::iterator skip_ns_char(StringRef::iterator Position); |
371 | | |
372 | | using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); |
373 | | |
374 | | /// Skip minimal well-formed code unit subsequences until Func |
375 | | /// returns its input. |
376 | | /// |
377 | | /// @returns The code unit after the last minimal well-formed code unit |
378 | | /// subsequence that Func accepted. |
379 | | StringRef::iterator skip_while( SkipWhileFunc Func |
380 | | , StringRef::iterator Position); |
381 | | |
382 | | /// Skip minimal well-formed code unit subsequences until Func returns its |
383 | | /// input. |
384 | | void advanceWhile(SkipWhileFunc Func); |
385 | | |
386 | | /// Scan ns-uri-char[39]s starting at Cur. |
387 | | /// |
388 | | /// This updates Cur and Column while scanning. |
389 | | void scan_ns_uri_char(); |
390 | | |
391 | | /// Consume a minimal well-formed code unit subsequence starting at |
392 | | /// \a Cur. Return false if it is not the same Unicode scalar value as |
393 | | /// \a Expected. This updates \a Column. |
394 | | bool consume(uint32_t Expected); |
395 | | |
396 | | /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. |
397 | | void skip(uint32_t Distance); |
398 | | |
399 | | /// Return true if the minimal well-formed code unit subsequence at |
400 | | /// Pos is whitespace or a new line |
401 | | bool isBlankOrBreak(StringRef::iterator Position); |
402 | | |
403 | | /// Consume a single b-break[28] if it's present at the current position. |
404 | | /// |
405 | | /// Return false if the code unit at the current position isn't a line break. |
406 | | bool consumeLineBreakIfPresent(); |
407 | | |
408 | | /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. |
409 | | void saveSimpleKeyCandidate( TokenQueueT::iterator Tok |
410 | | , unsigned AtColumn |
411 | | , bool IsRequired); |
412 | | |
413 | | /// Remove simple keys that can no longer be valid simple keys. |
414 | | /// |
415 | | /// Invalid simple keys are not on the current line or are further than 1024 |
416 | | /// columns back. |
417 | | void removeStaleSimpleKeyCandidates(); |
418 | | |
419 | | /// Remove all simple keys on FlowLevel \a Level. |
420 | | void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); |
421 | | |
422 | | /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd |
423 | | /// tokens if needed. |
424 | | bool unrollIndent(int ToColumn); |
425 | | |
426 | | /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint |
427 | | /// if needed. |
428 | | bool rollIndent( int ToColumn |
429 | | , Token::TokenKind Kind |
430 | | , TokenQueueT::iterator InsertPoint); |
431 | | |
432 | | /// Skip a single-line comment when the comment starts at the current |
433 | | /// position of the scanner. |
434 | | void skipComment(); |
435 | | |
436 | | /// Skip whitespace and comments until the start of the next token. |
437 | | void scanToNextToken(); |
438 | | |
439 | | /// Must be the first token generated. |
440 | | bool scanStreamStart(); |
441 | | |
442 | | /// Generate tokens needed to close out the stream. |
443 | | bool scanStreamEnd(); |
444 | | |
445 | | /// Scan a %BLAH directive. |
446 | | bool scanDirective(); |
447 | | |
448 | | /// Scan a ... or ---. |
449 | | bool scanDocumentIndicator(bool IsStart); |
450 | | |
451 | | /// Scan a [ or { and generate the proper flow collection start token. |
452 | | bool scanFlowCollectionStart(bool IsSequence); |
453 | | |
454 | | /// Scan a ] or } and generate the proper flow collection end token. |
455 | | bool scanFlowCollectionEnd(bool IsSequence); |
456 | | |
457 | | /// Scan the , that separates entries in a flow collection. |
458 | | bool scanFlowEntry(); |
459 | | |
460 | | /// Scan the - that starts block sequence entries. |
461 | | bool scanBlockEntry(); |
462 | | |
463 | | /// Scan an explicit ? indicating a key. |
464 | | bool scanKey(); |
465 | | |
466 | | /// Scan an explicit : indicating a value. |
467 | | bool scanValue(); |
468 | | |
469 | | /// Scan a quoted scalar. |
470 | | bool scanFlowScalar(bool IsDoubleQuoted); |
471 | | |
472 | | /// Scan an unquoted scalar. |
473 | | bool scanPlainScalar(); |
474 | | |
475 | | /// Scan an Alias or Anchor starting with * or &. |
476 | | bool scanAliasOrAnchor(bool IsAlias); |
477 | | |
478 | | /// Scan a block scalar starting with | or >. |
479 | | bool scanBlockScalar(bool IsLiteral); |
480 | | |
481 | | /// Scan a chomping indicator in a block scalar header. |
482 | | char scanBlockChompingIndicator(); |
483 | | |
484 | | /// Scan an indentation indicator in a block scalar header. |
485 | | unsigned scanBlockIndentationIndicator(); |
486 | | |
487 | | /// Scan a block scalar header. |
488 | | /// |
489 | | /// Return false if an error occurred. |
490 | | bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, |
491 | | bool &IsDone); |
492 | | |
493 | | /// Look for the indentation level of a block scalar. |
494 | | /// |
495 | | /// Return false if an error occurred. |
496 | | bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, |
497 | | unsigned &LineBreaks, bool &IsDone); |
498 | | |
499 | | /// Scan the indentation of a text line in a block scalar. |
500 | | /// |
501 | | /// Return false if an error occurred. |
502 | | bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, |
503 | | bool &IsDone); |
504 | | |
505 | | /// Scan a tag of the form !stuff. |
506 | | bool scanTag(); |
507 | | |
508 | | /// Dispatch to the next scanning function based on \a *Cur. |
509 | | bool fetchMoreTokens(); |
510 | | |
511 | | /// The SourceMgr used for diagnostics and buffer management. |
512 | | SourceMgr &SM; |
513 | | |
514 | | /// The original input. |
515 | | MemoryBufferRef InputBuffer; |
516 | | |
517 | | /// The current position of the scanner. |
518 | | StringRef::iterator Current; |
519 | | |
520 | | /// The end of the input (one past the last character). |
521 | | StringRef::iterator End; |
522 | | |
523 | | /// Current YAML indentation level in spaces. |
524 | | int Indent; |
525 | | |
526 | | /// Current column number in Unicode code points. |
527 | | unsigned Column; |
528 | | |
529 | | /// Current line number. |
530 | | unsigned Line; |
531 | | |
532 | | /// How deep we are in flow style containers. 0 Means at block level. |
533 | | unsigned FlowLevel; |
534 | | |
535 | | /// Are we at the start of the stream? |
536 | | bool IsStartOfStream; |
537 | | |
538 | | /// Can the next token be the start of a simple key? |
539 | | bool IsSimpleKeyAllowed; |
540 | | |
541 | | /// True if an error has occurred. |
542 | | bool Failed; |
543 | | |
544 | | /// Should colors be used when printing out the diagnostic messages? |
545 | | bool ShowColors; |
546 | | |
547 | | /// Queue of tokens. This is required to queue up tokens while looking |
548 | | /// for the end of a simple key. And for cases where a single character |
549 | | /// can produce multiple tokens (e.g. BlockEnd). |
550 | | TokenQueueT TokenQueue; |
551 | | |
552 | | /// Indentation levels. |
553 | | SmallVector<int, 4> Indents; |
554 | | |
555 | | /// Potential simple keys. |
556 | | SmallVector<SimpleKey, 4> SimpleKeys; |
557 | | |
558 | | std::error_code *EC; |
559 | | }; |
560 | | |
561 | | } // end namespace yaml |
562 | | } // end namespace llvm |
563 | | |
564 | | /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. |
565 | | static void encodeUTF8( uint32_t UnicodeScalarValue |
566 | 11 | , SmallVectorImpl<char> &Result) { |
567 | 11 | if (UnicodeScalarValue <= 0x7F) { |
568 | 6 | Result.push_back(UnicodeScalarValue & 0x7F); |
569 | 6 | } else if (5 UnicodeScalarValue <= 0x7FF5 ) { |
570 | 2 | uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); |
571 | 2 | uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); |
572 | 2 | Result.push_back(FirstByte); |
573 | 2 | Result.push_back(SecondByte); |
574 | 3 | } else if (UnicodeScalarValue <= 0xFFFF) { |
575 | 3 | uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); |
576 | 3 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); |
577 | 3 | uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); |
578 | 3 | Result.push_back(FirstByte); |
579 | 3 | Result.push_back(SecondByte); |
580 | 3 | Result.push_back(ThirdByte); |
581 | 3 | } else if (0 UnicodeScalarValue <= 0x10FFFF0 ) { |
582 | 0 | uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); |
583 | 0 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); |
584 | 0 | uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); |
585 | 0 | uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); |
586 | 0 | Result.push_back(FirstByte); |
587 | 0 | Result.push_back(SecondByte); |
588 | 0 | Result.push_back(ThirdByte); |
589 | 0 | Result.push_back(FourthByte); |
590 | 0 | } |
591 | 11 | } |
592 | | |
593 | 0 | bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { |
594 | 0 | SourceMgr SM; |
595 | 0 | Scanner scanner(Input, SM); |
596 | 0 | while (true) { |
597 | 0 | Token T = scanner.getNext(); |
598 | 0 | switch (T.Kind) { |
599 | 0 | case Token::TK_StreamStart: |
600 | 0 | OS << "Stream-Start: "; |
601 | 0 | break; |
602 | 0 | case Token::TK_StreamEnd: |
603 | 0 | OS << "Stream-End: "; |
604 | 0 | break; |
605 | 0 | case Token::TK_VersionDirective: |
606 | 0 | OS << "Version-Directive: "; |
607 | 0 | break; |
608 | 0 | case Token::TK_TagDirective: |
609 | 0 | OS << "Tag-Directive: "; |
610 | 0 | break; |
611 | 0 | case Token::TK_DocumentStart: |
612 | 0 | OS << "Document-Start: "; |
613 | 0 | break; |
614 | 0 | case Token::TK_DocumentEnd: |
615 | 0 | OS << "Document-End: "; |
616 | 0 | break; |
617 | 0 | case Token::TK_BlockEntry: |
618 | 0 | OS << "Block-Entry: "; |
619 | 0 | break; |
620 | 0 | case Token::TK_BlockEnd: |
621 | 0 | OS << "Block-End: "; |
622 | 0 | break; |
623 | 0 | case Token::TK_BlockSequenceStart: |
624 | 0 | OS << "Block-Sequence-Start: "; |
625 | 0 | break; |
626 | 0 | case Token::TK_BlockMappingStart: |
627 | 0 | OS << "Block-Mapping-Start: "; |
628 | 0 | break; |
629 | 0 | case Token::TK_FlowEntry: |
630 | 0 | OS << "Flow-Entry: "; |
631 | 0 | break; |
632 | 0 | case Token::TK_FlowSequenceStart: |
633 | 0 | OS << "Flow-Sequence-Start: "; |
634 | 0 | break; |
635 | 0 | case Token::TK_FlowSequenceEnd: |
636 | 0 | OS << "Flow-Sequence-End: "; |
637 | 0 | break; |
638 | 0 | case Token::TK_FlowMappingStart: |
639 | 0 | OS << "Flow-Mapping-Start: "; |
640 | 0 | break; |
641 | 0 | case Token::TK_FlowMappingEnd: |
642 | 0 | OS << "Flow-Mapping-End: "; |
643 | 0 | break; |
644 | 0 | case Token::TK_Key: |
645 | 0 | OS << "Key: "; |
646 | 0 | break; |
647 | 0 | case Token::TK_Value: |
648 | 0 | OS << "Value: "; |
649 | 0 | break; |
650 | 0 | case Token::TK_Scalar: |
651 | 0 | OS << "Scalar: "; |
652 | 0 | break; |
653 | 0 | case Token::TK_BlockScalar: |
654 | 0 | OS << "Block Scalar: "; |
655 | 0 | break; |
656 | 0 | case Token::TK_Alias: |
657 | 0 | OS << "Alias: "; |
658 | 0 | break; |
659 | 0 | case Token::TK_Anchor: |
660 | 0 | OS << "Anchor: "; |
661 | 0 | break; |
662 | 0 | case Token::TK_Tag: |
663 | 0 | OS << "Tag: "; |
664 | 0 | break; |
665 | 0 | case Token::TK_Error: |
666 | 0 | break; |
667 | 0 | } |
668 | 0 | OS << T.Range << "\n"; |
669 | 0 | if (T.Kind == Token::TK_StreamEnd) |
670 | 0 | break; |
671 | 0 | else if (T.Kind == Token::TK_Error) |
672 | 0 | return false; |
673 | 0 | } |
674 | 0 | return true; |
675 | 0 | } |
676 | | |
677 | 0 | bool yaml::scanTokens(StringRef Input) { |
678 | 0 | SourceMgr SM; |
679 | 0 | Scanner scanner(Input, SM); |
680 | 0 | while (true) { |
681 | 0 | Token T = scanner.getNext(); |
682 | 0 | if (T.Kind == Token::TK_StreamEnd) |
683 | 0 | break; |
684 | 0 | else if (T.Kind == Token::TK_Error) |
685 | 0 | return false; |
686 | 0 | } |
687 | 0 | return true; |
688 | 0 | } |
689 | | |
690 | 2.16k | std::string yaml::escape(StringRef Input, bool EscapePrintable) { |
691 | 2.16k | std::string EscapedInput; |
692 | 91.1k | for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i89.0k ) { |
693 | 89.0k | if (*i == '\\') |
694 | 7 | EscapedInput += "\\\\"; |
695 | 89.0k | else if (*i == '"') |
696 | 16 | EscapedInput += "\\\""; |
697 | 89.0k | else if (*i == 0) |
698 | 4 | EscapedInput += "\\0"; |
699 | 89.0k | else if (*i == 0x07) |
700 | 1 | EscapedInput += "\\a"; |
701 | 89.0k | else if (*i == 0x08) |
702 | 2 | EscapedInput += "\\b"; |
703 | 89.0k | else if (*i == 0x09) |
704 | 30 | EscapedInput += "\\t"; |
705 | 88.9k | else if (*i == 0x0A) |
706 | 209 | EscapedInput += "\\n"; |
707 | 88.7k | else if (*i == 0x0B) |
708 | 1 | EscapedInput += "\\v"; |
709 | 88.7k | else if (*i == 0x0C) |
710 | 1 | EscapedInput += "\\f"; |
711 | 88.7k | else if (*i == 0x0D) |
712 | 2 | EscapedInput += "\\r"; |
713 | 88.7k | else if (*i == 0x1B) |
714 | 1 | EscapedInput += "\\e"; |
715 | 88.7k | else if ((unsigned char)*i < 0x20) { // Control characters not handled above. |
716 | 6 | std::string HexStr = utohexstr(*i); |
717 | 6 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; |
718 | 88.7k | } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. |
719 | 52 | UTF8Decoded UnicodeScalarValue |
720 | 52 | = decodeUTF8(StringRef(i, Input.end() - i)); |
721 | 52 | if (UnicodeScalarValue.second == 0) { |
722 | 0 | // Found invalid char. |
723 | 0 | SmallString<4> Val; |
724 | 0 | encodeUTF8(0xFFFD, Val); |
725 | 0 | EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); |
726 | 0 | // FIXME: Error reporting. |
727 | 0 | return EscapedInput; |
728 | 0 | } |
729 | 52 | if (UnicodeScalarValue.first == 0x85) |
730 | 19 | EscapedInput += "\\N"; |
731 | 33 | else if (UnicodeScalarValue.first == 0xA0) |
732 | 1 | EscapedInput += "\\_"; |
733 | 32 | else if (UnicodeScalarValue.first == 0x2028) |
734 | 7 | EscapedInput += "\\L"; |
735 | 25 | else if (UnicodeScalarValue.first == 0x2029) |
736 | 4 | EscapedInput += "\\P"; |
737 | 21 | else if (!EscapePrintable && |
738 | 21 | sys::unicode::isPrintable(UnicodeScalarValue.first)17 ) |
739 | 16 | EscapedInput += StringRef(i, UnicodeScalarValue.second); |
740 | 5 | else { |
741 | 5 | std::string HexStr = utohexstr(UnicodeScalarValue.first); |
742 | 5 | if (HexStr.size() <= 2) |
743 | 0 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; |
744 | 5 | else if (HexStr.size() <= 4) |
745 | 5 | EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; |
746 | 0 | else if (HexStr.size() <= 8) |
747 | 0 | EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; |
748 | 5 | } |
749 | 52 | i += UnicodeScalarValue.second - 1; |
750 | 52 | } else |
751 | 88.7k | EscapedInput.push_back(*i); |
752 | 89.0k | } |
753 | 2.16k | return EscapedInput; |
754 | 2.16k | } |
755 | | |
756 | | Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, |
757 | | std::error_code *EC) |
758 | 4.78k | : SM(sm), ShowColors(ShowColors), EC(EC) { |
759 | 4.78k | init(MemoryBufferRef(Input, "YAML")); |
760 | 4.78k | } |
761 | | |
762 | | Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, |
763 | | std::error_code *EC) |
764 | 120 | : SM(SM_), ShowColors(ShowColors), EC(EC) { |
765 | 120 | init(Buffer); |
766 | 120 | } |
767 | | |
768 | 4.90k | void Scanner::init(MemoryBufferRef Buffer) { |
769 | 4.90k | InputBuffer = Buffer; |
770 | 4.90k | Current = InputBuffer.getBufferStart(); |
771 | 4.90k | End = InputBuffer.getBufferEnd(); |
772 | 4.90k | Indent = -1; |
773 | 4.90k | Column = 0; |
774 | 4.90k | Line = 0; |
775 | 4.90k | FlowLevel = 0; |
776 | 4.90k | IsStartOfStream = true; |
777 | 4.90k | IsSimpleKeyAllowed = true; |
778 | 4.90k | Failed = false; |
779 | 4.90k | std::unique_ptr<MemoryBuffer> InputBufferOwner = |
780 | 4.90k | MemoryBuffer::getMemBuffer(Buffer); |
781 | 4.90k | SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); |
782 | 4.90k | } |
783 | | |
784 | 4.85M | Token &Scanner::peekNext() { |
785 | 4.85M | // If the current token is a possible simple key, keep parsing until we |
786 | 4.85M | // can confirm. |
787 | 4.85M | bool NeedMore = false; |
788 | 5.36M | while (true) { |
789 | 5.36M | if (TokenQueue.empty() || NeedMore4.77M ) { |
790 | 1.10M | if (!fetchMoreTokens()) { |
791 | 26 | TokenQueue.clear(); |
792 | 26 | TokenQueue.push_back(Token()); |
793 | 26 | return TokenQueue.front(); |
794 | 26 | } |
795 | 5.36M | } |
796 | 5.36M | assert(!TokenQueue.empty() && |
797 | 5.36M | "fetchMoreTokens lied about getting tokens!"); |
798 | 5.36M | |
799 | 5.36M | removeStaleSimpleKeyCandidates(); |
800 | 5.36M | SimpleKey SK; |
801 | 5.36M | SK.Tok = TokenQueue.begin(); |
802 | 5.36M | if (!is_contained(SimpleKeys, SK)) |
803 | 4.85M | break; |
804 | 512k | else |
805 | 512k | NeedMore = true; |
806 | 5.36M | } |
807 | 4.85M | return TokenQueue.front()4.85M ; |
808 | 4.85M | } |
809 | | |
810 | 1.49M | Token Scanner::getNext() { |
811 | 1.49M | Token Ret = peekNext(); |
812 | 1.49M | // TokenQueue can be empty if there was an error getting the next token. |
813 | 1.49M | if (!TokenQueue.empty()) |
814 | 1.49M | TokenQueue.pop_front(); |
815 | 1.49M | |
816 | 1.49M | // There cannot be any referenced Token's if the TokenQueue is empty. So do a |
817 | 1.49M | // quick deallocation of them all. |
818 | 1.49M | if (TokenQueue.empty()) |
819 | 589k | TokenQueue.resetAlloc(); |
820 | 1.49M | |
821 | 1.49M | return Ret; |
822 | 1.49M | } |
823 | | |
824 | 19.7M | StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { |
825 | 19.7M | if (Position == End) |
826 | 489 | return Position; |
827 | 19.7M | // Check 7 bit c-printable - b-char. |
828 | 19.7M | if ( *Position == 0x09 |
829 | 19.7M | || (19.7M *Position >= 0x2019.7M && *Position <= 0x7E19.3M )) |
830 | 19.3M | return Position + 1; |
831 | 379k | |
832 | 379k | // Check for valid UTF-8. |
833 | 379k | if (uint8_t(*Position) & 0x80) { |
834 | 33 | UTF8Decoded u8d = decodeUTF8(Position); |
835 | 33 | if ( u8d.second != 0 |
836 | 33 | && u8d.first != 0xFEFF32 |
837 | 33 | && ( 31 u8d.first == 0x8531 |
838 | 31 | || ( 11 u8d.first >= 0xA011 |
839 | 11 | && u8d.first <= 0xD7FF) |
840 | 31 | || ( 0 u8d.first >= 0xE0000 |
841 | 0 | && u8d.first <= 0xFFFD) |
842 | 31 | || ( 0 u8d.first >= 0x100000 |
843 | 0 | && u8d.first <= 0x10FFFF))) |
844 | 31 | return Position + u8d.second; |
845 | 379k | } |
846 | 379k | return Position; |
847 | 379k | } |
848 | | |
849 | 1.93M | StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { |
850 | 1.93M | if (Position == End) |
851 | 3.55k | return Position; |
852 | 1.93M | if (*Position == 0x0D) { |
853 | 641 | if (Position + 1 != End && *(Position + 1) == 0x0A640 ) |
854 | 622 | return Position + 2; |
855 | 19 | return Position + 1; |
856 | 19 | } |
857 | 1.93M | |
858 | 1.93M | if (*Position == 0x0A) |
859 | 831k | return Position + 1; |
860 | 1.09M | return Position; |
861 | 1.09M | } |
862 | | |
863 | 574k | StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { |
864 | 574k | if (Position == End) |
865 | 108 | return Position; |
866 | 574k | if (*Position == ' ') |
867 | 526k | return Position + 1; |
868 | 47.6k | return Position; |
869 | 47.6k | } |
870 | | |
871 | 725k | StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { |
872 | 725k | if (Position == End) |
873 | 1 | return Position; |
874 | 725k | if (*Position == ' ' || *Position == '\t'152k ) |
875 | 572k | return Position + 1; |
876 | 152k | return Position; |
877 | 152k | } |
878 | | |
879 | 19.4k | StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { |
880 | 19.4k | if (Position == End) |
881 | 0 | return Position; |
882 | 19.4k | if (*Position == ' ' || *Position == '\t'19.3k ) |
883 | 112 | return Position; |
884 | 19.3k | return skip_nb_char(Position); |
885 | 19.3k | } |
886 | | |
887 | | StringRef::iterator Scanner::skip_while( SkipWhileFunc Func |
888 | 314k | , StringRef::iterator Position) { |
889 | 12.2M | while (true) { |
890 | 12.2M | StringRef::iterator i = (this->*Func)(Position); |
891 | 12.2M | if (i == Position) |
892 | 314k | break; |
893 | 11.9M | Position = i; |
894 | 11.9M | } |
895 | 314k | return Position; |
896 | 314k | } |
897 | | |
898 | 298k | void Scanner::advanceWhile(SkipWhileFunc Func) { |
899 | 298k | auto Final = skip_while(Func, Current); |
900 | 298k | Column += Final - Current; |
901 | 298k | Current = Final; |
902 | 298k | } |
903 | | |
904 | 0 | static bool is_ns_hex_digit(const char C) { |
905 | 0 | return (C >= '0' && C <= '9') |
906 | 0 | || (C >= 'a' && C <= 'z') |
907 | 0 | || (C >= 'A' && C <= 'Z'); |
908 | 0 | } |
909 | | |
910 | 36 | static bool is_ns_word_char(const char C) { |
911 | 36 | return C == '-' |
912 | 36 | || (C >= 'a' && C <= 'z'26 ) |
913 | 36 | || (10 C >= 'A'10 && C <= 'Z'0 ); |
914 | 36 | } |
915 | | |
916 | 3 | void Scanner::scan_ns_uri_char() { |
917 | 36 | while (true) { |
918 | 36 | if (Current == End) |
919 | 0 | break; |
920 | 36 | if (( *Current == '%' |
921 | 36 | && Current + 2 < End0 |
922 | 36 | && is_ns_hex_digit(*(Current + 1))0 |
923 | 36 | && is_ns_hex_digit(*(Current + 2))0 ) |
924 | 36 | || is_ns_word_char(*Current) |
925 | 36 | || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") |
926 | 33 | != StringRef::npos) { |
927 | 33 | ++Current; |
928 | 33 | ++Column; |
929 | 33 | } else |
930 | 3 | break; |
931 | 36 | } |
932 | 3 | } |
933 | | |
934 | 22 | bool Scanner::consume(uint32_t Expected) { |
935 | 22 | if (Expected >= 0x80) |
936 | 0 | report_fatal_error("Not dealing with this yet"); |
937 | 22 | if (Current == End) |
938 | 0 | return false; |
939 | 22 | if (uint8_t(*Current) >= 0x80) |
940 | 0 | report_fatal_error("Not dealing with this yet"); |
941 | 22 | if (uint8_t(*Current) == Expected) { |
942 | 20 | ++Current; |
943 | 20 | ++Column; |
944 | 20 | return true; |
945 | 20 | } |
946 | 2 | return false; |
947 | 2 | } |
948 | | |
949 | 2.81M | void Scanner::skip(uint32_t Distance) { |
950 | 2.81M | Current += Distance; |
951 | 2.81M | Column += Distance; |
952 | 2.81M | assert(Current <= End && "Skipped past the end"); |
953 | 2.81M | } |
954 | | |
955 | 6.18M | bool Scanner::isBlankOrBreak(StringRef::iterator Position) { |
956 | 6.18M | if (Position == End) |
957 | 921 | return false; |
958 | 6.18M | return *Position == ' ' || *Position == '\t'5.07M || *Position == '\r'5.07M || |
959 | 6.18M | *Position == '\n'5.07M ; |
960 | 6.18M | } |
961 | | |
962 | 298k | bool Scanner::consumeLineBreakIfPresent() { |
963 | 298k | auto Next = skip_b_break(Current); |
964 | 298k | if (Next == Current) |
965 | 3 | return false; |
966 | 298k | Column = 0; |
967 | 298k | ++Line; |
968 | 298k | Current = Next; |
969 | 298k | return true; |
970 | 298k | } |
971 | | |
972 | | void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok |
973 | | , unsigned AtColumn |
974 | 593k | , bool IsRequired) { |
975 | 593k | if (IsSimpleKeyAllowed) { |
976 | 341k | SimpleKey SK; |
977 | 341k | SK.Tok = Tok; |
978 | 341k | SK.Line = Line; |
979 | 341k | SK.Column = AtColumn; |
980 | 341k | SK.IsRequired = IsRequired; |
981 | 341k | SK.FlowLevel = FlowLevel; |
982 | 341k | SimpleKeys.push_back(SK); |
983 | 341k | } |
984 | 593k | } |
985 | | |
986 | 6.46M | void Scanner::removeStaleSimpleKeyCandidates() { |
987 | 6.46M | for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); |
988 | 7.93M | i != SimpleKeys.end();) { |
989 | 1.46M | if (i->Line != Line || i->Column + 1024 < Column1.43M ) { |
990 | 37.5k | if (i->IsRequired) |
991 | 0 | setError( "Could not find expected : for simple key" |
992 | 0 | , i->Tok->Range.begin()); |
993 | 37.5k | i = SimpleKeys.erase(i); |
994 | 37.5k | } else |
995 | 1.43M | ++i; |
996 | 1.46M | } |
997 | 6.46M | } |
998 | | |
999 | 179k | void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { |
1000 | 179k | if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level77.5k ) |
1001 | 20.6k | SimpleKeys.pop_back(); |
1002 | 179k | } |
1003 | | |
1004 | 1.13M | bool Scanner::unrollIndent(int ToColumn) { |
1005 | 1.13M | Token T; |
1006 | 1.13M | // Indentation is ignored in flow. |
1007 | 1.13M | if (FlowLevel != 0) |
1008 | 469k | return true; |
1009 | 662k | |
1010 | 719k | while (662k Indent > ToColumn) { |
1011 | 56.4k | T.Kind = Token::TK_BlockEnd; |
1012 | 56.4k | T.Range = StringRef(Current, 1); |
1013 | 56.4k | TokenQueue.push_back(T); |
1014 | 56.4k | Indent = Indents.pop_back_val(); |
1015 | 56.4k | } |
1016 | 662k | |
1017 | 662k | return true; |
1018 | 662k | } |
1019 | | |
1020 | | bool Scanner::rollIndent( int ToColumn |
1021 | | , Token::TokenKind Kind |
1022 | 327k | , TokenQueueT::iterator InsertPoint) { |
1023 | 327k | if (FlowLevel) |
1024 | 99.8k | return true; |
1025 | 227k | if (Indent < ToColumn) { |
1026 | 56.5k | Indents.push_back(Indent); |
1027 | 56.5k | Indent = ToColumn; |
1028 | 56.5k | |
1029 | 56.5k | Token T; |
1030 | 56.5k | T.Kind = Kind; |
1031 | 56.5k | T.Range = StringRef(Current, 0); |
1032 | 56.5k | TokenQueue.insert(InsertPoint, T); |
1033 | 56.5k | } |
1034 | 227k | return true; |
1035 | 227k | } |
1036 | | |
1037 | 1.50M | void Scanner::skipComment() { |
1038 | 1.50M | if (*Current != '#') |
1039 | 1.44M | return; |
1040 | 3.39M | while (65.7k true) { |
1041 | 3.39M | // This may skip more than one byte, thus Column is only incremented |
1042 | 3.39M | // for code points. |
1043 | 3.39M | StringRef::iterator I = skip_nb_char(Current); |
1044 | 3.39M | if (I == Current) |
1045 | 65.7k | break; |
1046 | 3.33M | Current = I; |
1047 | 3.33M | ++Column; |
1048 | 3.33M | } |
1049 | 65.7k | } |
1050 | | |
1051 | 1.10M | void Scanner::scanToNextToken() { |
1052 | 1.49M | while (true) { |
1053 | 3.63M | while (*Current == ' ' || *Current == '\t'1.49M ) { |
1054 | 2.14M | skip(1); |
1055 | 2.14M | } |
1056 | 1.49M | |
1057 | 1.49M | skipComment(); |
1058 | 1.49M | |
1059 | 1.49M | // Skip EOL. |
1060 | 1.49M | StringRef::iterator i = skip_b_break(Current); |
1061 | 1.49M | if (i == Current) |
1062 | 1.10M | break; |
1063 | 392k | Current = i; |
1064 | 392k | ++Line; |
1065 | 392k | Column = 0; |
1066 | 392k | // New lines may start a simple key. |
1067 | 392k | if (!FlowLevel) |
1068 | 310k | IsSimpleKeyAllowed = true; |
1069 | 392k | } |
1070 | 1.10M | } |
1071 | | |
1072 | 4.90k | bool Scanner::scanStreamStart() { |
1073 | 4.90k | IsStartOfStream = false; |
1074 | 4.90k | |
1075 | 4.90k | EncodingInfo EI = getUnicodeEncoding(currentInput()); |
1076 | 4.90k | |
1077 | 4.90k | Token T; |
1078 | 4.90k | T.Kind = Token::TK_StreamStart; |
1079 | 4.90k | T.Range = StringRef(Current, EI.second); |
1080 | 4.90k | TokenQueue.push_back(T); |
1081 | 4.90k | Current += EI.second; |
1082 | 4.90k | return true; |
1083 | 4.90k | } |
1084 | | |
1085 | 3.55k | bool Scanner::scanStreamEnd() { |
1086 | 3.55k | // Force an ending new line if one isn't present. |
1087 | 3.55k | if (Column != 0) { |
1088 | 431 | Column = 0; |
1089 | 431 | ++Line; |
1090 | 431 | } |
1091 | 3.55k | |
1092 | 3.55k | unrollIndent(-1); |
1093 | 3.55k | SimpleKeys.clear(); |
1094 | 3.55k | IsSimpleKeyAllowed = false; |
1095 | 3.55k | |
1096 | 3.55k | Token T; |
1097 | 3.55k | T.Kind = Token::TK_StreamEnd; |
1098 | 3.55k | T.Range = StringRef(Current, 0); |
1099 | 3.55k | TokenQueue.push_back(T); |
1100 | 3.55k | return true; |
1101 | 3.55k | } |
1102 | | |
1103 | 19 | bool Scanner::scanDirective() { |
1104 | 19 | // Reset the indentation level. |
1105 | 19 | unrollIndent(-1); |
1106 | 19 | SimpleKeys.clear(); |
1107 | 19 | IsSimpleKeyAllowed = false; |
1108 | 19 | |
1109 | 19 | StringRef::iterator Start = Current; |
1110 | 19 | consume('%'); |
1111 | 19 | StringRef::iterator NameStart = Current; |
1112 | 19 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1113 | 19 | StringRef Name(NameStart, Current - NameStart); |
1114 | 19 | Current = skip_while(&Scanner::skip_s_white, Current); |
1115 | 19 | |
1116 | 19 | Token T; |
1117 | 19 | if (Name == "YAML") { |
1118 | 5 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1119 | 5 | T.Kind = Token::TK_VersionDirective; |
1120 | 5 | T.Range = StringRef(Start, Current - Start); |
1121 | 5 | TokenQueue.push_back(T); |
1122 | 5 | return true; |
1123 | 14 | } else if(Name == "TAG") { |
1124 | 13 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1125 | 13 | Current = skip_while(&Scanner::skip_s_white, Current); |
1126 | 13 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1127 | 13 | T.Kind = Token::TK_TagDirective; |
1128 | 13 | T.Range = StringRef(Start, Current - Start); |
1129 | 13 | TokenQueue.push_back(T); |
1130 | 13 | return true; |
1131 | 13 | } |
1132 | 1 | return false; |
1133 | 1 | } |
1134 | | |
1135 | 29.8k | bool Scanner::scanDocumentIndicator(bool IsStart) { |
1136 | 29.8k | unrollIndent(-1); |
1137 | 29.8k | SimpleKeys.clear(); |
1138 | 29.8k | IsSimpleKeyAllowed = false; |
1139 | 29.8k | |
1140 | 29.8k | Token T; |
1141 | 29.8k | T.Kind = IsStart ? Token::TK_DocumentStart15.5k : Token::TK_DocumentEnd14.2k ; |
1142 | 29.8k | T.Range = StringRef(Current, 3); |
1143 | 29.8k | skip(3); |
1144 | 29.8k | TokenQueue.push_back(T); |
1145 | 29.8k | return true; |
1146 | 29.8k | } |
1147 | | |
1148 | 43.1k | bool Scanner::scanFlowCollectionStart(bool IsSequence) { |
1149 | 43.1k | Token T; |
1150 | 43.1k | T.Kind = IsSequence ? Token::TK_FlowSequenceStart6.66k |
1151 | 43.1k | : Token::TK_FlowMappingStart36.5k ; |
1152 | 43.1k | T.Range = StringRef(Current, 1); |
1153 | 43.1k | skip(1); |
1154 | 43.1k | TokenQueue.push_back(T); |
1155 | 43.1k | |
1156 | 43.1k | // [ and { may begin a simple key. |
1157 | 43.1k | saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); |
1158 | 43.1k | |
1159 | 43.1k | // And may also be followed by a simple key. |
1160 | 43.1k | IsSimpleKeyAllowed = true; |
1161 | 43.1k | ++FlowLevel; |
1162 | 43.1k | return true; |
1163 | 43.1k | } |
1164 | | |
1165 | 43.0k | bool Scanner::scanFlowCollectionEnd(bool IsSequence) { |
1166 | 43.0k | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1167 | 43.0k | IsSimpleKeyAllowed = false; |
1168 | 43.0k | Token T; |
1169 | 43.0k | T.Kind = IsSequence ? Token::TK_FlowSequenceEnd6.63k |
1170 | 43.0k | : Token::TK_FlowMappingEnd36.4k ; |
1171 | 43.0k | T.Range = StringRef(Current, 1); |
1172 | 43.0k | skip(1); |
1173 | 43.0k | TokenQueue.push_back(T); |
1174 | 43.0k | if (FlowLevel) |
1175 | 43.0k | --FlowLevel; |
1176 | 43.0k | return true; |
1177 | 43.0k | } |
1178 | | |
1179 | 92.5k | bool Scanner::scanFlowEntry() { |
1180 | 92.5k | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1181 | 92.5k | IsSimpleKeyAllowed = true; |
1182 | 92.5k | Token T; |
1183 | 92.5k | T.Kind = Token::TK_FlowEntry; |
1184 | 92.5k | T.Range = StringRef(Current, 1); |
1185 | 92.5k | skip(1); |
1186 | 92.5k | TokenQueue.push_back(T); |
1187 | 92.5k | return true; |
1188 | 92.5k | } |
1189 | | |
1190 | 43.9k | bool Scanner::scanBlockEntry() { |
1191 | 43.9k | rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); |
1192 | 43.9k | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1193 | 43.9k | IsSimpleKeyAllowed = true; |
1194 | 43.9k | Token T; |
1195 | 43.9k | T.Kind = Token::TK_BlockEntry; |
1196 | 43.9k | T.Range = StringRef(Current, 1); |
1197 | 43.9k | skip(1); |
1198 | 43.9k | TokenQueue.push_back(T); |
1199 | 43.9k | return true; |
1200 | 43.9k | } |
1201 | | |
1202 | 31 | bool Scanner::scanKey() { |
1203 | 31 | if (!FlowLevel) |
1204 | 17 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); |
1205 | 31 | |
1206 | 31 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1207 | 31 | IsSimpleKeyAllowed = !FlowLevel; |
1208 | 31 | |
1209 | 31 | Token T; |
1210 | 31 | T.Kind = Token::TK_Key; |
1211 | 31 | T.Range = StringRef(Current, 1); |
1212 | 31 | skip(1); |
1213 | 31 | TokenQueue.push_back(T); |
1214 | 31 | return true; |
1215 | 31 | } |
1216 | | |
1217 | 283k | bool Scanner::scanValue() { |
1218 | 283k | // If the previous token could have been a simple key, insert the key token |
1219 | 283k | // into the token queue. |
1220 | 283k | if (!SimpleKeys.empty()) { |
1221 | 283k | SimpleKey SK = SimpleKeys.pop_back_val(); |
1222 | 283k | Token T; |
1223 | 283k | T.Kind = Token::TK_Key; |
1224 | 283k | T.Range = SK.Tok->Range; |
1225 | 283k | TokenQueueT::iterator i, e; |
1226 | 625k | for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i341k ) { |
1227 | 625k | if (i == SK.Tok) |
1228 | 283k | break; |
1229 | 625k | } |
1230 | 283k | assert(i != e && "SimpleKey not in token queue!"); |
1231 | 283k | i = TokenQueue.insert(i, T); |
1232 | 283k | |
1233 | 283k | // We may also need to add a Block-Mapping-Start token. |
1234 | 283k | rollIndent(SK.Column, Token::TK_BlockMappingStart, i); |
1235 | 283k | |
1236 | 283k | IsSimpleKeyAllowed = false; |
1237 | 283k | } else { |
1238 | 23 | if (!FlowLevel) |
1239 | 8 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); |
1240 | 23 | IsSimpleKeyAllowed = !FlowLevel; |
1241 | 23 | } |
1242 | 283k | |
1243 | 283k | Token T; |
1244 | 283k | T.Kind = Token::TK_Value; |
1245 | 283k | T.Range = StringRef(Current, 1); |
1246 | 283k | skip(1); |
1247 | 283k | TokenQueue.push_back(T); |
1248 | 283k | return true; |
1249 | 283k | } |
1250 | | |
1251 | | // Forbidding inlining improves performance by roughly 20%. |
1252 | | // FIXME: Remove once llvm optimizes this to the faster version without hints. |
1253 | | LLVM_ATTRIBUTE_NOINLINE static bool |
1254 | | wasEscaped(StringRef::iterator First, StringRef::iterator Position); |
1255 | | |
1256 | | // Returns whether a character at 'Position' was escaped with a leading '\'. |
1257 | | // 'First' specifies the position of the first character in the string. |
1258 | | static bool wasEscaped(StringRef::iterator First, |
1259 | 64 | StringRef::iterator Position) { |
1260 | 64 | assert(Position - 1 >= First); |
1261 | 64 | StringRef::iterator I = Position - 1; |
1262 | 64 | // We calculate the number of consecutive '\'s before the current position |
1263 | 64 | // by iterating backwards through our string. |
1264 | 164 | while (I >= First && *I == '\\'147 ) --I100 ; |
1265 | 64 | // (Position - 1 - I) now contains the number of '\'s before the current |
1266 | 64 | // position. If it is odd, the character at 'Position' was escaped. |
1267 | 64 | return (Position - 1 - I) % 2 == 1; |
1268 | 64 | } |
1269 | | |
1270 | 99.7k | bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { |
1271 | 99.7k | StringRef::iterator Start = Current; |
1272 | 99.7k | unsigned ColStart = Column; |
1273 | 99.7k | if (IsDoubleQuoted) { |
1274 | 78.8k | do { |
1275 | 78.8k | ++Current; |
1276 | 21.9M | while (Current != End && *Current != '"'21.9M ) |
1277 | 21.8M | ++Current; |
1278 | 78.8k | // Repeat until the previous character was not a '\' or was an escaped |
1279 | 78.8k | // backslash. |
1280 | 78.8k | } while ( Current != End |
1281 | 78.8k | && *(Current - 1) == '\\'78.8k |
1282 | 78.8k | && wasEscaped(Start + 1, Current)64 ); |
1283 | 78.7k | } else { |
1284 | 20.9k | skip(1); |
1285 | 419k | while (true) { |
1286 | 419k | // Skip a ' followed by another '. |
1287 | 419k | if (Current + 1 < End && *Current == '\''419k && *(Current + 1) == '\''20.9k ) { |
1288 | 9 | skip(2); |
1289 | 9 | continue; |
1290 | 419k | } else if (*Current == '\'') |
1291 | 20.9k | break; |
1292 | 398k | StringRef::iterator i = skip_nb_char(Current); |
1293 | 398k | if (i == Current) { |
1294 | 15 | i = skip_b_break(Current); |
1295 | 15 | if (i == Current) |
1296 | 1 | break; |
1297 | 14 | Current = i; |
1298 | 14 | Column = 0; |
1299 | 14 | ++Line; |
1300 | 398k | } else { |
1301 | 398k | if (i == End) |
1302 | 0 | break; |
1303 | 398k | Current = i; |
1304 | 398k | ++Column; |
1305 | 398k | } |
1306 | 398k | } |
1307 | 20.9k | } |
1308 | 99.7k | |
1309 | 99.7k | if (Current == End) { |
1310 | 11 | setError("Expected quote at end of scalar", Current); |
1311 | 11 | return false; |
1312 | 11 | } |
1313 | 99.7k | |
1314 | 99.7k | skip(1); // Skip ending quote. |
1315 | 99.7k | Token T; |
1316 | 99.7k | T.Kind = Token::TK_Scalar; |
1317 | 99.7k | T.Range = StringRef(Start, Current - Start); |
1318 | 99.7k | TokenQueue.push_back(T); |
1319 | 99.7k | |
1320 | 99.7k | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1321 | 99.7k | |
1322 | 99.7k | IsSimpleKeyAllowed = false; |
1323 | 99.7k | |
1324 | 99.7k | return true; |
1325 | 99.7k | } |
1326 | | |
1327 | 446k | bool Scanner::scanPlainScalar() { |
1328 | 446k | StringRef::iterator Start = Current; |
1329 | 446k | unsigned ColStart = Column; |
1330 | 446k | unsigned LeadingBlanks = 0; |
1331 | 446k | assert(Indent >= -1 && "Indent must be >= -1 !"); |
1332 | 446k | unsigned indent = static_cast<unsigned>(Indent + 1); |
1333 | 467k | while (true) { |
1334 | 467k | if (*Current == '#') |
1335 | 324 | break; |
1336 | 467k | |
1337 | 3.83M | while (467k !isBlankOrBreak(Current)) { |
1338 | 3.67M | if ( FlowLevel && *Current == ':'774k |
1339 | 3.67M | && !(58.6k isBlankOrBreak(Current + 1)58.6k || *(Current + 1) == ','3 )) { |
1340 | 2 | setError("Found unexpected ':' while scanning a plain scalar", Current); |
1341 | 2 | return false; |
1342 | 2 | } |
1343 | 3.67M | |
1344 | 3.67M | // Check for the end of the plain scalar. |
1345 | 3.67M | if ( (*Current == ':' && isBlankOrBreak(Current + 1)242k ) |
1346 | 3.67M | || ( 3.43M FlowLevel3.43M |
1347 | 3.43M | && (StringRef(Current, 1).find_first_of(",:?[]{}") |
1348 | 716k | != StringRef::npos))) |
1349 | 310k | break; |
1350 | 3.36M | |
1351 | 3.36M | StringRef::iterator i = skip_nb_char(Current); |
1352 | 3.36M | if (i == Current) |
1353 | 261 | break; |
1354 | 3.36M | Current = i; |
1355 | 3.36M | ++Column; |
1356 | 3.36M | } |
1357 | 467k | |
1358 | 467k | // Are we at the end? |
1359 | 467k | if (467k !isBlankOrBreak(Current)467k ) |
1360 | 310k | break; |
1361 | 156k | |
1362 | 156k | // Eat blanks. |
1363 | 156k | StringRef::iterator Tmp = Current; |
1364 | 869k | while (isBlankOrBreak(Tmp)) { |
1365 | 712k | StringRef::iterator i = skip_s_white(Tmp); |
1366 | 712k | if (i != Tmp) { |
1367 | 572k | if (LeadingBlanks && (Column < indent)551k && *Tmp == '\t'551k ) { |
1368 | 0 | setError("Found invalid tab character in indentation", Tmp); |
1369 | 0 | return false; |
1370 | 0 | } |
1371 | 572k | Tmp = i; |
1372 | 572k | ++Column; |
1373 | 572k | } else { |
1374 | 140k | i = skip_b_break(Tmp); |
1375 | 140k | if (!LeadingBlanks) |
1376 | 135k | LeadingBlanks = 1; |
1377 | 140k | Tmp = i; |
1378 | 140k | Column = 0; |
1379 | 140k | ++Line; |
1380 | 140k | } |
1381 | 712k | } |
1382 | 156k | |
1383 | 156k | if (!FlowLevel && Column < indent136k ) |
1384 | 135k | break; |
1385 | 20.5k | |
1386 | 20.5k | Current = Tmp; |
1387 | 20.5k | } |
1388 | 446k | if (446k Start == Current446k ) { |
1389 | 3 | setError("Got empty plain scalar", Start); |
1390 | 3 | return false; |
1391 | 3 | } |
1392 | 446k | Token T; |
1393 | 446k | T.Kind = Token::TK_Scalar; |
1394 | 446k | T.Range = StringRef(Start, Current - Start); |
1395 | 446k | TokenQueue.push_back(T); |
1396 | 446k | |
1397 | 446k | // Plain scalars can be simple keys. |
1398 | 446k | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1399 | 446k | |
1400 | 446k | IsSimpleKeyAllowed = false; |
1401 | 446k | |
1402 | 446k | return true; |
1403 | 446k | } |
1404 | | |
1405 | 29 | bool Scanner::scanAliasOrAnchor(bool IsAlias) { |
1406 | 29 | StringRef::iterator Start = Current; |
1407 | 29 | unsigned ColStart = Column; |
1408 | 29 | skip(1); |
1409 | 168 | while(true) { |
1410 | 168 | if ( *Current == '[' || *Current == ']' |
1411 | 168 | || *Current == '{' || *Current == '}' |
1412 | 168 | || *Current == ',' |
1413 | 168 | || *Current == ':'164 ) |
1414 | 6 | break; |
1415 | 162 | StringRef::iterator i = skip_ns_char(Current); |
1416 | 162 | if (i == Current) |
1417 | 23 | break; |
1418 | 139 | Current = i; |
1419 | 139 | ++Column; |
1420 | 139 | } |
1421 | 29 | |
1422 | 29 | if (Start == Current) { |
1423 | 0 | setError("Got empty alias or anchor", Start); |
1424 | 0 | return false; |
1425 | 0 | } |
1426 | 29 | |
1427 | 29 | Token T; |
1428 | 29 | T.Kind = IsAlias ? Token::TK_Alias16 : Token::TK_Anchor13 ; |
1429 | 29 | T.Range = StringRef(Start, Current - Start); |
1430 | 29 | TokenQueue.push_back(T); |
1431 | 29 | |
1432 | 29 | // Alias and anchors can be simple keys. |
1433 | 29 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1434 | 29 | |
1435 | 29 | IsSimpleKeyAllowed = false; |
1436 | 29 | |
1437 | 29 | return true; |
1438 | 29 | } |
1439 | | |
1440 | 24.9k | char Scanner::scanBlockChompingIndicator() { |
1441 | 24.9k | char Indicator = ' '; |
1442 | 24.9k | if (Current != End && (*Current == '+' || *Current == '-'24.9k )) { |
1443 | 17 | Indicator = *Current; |
1444 | 17 | skip(1); |
1445 | 17 | } |
1446 | 24.9k | return Indicator; |
1447 | 24.9k | } |
1448 | | |
1449 | | /// Get the number of line breaks after chomping. |
1450 | | /// |
1451 | | /// Return the number of trailing line breaks to emit, depending on |
1452 | | /// \p ChompingIndicator. |
1453 | | static unsigned getChompedLineBreaks(char ChompingIndicator, |
1454 | 12.4k | unsigned LineBreaks, StringRef Str) { |
1455 | 12.4k | if (ChompingIndicator == '-') // Strip all line breaks. |
1456 | 11 | return 0; |
1457 | 12.4k | if (ChompingIndicator == '+') // Keep all line breaks. |
1458 | 6 | return LineBreaks; |
1459 | 12.4k | // Clip trailing lines. |
1460 | 12.4k | return Str.empty() ? 07 : 112.4k ; |
1461 | 12.4k | } |
1462 | | |
1463 | 12.5k | unsigned Scanner::scanBlockIndentationIndicator() { |
1464 | 12.5k | unsigned Indent = 0; |
1465 | 12.5k | if (Current != End && (*Current >= '1' && *Current <= '9'7 )) { |
1466 | 6 | Indent = unsigned(*Current - '0'); |
1467 | 6 | skip(1); |
1468 | 6 | } |
1469 | 12.5k | return Indent; |
1470 | 12.5k | } |
1471 | | |
1472 | | bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, |
1473 | 12.5k | unsigned &IndentIndicator, bool &IsDone) { |
1474 | 12.5k | auto Start = Current; |
1475 | 12.5k | |
1476 | 12.5k | ChompingIndicator = scanBlockChompingIndicator(); |
1477 | 12.5k | IndentIndicator = scanBlockIndentationIndicator(); |
1478 | 12.5k | // Check for the chomping indicator once again. |
1479 | 12.5k | if (ChompingIndicator == ' ') |
1480 | 12.4k | ChompingIndicator = scanBlockChompingIndicator(); |
1481 | 12.5k | Current = skip_while(&Scanner::skip_s_white, Current); |
1482 | 12.5k | skipComment(); |
1483 | 12.5k | |
1484 | 12.5k | if (Current == End) { // EOF, we have an empty scalar. |
1485 | 2 | Token T; |
1486 | 2 | T.Kind = Token::TK_BlockScalar; |
1487 | 2 | T.Range = StringRef(Start, Current - Start); |
1488 | 2 | TokenQueue.push_back(T); |
1489 | 2 | IsDone = true; |
1490 | 2 | return true; |
1491 | 2 | } |
1492 | 12.4k | |
1493 | 12.4k | if (!consumeLineBreakIfPresent()) { |
1494 | 3 | setError("Expected a line break after block scalar header", Current); |
1495 | 3 | return false; |
1496 | 3 | } |
1497 | 12.4k | return true; |
1498 | 12.4k | } |
1499 | | |
1500 | | bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, |
1501 | | unsigned BlockExitIndent, |
1502 | 12.4k | unsigned &LineBreaks, bool &IsDone) { |
1503 | 12.4k | unsigned MaxAllSpaceLineCharacters = 0; |
1504 | 12.4k | StringRef::iterator LongestAllSpaceLine; |
1505 | 12.4k | |
1506 | 12.9k | while (true) { |
1507 | 12.9k | advanceWhile(&Scanner::skip_s_space); |
1508 | 12.9k | if (skip_nb_char(Current) != Current) { |
1509 | 12.4k | // This line isn't empty, so try and find the indentation. |
1510 | 12.4k | if (Column <= BlockExitIndent) { // End of the block literal. |
1511 | 8 | IsDone = true; |
1512 | 8 | return true; |
1513 | 8 | } |
1514 | 12.4k | // We found the block's indentation. |
1515 | 12.4k | BlockIndent = Column; |
1516 | 12.4k | if (MaxAllSpaceLineCharacters > BlockIndent) { |
1517 | 1 | setError( |
1518 | 1 | "Leading all-spaces line must be smaller than the block indent", |
1519 | 1 | LongestAllSpaceLine); |
1520 | 1 | return false; |
1521 | 1 | } |
1522 | 12.4k | return true; |
1523 | 12.4k | } |
1524 | 423 | if (skip_b_break(Current) != Current && |
1525 | 423 | Column > MaxAllSpaceLineCharacters422 ) { |
1526 | 16 | // Record the longest all-space line in case it's longer than the |
1527 | 16 | // discovered block indent. |
1528 | 16 | MaxAllSpaceLineCharacters = Column; |
1529 | 16 | LongestAllSpaceLine = Current; |
1530 | 16 | } |
1531 | 423 | |
1532 | 423 | // Check for EOF. |
1533 | 423 | if (Current == End) { |
1534 | 1 | IsDone = true; |
1535 | 1 | return true; |
1536 | 1 | } |
1537 | 422 | |
1538 | 422 | if (!consumeLineBreakIfPresent()) { |
1539 | 0 | IsDone = true; |
1540 | 0 | return true; |
1541 | 0 | } |
1542 | 422 | ++LineBreaks; |
1543 | 422 | } |
1544 | 12.4k | return true0 ; |
1545 | 12.4k | } |
1546 | | |
1547 | | bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, |
1548 | 298k | unsigned BlockExitIndent, bool &IsDone) { |
1549 | 298k | // Skip the indentation. |
1550 | 800k | while (Column < BlockIndent) { |
1551 | 536k | auto I = skip_s_space(Current); |
1552 | 536k | if (I == Current) |
1553 | 34.8k | break; |
1554 | 501k | Current = I; |
1555 | 501k | ++Column; |
1556 | 501k | } |
1557 | 298k | |
1558 | 298k | if (skip_nb_char(Current) == Current) |
1559 | 24.6k | return true; |
1560 | 273k | |
1561 | 273k | if (Column <= BlockExitIndent) { // End of the block literal. |
1562 | 12.3k | IsDone = true; |
1563 | 12.3k | return true; |
1564 | 12.3k | } |
1565 | 261k | |
1566 | 261k | if (Column < BlockIndent) { |
1567 | 4 | if (Current != End && *Current == '#') { // Trailing comment. |
1568 | 2 | IsDone = true; |
1569 | 2 | return true; |
1570 | 2 | } |
1571 | 2 | setError("A text line is less indented than the block scalar", Current); |
1572 | 2 | return false; |
1573 | 2 | } |
1574 | 261k | return true; // A normal text line. |
1575 | 261k | } |
1576 | | |
1577 | 12.5k | bool Scanner::scanBlockScalar(bool IsLiteral) { |
1578 | 12.5k | // Eat '|' or '>' |
1579 | 12.5k | assert(*Current == '|' || *Current == '>'); |
1580 | 12.5k | skip(1); |
1581 | 12.5k | |
1582 | 12.5k | char ChompingIndicator; |
1583 | 12.5k | unsigned BlockIndent; |
1584 | 12.5k | bool IsDone = false; |
1585 | 12.5k | if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) |
1586 | 3 | return false; |
1587 | 12.4k | if (IsDone) |
1588 | 2 | return true; |
1589 | 12.4k | |
1590 | 12.4k | auto Start = Current; |
1591 | 12.4k | unsigned BlockExitIndent = Indent < 0 ? 01.14k : (unsigned)Indent11.3k ; |
1592 | 12.4k | unsigned LineBreaks = 0; |
1593 | 12.4k | if (BlockIndent == 0) { |
1594 | 12.4k | if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, |
1595 | 12.4k | IsDone)) |
1596 | 1 | return false; |
1597 | 12.4k | } |
1598 | 12.4k | |
1599 | 12.4k | // Scan the block's scalars body. |
1600 | 12.4k | SmallString<256> Str; |
1601 | 298k | while (!IsDone) { |
1602 | 298k | if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) |
1603 | 2 | return false; |
1604 | 298k | if (IsDone) |
1605 | 12.3k | break; |
1606 | 285k | |
1607 | 285k | // Parse the current line. |
1608 | 285k | auto LineStart = Current; |
1609 | 285k | advanceWhile(&Scanner::skip_nb_char); |
1610 | 285k | if (LineStart != Current) { |
1611 | 261k | Str.append(LineBreaks, '\n'); |
1612 | 261k | Str.append(StringRef(LineStart, Current - LineStart)); |
1613 | 261k | LineBreaks = 0; |
1614 | 261k | } |
1615 | 285k | |
1616 | 285k | // Check for EOF. |
1617 | 285k | if (Current == End) |
1618 | 109 | break; |
1619 | 285k | |
1620 | 285k | if (!consumeLineBreakIfPresent()) |
1621 | 0 | break; |
1622 | 285k | ++LineBreaks; |
1623 | 285k | } |
1624 | 12.4k | |
1625 | 12.4k | if (12.4k Current == End12.4k && !LineBreaks110 ) |
1626 | 3 | // Ensure that there is at least one line break before the end of file. |
1627 | 3 | LineBreaks = 1; |
1628 | 12.4k | Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); |
1629 | 12.4k | |
1630 | 12.4k | // New lines may start a simple key. |
1631 | 12.4k | if (!FlowLevel) |
1632 | 12.4k | IsSimpleKeyAllowed = true; |
1633 | 12.4k | |
1634 | 12.4k | Token T; |
1635 | 12.4k | T.Kind = Token::TK_BlockScalar; |
1636 | 12.4k | T.Range = StringRef(Start, Current - Start); |
1637 | 12.4k | T.Value = Str.str().str(); |
1638 | 12.4k | TokenQueue.push_back(T); |
1639 | 12.4k | return true; |
1640 | 12.4k | } |
1641 | | |
1642 | 3.51k | bool Scanner::scanTag() { |
1643 | 3.51k | StringRef::iterator Start = Current; |
1644 | 3.51k | unsigned ColStart = Column; |
1645 | 3.51k | skip(1); // Eat !. |
1646 | 3.51k | if (Current == End || isBlankOrBreak(Current));2 // An empty tag. |
1647 | 3.51k | else if (*Current == '<') { |
1648 | 3 | skip(1); |
1649 | 3 | scan_ns_uri_char(); |
1650 | 3 | if (!consume('>')) |
1651 | 2 | return false; |
1652 | 3.50k | } else { |
1653 | 3.50k | // FIXME: Actually parse the c-ns-shorthand-tag rule. |
1654 | 3.50k | Current = skip_while(&Scanner::skip_ns_char, Current); |
1655 | 3.50k | } |
1656 | 3.51k | |
1657 | 3.51k | Token T; |
1658 | 3.51k | T.Kind = Token::TK_Tag; |
1659 | 3.51k | T.Range = StringRef(Start, Current - Start); |
1660 | 3.51k | TokenQueue.push_back(T); |
1661 | 3.51k | |
1662 | 3.51k | // Tags can be simple keys. |
1663 | 3.51k | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1664 | 3.51k | |
1665 | 3.51k | IsSimpleKeyAllowed = false; |
1666 | 3.51k | |
1667 | 3.51k | return true; |
1668 | 3.51k | } |
1669 | | |
1670 | 1.10M | bool Scanner::fetchMoreTokens() { |
1671 | 1.10M | if (IsStartOfStream) |
1672 | 4.90k | return scanStreamStart(); |
1673 | 1.10M | |
1674 | 1.10M | scanToNextToken(); |
1675 | 1.10M | |
1676 | 1.10M | if (Current == End) |
1677 | 3.55k | return scanStreamEnd(); |
1678 | 1.09M | |
1679 | 1.09M | removeStaleSimpleKeyCandidates(); |
1680 | 1.09M | |
1681 | 1.09M | unrollIndent(Column); |
1682 | 1.09M | |
1683 | 1.09M | if (Column == 0 && *Current == '%'120k ) |
1684 | 19 | return scanDirective(); |
1685 | 1.09M | |
1686 | 1.09M | if (Column == 0 && Current + 4 <= End120k |
1687 | 1.09M | && *Current == '-'120k |
1688 | 1.09M | && *(Current + 1) == '-'16.6k |
1689 | 1.09M | && *(Current + 2) == '-'15.5k |
1690 | 1.09M | && (15.5k Current + 3 == End15.5k || isBlankOrBreak(Current + 3)15.5k )) |
1691 | 15.5k | return scanDocumentIndicator(true); |
1692 | 1.08M | |
1693 | 1.08M | if (Column == 0 && Current + 4 <= End105k |
1694 | 1.08M | && *Current == '.'105k |
1695 | 1.08M | && *(Current + 1) == '.'14.2k |
1696 | 1.08M | && *(Current + 2) == '.'14.2k |
1697 | 1.08M | && (14.2k Current + 3 == End14.2k || isBlankOrBreak(Current + 3)14.2k )) |
1698 | 14.2k | return scanDocumentIndicator(false); |
1699 | 1.06M | |
1700 | 1.06M | if (*Current == '[') |
1701 | 6.66k | return scanFlowCollectionStart(true); |
1702 | 1.06M | |
1703 | 1.06M | if (*Current == '{') |
1704 | 36.5k | return scanFlowCollectionStart(false); |
1705 | 1.02M | |
1706 | 1.02M | if (*Current == ']') |
1707 | 6.63k | return scanFlowCollectionEnd(true); |
1708 | 1.01M | |
1709 | 1.01M | if (*Current == '}') |
1710 | 36.4k | return scanFlowCollectionEnd(false); |
1711 | 982k | |
1712 | 982k | if (*Current == ',') |
1713 | 92.5k | return scanFlowEntry(); |
1714 | 890k | |
1715 | 890k | if (*Current == '-' && isBlankOrBreak(Current + 1)44.9k ) |
1716 | 43.9k | return scanBlockEntry(); |
1717 | 846k | |
1718 | 846k | if (*Current == '?' && (31 FlowLevel31 || isBlankOrBreak(Current + 1)17 )) |
1719 | 31 | return scanKey(); |
1720 | 846k | |
1721 | 846k | if (*Current == ':' && (283k FlowLevel283k || isBlankOrBreak(Current + 1)183k )) |
1722 | 283k | return scanValue(); |
1723 | 562k | |
1724 | 562k | if (*Current == '*') |
1725 | 16 | return scanAliasOrAnchor(true); |
1726 | 562k | |
1727 | 562k | if (*Current == '&') |
1728 | 13 | return scanAliasOrAnchor(false); |
1729 | 562k | |
1730 | 562k | if (*Current == '!') |
1731 | 3.51k | return scanTag(); |
1732 | 559k | |
1733 | 559k | if (*Current == '|' && !FlowLevel12.4k ) |
1734 | 12.4k | return scanBlockScalar(true); |
1735 | 546k | |
1736 | 546k | if (*Current == '>' && !FlowLevel20 ) |
1737 | 20 | return scanBlockScalar(false); |
1738 | 546k | |
1739 | 546k | if (*Current == '\'') |
1740 | 20.9k | return scanFlowScalar(false); |
1741 | 525k | |
1742 | 525k | if (*Current == '"') |
1743 | 78.7k | return scanFlowScalar(true); |
1744 | 446k | |
1745 | 446k | // Get a plain scalar. |
1746 | 446k | StringRef FirstChar(Current, 1); |
1747 | 446k | if (!(isBlankOrBreak(Current) |
1748 | 446k | || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) |
1749 | 446k | || (1.03k *Current == '-'1.03k && !isBlankOrBreak(Current + 1)1.03k ) |
1750 | 446k | || (2 !FlowLevel2 && (2 *Current == '?'2 || *Current == ':'2 ) |
1751 | 2 | && isBlankOrBreak(Current + 1)1 ) |
1752 | 446k | || (2 !FlowLevel2 && *Current == ':'2 |
1753 | 2 | && Current + 2 < End1 |
1754 | 2 | && *(Current + 1) == ':'1 |
1755 | 2 | && !isBlankOrBreak(Current + 2)1 )) |
1756 | 446k | return scanPlainScalar(); |
1757 | 1 | |
1758 | 1 | setError("Unrecognized character while tokenizing."); |
1759 | 1 | return false; |
1760 | 1 | } |
1761 | | |
1762 | | Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, |
1763 | | std::error_code *EC) |
1764 | 4.78k | : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} |
1765 | | |
1766 | | Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, |
1767 | | std::error_code *EC) |
1768 | 120 | : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} |
1769 | | |
1770 | 4.79k | Stream::~Stream() = default; |
1771 | | |
1772 | 935 | bool Stream::failed() { return scanner->failed(); } |
1773 | | |
1774 | 128 | void Stream::printError(Node *N, const Twine &Msg) { |
1775 | 128 | scanner->printError( N->getSourceRange().Start |
1776 | 128 | , SourceMgr::DK_Error |
1777 | 128 | , Msg |
1778 | 128 | , N->getSourceRange()); |
1779 | 128 | } |
1780 | | |
1781 | 4.90k | document_iterator Stream::begin() { |
1782 | 4.90k | if (CurrentDoc) |
1783 | 0 | report_fatal_error("Can only iterate over the stream once"); |
1784 | 4.90k | |
1785 | 4.90k | // Skip Stream-Start. |
1786 | 4.90k | scanner->getNext(); |
1787 | 4.90k | |
1788 | 4.90k | CurrentDoc.reset(new Document(*this)); |
1789 | 4.90k | return document_iterator(CurrentDoc); |
1790 | 4.90k | } |
1791 | | |
1792 | 32.1k | document_iterator Stream::end() { |
1793 | 32.1k | return document_iterator(); |
1794 | 32.1k | } |
1795 | | |
1796 | 53 | void Stream::skip() { |
1797 | 107 | for (document_iterator i = begin(), e = end(); i != e; ++i54 ) |
1798 | 54 | i->skip(); |
1799 | 53 | } |
1800 | | |
1801 | | Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, |
1802 | | StringRef T) |
1803 | 945k | : Doc(D), TypeID(Type), Anchor(A), Tag(T) { |
1804 | 945k | SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); |
1805 | 945k | SourceRange = SMRange(Start, Start); |
1806 | 945k | } |
1807 | | |
1808 | 13.4k | std::string Node::getVerbatimTag() const { |
1809 | 13.4k | StringRef Raw = getRawTag(); |
1810 | 13.4k | if (!Raw.empty() && Raw != "!"5.58k ) { |
1811 | 5.58k | std::string Ret; |
1812 | 5.58k | if (Raw.find_last_of('!') == 0) { |
1813 | 5.55k | Ret = Doc->getTagMap().find("!")->second; |
1814 | 5.55k | Ret += Raw.substr(1); |
1815 | 5.55k | return Ret; |
1816 | 5.55k | } else if (37 Raw.startswith("!!")37 ) { |
1817 | 31 | Ret = Doc->getTagMap().find("!!")->second; |
1818 | 31 | Ret += Raw.substr(2); |
1819 | 31 | return Ret; |
1820 | 31 | } else { |
1821 | 6 | StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); |
1822 | 6 | std::map<StringRef, StringRef>::const_iterator It = |
1823 | 6 | Doc->getTagMap().find(TagHandle); |
1824 | 6 | if (It != Doc->getTagMap().end()) |
1825 | 4 | Ret = It->second; |
1826 | 2 | else { |
1827 | 2 | Token T; |
1828 | 2 | T.Kind = Token::TK_Tag; |
1829 | 2 | T.Range = TagHandle; |
1830 | 2 | setError(Twine("Unknown tag handle ") + TagHandle, T); |
1831 | 2 | } |
1832 | 6 | Ret += Raw.substr(Raw.find_last_of('!') + 1); |
1833 | 6 | return Ret; |
1834 | 6 | } |
1835 | 7.89k | } |
1836 | 7.89k | |
1837 | 7.89k | switch (getType()) { |
1838 | 7.89k | case NK_Null: |
1839 | 42 | return "tag:yaml.org,2002:null"; |
1840 | 7.89k | case NK_Scalar: |
1841 | 7.48k | case NK_BlockScalar: |
1842 | 7.48k | // TODO: Tag resolution. |
1843 | 7.48k | return "tag:yaml.org,2002:str"; |
1844 | 7.48k | case NK_Mapping: |
1845 | 276 | return "tag:yaml.org,2002:map"; |
1846 | 7.48k | case NK_Sequence: |
1847 | 94 | return "tag:yaml.org,2002:seq"; |
1848 | 0 | } |
1849 | 0 | |
1850 | 0 | return ""; |
1851 | 0 | } |
1852 | | |
1853 | 2.63M | Token &Node::peekNext() { |
1854 | 2.63M | return Doc->peekNext(); |
1855 | 2.63M | } |
1856 | | |
1857 | 802k | Token Node::getNext() { |
1858 | 802k | return Doc->getNext(); |
1859 | 802k | } |
1860 | | |
1861 | 642k | Node *Node::parseBlockNode() { |
1862 | 642k | return Doc->parseBlockNode(); |
1863 | 642k | } |
1864 | | |
1865 | 286k | BumpPtrAllocator &Node::getAllocator() { |
1866 | 286k | return Doc->NodeAllocator; |
1867 | 286k | } |
1868 | | |
1869 | 22 | void Node::setError(const Twine &Msg, Token &Tok) const { |
1870 | 22 | Doc->setError(Msg, Tok); |
1871 | 22 | } |
1872 | | |
1873 | 837k | bool Node::failed() const { |
1874 | 837k | return Doc->failed(); |
1875 | 837k | } |
1876 | | |
1877 | 492k | StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { |
1878 | 492k | // TODO: Handle newlines properly. We need to remove leading whitespace. |
1879 | 492k | if (Value[0] == '"') { // Double quoted. |
1880 | 52.8k | // Pull off the leading and trailing "s. |
1881 | 52.8k | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); |
1882 | 52.8k | // Search for characters that would require unescaping the value. |
1883 | 52.8k | StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); |
1884 | 52.8k | if (i != StringRef::npos) |
1885 | 38 | return unescapeDoubleQuoted(UnquotedValue, i, Storage); |
1886 | 52.8k | return UnquotedValue; |
1887 | 439k | } else if (Value[0] == '\'') { // Single quoted. |
1888 | 20.5k | // Pull off the leading and trailing 's. |
1889 | 20.5k | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); |
1890 | 20.5k | StringRef::size_type i = UnquotedValue.find('\''); |
1891 | 20.5k | if (i != StringRef::npos) { |
1892 | 8 | // We're going to need Storage. |
1893 | 8 | Storage.clear(); |
1894 | 8 | Storage.reserve(UnquotedValue.size()); |
1895 | 17 | for (; i != StringRef::npos; i = UnquotedValue.find('\'')9 ) { |
1896 | 9 | StringRef Valid(UnquotedValue.begin(), i); |
1897 | 9 | Storage.insert(Storage.end(), Valid.begin(), Valid.end()); |
1898 | 9 | Storage.push_back('\''); |
1899 | 9 | UnquotedValue = UnquotedValue.substr(i + 2); |
1900 | 9 | } |
1901 | 8 | Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); |
1902 | 8 | return StringRef(Storage.begin(), Storage.size()); |
1903 | 8 | } |
1904 | 20.4k | return UnquotedValue; |
1905 | 20.4k | } |
1906 | 418k | // Plain or block. |
1907 | 418k | return Value.rtrim(' '); |
1908 | 418k | } |
1909 | | |
1910 | | StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue |
1911 | | , StringRef::size_type i |
1912 | | , SmallVectorImpl<char> &Storage) |
1913 | 38 | const { |
1914 | 38 | // Use Storage to build proper value. |
1915 | 38 | Storage.clear(); |
1916 | 38 | Storage.reserve(UnquotedValue.size()); |
1917 | 153 | for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")115 ) { |
1918 | 116 | // Insert all previous chars into Storage. |
1919 | 116 | StringRef Valid(UnquotedValue.begin(), i); |
1920 | 116 | Storage.insert(Storage.end(), Valid.begin(), Valid.end()); |
1921 | 116 | // Chop off inserted chars. |
1922 | 116 | UnquotedValue = UnquotedValue.substr(i); |
1923 | 116 | |
1924 | 116 | assert(!UnquotedValue.empty() && "Can't be empty!"); |
1925 | 116 | |
1926 | 116 | // Parse escape or line break. |
1927 | 116 | switch (UnquotedValue[0]) { |
1928 | 116 | case '\r': |
1929 | 23 | case '\n': |
1930 | 23 | Storage.push_back('\n'); |
1931 | 23 | if ( UnquotedValue.size() > 1 |
1932 | 23 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) |
1933 | 3 | UnquotedValue = UnquotedValue.substr(1); |
1934 | 23 | UnquotedValue = UnquotedValue.substr(1); |
1935 | 23 | break; |
1936 | 93 | default: |
1937 | 93 | if (UnquotedValue.size() == 1) |
1938 | 0 | // TODO: Report error. |
1939 | 0 | break; |
1940 | 93 | UnquotedValue = UnquotedValue.substr(1); |
1941 | 93 | switch (UnquotedValue[0]) { |
1942 | 93 | default: { |
1943 | 1 | Token T; |
1944 | 1 | T.Range = StringRef(UnquotedValue.begin(), 1); |
1945 | 1 | setError("Unrecognized escape code!", T); |
1946 | 1 | return ""; |
1947 | 93 | } |
1948 | 93 | case '\r': |
1949 | 5 | case '\n': |
1950 | 5 | // Remove the new line. |
1951 | 5 | if ( UnquotedValue.size() > 1 |
1952 | 5 | && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) |
1953 | 0 | UnquotedValue = UnquotedValue.substr(1); |
1954 | 5 | // If this was just a single byte newline, it will get skipped |
1955 | 5 | // below. |
1956 | 5 | break; |
1957 | 5 | case '0': |
1958 | 4 | Storage.push_back(0x00); |
1959 | 4 | break; |
1960 | 5 | case 'a': |
1961 | 1 | Storage.push_back(0x07); |
1962 | 1 | break; |
1963 | 5 | case 'b': |
1964 | 2 | Storage.push_back(0x08); |
1965 | 2 | break; |
1966 | 5 | case 't': |
1967 | 3 | case 0x09: |
1968 | 3 | Storage.push_back(0x09); |
1969 | 3 | break; |
1970 | 5 | case 'n': |
1971 | 5 | Storage.push_back(0x0A); |
1972 | 5 | break; |
1973 | 3 | case 'v': |
1974 | 1 | Storage.push_back(0x0B); |
1975 | 1 | break; |
1976 | 3 | case 'f': |
1977 | 1 | Storage.push_back(0x0C); |
1978 | 1 | break; |
1979 | 3 | case 'r': |
1980 | 2 | Storage.push_back(0x0D); |
1981 | 2 | break; |
1982 | 3 | case 'e': |
1983 | 1 | Storage.push_back(0x1B); |
1984 | 1 | break; |
1985 | 3 | case ' ': |
1986 | 3 | Storage.push_back(0x20); |
1987 | 3 | break; |
1988 | 42 | case '"': |
1989 | 42 | Storage.push_back(0x22); |
1990 | 42 | break; |
1991 | 3 | case '/': |
1992 | 0 | Storage.push_back(0x2F); |
1993 | 0 | break; |
1994 | 11 | case '\\': |
1995 | 11 | Storage.push_back(0x5C); |
1996 | 11 | break; |
1997 | 3 | case 'N': |
1998 | 1 | encodeUTF8(0x85, Storage); |
1999 | 1 | break; |
2000 | 3 | case '_': |
2001 | 1 | encodeUTF8(0xA0, Storage); |
2002 | 1 | break; |
2003 | 3 | case 'L': |
2004 | 1 | encodeUTF8(0x2028, Storage); |
2005 | 1 | break; |
2006 | 3 | case 'P': |
2007 | 1 | encodeUTF8(0x2029, Storage); |
2008 | 1 | break; |
2009 | 4 | case 'x': { |
2010 | 4 | if (UnquotedValue.size() < 3) |
2011 | 0 | // TODO: Report error. |
2012 | 0 | break; |
2013 | 4 | unsigned int UnicodeScalarValue; |
2014 | 4 | if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) |
2015 | 0 | // TODO: Report error. |
2016 | 0 | UnicodeScalarValue = 0xFFFD; |
2017 | 4 | encodeUTF8(UnicodeScalarValue, Storage); |
2018 | 4 | UnquotedValue = UnquotedValue.substr(2); |
2019 | 4 | break; |
2020 | 4 | } |
2021 | 4 | case 'u': { |
2022 | 2 | if (UnquotedValue.size() < 5) |
2023 | 0 | // TODO: Report error. |
2024 | 0 | break; |
2025 | 2 | unsigned int UnicodeScalarValue; |
2026 | 2 | if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) |
2027 | 0 | // TODO: Report error. |
2028 | 0 | UnicodeScalarValue = 0xFFFD; |
2029 | 2 | encodeUTF8(UnicodeScalarValue, Storage); |
2030 | 2 | UnquotedValue = UnquotedValue.substr(4); |
2031 | 2 | break; |
2032 | 2 | } |
2033 | 2 | case 'U': { |
2034 | 1 | if (UnquotedValue.size() < 9) |
2035 | 0 | // TODO: Report error. |
2036 | 0 | break; |
2037 | 1 | unsigned int UnicodeScalarValue; |
2038 | 1 | if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) |
2039 | 0 | // TODO: Report error. |
2040 | 0 | UnicodeScalarValue = 0xFFFD; |
2041 | 1 | encodeUTF8(UnicodeScalarValue, Storage); |
2042 | 1 | UnquotedValue = UnquotedValue.substr(8); |
2043 | 1 | break; |
2044 | 1 | } |
2045 | 92 | } |
2046 | 92 | UnquotedValue = UnquotedValue.substr(1); |
2047 | 116 | } |
2048 | 116 | } |
2049 | 38 | Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); |
2050 | 37 | return StringRef(Storage.begin(), Storage.size()); |
2051 | 38 | } |
2052 | | |
2053 | 916k | Node *KeyValueNode::getKey() { |
2054 | 916k | if (Key) |
2055 | 632k | return Key; |
2056 | 283k | // Handle implicit null keys. |
2057 | 283k | { |
2058 | 283k | Token &t = peekNext(); |
2059 | 283k | if ( t.Kind == Token::TK_BlockEnd |
2060 | 283k | || t.Kind == Token::TK_Value |
2061 | 283k | || t.Kind == Token::TK_Error) { |
2062 | 0 | return Key = new (getAllocator()) NullNode(Doc); |
2063 | 0 | } |
2064 | 283k | if (t.Kind == Token::TK_Key) |
2065 | 283k | getNext(); // skip TK_Key. |
2066 | 283k | } |
2067 | 283k | |
2068 | 283k | // Handle explicit null keys. |
2069 | 283k | Token &t = peekNext(); |
2070 | 283k | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { |
2071 | 3 | return Key = new (getAllocator()) NullNode(Doc); |
2072 | 3 | } |
2073 | 283k | |
2074 | 283k | // We've got a normal key. |
2075 | 283k | return Key = parseBlockNode(); |
2076 | 283k | } |
2077 | | |
2078 | 631k | Node *KeyValueNode::getValue() { |
2079 | 631k | if (Value) |
2080 | 347k | return Value; |
2081 | 283k | getKey()->skip(); |
2082 | 283k | if (failed()) |
2083 | 1 | return Value = new (getAllocator()) NullNode(Doc); |
2084 | 283k | |
2085 | 283k | // Handle implicit null values. |
2086 | 283k | { |
2087 | 283k | Token &t = peekNext(); |
2088 | 283k | if ( t.Kind == Token::TK_BlockEnd |
2089 | 283k | || t.Kind == Token::TK_FlowMappingEnd283k |
2090 | 283k | || t.Kind == Token::TK_Key283k |
2091 | 283k | || t.Kind == Token::TK_FlowEntry283k |
2092 | 283k | || t.Kind == Token::TK_Error283k ) { |
2093 | 20 | return Value = new (getAllocator()) NullNode(Doc); |
2094 | 20 | } |
2095 | 283k | |
2096 | 283k | if (t.Kind != Token::TK_Value) { |
2097 | 3 | setError("Unexpected token in Key Value.", t); |
2098 | 3 | return Value = new (getAllocator()) NullNode(Doc); |
2099 | 3 | } |
2100 | 283k | getNext(); // skip TK_Value. |
2101 | 283k | } |
2102 | 283k | |
2103 | 283k | // Handle explicit null values. |
2104 | 283k | Token &t = peekNext(); |
2105 | 283k | if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key283k ) { |
2106 | 2.70k | return Value = new (getAllocator()) NullNode(Doc); |
2107 | 2.70k | } |
2108 | 280k | |
2109 | 280k | // We got a normal value. |
2110 | 280k | return Value = parseBlockNode(); |
2111 | 280k | } |
2112 | | |
2113 | 428k | void MappingNode::increment() { |
2114 | 428k | if (failed()) { |
2115 | 10 | IsAtEnd = true; |
2116 | 10 | CurrentEntry = nullptr; |
2117 | 10 | return; |
2118 | 10 | } |
2119 | 428k | if (CurrentEntry) { |
2120 | 347k | CurrentEntry->skip(); |
2121 | 347k | if (Type == MT_Inline) { |
2122 | 12 | IsAtEnd = true; |
2123 | 12 | CurrentEntry = nullptr; |
2124 | 12 | return; |
2125 | 12 | } |
2126 | 428k | } |
2127 | 428k | Token T = peekNext(); |
2128 | 428k | if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar144k ) { |
2129 | 283k | // KeyValueNode eats the TK_Key. That way it can detect null keys. |
2130 | 283k | CurrentEntry = new (getAllocator()) KeyValueNode(Doc); |
2131 | 283k | } else if (144k Type == MT_Block144k ) { |
2132 | 44.6k | switch (T.Kind) { |
2133 | 44.6k | case Token::TK_BlockEnd: |
2134 | 44.6k | getNext(); |
2135 | 44.6k | IsAtEnd = true; |
2136 | 44.6k | CurrentEntry = nullptr; |
2137 | 44.6k | break; |
2138 | 44.6k | default: |
2139 | 0 | setError("Unexpected token. Expected Key or Block End", T); |
2140 | 0 | LLVM_FALLTHROUGH; |
2141 | 0 | case Token::TK_Error: |
2142 | 0 | IsAtEnd = true; |
2143 | 0 | CurrentEntry = nullptr; |
2144 | 44.6k | } |
2145 | 100k | } else { |
2146 | 100k | switch (T.Kind) { |
2147 | 100k | case Token::TK_FlowEntry: |
2148 | 63.8k | // Eat the flow entry and recurse. |
2149 | 63.8k | getNext(); |
2150 | 63.8k | return increment(); |
2151 | 100k | case Token::TK_FlowMappingEnd: |
2152 | 36.4k | getNext(); |
2153 | 36.4k | LLVM_FALLTHROUGH; |
2154 | 36.4k | case Token::TK_Error: |
2155 | 36.4k | // Set this to end iterator. |
2156 | 36.4k | IsAtEnd = true; |
2157 | 36.4k | CurrentEntry = nullptr; |
2158 | 36.4k | break; |
2159 | 36.4k | default: |
2160 | 7 | setError( "Unexpected token. Expected Key, Flow Entry, or Flow " |
2161 | 7 | "Mapping End." |
2162 | 7 | , T); |
2163 | 7 | IsAtEnd = true; |
2164 | 7 | CurrentEntry = nullptr; |
2165 | 100k | } |
2166 | 100k | } |
2167 | 428k | } |
2168 | | |
2169 | 125k | void SequenceNode::increment() { |
2170 | 125k | if (failed()) { |
2171 | 6 | IsAtEnd = true; |
2172 | 6 | CurrentEntry = nullptr; |
2173 | 6 | return; |
2174 | 6 | } |
2175 | 125k | if (CurrentEntry) |
2176 | 106k | CurrentEntry->skip(); |
2177 | 125k | Token T = peekNext(); |
2178 | 125k | if (SeqType == ST_Block) { |
2179 | 55.5k | switch (T.Kind) { |
2180 | 55.5k | case Token::TK_BlockEntry: |
2181 | 43.8k | getNext(); |
2182 | 43.8k | CurrentEntry = parseBlockNode(); |
2183 | 43.8k | if (!CurrentEntry) { // An error occurred. |
2184 | 1 | IsAtEnd = true; |
2185 | 1 | CurrentEntry = nullptr; |
2186 | 1 | } |
2187 | 43.8k | break; |
2188 | 55.5k | case Token::TK_BlockEnd: |
2189 | 11.7k | getNext(); |
2190 | 11.7k | IsAtEnd = true; |
2191 | 11.7k | CurrentEntry = nullptr; |
2192 | 11.7k | break; |
2193 | 55.5k | default: |
2194 | 0 | setError( "Unexpected token. Expected Block Entry or Block End." |
2195 | 0 | , T); |
2196 | 0 | LLVM_FALLTHROUGH; |
2197 | 0 | case Token::TK_Error: |
2198 | 0 | IsAtEnd = true; |
2199 | 0 | CurrentEntry = nullptr; |
2200 | 55.5k | } |
2201 | 69.6k | } else if (SeqType == ST_Indentless) { |
2202 | 154 | switch (T.Kind) { |
2203 | 154 | case Token::TK_BlockEntry: |
2204 | 89 | getNext(); |
2205 | 89 | CurrentEntry = parseBlockNode(); |
2206 | 89 | if (!CurrentEntry) { // An error occurred. |
2207 | 0 | IsAtEnd = true; |
2208 | 0 | CurrentEntry = nullptr; |
2209 | 0 | } |
2210 | 89 | break; |
2211 | 154 | default: |
2212 | 65 | case Token::TK_Error: |
2213 | 65 | IsAtEnd = true; |
2214 | 65 | CurrentEntry = nullptr; |
2215 | 154 | } |
2216 | 69.4k | } else if (SeqType == ST_Flow) { |
2217 | 69.4k | switch (T.Kind) { |
2218 | 69.4k | case Token::TK_FlowEntry: |
2219 | 28.6k | // Eat the flow entry and recurse. |
2220 | 28.6k | getNext(); |
2221 | 28.6k | WasPreviousTokenFlowEntry = true; |
2222 | 28.6k | return increment(); |
2223 | 69.4k | case Token::TK_FlowSequenceEnd: |
2224 | 6.58k | getNext(); |
2225 | 6.58k | LLVM_FALLTHROUGH; |
2226 | 6.58k | case Token::TK_Error: |
2227 | 6.58k | // Set this to end iterator. |
2228 | 6.58k | IsAtEnd = true; |
2229 | 6.58k | CurrentEntry = nullptr; |
2230 | 6.58k | break; |
2231 | 6.58k | case Token::TK_StreamEnd: |
2232 | 7 | case Token::TK_DocumentEnd: |
2233 | 7 | case Token::TK_DocumentStart: |
2234 | 7 | setError("Could not find closing ]!", T); |
2235 | 7 | // Set this to end iterator. |
2236 | 7 | IsAtEnd = true; |
2237 | 7 | CurrentEntry = nullptr; |
2238 | 7 | break; |
2239 | 34.1k | default: |
2240 | 34.1k | if (!WasPreviousTokenFlowEntry) { |
2241 | 2 | setError("Expected , between entries!", T); |
2242 | 2 | IsAtEnd = true; |
2243 | 2 | CurrentEntry = nullptr; |
2244 | 2 | break; |
2245 | 2 | } |
2246 | 34.1k | // Otherwise it must be a flow entry. |
2247 | 34.1k | CurrentEntry = parseBlockNode(); |
2248 | 34.1k | if (!CurrentEntry) { |
2249 | 0 | IsAtEnd = true; |
2250 | 0 | } |
2251 | 34.1k | WasPreviousTokenFlowEntry = false; |
2252 | 34.1k | break; |
2253 | 69.4k | } |
2254 | 69.4k | } |
2255 | 125k | } |
2256 | | |
2257 | 16.6k | Document::Document(Stream &S) : stream(S), Root(nullptr) { |
2258 | 16.6k | // Tag maps starts with two default mappings. |
2259 | 16.6k | TagMap["!"] = "!"; |
2260 | 16.6k | TagMap["!!"] = "tag:yaml.org,2002:"; |
2261 | 16.6k | |
2262 | 16.6k | if (parseDirectives()) |
2263 | 13 | expectToken(Token::TK_DocumentStart); |
2264 | 16.6k | Token &T = peekNext(); |
2265 | 16.6k | if (T.Kind == Token::TK_DocumentStart) |
2266 | 15.3k | getNext(); |
2267 | 16.6k | } |
2268 | | |
2269 | 28.0k | bool Document::skip() { |
2270 | 28.0k | if (stream.scanner->failed()) |
2271 | 46 | return false; |
2272 | 27.9k | if (!Root) |
2273 | 1.32k | getRoot(); |
2274 | 27.9k | Root->skip(); |
2275 | 27.9k | Token &T = peekNext(); |
2276 | 27.9k | if (T.Kind == Token::TK_StreamEnd) |
2277 | 2.88k | return false; |
2278 | 25.0k | if (T.Kind == Token::TK_DocumentEnd) { |
2279 | 13.2k | getNext(); |
2280 | 13.2k | return skip(); |
2281 | 13.2k | } |
2282 | 11.8k | return true; |
2283 | 11.8k | } |
2284 | | |
2285 | 3.35M | Token &Document::peekNext() { |
2286 | 3.35M | return stream.scanner->peekNext(); |
2287 | 3.35M | } |
2288 | | |
2289 | 1.49M | Token Document::getNext() { |
2290 | 1.49M | return stream.scanner->getNext(); |
2291 | 1.49M | } |
2292 | | |
2293 | 24 | void Document::setError(const Twine &Message, Token &Location) const { |
2294 | 24 | stream.scanner->setError(Message, Location.Range.begin()); |
2295 | 24 | } |
2296 | | |
2297 | 837k | bool Document::failed() const { |
2298 | 837k | return stream.scanner->failed(); |
2299 | 837k | } |
2300 | | |
2301 | 659k | Node *Document::parseBlockNode() { |
2302 | 659k | Token T = peekNext(); |
2303 | 659k | // Handle properties. |
2304 | 659k | Token AnchorInfo; |
2305 | 659k | Token TagInfo; |
2306 | 662k | parse_property: |
2307 | 662k | switch (T.Kind) { |
2308 | 662k | case Token::TK_Alias: |
2309 | 16 | getNext(); |
2310 | 16 | return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); |
2311 | 662k | case Token::TK_Anchor: |
2312 | 13 | if (AnchorInfo.Kind == Token::TK_Anchor) { |
2313 | 0 | setError("Already encountered an anchor for this node!", T); |
2314 | 0 | return nullptr; |
2315 | 0 | } |
2316 | 13 | AnchorInfo = getNext(); // Consume TK_Anchor. |
2317 | 13 | T = peekNext(); |
2318 | 13 | goto parse_property; |
2319 | 3.51k | case Token::TK_Tag: |
2320 | 3.51k | if (TagInfo.Kind == Token::TK_Tag) { |
2321 | 0 | setError("Already encountered a tag for this node!", T); |
2322 | 0 | return nullptr; |
2323 | 0 | } |
2324 | 3.51k | TagInfo = getNext(); // Consume TK_Tag. |
2325 | 3.51k | T = peekNext(); |
2326 | 3.51k | goto parse_property; |
2327 | 658k | default: |
2328 | 658k | break; |
2329 | 658k | } |
2330 | 658k | |
2331 | 658k | switch (T.Kind) { |
2332 | 658k | case Token::TK_BlockEntry: |
2333 | 65 | // We got an unindented BlockEntry sequence. This is not terminated with |
2334 | 65 | // a BlockEnd. |
2335 | 65 | // Don't eat the TK_BlockEntry, SequenceNode needs it. |
2336 | 65 | return new (NodeAllocator) SequenceNode( stream.CurrentDoc |
2337 | 65 | , AnchorInfo.Range.substr(1) |
2338 | 65 | , TagInfo.Range |
2339 | 65 | , SequenceNode::ST_Indentless); |
2340 | 658k | case Token::TK_BlockSequenceStart: |
2341 | 11.7k | getNext(); |
2342 | 11.7k | return new (NodeAllocator) |
2343 | 11.7k | SequenceNode( stream.CurrentDoc |
2344 | 11.7k | , AnchorInfo.Range.substr(1) |
2345 | 11.7k | , TagInfo.Range |
2346 | 11.7k | , SequenceNode::ST_Block); |
2347 | 658k | case Token::TK_BlockMappingStart: |
2348 | 44.7k | getNext(); |
2349 | 44.7k | return new (NodeAllocator) |
2350 | 44.7k | MappingNode( stream.CurrentDoc |
2351 | 44.7k | , AnchorInfo.Range.substr(1) |
2352 | 44.7k | , TagInfo.Range |
2353 | 44.7k | , MappingNode::MT_Block); |
2354 | 658k | case Token::TK_FlowSequenceStart: |
2355 | 6.65k | getNext(); |
2356 | 6.65k | return new (NodeAllocator) |
2357 | 6.65k | SequenceNode( stream.CurrentDoc |
2358 | 6.65k | , AnchorInfo.Range.substr(1) |
2359 | 6.65k | , TagInfo.Range |
2360 | 6.65k | , SequenceNode::ST_Flow); |
2361 | 658k | case Token::TK_FlowMappingStart: |
2362 | 36.5k | getNext(); |
2363 | 36.5k | return new (NodeAllocator) |
2364 | 36.5k | MappingNode( stream.CurrentDoc |
2365 | 36.5k | , AnchorInfo.Range.substr(1) |
2366 | 36.5k | , TagInfo.Range |
2367 | 36.5k | , MappingNode::MT_Flow); |
2368 | 658k | case Token::TK_Scalar: |
2369 | 546k | getNext(); |
2370 | 546k | return new (NodeAllocator) |
2371 | 546k | ScalarNode( stream.CurrentDoc |
2372 | 546k | , AnchorInfo.Range.substr(1) |
2373 | 546k | , TagInfo.Range |
2374 | 546k | , T.Range); |
2375 | 658k | case Token::TK_BlockScalar: { |
2376 | 12.4k | getNext(); |
2377 | 12.4k | StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); |
2378 | 12.4k | StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); |
2379 | 12.4k | return new (NodeAllocator) |
2380 | 12.4k | BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), |
2381 | 12.4k | TagInfo.Range, StrCopy, T.Range); |
2382 | 658k | } |
2383 | 658k | case Token::TK_Key: |
2384 | 13 | // Don't eat the TK_Key, KeyValueNode expects it. |
2385 | 13 | return new (NodeAllocator) |
2386 | 13 | MappingNode( stream.CurrentDoc |
2387 | 13 | , AnchorInfo.Range.substr(1) |
2388 | 13 | , TagInfo.Range |
2389 | 13 | , MappingNode::MT_Inline); |
2390 | 658k | case Token::TK_DocumentStart: |
2391 | 190 | case Token::TK_DocumentEnd: |
2392 | 190 | case Token::TK_StreamEnd: |
2393 | 199 | default: |
2394 | 199 | // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not |
2395 | 199 | // !!null null. |
2396 | 199 | return new (NodeAllocator) NullNode(stream.CurrentDoc); |
2397 | 190 | case Token::TK_Error: |
2398 | 15 | return nullptr; |
2399 | 0 | } |
2400 | 0 | llvm_unreachable("Control flow shouldn't reach here."); |
2401 | 0 | return nullptr; |
2402 | 0 | } |
2403 | | |
2404 | 16.6k | bool Document::parseDirectives() { |
2405 | 16.6k | bool isDirective = false; |
2406 | 16.7k | while (true) { |
2407 | 16.7k | Token T = peekNext(); |
2408 | 16.7k | if (T.Kind == Token::TK_TagDirective) { |
2409 | 13 | parseTAGDirective(); |
2410 | 13 | isDirective = true; |
2411 | 16.6k | } else if (T.Kind == Token::TK_VersionDirective) { |
2412 | 5 | parseYAMLDirective(); |
2413 | 5 | isDirective = true; |
2414 | 5 | } else |
2415 | 16.6k | break; |
2416 | 16.7k | } |
2417 | 16.6k | return isDirective; |
2418 | 16.6k | } |
2419 | | |
2420 | 5 | void Document::parseYAMLDirective() { |
2421 | 5 | getNext(); // Eat %YAML <version> |
2422 | 5 | } |
2423 | | |
2424 | 13 | void Document::parseTAGDirective() { |
2425 | 13 | Token Tag = getNext(); // %TAG <handle> <prefix> |
2426 | 13 | StringRef T = Tag.Range; |
2427 | 13 | // Strip %TAG |
2428 | 13 | T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); |
2429 | 13 | std::size_t HandleEnd = T.find_first_of(" \t"); |
2430 | 13 | StringRef TagHandle = T.substr(0, HandleEnd); |
2431 | 13 | StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); |
2432 | 13 | TagMap[TagHandle] = TagPrefix; |
2433 | 13 | } |
2434 | | |
2435 | 13 | bool Document::expectToken(int TK) { |
2436 | 13 | Token T = getNext(); |
2437 | 13 | if (T.Kind != TK) { |
2438 | 2 | setError("Unexpected token", T); |
2439 | 2 | return false; |
2440 | 2 | } |
2441 | 11 | return true; |
2442 | 11 | } |