/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Support/YAMLParser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- YAMLParser.cpp - Simple YAML parser --------------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements a YAML parser. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "llvm/Support/YAMLParser.h" |
15 | | #include "llvm/ADT/AllocatorList.h" |
16 | | #include "llvm/ADT/ArrayRef.h" |
17 | | #include "llvm/ADT/None.h" |
18 | | #include "llvm/ADT/STLExtras.h" |
19 | | #include "llvm/ADT/SmallString.h" |
20 | | #include "llvm/ADT/SmallVector.h" |
21 | | #include "llvm/ADT/StringExtras.h" |
22 | | #include "llvm/ADT/StringRef.h" |
23 | | #include "llvm/ADT/Twine.h" |
24 | | #include "llvm/Support/Compiler.h" |
25 | | #include "llvm/Support/ErrorHandling.h" |
26 | | #include "llvm/Support/MemoryBuffer.h" |
27 | | #include "llvm/Support/SMLoc.h" |
28 | | #include "llvm/Support/SourceMgr.h" |
29 | | #include "llvm/Support/raw_ostream.h" |
30 | | #include <algorithm> |
31 | | #include <cassert> |
32 | | #include <cstddef> |
33 | | #include <cstdint> |
34 | | #include <map> |
35 | | #include <memory> |
36 | | #include <string> |
37 | | #include <system_error> |
38 | | #include <utility> |
39 | | |
40 | | using namespace llvm; |
41 | | using namespace yaml; |
42 | | |
43 | | enum UnicodeEncodingForm { |
44 | | UEF_UTF32_LE, ///< UTF-32 Little Endian |
45 | | UEF_UTF32_BE, ///< UTF-32 Big Endian |
46 | | UEF_UTF16_LE, ///< UTF-16 Little Endian |
47 | | UEF_UTF16_BE, ///< UTF-16 Big Endian |
48 | | UEF_UTF8, ///< UTF-8 or ascii. |
49 | | UEF_Unknown ///< Not a valid Unicode encoding. |
50 | | }; |
51 | | |
52 | | /// EncodingInfo - Holds the encoding type and length of the byte order mark if |
53 | | /// it exists. Length is in {0, 2, 3, 4}. |
54 | | using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; |
55 | | |
56 | | /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode |
57 | | /// encoding form of \a Input. |
58 | | /// |
59 | | /// @param Input A string of length 0 or more. |
60 | | /// @returns An EncodingInfo indicating the Unicode encoding form of the input |
61 | | /// and how long the byte order mark is if one exists. |
62 | 2.53k | static EncodingInfo getUnicodeEncoding(StringRef Input) { |
63 | 2.53k | if (Input.empty()) |
64 | 5 | return std::make_pair(UEF_Unknown, 0); |
65 | 2.52k | |
66 | 2.52k | switch (uint8_t(Input[0])) { |
67 | 0 | case 0x00: |
68 | 0 | if (Input.size() >= 40 ) { |
69 | 0 | if ( Input[1] == 0 |
70 | 0 | && uint8_t(Input[2]) == 0xFE |
71 | 0 | && uint8_t(Input[3]) == 0xFF) |
72 | 0 | return std::make_pair(UEF_UTF32_BE, 4); |
73 | 0 | if (0 Input[1] == 0 && 0 Input[2] == 00 && Input[3] != 00 ) |
74 | 0 | return std::make_pair(UEF_UTF32_BE, 0); |
75 | 0 | } |
76 | 0 |
|
77 | 0 | if (0 Input.size() >= 2 && 0 Input[1] != 00 ) |
78 | 0 | return std::make_pair(UEF_UTF16_BE, 0); |
79 | 0 | return std::make_pair(UEF_Unknown, 0); |
80 | 0 | case 0xFF: |
81 | 0 | if ( Input.size() >= 4 |
82 | 0 | && uint8_t(Input[1]) == 0xFE |
83 | 0 | && Input[2] == 0 |
84 | 0 | && Input[3] == 0) |
85 | 0 | return std::make_pair(UEF_UTF32_LE, 4); |
86 | 0 |
|
87 | 0 | if (0 Input.size() >= 2 && 0 uint8_t(Input[1]) == 0xFE0 ) |
88 | 0 | return std::make_pair(UEF_UTF16_LE, 2); |
89 | 0 | return std::make_pair(UEF_Unknown, 0); |
90 | 0 | case 0xFE: |
91 | 0 | if (Input.size() >= 2 && 0 uint8_t(Input[1]) == 0xFF0 ) |
92 | 0 | return std::make_pair(UEF_UTF16_BE, 2); |
93 | 0 | return std::make_pair(UEF_Unknown, 0); |
94 | 3 | case 0xEF: |
95 | 3 | if ( Input.size() >= 3 |
96 | 3 | && uint8_t(Input[1]) == 0xBB |
97 | 3 | && uint8_t(Input[2]) == 0xBF) |
98 | 3 | return std::make_pair(UEF_UTF8, 3); |
99 | 0 | return std::make_pair(UEF_Unknown, 0); |
100 | 2.52k | } |
101 | 2.52k | |
102 | 2.52k | // It could still be utf-32 or utf-16. |
103 | 2.52k | if (2.52k Input.size() >= 4 && 2.52k Input[1] == 02.51k && Input[2] == 00 && Input[3] == 00 ) |
104 | 0 | return std::make_pair(UEF_UTF32_LE, 0); |
105 | 2.52k | |
106 | 2.52k | if (2.52k Input.size() >= 2 && 2.52k Input[1] == 02.52k ) |
107 | 0 | return std::make_pair(UEF_UTF16_LE, 0); |
108 | 2.52k | |
109 | 2.52k | return std::make_pair(UEF_UTF8, 0); |
110 | 2.52k | } |
111 | | |
112 | | /// Pin the vtables to this file. |
113 | 0 | void Node::anchor() {} |
114 | 0 | void NullNode::anchor() {} |
115 | 0 | void ScalarNode::anchor() {} |
116 | 0 | void BlockScalarNode::anchor() {} |
117 | 0 | void KeyValueNode::anchor() {} |
118 | 0 | void MappingNode::anchor() {} |
119 | 0 | void SequenceNode::anchor() {} |
120 | 0 | void AliasNode::anchor() {} |
121 | | |
122 | | namespace llvm { |
123 | | namespace yaml { |
124 | | |
125 | | /// Token - A single YAML token. |
126 | | struct Token { |
127 | | enum TokenKind { |
128 | | TK_Error, // Uninitialized token. |
129 | | TK_StreamStart, |
130 | | TK_StreamEnd, |
131 | | TK_VersionDirective, |
132 | | TK_TagDirective, |
133 | | TK_DocumentStart, |
134 | | TK_DocumentEnd, |
135 | | TK_BlockEntry, |
136 | | TK_BlockEnd, |
137 | | TK_BlockSequenceStart, |
138 | | TK_BlockMappingStart, |
139 | | TK_FlowEntry, |
140 | | TK_FlowSequenceStart, |
141 | | TK_FlowSequenceEnd, |
142 | | TK_FlowMappingStart, |
143 | | TK_FlowMappingEnd, |
144 | | TK_Key, |
145 | | TK_Value, |
146 | | TK_Scalar, |
147 | | TK_BlockScalar, |
148 | | TK_Alias, |
149 | | TK_Anchor, |
150 | | TK_Tag |
151 | | } Kind = TK_Error; |
152 | | |
153 | | /// A string of length 0 or more whose begin() points to the logical location |
154 | | /// of the token in the input. |
155 | | StringRef Range; |
156 | | |
157 | | /// The value of a block scalar node. |
158 | | std::string Value; |
159 | | |
160 | 1.67M | Token() = default; |
161 | | }; |
162 | | |
163 | | } // end namespace yaml |
164 | | } // end namespace llvm |
165 | | |
166 | | using TokenQueueT = BumpPtrList<Token>; |
167 | | |
168 | | namespace { |
169 | | |
170 | | /// @brief This struct is used to track simple keys. |
171 | | /// |
172 | | /// Simple keys are handled by creating an entry in SimpleKeys for each Token |
173 | | /// which could legally be the start of a simple key. When peekNext is called, |
174 | | /// if the Token To be returned is referenced by a SimpleKey, we continue |
175 | | /// tokenizing until that potential simple key has either been found to not be |
176 | | /// a simple key (we moved on to the next line or went further than 1024 chars). |
177 | | /// Or when we run into a Value, and then insert a Key token (and possibly |
178 | | /// others) before the SimpleKey's Tok. |
179 | | struct SimpleKey { |
180 | | TokenQueueT::iterator Tok; |
181 | | unsigned Column; |
182 | | unsigned Line; |
183 | | unsigned FlowLevel; |
184 | | bool IsRequired; |
185 | | |
186 | 344k | bool operator ==(const SimpleKey &Other) { |
187 | 344k | return Tok == Other.Tok; |
188 | 344k | } |
189 | | }; |
190 | | |
191 | | } // end anonymous namespace |
192 | | |
193 | | /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit |
194 | | /// subsequence and the subsequence's length in code units (uint8_t). |
195 | | /// A length of 0 represents an error. |
196 | | using UTF8Decoded = std::pair<uint32_t, unsigned>; |
197 | | |
198 | 66 | static UTF8Decoded decodeUTF8(StringRef Range) { |
199 | 66 | StringRef::iterator Position= Range.begin(); |
200 | 66 | StringRef::iterator End = Range.end(); |
201 | 66 | // 1 byte: [0x00, 0x7f] |
202 | 66 | // Bit pattern: 0xxxxxxx |
203 | 66 | if ((*Position & 0x80) == 066 ) { |
204 | 0 | return std::make_pair(*Position, 1); |
205 | 0 | } |
206 | 66 | // 2 bytes: [0x80, 0x7ff] |
207 | 66 | // Bit pattern: 110xxxxx 10xxxxxx |
208 | 66 | if (66 Position + 1 != End && |
209 | 66 | ((*Position & 0xE0) == 0xC0) && |
210 | 66 | ((*(Position + 1) & 0xC0) == 0x80)40 ) { |
211 | 40 | uint32_t codepoint = ((*Position & 0x1F) << 6) | |
212 | 40 | (*(Position + 1) & 0x3F); |
213 | 40 | if (codepoint >= 0x80) |
214 | 40 | return std::make_pair(codepoint, 2); |
215 | 26 | } |
216 | 26 | // 3 bytes: [0x8000, 0xffff] |
217 | 26 | // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx |
218 | 26 | if (26 Position + 2 != End && |
219 | 26 | ((*Position & 0xF0) == 0xE0) && |
220 | 26 | ((*(Position + 1) & 0xC0) == 0x80) && |
221 | 26 | ((*(Position + 2) & 0xC0) == 0x80)26 ) { |
222 | 26 | uint32_t codepoint = ((*Position & 0x0F) << 12) | |
223 | 26 | ((*(Position + 1) & 0x3F) << 6) | |
224 | 26 | (*(Position + 2) & 0x3F); |
225 | 26 | // Codepoints between 0xD800 and 0xDFFF are invalid, as |
226 | 26 | // they are high / low surrogate halves used by UTF-16. |
227 | 26 | if (codepoint >= 0x800 && |
228 | 26 | (codepoint < 0xD800 || 26 codepoint > 0xDFFF1 )) |
229 | 26 | return std::make_pair(codepoint, 3); |
230 | 0 | } |
231 | 0 | // 4 bytes: [0x10000, 0x10FFFF] |
232 | 0 | // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
233 | 0 | if (0 Position + 3 != End && |
234 | 0 | ((*Position & 0xF8) == 0xF0) && |
235 | 0 | ((*(Position + 1) & 0xC0) == 0x80) && |
236 | 0 | ((*(Position + 2) & 0xC0) == 0x80) && |
237 | 0 | ((*(Position + 3) & 0xC0) == 0x80)0 ) { |
238 | 0 | uint32_t codepoint = ((*Position & 0x07) << 18) | |
239 | 0 | ((*(Position + 1) & 0x3F) << 12) | |
240 | 0 | ((*(Position + 2) & 0x3F) << 6) | |
241 | 0 | (*(Position + 3) & 0x3F); |
242 | 0 | if (codepoint >= 0x10000 && 0 codepoint <= 0x10FFFF0 ) |
243 | 0 | return std::make_pair(codepoint, 4); |
244 | 0 | } |
245 | 0 | return std::make_pair(0, 0); |
246 | 0 | } |
247 | | |
248 | | namespace llvm { |
249 | | namespace yaml { |
250 | | |
251 | | /// @brief Scans YAML tokens from a MemoryBuffer. |
252 | | class Scanner { |
253 | | public: |
254 | | Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, |
255 | | std::error_code *EC = nullptr); |
256 | | Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, |
257 | | std::error_code *EC = nullptr); |
258 | | |
259 | | /// @brief Parse the next token and return it without popping it. |
260 | | Token &peekNext(); |
261 | | |
262 | | /// @brief Parse the next token and pop it from the queue. |
263 | | Token getNext(); |
264 | | |
265 | | void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, |
266 | 128 | ArrayRef<SMRange> Ranges = None) { |
267 | 128 | SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); |
268 | 128 | } |
269 | | |
270 | 45 | void setError(const Twine &Message, StringRef::iterator Position) { |
271 | 45 | if (Current >= End) |
272 | 30 | Current = End - 1; |
273 | 45 | |
274 | 45 | // propagate the error if possible |
275 | 45 | if (EC) |
276 | 4 | *EC = make_error_code(std::errc::invalid_argument); |
277 | 45 | |
278 | 45 | // Don't print out more errors after the first one we encounter. The rest |
279 | 45 | // are just the result of the first, and have no meaning. |
280 | 45 | if (!Failed) |
281 | 45 | printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); |
282 | 45 | Failed = true; |
283 | 45 | } |
284 | | |
285 | 1 | void setError(const Twine &Message) { |
286 | 1 | setError(Message, Current); |
287 | 1 | } |
288 | | |
289 | | /// @brief Returns true if an error occurred while parsing. |
290 | 364k | bool failed() { |
291 | 364k | return Failed; |
292 | 364k | } |
293 | | |
294 | | private: |
295 | | void init(MemoryBufferRef Buffer); |
296 | | |
297 | 2.53k | StringRef currentInput() { |
298 | 2.53k | return StringRef(Current, End - Current); |
299 | 2.53k | } |
300 | | |
301 | | /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting |
302 | | /// at \a Position. |
303 | | /// |
304 | | /// If the UTF-8 code units starting at Position do not form a well-formed |
305 | | /// code unit subsequence, then the Unicode scalar value is 0, and the length |
306 | | /// is 0. |
307 | 31 | UTF8Decoded decodeUTF8(StringRef::iterator Position) { |
308 | 31 | return ::decodeUTF8(StringRef(Position, End - Position)); |
309 | 31 | } |
310 | | |
311 | | // The following functions are based on the gramar rules in the YAML spec. The |
312 | | // style of the function names it meant to closely match how they are written |
313 | | // in the spec. The number within the [] is the number of the grammar rule in |
314 | | // the spec. |
315 | | // |
316 | | // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. |
317 | | // |
318 | | // c- |
319 | | // A production starting and ending with a special character. |
320 | | // b- |
321 | | // A production matching a single line break. |
322 | | // nb- |
323 | | // A production starting and ending with a non-break character. |
324 | | // s- |
325 | | // A production starting and ending with a white space character. |
326 | | // ns- |
327 | | // A production starting and ending with a non-space character. |
328 | | // l- |
329 | | // A production matching complete line(s). |
330 | | |
331 | | /// @brief Skip a single nb-char[27] starting at Position. |
332 | | /// |
333 | | /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] |
334 | | /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] |
335 | | /// |
336 | | /// @returns The code unit after the nb-char, or Position if it's not an |
337 | | /// nb-char. |
338 | | StringRef::iterator skip_nb_char(StringRef::iterator Position); |
339 | | |
340 | | /// @brief Skip a single b-break[28] starting at Position. |
341 | | /// |
342 | | /// A b-break is 0xD 0xA | 0xD | 0xA |
343 | | /// |
344 | | /// @returns The code unit after the b-break, or Position if it's not a |
345 | | /// b-break. |
346 | | StringRef::iterator skip_b_break(StringRef::iterator Position); |
347 | | |
348 | | /// Skip a single s-space[31] starting at Position. |
349 | | /// |
350 | | /// An s-space is 0x20 |
351 | | /// |
352 | | /// @returns The code unit after the s-space, or Position if it's not a |
353 | | /// s-space. |
354 | | StringRef::iterator skip_s_space(StringRef::iterator Position); |
355 | | |
356 | | /// @brief Skip a single s-white[33] starting at Position. |
357 | | /// |
358 | | /// A s-white is 0x20 | 0x9 |
359 | | /// |
360 | | /// @returns The code unit after the s-white, or Position if it's not a |
361 | | /// s-white. |
362 | | StringRef::iterator skip_s_white(StringRef::iterator Position); |
363 | | |
364 | | /// @brief Skip a single ns-char[34] starting at Position. |
365 | | /// |
366 | | /// A ns-char is nb-char - s-white |
367 | | /// |
368 | | /// @returns The code unit after the ns-char, or Position if it's not a |
369 | | /// ns-char. |
370 | | StringRef::iterator skip_ns_char(StringRef::iterator Position); |
371 | | |
372 | | using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); |
373 | | |
374 | | /// @brief Skip minimal well-formed code unit subsequences until Func |
375 | | /// returns its input. |
376 | | /// |
377 | | /// @returns The code unit after the last minimal well-formed code unit |
378 | | /// subsequence that Func accepted. |
379 | | StringRef::iterator skip_while( SkipWhileFunc Func |
380 | | , StringRef::iterator Position); |
381 | | |
382 | | /// Skip minimal well-formed code unit subsequences until Func returns its |
383 | | /// input. |
384 | | void advanceWhile(SkipWhileFunc Func); |
385 | | |
386 | | /// @brief Scan ns-uri-char[39]s starting at Cur. |
387 | | /// |
388 | | /// This updates Cur and Column while scanning. |
389 | | void scan_ns_uri_char(); |
390 | | |
391 | | /// @brief Consume a minimal well-formed code unit subsequence starting at |
392 | | /// \a Cur. Return false if it is not the same Unicode scalar value as |
393 | | /// \a Expected. This updates \a Column. |
394 | | bool consume(uint32_t Expected); |
395 | | |
396 | | /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. |
397 | | void skip(uint32_t Distance); |
398 | | |
399 | | /// @brief Return true if the minimal well-formed code unit subsequence at |
400 | | /// Pos is whitespace or a new line |
401 | | bool isBlankOrBreak(StringRef::iterator Position); |
402 | | |
403 | | /// Consume a single b-break[28] if it's present at the current position. |
404 | | /// |
405 | | /// Return false if the code unit at the current position isn't a line break. |
406 | | bool consumeLineBreakIfPresent(); |
407 | | |
408 | | /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. |
409 | | void saveSimpleKeyCandidate( TokenQueueT::iterator Tok |
410 | | , unsigned AtColumn |
411 | | , bool IsRequired); |
412 | | |
413 | | /// @brief Remove simple keys that can no longer be valid simple keys. |
414 | | /// |
415 | | /// Invalid simple keys are not on the current line or are further than 1024 |
416 | | /// columns back. |
417 | | void removeStaleSimpleKeyCandidates(); |
418 | | |
419 | | /// @brief Remove all simple keys on FlowLevel \a Level. |
420 | | void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); |
421 | | |
422 | | /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd |
423 | | /// tokens if needed. |
424 | | bool unrollIndent(int ToColumn); |
425 | | |
426 | | /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint |
427 | | /// if needed. |
428 | | bool rollIndent( int ToColumn |
429 | | , Token::TokenKind Kind |
430 | | , TokenQueueT::iterator InsertPoint); |
431 | | |
432 | | /// @brief Skip a single-line comment when the comment starts at the current |
433 | | /// position of the scanner. |
434 | | void skipComment(); |
435 | | |
436 | | /// @brief Skip whitespace and comments until the start of the next token. |
437 | | void scanToNextToken(); |
438 | | |
439 | | /// @brief Must be the first token generated. |
440 | | bool scanStreamStart(); |
441 | | |
442 | | /// @brief Generate tokens needed to close out the stream. |
443 | | bool scanStreamEnd(); |
444 | | |
445 | | /// @brief Scan a %BLAH directive. |
446 | | bool scanDirective(); |
447 | | |
448 | | /// @brief Scan a ... or ---. |
449 | | bool scanDocumentIndicator(bool IsStart); |
450 | | |
451 | | /// @brief Scan a [ or { and generate the proper flow collection start token. |
452 | | bool scanFlowCollectionStart(bool IsSequence); |
453 | | |
454 | | /// @brief Scan a ] or } and generate the proper flow collection end token. |
455 | | bool scanFlowCollectionEnd(bool IsSequence); |
456 | | |
457 | | /// @brief Scan the , that separates entries in a flow collection. |
458 | | bool scanFlowEntry(); |
459 | | |
460 | | /// @brief Scan the - that starts block sequence entries. |
461 | | bool scanBlockEntry(); |
462 | | |
463 | | /// @brief Scan an explicit ? indicating a key. |
464 | | bool scanKey(); |
465 | | |
466 | | /// @brief Scan an explicit : indicating a value. |
467 | | bool scanValue(); |
468 | | |
469 | | /// @brief Scan a quoted scalar. |
470 | | bool scanFlowScalar(bool IsDoubleQuoted); |
471 | | |
472 | | /// @brief Scan an unquoted scalar. |
473 | | bool scanPlainScalar(); |
474 | | |
475 | | /// @brief Scan an Alias or Anchor starting with * or &. |
476 | | bool scanAliasOrAnchor(bool IsAlias); |
477 | | |
478 | | /// @brief Scan a block scalar starting with | or >. |
479 | | bool scanBlockScalar(bool IsLiteral); |
480 | | |
481 | | /// Scan a chomping indicator in a block scalar header. |
482 | | char scanBlockChompingIndicator(); |
483 | | |
484 | | /// Scan an indentation indicator in a block scalar header. |
485 | | unsigned scanBlockIndentationIndicator(); |
486 | | |
487 | | /// Scan a block scalar header. |
488 | | /// |
489 | | /// Return false if an error occurred. |
490 | | bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, |
491 | | bool &IsDone); |
492 | | |
493 | | /// Look for the indentation level of a block scalar. |
494 | | /// |
495 | | /// Return false if an error occurred. |
496 | | bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, |
497 | | unsigned &LineBreaks, bool &IsDone); |
498 | | |
499 | | /// Scan the indentation of a text line in a block scalar. |
500 | | /// |
501 | | /// Return false if an error occurred. |
502 | | bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, |
503 | | bool &IsDone); |
504 | | |
505 | | /// @brief Scan a tag of the form !stuff. |
506 | | bool scanTag(); |
507 | | |
508 | | /// @brief Dispatch to the next scanning function based on \a *Cur. |
509 | | bool fetchMoreTokens(); |
510 | | |
511 | | /// @brief The SourceMgr used for diagnostics and buffer management. |
512 | | SourceMgr &SM; |
513 | | |
514 | | /// @brief The original input. |
515 | | MemoryBufferRef InputBuffer; |
516 | | |
517 | | /// @brief The current position of the scanner. |
518 | | StringRef::iterator Current; |
519 | | |
520 | | /// @brief The end of the input (one past the last character). |
521 | | StringRef::iterator End; |
522 | | |
523 | | /// @brief Current YAML indentation level in spaces. |
524 | | int Indent; |
525 | | |
526 | | /// @brief Current column number in Unicode code points. |
527 | | unsigned Column; |
528 | | |
529 | | /// @brief Current line number. |
530 | | unsigned Line; |
531 | | |
532 | | /// @brief How deep we are in flow style containers. 0 Means at block level. |
533 | | unsigned FlowLevel; |
534 | | |
535 | | /// @brief Are we at the start of the stream? |
536 | | bool IsStartOfStream; |
537 | | |
538 | | /// @brief Can the next token be the start of a simple key? |
539 | | bool IsSimpleKeyAllowed; |
540 | | |
541 | | /// @brief True if an error has occurred. |
542 | | bool Failed; |
543 | | |
544 | | /// @brief Should colors be used when printing out the diagnostic messages? |
545 | | bool ShowColors; |
546 | | |
547 | | /// @brief Queue of tokens. This is required to queue up tokens while looking |
548 | | /// for the end of a simple key. And for cases where a single character |
549 | | /// can produce multiple tokens (e.g. BlockEnd). |
550 | | TokenQueueT TokenQueue; |
551 | | |
552 | | /// @brief Indentation levels. |
553 | | SmallVector<int, 4> Indents; |
554 | | |
555 | | /// @brief Potential simple keys. |
556 | | SmallVector<SimpleKey, 4> SimpleKeys; |
557 | | |
558 | | std::error_code *EC; |
559 | | }; |
560 | | |
561 | | } // end namespace yaml |
562 | | } // end namespace llvm |
563 | | |
564 | | /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. |
565 | | static void encodeUTF8( uint32_t UnicodeScalarValue |
566 | 10 | , SmallVectorImpl<char> &Result) { |
567 | 10 | if (UnicodeScalarValue <= 0x7F10 ) { |
568 | 5 | Result.push_back(UnicodeScalarValue & 0x7F); |
569 | 10 | } else if (5 UnicodeScalarValue <= 0x7FF5 ) { |
570 | 2 | uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); |
571 | 2 | uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); |
572 | 2 | Result.push_back(FirstByte); |
573 | 2 | Result.push_back(SecondByte); |
574 | 5 | } else if (3 UnicodeScalarValue <= 0xFFFF3 ) { |
575 | 3 | uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); |
576 | 3 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); |
577 | 3 | uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); |
578 | 3 | Result.push_back(FirstByte); |
579 | 3 | Result.push_back(SecondByte); |
580 | 3 | Result.push_back(ThirdByte); |
581 | 3 | } else if (0 UnicodeScalarValue <= 0x10FFFF0 ) { |
582 | 0 | uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); |
583 | 0 | uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); |
584 | 0 | uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); |
585 | 0 | uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); |
586 | 0 | Result.push_back(FirstByte); |
587 | 0 | Result.push_back(SecondByte); |
588 | 0 | Result.push_back(ThirdByte); |
589 | 0 | Result.push_back(FourthByte); |
590 | 0 | } |
591 | 10 | } |
592 | | |
593 | 0 | bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { |
594 | 0 | SourceMgr SM; |
595 | 0 | Scanner scanner(Input, SM); |
596 | 0 | while (true0 ) { |
597 | 0 | Token T = scanner.getNext(); |
598 | 0 | switch (T.Kind) { |
599 | 0 | case Token::TK_StreamStart: |
600 | 0 | OS << "Stream-Start: "; |
601 | 0 | break; |
602 | 0 | case Token::TK_StreamEnd: |
603 | 0 | OS << "Stream-End: "; |
604 | 0 | break; |
605 | 0 | case Token::TK_VersionDirective: |
606 | 0 | OS << "Version-Directive: "; |
607 | 0 | break; |
608 | 0 | case Token::TK_TagDirective: |
609 | 0 | OS << "Tag-Directive: "; |
610 | 0 | break; |
611 | 0 | case Token::TK_DocumentStart: |
612 | 0 | OS << "Document-Start: "; |
613 | 0 | break; |
614 | 0 | case Token::TK_DocumentEnd: |
615 | 0 | OS << "Document-End: "; |
616 | 0 | break; |
617 | 0 | case Token::TK_BlockEntry: |
618 | 0 | OS << "Block-Entry: "; |
619 | 0 | break; |
620 | 0 | case Token::TK_BlockEnd: |
621 | 0 | OS << "Block-End: "; |
622 | 0 | break; |
623 | 0 | case Token::TK_BlockSequenceStart: |
624 | 0 | OS << "Block-Sequence-Start: "; |
625 | 0 | break; |
626 | 0 | case Token::TK_BlockMappingStart: |
627 | 0 | OS << "Block-Mapping-Start: "; |
628 | 0 | break; |
629 | 0 | case Token::TK_FlowEntry: |
630 | 0 | OS << "Flow-Entry: "; |
631 | 0 | break; |
632 | 0 | case Token::TK_FlowSequenceStart: |
633 | 0 | OS << "Flow-Sequence-Start: "; |
634 | 0 | break; |
635 | 0 | case Token::TK_FlowSequenceEnd: |
636 | 0 | OS << "Flow-Sequence-End: "; |
637 | 0 | break; |
638 | 0 | case Token::TK_FlowMappingStart: |
639 | 0 | OS << "Flow-Mapping-Start: "; |
640 | 0 | break; |
641 | 0 | case Token::TK_FlowMappingEnd: |
642 | 0 | OS << "Flow-Mapping-End: "; |
643 | 0 | break; |
644 | 0 | case Token::TK_Key: |
645 | 0 | OS << "Key: "; |
646 | 0 | break; |
647 | 0 | case Token::TK_Value: |
648 | 0 | OS << "Value: "; |
649 | 0 | break; |
650 | 0 | case Token::TK_Scalar: |
651 | 0 | OS << "Scalar: "; |
652 | 0 | break; |
653 | 0 | case Token::TK_BlockScalar: |
654 | 0 | OS << "Block Scalar: "; |
655 | 0 | break; |
656 | 0 | case Token::TK_Alias: |
657 | 0 | OS << "Alias: "; |
658 | 0 | break; |
659 | 0 | case Token::TK_Anchor: |
660 | 0 | OS << "Anchor: "; |
661 | 0 | break; |
662 | 0 | case Token::TK_Tag: |
663 | 0 | OS << "Tag: "; |
664 | 0 | break; |
665 | 0 | case Token::TK_Error: |
666 | 0 | break; |
667 | 0 | } |
668 | 0 | OS << T.Range << "\n"; |
669 | 0 | if (T.Kind == Token::TK_StreamEnd) |
670 | 0 | break; |
671 | 0 | else if (0 T.Kind == Token::TK_Error0 ) |
672 | 0 | return false; |
673 | 0 | } |
674 | 0 | return true; |
675 | 0 | } |
676 | | |
677 | 0 | bool yaml::scanTokens(StringRef Input) { |
678 | 0 | SourceMgr SM; |
679 | 0 | Scanner scanner(Input, SM); |
680 | 0 | while (true0 ) { |
681 | 0 | Token T = scanner.getNext(); |
682 | 0 | if (T.Kind == Token::TK_StreamEnd) |
683 | 0 | break; |
684 | 0 | else if (0 T.Kind == Token::TK_Error0 ) |
685 | 0 | return false; |
686 | 0 | } |
687 | 0 | return true; |
688 | 0 | } |
689 | | |
690 | 2.03k | std::string yaml::escape(StringRef Input) { |
691 | 2.03k | std::string EscapedInput; |
692 | 87.5k | for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e87.5k ; ++i85.4k ) { |
693 | 85.4k | if (*i == '\\') |
694 | 7 | EscapedInput += "\\\\"; |
695 | 85.4k | else if (85.4k *i == '"'85.4k ) |
696 | 15 | EscapedInput += "\\\""; |
697 | 85.4k | else if (85.4k *i == 085.4k ) |
698 | 1 | EscapedInput += "\\0"; |
699 | 85.4k | else if (85.4k *i == 0x0785.4k ) |
700 | 1 | EscapedInput += "\\a"; |
701 | 85.4k | else if (85.4k *i == 0x0885.4k ) |
702 | 2 | EscapedInput += "\\b"; |
703 | 85.4k | else if (85.4k *i == 0x0985.4k ) |
704 | 30 | EscapedInput += "\\t"; |
705 | 85.4k | else if (85.4k *i == 0x0A85.4k ) |
706 | 209 | EscapedInput += "\\n"; |
707 | 85.2k | else if (85.2k *i == 0x0B85.2k ) |
708 | 1 | EscapedInput += "\\v"; |
709 | 85.2k | else if (85.2k *i == 0x0C85.2k ) |
710 | 1 | EscapedInput += "\\f"; |
711 | 85.2k | else if (85.2k *i == 0x0D85.2k ) |
712 | 2 | EscapedInput += "\\r"; |
713 | 85.2k | else if (85.2k *i == 0x1B85.2k ) |
714 | 1 | EscapedInput += "\\e"; |
715 | 85.2k | else if (85.2k (unsigned char)*i < 0x2085.2k ) { // Control characters not handled above. |
716 | 2 | std::string HexStr = utohexstr(*i); |
717 | 2 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; |
718 | 85.2k | } else if (85.2k *i & 0x8085.2k ) { // UTF-8 multiple code unit subsequence. |
719 | 35 | UTF8Decoded UnicodeScalarValue |
720 | 35 | = decodeUTF8(StringRef(i, Input.end() - i)); |
721 | 35 | if (UnicodeScalarValue.second == 035 ) { |
722 | 0 | // Found invalid char. |
723 | 0 | SmallString<4> Val; |
724 | 0 | encodeUTF8(0xFFFD, Val); |
725 | 0 | EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); |
726 | 0 | // FIXME: Error reporting. |
727 | 0 | return EscapedInput; |
728 | 0 | } |
729 | 35 | if (35 UnicodeScalarValue.first == 0x8535 ) |
730 | 19 | EscapedInput += "\\N"; |
731 | 16 | else if (16 UnicodeScalarValue.first == 0xA016 ) |
732 | 1 | EscapedInput += "\\_"; |
733 | 15 | else if (15 UnicodeScalarValue.first == 0x202815 ) |
734 | 7 | EscapedInput += "\\L"; |
735 | 8 | else if (8 UnicodeScalarValue.first == 0x20298 ) |
736 | 4 | EscapedInput += "\\P"; |
737 | 4 | else { |
738 | 4 | std::string HexStr = utohexstr(UnicodeScalarValue.first); |
739 | 4 | if (HexStr.size() <= 2) |
740 | 0 | EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; |
741 | 4 | else if (4 HexStr.size() <= 44 ) |
742 | 4 | EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; |
743 | 0 | else if (0 HexStr.size() <= 80 ) |
744 | 0 | EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; |
745 | 16 | } |
746 | 35 | i += UnicodeScalarValue.second - 1; |
747 | 35 | } else |
748 | 85.1k | EscapedInput.push_back(*i); |
749 | 85.4k | } |
750 | 2.03k | return EscapedInput; |
751 | 2.03k | } |
752 | | |
753 | | Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, |
754 | | std::error_code *EC) |
755 | 2.43k | : SM(sm), ShowColors(ShowColors), EC(EC) { |
756 | 2.43k | init(MemoryBufferRef(Input, "YAML")); |
757 | 2.43k | } |
758 | | |
759 | | Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, |
760 | | std::error_code *EC) |
761 | 101 | : SM(SM_), ShowColors(ShowColors), EC(EC) { |
762 | 101 | init(Buffer); |
763 | 101 | } |
764 | | |
765 | 2.53k | void Scanner::init(MemoryBufferRef Buffer) { |
766 | 2.53k | InputBuffer = Buffer; |
767 | 2.53k | Current = InputBuffer.getBufferStart(); |
768 | 2.53k | End = InputBuffer.getBufferEnd(); |
769 | 2.53k | Indent = -1; |
770 | 2.53k | Column = 0; |
771 | 2.53k | Line = 0; |
772 | 2.53k | FlowLevel = 0; |
773 | 2.53k | IsStartOfStream = true; |
774 | 2.53k | IsSimpleKeyAllowed = true; |
775 | 2.53k | Failed = false; |
776 | 2.53k | std::unique_ptr<MemoryBuffer> InputBufferOwner = |
777 | 2.53k | MemoryBuffer::getMemBuffer(Buffer); |
778 | 2.53k | SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); |
779 | 2.53k | } |
780 | | |
781 | 2.09M | Token &Scanner::peekNext() { |
782 | 2.09M | // If the current token is a possible simple key, keep parsing until we |
783 | 2.09M | // can confirm. |
784 | 2.09M | bool NeedMore = false; |
785 | 2.32M | while (true2.32M ) { |
786 | 2.32M | if (TokenQueue.empty() || 2.32M NeedMore2.08M ) { |
787 | 465k | if (!fetchMoreTokens()465k ) { |
788 | 23 | TokenQueue.clear(); |
789 | 23 | TokenQueue.push_back(Token()); |
790 | 23 | return TokenQueue.front(); |
791 | 23 | } |
792 | 2.32M | } |
793 | 2.32M | assert(!TokenQueue.empty() && |
794 | 2.32M | "fetchMoreTokens lied about getting tokens!"); |
795 | 2.32M | |
796 | 2.32M | removeStaleSimpleKeyCandidates(); |
797 | 2.32M | SimpleKey SK; |
798 | 2.32M | SK.Tok = TokenQueue.begin(); |
799 | 2.32M | if (!is_contained(SimpleKeys, SK)) |
800 | 2.09M | break; |
801 | 2.32M | else |
802 | 227k | NeedMore = true; |
803 | 2.32M | } |
804 | 2.09M | return TokenQueue.front(); |
805 | 2.09M | } |
806 | | |
807 | 644k | Token Scanner::getNext() { |
808 | 644k | Token Ret = peekNext(); |
809 | 644k | // TokenQueue can be empty if there was an error getting the next token. |
810 | 644k | if (!TokenQueue.empty()) |
811 | 644k | TokenQueue.pop_front(); |
812 | 644k | |
813 | 644k | // There cannot be any referenced Token's if the TokenQueue is empty. So do a |
814 | 644k | // quick deallocation of them all. |
815 | 644k | if (TokenQueue.empty()) |
816 | 235k | TokenQueue.resetAlloc(); |
817 | 644k | |
818 | 644k | return Ret; |
819 | 644k | } |
820 | | |
821 | 4.98M | StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { |
822 | 4.98M | if (Position == End) |
823 | 332 | return Position; |
824 | 4.98M | // Check 7 bit c-printable - b-char. |
825 | 4.98M | if ( 4.98M *Position == 0x09 |
826 | 4.98M | || (*Position >= 0x20 && 4.98M *Position <= 0x7E4.89M )) |
827 | 4.89M | return Position + 1; |
828 | 84.6k | |
829 | 84.6k | // Check for valid UTF-8. |
830 | 84.6k | if (84.6k uint8_t(*Position) & 0x8084.6k ) { |
831 | 31 | UTF8Decoded u8d = decodeUTF8(Position); |
832 | 31 | if ( u8d.second != 0 |
833 | 31 | && u8d.first != 0xFEFF |
834 | 30 | && ( u8d.first == 0x85 |
835 | 10 | || ( u8d.first >= 0xA0 |
836 | 10 | && u8d.first <= 0xD7FF) |
837 | 0 | || ( u8d.first >= 0xE000 |
838 | 0 | && u8d.first <= 0xFFFD) |
839 | 0 | || ( u8d.first >= 0x10000 |
840 | 0 | && u8d.first <= 0x10FFFF))) |
841 | 30 | return Position + u8d.second; |
842 | 84.5k | } |
843 | 84.5k | return Position; |
844 | 84.5k | } |
845 | | |
846 | 739k | StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { |
847 | 739k | if (Position == End) |
848 | 1.57k | return Position; |
849 | 737k | if (737k *Position == 0x0D737k ) { |
850 | 540 | if (Position + 1 != End && 540 *(Position + 1) == 0x0A539 ) |
851 | 522 | return Position + 2; |
852 | 18 | return Position + 1; |
853 | 18 | } |
854 | 737k | |
855 | 737k | if (737k *Position == 0x0A737k ) |
856 | 275k | return Position + 1; |
857 | 461k | return Position; |
858 | 461k | } |
859 | | |
860 | 98.1k | StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { |
861 | 98.1k | if (Position == End) |
862 | 46 | return Position; |
863 | 98.1k | if (98.1k *Position == ' '98.1k ) |
864 | 85.3k | return Position + 1; |
865 | 12.7k | return Position; |
866 | 12.7k | } |
867 | | |
868 | 504k | StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { |
869 | 504k | if (Position == End) |
870 | 1 | return Position; |
871 | 504k | if (504k *Position == ' ' || 504k *Position == '\t'81.0k ) |
872 | 423k | return Position + 1; |
873 | 81.0k | return Position; |
874 | 81.0k | } |
875 | | |
876 | 8.03k | StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { |
877 | 8.03k | if (Position == End) |
878 | 0 | return Position; |
879 | 8.03k | if (8.03k *Position == ' ' || 8.03k *Position == '\t'7.92k ) |
880 | 104 | return Position; |
881 | 7.92k | return skip_nb_char(Position); |
882 | 7.92k | } |
883 | | |
884 | | StringRef::iterator Scanner::skip_while( SkipWhileFunc Func |
885 | 56.6k | , StringRef::iterator Position) { |
886 | 1.70M | while (true1.70M ) { |
887 | 1.70M | StringRef::iterator i = (this->*Func)(Position); |
888 | 1.70M | if (i == Position) |
889 | 56.6k | break; |
890 | 1.64M | Position = i; |
891 | 1.64M | } |
892 | 56.6k | return Position; |
893 | 56.6k | } |
894 | | |
895 | 52.5k | void Scanner::advanceWhile(SkipWhileFunc Func) { |
896 | 52.5k | auto Final = skip_while(Func, Current); |
897 | 52.5k | Column += Final - Current; |
898 | 52.5k | Current = Final; |
899 | 52.5k | } |
900 | | |
901 | 0 | static bool is_ns_hex_digit(const char C) { |
902 | 0 | return (C >= '0' && C <= '9') |
903 | 0 | || (C >= 'a' && 0 C <= 'z'0 ) |
904 | 0 | || (C >= 'A' && 0 C <= 'Z'0 ); |
905 | 0 | } |
906 | | |
907 | 36 | static bool is_ns_word_char(const char C) { |
908 | 36 | return C == '-' |
909 | 36 | || (C >= 'a' && 36 C <= 'z'26 ) |
910 | 10 | || (C >= 'A' && 10 C <= 'Z'0 ); |
911 | 36 | } |
912 | | |
913 | 3 | void Scanner::scan_ns_uri_char() { |
914 | 36 | while (true36 ) { |
915 | 36 | if (Current == End) |
916 | 0 | break; |
917 | 36 | if (36 ( *Current == '%' |
918 | 0 | && Current + 2 < End |
919 | 0 | && is_ns_hex_digit(*(Current + 1)) |
920 | 0 | && is_ns_hex_digit(*(Current + 2))) |
921 | 36 | || is_ns_word_char(*Current) |
922 | 10 | || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") |
923 | 36 | != StringRef::npos) { |
924 | 33 | ++Current; |
925 | 33 | ++Column; |
926 | 33 | } else |
927 | 3 | break; |
928 | 36 | } |
929 | 3 | } |
930 | | |
931 | 22 | bool Scanner::consume(uint32_t Expected) { |
932 | 22 | if (Expected >= 0x80) |
933 | 0 | report_fatal_error("Not dealing with this yet"); |
934 | 22 | if (22 Current == End22 ) |
935 | 0 | return false; |
936 | 22 | if (22 uint8_t(*Current) >= 0x8022 ) |
937 | 0 | report_fatal_error("Not dealing with this yet"); |
938 | 22 | if (22 uint8_t(*Current) == Expected22 ) { |
939 | 20 | ++Current; |
940 | 20 | ++Column; |
941 | 20 | return true; |
942 | 20 | } |
943 | 2 | return false; |
944 | 2 | } |
945 | | |
946 | 1.51M | void Scanner::skip(uint32_t Distance) { |
947 | 1.51M | Current += Distance; |
948 | 1.51M | Column += Distance; |
949 | 1.51M | assert(Current <= End && "Skipped past the end"); |
950 | 1.51M | } |
951 | | |
952 | 3.26M | bool Scanner::isBlankOrBreak(StringRef::iterator Position) { |
953 | 3.26M | if (Position == End) |
954 | 634 | return false; |
955 | 3.26M | return *Position == ' ' || 3.26M *Position == '\t'2.55M || *Position == '\r'2.55M || |
956 | 2.55M | *Position == '\n'; |
957 | 3.26M | } |
958 | | |
959 | 52.4k | bool Scanner::consumeLineBreakIfPresent() { |
960 | 52.4k | auto Next = skip_b_break(Current); |
961 | 52.4k | if (Next == Current) |
962 | 3 | return false; |
963 | 52.4k | Column = 0; |
964 | 52.4k | ++Line; |
965 | 52.4k | Current = Next; |
966 | 52.4k | return true; |
967 | 52.4k | } |
968 | | |
969 | | void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok |
970 | | , unsigned AtColumn |
971 | 260k | , bool IsRequired) { |
972 | 260k | if (IsSimpleKeyAllowed260k ) { |
973 | 148k | SimpleKey SK; |
974 | 148k | SK.Tok = Tok; |
975 | 148k | SK.Line = Line; |
976 | 148k | SK.Column = AtColumn; |
977 | 148k | SK.IsRequired = IsRequired; |
978 | 148k | SK.FlowLevel = FlowLevel; |
979 | 148k | SimpleKeys.push_back(SK); |
980 | 148k | } |
981 | 260k | } |
982 | | |
983 | 2.78M | void Scanner::removeStaleSimpleKeyCandidates() { |
984 | 2.78M | for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); |
985 | 3.41M | i != SimpleKeys.end()3.41M ;) { |
986 | 625k | if (i->Line != Line || 625k i->Column + 1024 < Column614k ) { |
987 | 11.1k | if (i->IsRequired) |
988 | 0 | setError( "Could not find expected : for simple key" |
989 | 0 | , i->Tok->Range.begin()); |
990 | 11.1k | i = SimpleKeys.erase(i); |
991 | 11.1k | } else |
992 | 614k | ++i; |
993 | 625k | } |
994 | 2.78M | } |
995 | | |
996 | 65.4k | void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { |
997 | 65.4k | if (!SimpleKeys.empty() && 65.4k (SimpleKeys.end() - 1)->FlowLevel == Level38.9k ) |
998 | 12.7k | SimpleKeys.pop_back(); |
999 | 65.4k | } |
1000 | | |
1001 | 471k | bool Scanner::unrollIndent(int ToColumn) { |
1002 | 471k | Token T; |
1003 | 471k | // Indentation is ignored in flow. |
1004 | 471k | if (FlowLevel != 0) |
1005 | 138k | return true; |
1006 | 332k | |
1007 | 361k | while (332k Indent > ToColumn361k ) { |
1008 | 28.5k | T.Kind = Token::TK_BlockEnd; |
1009 | 28.5k | T.Range = StringRef(Current, 1); |
1010 | 28.5k | TokenQueue.push_back(T); |
1011 | 28.5k | Indent = Indents.pop_back_val(); |
1012 | 28.5k | } |
1013 | 471k | |
1014 | 471k | return true; |
1015 | 471k | } |
1016 | | |
1017 | | bool Scanner::rollIndent( int ToColumn |
1018 | | , Token::TokenKind Kind |
1019 | 148k | , TokenQueueT::iterator InsertPoint) { |
1020 | 148k | if (FlowLevel) |
1021 | 27.8k | return true; |
1022 | 120k | if (120k Indent < ToColumn120k ) { |
1023 | 28.5k | Indents.push_back(Indent); |
1024 | 28.5k | Indent = ToColumn; |
1025 | 28.5k | |
1026 | 28.5k | Token T; |
1027 | 28.5k | T.Kind = Kind; |
1028 | 28.5k | T.Range = StringRef(Current, 0); |
1029 | 28.5k | TokenQueue.insert(InsertPoint, T); |
1030 | 28.5k | } |
1031 | 148k | return true; |
1032 | 148k | } |
1033 | | |
1034 | 611k | void Scanner::skipComment() { |
1035 | 611k | if (*Current != '#') |
1036 | 584k | return; |
1037 | 1.26M | while (26.6k true1.26M ) { |
1038 | 1.26M | // This may skip more than one byte, thus Column is only incremented |
1039 | 1.26M | // for code points. |
1040 | 1.26M | StringRef::iterator I = skip_nb_char(Current); |
1041 | 1.26M | if (I == Current) |
1042 | 26.6k | break; |
1043 | 1.24M | Current = I; |
1044 | 1.24M | ++Column; |
1045 | 1.24M | } |
1046 | 611k | } |
1047 | | |
1048 | 462k | void Scanner::scanToNextToken() { |
1049 | 608k | while (true608k ) { |
1050 | 1.89M | while (*Current == ' ' || 1.89M *Current == '\t'608k ) { |
1051 | 1.28M | skip(1); |
1052 | 1.28M | } |
1053 | 608k | |
1054 | 608k | skipComment(); |
1055 | 608k | |
1056 | 608k | // Skip EOL. |
1057 | 608k | StringRef::iterator i = skip_b_break(Current); |
1058 | 608k | if (i == Current) |
1059 | 462k | break; |
1060 | 145k | Current = i; |
1061 | 145k | ++Line; |
1062 | 145k | Column = 0; |
1063 | 145k | // New lines may start a simple key. |
1064 | 145k | if (!FlowLevel) |
1065 | 142k | IsSimpleKeyAllowed = true; |
1066 | 608k | } |
1067 | 462k | } |
1068 | | |
1069 | 2.53k | bool Scanner::scanStreamStart() { |
1070 | 2.53k | IsStartOfStream = false; |
1071 | 2.53k | |
1072 | 2.53k | EncodingInfo EI = getUnicodeEncoding(currentInput()); |
1073 | 2.53k | |
1074 | 2.53k | Token T; |
1075 | 2.53k | T.Kind = Token::TK_StreamStart; |
1076 | 2.53k | T.Range = StringRef(Current, EI.second); |
1077 | 2.53k | TokenQueue.push_back(T); |
1078 | 2.53k | Current += EI.second; |
1079 | 2.53k | return true; |
1080 | 2.53k | } |
1081 | | |
1082 | 1.56k | bool Scanner::scanStreamEnd() { |
1083 | 1.56k | // Force an ending new line if one isn't present. |
1084 | 1.56k | if (Column != 01.56k ) { |
1085 | 365 | Column = 0; |
1086 | 365 | ++Line; |
1087 | 365 | } |
1088 | 1.56k | |
1089 | 1.56k | unrollIndent(-1); |
1090 | 1.56k | SimpleKeys.clear(); |
1091 | 1.56k | IsSimpleKeyAllowed = false; |
1092 | 1.56k | |
1093 | 1.56k | Token T; |
1094 | 1.56k | T.Kind = Token::TK_StreamEnd; |
1095 | 1.56k | T.Range = StringRef(Current, 0); |
1096 | 1.56k | TokenQueue.push_back(T); |
1097 | 1.56k | return true; |
1098 | 1.56k | } |
1099 | | |
1100 | 19 | bool Scanner::scanDirective() { |
1101 | 19 | // Reset the indentation level. |
1102 | 19 | unrollIndent(-1); |
1103 | 19 | SimpleKeys.clear(); |
1104 | 19 | IsSimpleKeyAllowed = false; |
1105 | 19 | |
1106 | 19 | StringRef::iterator Start = Current; |
1107 | 19 | consume('%'); |
1108 | 19 | StringRef::iterator NameStart = Current; |
1109 | 19 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1110 | 19 | StringRef Name(NameStart, Current - NameStart); |
1111 | 19 | Current = skip_while(&Scanner::skip_s_white, Current); |
1112 | 19 | |
1113 | 19 | Token T; |
1114 | 19 | if (Name == "YAML"19 ) { |
1115 | 5 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1116 | 5 | T.Kind = Token::TK_VersionDirective; |
1117 | 5 | T.Range = StringRef(Start, Current - Start); |
1118 | 5 | TokenQueue.push_back(T); |
1119 | 5 | return true; |
1120 | 14 | } else if(14 Name == "TAG"14 ) { |
1121 | 13 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1122 | 13 | Current = skip_while(&Scanner::skip_s_white, Current); |
1123 | 13 | Current = skip_while(&Scanner::skip_ns_char, Current); |
1124 | 13 | T.Kind = Token::TK_TagDirective; |
1125 | 13 | T.Range = StringRef(Start, Current - Start); |
1126 | 13 | TokenQueue.push_back(T); |
1127 | 13 | return true; |
1128 | 13 | } |
1129 | 1 | return false; |
1130 | 1 | } |
1131 | | |
1132 | 8.15k | bool Scanner::scanDocumentIndicator(bool IsStart) { |
1133 | 8.15k | unrollIndent(-1); |
1134 | 8.15k | SimpleKeys.clear(); |
1135 | 8.15k | IsSimpleKeyAllowed = false; |
1136 | 8.15k | |
1137 | 8.15k | Token T; |
1138 | 8.15k | T.Kind = IsStart ? Token::TK_DocumentStart4.13k : Token::TK_DocumentEnd4.02k ; |
1139 | 8.15k | T.Range = StringRef(Current, 3); |
1140 | 8.15k | skip(3); |
1141 | 8.15k | TokenQueue.push_back(T); |
1142 | 8.15k | return true; |
1143 | 8.15k | } |
1144 | | |
1145 | 14.0k | bool Scanner::scanFlowCollectionStart(bool IsSequence) { |
1146 | 14.0k | Token T; |
1147 | 3.58k | T.Kind = IsSequence ? Token::TK_FlowSequenceStart |
1148 | 10.4k | : Token::TK_FlowMappingStart; |
1149 | 14.0k | T.Range = StringRef(Current, 1); |
1150 | 14.0k | skip(1); |
1151 | 14.0k | TokenQueue.push_back(T); |
1152 | 14.0k | |
1153 | 14.0k | // [ and { may begin a simple key. |
1154 | 14.0k | saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); |
1155 | 14.0k | |
1156 | 14.0k | // And may also be followed by a simple key. |
1157 | 14.0k | IsSimpleKeyAllowed = true; |
1158 | 14.0k | ++FlowLevel; |
1159 | 14.0k | return true; |
1160 | 14.0k | } |
1161 | | |
1162 | 13.9k | bool Scanner::scanFlowCollectionEnd(bool IsSequence) { |
1163 | 13.9k | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1164 | 13.9k | IsSimpleKeyAllowed = false; |
1165 | 13.9k | Token T; |
1166 | 3.56k | T.Kind = IsSequence ? Token::TK_FlowSequenceEnd |
1167 | 10.4k | : Token::TK_FlowMappingEnd; |
1168 | 13.9k | T.Range = StringRef(Current, 1); |
1169 | 13.9k | skip(1); |
1170 | 13.9k | TokenQueue.push_back(T); |
1171 | 13.9k | if (FlowLevel) |
1172 | 13.9k | --FlowLevel; |
1173 | 13.9k | return true; |
1174 | 13.9k | } |
1175 | | |
1176 | 27.6k | bool Scanner::scanFlowEntry() { |
1177 | 27.6k | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1178 | 27.6k | IsSimpleKeyAllowed = true; |
1179 | 27.6k | Token T; |
1180 | 27.6k | T.Kind = Token::TK_FlowEntry; |
1181 | 27.6k | T.Range = StringRef(Current, 1); |
1182 | 27.6k | skip(1); |
1183 | 27.6k | TokenQueue.push_back(T); |
1184 | 27.6k | return true; |
1185 | 27.6k | } |
1186 | | |
1187 | 23.7k | bool Scanner::scanBlockEntry() { |
1188 | 23.7k | rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); |
1189 | 23.7k | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1190 | 23.7k | IsSimpleKeyAllowed = true; |
1191 | 23.7k | Token T; |
1192 | 23.7k | T.Kind = Token::TK_BlockEntry; |
1193 | 23.7k | T.Range = StringRef(Current, 1); |
1194 | 23.7k | skip(1); |
1195 | 23.7k | TokenQueue.push_back(T); |
1196 | 23.7k | return true; |
1197 | 23.7k | } |
1198 | | |
1199 | 30 | bool Scanner::scanKey() { |
1200 | 30 | if (!FlowLevel) |
1201 | 16 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); |
1202 | 30 | |
1203 | 30 | removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); |
1204 | 30 | IsSimpleKeyAllowed = !FlowLevel; |
1205 | 30 | |
1206 | 30 | Token T; |
1207 | 30 | T.Kind = Token::TK_Key; |
1208 | 30 | T.Range = StringRef(Current, 1); |
1209 | 30 | skip(1); |
1210 | 30 | TokenQueue.push_back(T); |
1211 | 30 | return true; |
1212 | 30 | } |
1213 | | |
1214 | 124k | bool Scanner::scanValue() { |
1215 | 124k | // If the previous token could have been a simple key, insert the key token |
1216 | 124k | // into the token queue. |
1217 | 124k | if (!SimpleKeys.empty()124k ) { |
1218 | 124k | SimpleKey SK = SimpleKeys.pop_back_val(); |
1219 | 124k | Token T; |
1220 | 124k | T.Kind = Token::TK_Key; |
1221 | 124k | T.Range = SK.Tok->Range; |
1222 | 124k | TokenQueueT::iterator i, e; |
1223 | 300k | for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e300k ; ++i176k ) { |
1224 | 300k | if (i == SK.Tok) |
1225 | 124k | break; |
1226 | 300k | } |
1227 | 124k | assert(i != e && "SimpleKey not in token queue!"); |
1228 | 124k | i = TokenQueue.insert(i, T); |
1229 | 124k | |
1230 | 124k | // We may also need to add a Block-Mapping-Start token. |
1231 | 124k | rollIndent(SK.Column, Token::TK_BlockMappingStart, i); |
1232 | 124k | |
1233 | 124k | IsSimpleKeyAllowed = false; |
1234 | 124k | } else { |
1235 | 23 | if (!FlowLevel) |
1236 | 8 | rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); |
1237 | 23 | IsSimpleKeyAllowed = !FlowLevel; |
1238 | 23 | } |
1239 | 124k | |
1240 | 124k | Token T; |
1241 | 124k | T.Kind = Token::TK_Value; |
1242 | 124k | T.Range = StringRef(Current, 1); |
1243 | 124k | skip(1); |
1244 | 124k | TokenQueue.push_back(T); |
1245 | 124k | return true; |
1246 | 124k | } |
1247 | | |
1248 | | // Forbidding inlining improves performance by roughly 20%. |
1249 | | // FIXME: Remove once llvm optimizes this to the faster version without hints. |
1250 | | LLVM_ATTRIBUTE_NOINLINE static bool |
1251 | | wasEscaped(StringRef::iterator First, StringRef::iterator Position); |
1252 | | |
1253 | | // Returns whether a character at 'Position' was escaped with a leading '\'. |
1254 | | // 'First' specifies the position of the first character in the string. |
1255 | | static bool wasEscaped(StringRef::iterator First, |
1256 | 58 | StringRef::iterator Position) { |
1257 | 58 | assert(Position - 1 >= First); |
1258 | 58 | StringRef::iterator I = Position - 1; |
1259 | 58 | // We calculate the number of consecutive '\'s before the current position |
1260 | 58 | // by iterating backwards through our string. |
1261 | 140 | while (I >= First && 140 *I == '\\'123 ) --I82 ; |
1262 | 58 | // (Position - 1 - I) now contains the number of '\'s before the current |
1263 | 58 | // position. If it is odd, the character at 'Position' was escaped. |
1264 | 58 | return (Position - 1 - I) % 2 == 1; |
1265 | 58 | } |
1266 | | |
1267 | 8.40k | bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { |
1268 | 8.40k | StringRef::iterator Start = Current; |
1269 | 8.40k | unsigned ColStart = Column; |
1270 | 8.40k | if (IsDoubleQuoted8.40k ) { |
1271 | 850 | do { |
1272 | 850 | ++Current; |
1273 | 22.1k | while (Current != End && 22.1k *Current != '"'22.0k ) |
1274 | 21.2k | ++Current; |
1275 | 850 | // Repeat until the previous character was not a '\' or was an escaped |
1276 | 850 | // backslash. |
1277 | 796 | } while ( Current != End |
1278 | 841 | && *(Current - 1) == '\\' |
1279 | 58 | && wasEscaped(Start + 1, Current)); |
1280 | 8.40k | } else { |
1281 | 7.60k | skip(1); |
1282 | 316k | while (true316k ) { |
1283 | 316k | // Skip a ' followed by another '. |
1284 | 316k | if (Current + 1 < End && 316k *Current == '\''316k && *(Current + 1) == '\''7.60k ) { |
1285 | 5 | skip(2); |
1286 | 5 | continue; |
1287 | 316k | } else if (316k *Current == '\''316k ) |
1288 | 7.60k | break; |
1289 | 309k | StringRef::iterator i = skip_nb_char(Current); |
1290 | 309k | if (i == Current309k ) { |
1291 | 13 | i = skip_b_break(Current); |
1292 | 13 | if (i == Current) |
1293 | 1 | break; |
1294 | 12 | Current = i; |
1295 | 12 | Column = 0; |
1296 | 12 | ++Line; |
1297 | 309k | } else { |
1298 | 309k | if (i == End) |
1299 | 0 | break; |
1300 | 309k | Current = i; |
1301 | 309k | ++Column; |
1302 | 309k | } |
1303 | 316k | } |
1304 | 7.60k | } |
1305 | 8.40k | |
1306 | 8.40k | if (Current == End8.40k ) { |
1307 | 10 | setError("Expected quote at end of scalar", Current); |
1308 | 10 | return false; |
1309 | 10 | } |
1310 | 8.39k | |
1311 | 8.39k | skip(1); // Skip ending quote. |
1312 | 8.39k | Token T; |
1313 | 8.39k | T.Kind = Token::TK_Scalar; |
1314 | 8.39k | T.Range = StringRef(Start, Current - Start); |
1315 | 8.39k | TokenQueue.push_back(T); |
1316 | 8.39k | |
1317 | 8.39k | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1318 | 8.39k | |
1319 | 8.39k | IsSimpleKeyAllowed = false; |
1320 | 8.39k | |
1321 | 8.39k | return true; |
1322 | 8.39k | } |
1323 | | |
1324 | 236k | bool Scanner::scanPlainScalar() { |
1325 | 236k | StringRef::iterator Start = Current; |
1326 | 236k | unsigned ColStart = Column; |
1327 | 236k | unsigned LeadingBlanks = 0; |
1328 | 236k | assert(Indent >= -1 && "Indent must be >= -1 !"); |
1329 | 236k | unsigned indent = static_cast<unsigned>(Indent + 1); |
1330 | 247k | while (true247k ) { |
1331 | 247k | if (*Current == '#') |
1332 | 252 | break; |
1333 | 247k | |
1334 | 1.90M | while (247k !isBlankOrBreak(Current)1.90M ) { |
1335 | 1.81M | if ( FlowLevel && 1.81M *Current == ':'352k |
1336 | 1.81M | && !(isBlankOrBreak(Current + 1) || 25.8k *(Current + 1) == ','3 )) { |
1337 | 2 | setError("Found unexpected ':' while scanning a plain scalar", Current); |
1338 | 2 | return false; |
1339 | 2 | } |
1340 | 1.81M | |
1341 | 1.81M | // Check for the end of the plain scalar. |
1342 | 1.81M | if ( 1.81M (*Current == ':' && 1.81M isBlankOrBreak(Current + 1)122k ) |
1343 | 1.69M | || ( FlowLevel |
1344 | 327k | && (StringRef(Current, 1).find_first_of(",:?[]{}") |
1345 | 327k | != StringRef::npos))) |
1346 | 158k | break; |
1347 | 1.65M | |
1348 | 1.65M | StringRef::iterator i = skip_nb_char(Current); |
1349 | 1.65M | if (i == Current) |
1350 | 226 | break; |
1351 | 1.65M | Current = i; |
1352 | 1.65M | ++Column; |
1353 | 1.65M | } |
1354 | 247k | |
1355 | 247k | // Are we at the end? |
1356 | 247k | if (247k !isBlankOrBreak(Current)247k ) |
1357 | 158k | break; |
1358 | 88.7k | |
1359 | 88.7k | // Eat blanks. |
1360 | 88.7k | StringRef::iterator Tmp = Current; |
1361 | 590k | while (isBlankOrBreak(Tmp)590k ) { |
1362 | 501k | StringRef::iterator i = skip_s_white(Tmp); |
1363 | 501k | if (i != Tmp501k ) { |
1364 | 423k | if (LeadingBlanks && 423k (Column < indent)411k && *Tmp == '\t'411k ) { |
1365 | 0 | setError("Found invalid tab character in indentation", Tmp); |
1366 | 0 | return false; |
1367 | 0 | } |
1368 | 423k | Tmp = i; |
1369 | 423k | ++Column; |
1370 | 501k | } else { |
1371 | 78.0k | i = skip_b_break(Tmp); |
1372 | 78.0k | if (!LeadingBlanks) |
1373 | 77.4k | LeadingBlanks = 1; |
1374 | 78.0k | Tmp = i; |
1375 | 78.0k | Column = 0; |
1376 | 78.0k | ++Line; |
1377 | 78.0k | } |
1378 | 501k | } |
1379 | 88.7k | |
1380 | 88.7k | if (88.7k !FlowLevel && 88.7k Column < indent78.4k ) |
1381 | 77.3k | break; |
1382 | 11.3k | |
1383 | 11.3k | Current = Tmp; |
1384 | 11.3k | } |
1385 | 236k | if (236k Start == Current236k ) { |
1386 | 1 | setError("Got empty plain scalar", Start); |
1387 | 1 | return false; |
1388 | 1 | } |
1389 | 236k | Token T; |
1390 | 236k | T.Kind = Token::TK_Scalar; |
1391 | 236k | T.Range = StringRef(Start, Current - Start); |
1392 | 236k | TokenQueue.push_back(T); |
1393 | 236k | |
1394 | 236k | // Plain scalars can be simple keys. |
1395 | 236k | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1396 | 236k | |
1397 | 236k | IsSimpleKeyAllowed = false; |
1398 | 236k | |
1399 | 236k | return true; |
1400 | 236k | } |
1401 | | |
1402 | 29 | bool Scanner::scanAliasOrAnchor(bool IsAlias) { |
1403 | 29 | StringRef::iterator Start = Current; |
1404 | 29 | unsigned ColStart = Column; |
1405 | 29 | skip(1); |
1406 | 168 | while(true168 ) { |
1407 | 168 | if ( *Current == '[' || 168 *Current == ']'168 |
1408 | 168 | || *Current == '{'168 || *Current == '}'168 |
1409 | 168 | || *Current == ',' |
1410 | 164 | || *Current == ':') |
1411 | 6 | break; |
1412 | 162 | StringRef::iterator i = skip_ns_char(Current); |
1413 | 162 | if (i == Current) |
1414 | 23 | break; |
1415 | 139 | Current = i; |
1416 | 139 | ++Column; |
1417 | 139 | } |
1418 | 29 | |
1419 | 29 | if (Start == Current29 ) { |
1420 | 0 | setError("Got empty alias or anchor", Start); |
1421 | 0 | return false; |
1422 | 0 | } |
1423 | 29 | |
1424 | 29 | Token T; |
1425 | 29 | T.Kind = IsAlias ? Token::TK_Alias16 : Token::TK_Anchor13 ; |
1426 | 29 | T.Range = StringRef(Start, Current - Start); |
1427 | 29 | TokenQueue.push_back(T); |
1428 | 29 | |
1429 | 29 | // Alias and anchors can be simple keys. |
1430 | 29 | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1431 | 29 | |
1432 | 29 | IsSimpleKeyAllowed = false; |
1433 | 29 | |
1434 | 29 | return true; |
1435 | 29 | } |
1436 | | |
1437 | 5.98k | char Scanner::scanBlockChompingIndicator() { |
1438 | 5.98k | char Indicator = ' '; |
1439 | 5.98k | if (Current != End && 5.98k (*Current == '+' || 5.98k *Current == '-'5.97k )) { |
1440 | 17 | Indicator = *Current; |
1441 | 17 | skip(1); |
1442 | 17 | } |
1443 | 5.98k | return Indicator; |
1444 | 5.98k | } |
1445 | | |
1446 | | /// Get the number of line breaks after chomping. |
1447 | | /// |
1448 | | /// Return the number of trailing line breaks to emit, depending on |
1449 | | /// \p ChompingIndicator. |
1450 | | static unsigned getChompedLineBreaks(char ChompingIndicator, |
1451 | 2.99k | unsigned LineBreaks, StringRef Str) { |
1452 | 2.99k | if (ChompingIndicator == '-') // Strip all line breaks. |
1453 | 11 | return 0; |
1454 | 2.98k | if (2.98k ChompingIndicator == '+'2.98k ) // Keep all line breaks. |
1455 | 6 | return LineBreaks; |
1456 | 2.97k | // Clip trailing lines. |
1457 | 2.97k | return Str.empty() ? 2.97k 06 : 12.96k ; |
1458 | 2.99k | } |
1459 | | |
1460 | 2.99k | unsigned Scanner::scanBlockIndentationIndicator() { |
1461 | 2.99k | unsigned Indent = 0; |
1462 | 2.99k | if (Current != End && 2.99k (*Current >= '1' && 2.99k *Current <= '9'7 )) { |
1463 | 6 | Indent = unsigned(*Current - '0'); |
1464 | 6 | skip(1); |
1465 | 6 | } |
1466 | 2.99k | return Indent; |
1467 | 2.99k | } |
1468 | | |
1469 | | bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, |
1470 | 2.99k | unsigned &IndentIndicator, bool &IsDone) { |
1471 | 2.99k | auto Start = Current; |
1472 | 2.99k | |
1473 | 2.99k | ChompingIndicator = scanBlockChompingIndicator(); |
1474 | 2.99k | IndentIndicator = scanBlockIndentationIndicator(); |
1475 | 2.99k | // Check for the chomping indicator once again. |
1476 | 2.99k | if (ChompingIndicator == ' ') |
1477 | 2.98k | ChompingIndicator = scanBlockChompingIndicator(); |
1478 | 2.99k | Current = skip_while(&Scanner::skip_s_white, Current); |
1479 | 2.99k | skipComment(); |
1480 | 2.99k | |
1481 | 2.99k | if (Current == End2.99k ) { // EOF, we have an empty scalar. |
1482 | 2 | Token T; |
1483 | 2 | T.Kind = Token::TK_BlockScalar; |
1484 | 2 | T.Range = StringRef(Start, Current - Start); |
1485 | 2 | TokenQueue.push_back(T); |
1486 | 2 | IsDone = true; |
1487 | 2 | return true; |
1488 | 2 | } |
1489 | 2.99k | |
1490 | 2.99k | if (2.99k !consumeLineBreakIfPresent()2.99k ) { |
1491 | 3 | setError("Expected a line break after block scalar header", Current); |
1492 | 3 | return false; |
1493 | 3 | } |
1494 | 2.99k | return true; |
1495 | 2.99k | } |
1496 | | |
1497 | | bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, |
1498 | | unsigned BlockExitIndent, |
1499 | 2.98k | unsigned &LineBreaks, bool &IsDone) { |
1500 | 2.98k | unsigned MaxAllSpaceLineCharacters = 0; |
1501 | 2.98k | StringRef::iterator LongestAllSpaceLine; |
1502 | 2.98k | |
1503 | 3.21k | while (true3.21k ) { |
1504 | 3.21k | advanceWhile(&Scanner::skip_s_space); |
1505 | 3.21k | if (skip_nb_char(Current) != Current3.21k ) { |
1506 | 2.98k | // This line isn't empty, so try and find the indentation. |
1507 | 2.98k | if (Column <= BlockExitIndent2.98k ) { // End of the block literal. |
1508 | 7 | IsDone = true; |
1509 | 7 | return true; |
1510 | 7 | } |
1511 | 2.98k | // We found the block's indentation. |
1512 | 2.98k | BlockIndent = Column; |
1513 | 2.98k | if (MaxAllSpaceLineCharacters > BlockIndent2.98k ) { |
1514 | 1 | setError( |
1515 | 1 | "Leading all-spaces line must be smaller than the block indent", |
1516 | 1 | LongestAllSpaceLine); |
1517 | 1 | return false; |
1518 | 1 | } |
1519 | 2.97k | return true; |
1520 | 2.97k | } |
1521 | 227 | if (227 skip_b_break(Current) != Current && |
1522 | 227 | Column > MaxAllSpaceLineCharacters226 ) { |
1523 | 10 | // Record the longest all-space line in case it's longer than the |
1524 | 10 | // discovered block indent. |
1525 | 10 | MaxAllSpaceLineCharacters = Column; |
1526 | 10 | LongestAllSpaceLine = Current; |
1527 | 10 | } |
1528 | 227 | |
1529 | 227 | // Check for EOF. |
1530 | 227 | if (Current == End227 ) { |
1531 | 1 | IsDone = true; |
1532 | 1 | return true; |
1533 | 1 | } |
1534 | 226 | |
1535 | 226 | if (226 !consumeLineBreakIfPresent()226 ) { |
1536 | 0 | IsDone = true; |
1537 | 0 | return true; |
1538 | 0 | } |
1539 | 226 | ++LineBreaks; |
1540 | 226 | } |
1541 | 0 | return true; |
1542 | 2.98k | } |
1543 | | |
1544 | | bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, |
1545 | 52.2k | unsigned BlockExitIndent, bool &IsDone) { |
1546 | 52.2k | // Skip the indentation. |
1547 | 131k | while (Column < BlockIndent131k ) { |
1548 | 88.9k | auto I = skip_s_space(Current); |
1549 | 88.9k | if (I == Current) |
1550 | 9.58k | break; |
1551 | 79.3k | Current = I; |
1552 | 79.3k | ++Column; |
1553 | 79.3k | } |
1554 | 52.2k | |
1555 | 52.2k | if (skip_nb_char(Current) == Current) |
1556 | 7.42k | return true; |
1557 | 44.8k | |
1558 | 44.8k | if (44.8k Column <= BlockExitIndent44.8k ) { // End of the block literal. |
1559 | 2.93k | IsDone = true; |
1560 | 2.93k | return true; |
1561 | 2.93k | } |
1562 | 41.8k | |
1563 | 41.8k | if (41.8k Column < BlockIndent41.8k ) { |
1564 | 4 | if (Current != End && 4 *Current == '#'4 ) { // Trailing comment. |
1565 | 2 | IsDone = true; |
1566 | 2 | return true; |
1567 | 2 | } |
1568 | 2 | setError("A text line is less indented than the block scalar", Current); |
1569 | 2 | return false; |
1570 | 2 | } |
1571 | 41.8k | return true; // A normal text line. |
1572 | 41.8k | } |
1573 | | |
1574 | 2.99k | bool Scanner::scanBlockScalar(bool IsLiteral) { |
1575 | 2.99k | // Eat '|' or '>' |
1576 | 2.99k | assert(*Current == '|' || *Current == '>'); |
1577 | 2.99k | skip(1); |
1578 | 2.99k | |
1579 | 2.99k | char ChompingIndicator; |
1580 | 2.99k | unsigned BlockIndent; |
1581 | 2.99k | bool IsDone = false; |
1582 | 2.99k | if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) |
1583 | 3 | return false; |
1584 | 2.99k | if (2.99k IsDone2.99k ) |
1585 | 2 | return true; |
1586 | 2.99k | |
1587 | 2.99k | auto Start = Current; |
1588 | 2.99k | unsigned BlockExitIndent = Indent < 0 ? 0609 : (unsigned)Indent2.38k ; |
1589 | 2.99k | unsigned LineBreaks = 0; |
1590 | 2.99k | if (BlockIndent == 02.99k ) { |
1591 | 2.98k | if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, |
1592 | 2.98k | IsDone)) |
1593 | 1 | return false; |
1594 | 2.99k | } |
1595 | 2.99k | |
1596 | 2.99k | // Scan the block's scalars body. |
1597 | 2.99k | SmallString<256> Str; |
1598 | 52.2k | while (!IsDone52.2k ) { |
1599 | 52.2k | if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) |
1600 | 2 | return false; |
1601 | 52.2k | if (52.2k IsDone52.2k ) |
1602 | 2.93k | break; |
1603 | 49.2k | |
1604 | 49.2k | // Parse the current line. |
1605 | 49.2k | auto LineStart = Current; |
1606 | 49.2k | advanceWhile(&Scanner::skip_nb_char); |
1607 | 49.2k | if (LineStart != Current49.2k ) { |
1608 | 41.8k | Str.append(LineBreaks, '\n'); |
1609 | 41.8k | Str.append(StringRef(LineStart, Current - LineStart)); |
1610 | 41.8k | LineBreaks = 0; |
1611 | 41.8k | } |
1612 | 49.2k | |
1613 | 49.2k | // Check for EOF. |
1614 | 49.2k | if (Current == End) |
1615 | 47 | break; |
1616 | 49.2k | |
1617 | 49.2k | if (49.2k !consumeLineBreakIfPresent()49.2k ) |
1618 | 0 | break; |
1619 | 49.2k | ++LineBreaks; |
1620 | 49.2k | } |
1621 | 2.99k | |
1622 | 2.99k | if (2.99k Current == End && 2.99k !LineBreaks48 ) |
1623 | 2.99k | // Ensure that there is at least one line break before the end of file. |
1624 | 3 | LineBreaks = 1; |
1625 | 2.99k | Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); |
1626 | 2.99k | |
1627 | 2.99k | // New lines may start a simple key. |
1628 | 2.99k | if (!FlowLevel) |
1629 | 2.99k | IsSimpleKeyAllowed = true; |
1630 | 2.99k | |
1631 | 2.99k | Token T; |
1632 | 2.99k | T.Kind = Token::TK_BlockScalar; |
1633 | 2.99k | T.Range = StringRef(Start, Current - Start); |
1634 | 2.99k | T.Value = Str.str().str(); |
1635 | 2.99k | TokenQueue.push_back(T); |
1636 | 2.99k | return true; |
1637 | 2.99k | } |
1638 | | |
1639 | 1.10k | bool Scanner::scanTag() { |
1640 | 1.10k | StringRef::iterator Start = Current; |
1641 | 1.10k | unsigned ColStart = Column; |
1642 | 1.10k | skip(1); // Eat !. |
1643 | 1.10k | if (Current == End || 1.10k isBlankOrBreak(Current)1.10k );2 // An empty tag. |
1644 | 1.10k | else if (1.10k *Current == '<'1.10k ) { |
1645 | 3 | skip(1); |
1646 | 3 | scan_ns_uri_char(); |
1647 | 3 | if (!consume('>')) |
1648 | 2 | return false; |
1649 | 1.09k | } else { |
1650 | 1.09k | // FIXME: Actually parse the c-ns-shorthand-tag rule. |
1651 | 1.09k | Current = skip_while(&Scanner::skip_ns_char, Current); |
1652 | 1.09k | } |
1653 | 1.10k | |
1654 | 1.10k | Token T; |
1655 | 1.10k | T.Kind = Token::TK_Tag; |
1656 | 1.10k | T.Range = StringRef(Start, Current - Start); |
1657 | 1.10k | TokenQueue.push_back(T); |
1658 | 1.10k | |
1659 | 1.10k | // Tags can be simple keys. |
1660 | 1.10k | saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); |
1661 | 1.10k | |
1662 | 1.10k | IsSimpleKeyAllowed = false; |
1663 | 1.10k | |
1664 | 1.10k | return true; |
1665 | 1.10k | } |
1666 | | |
1667 | 465k | bool Scanner::fetchMoreTokens() { |
1668 | 465k | if (IsStartOfStream) |
1669 | 2.53k | return scanStreamStart(); |
1670 | 462k | |
1671 | 462k | scanToNextToken(); |
1672 | 462k | |
1673 | 462k | if (Current == End) |
1674 | 1.56k | return scanStreamEnd(); |
1675 | 461k | |
1676 | 461k | removeStaleSimpleKeyCandidates(); |
1677 | 461k | |
1678 | 461k | unrollIndent(Column); |
1679 | 461k | |
1680 | 461k | if (Column == 0 && 461k *Current == '%'27.3k ) |
1681 | 19 | return scanDirective(); |
1682 | 461k | |
1683 | 461k | if (461k Column == 0 && 461k Current + 4 <= End27.3k |
1684 | 27.2k | && *Current == '-' |
1685 | 5.11k | && *(Current + 1) == '-' |
1686 | 4.13k | && *(Current + 2) == '-' |
1687 | 4.13k | && (Current + 3 == End || 4.13k isBlankOrBreak(Current + 3)4.13k )) |
1688 | 4.13k | return scanDocumentIndicator(true); |
1689 | 457k | |
1690 | 457k | if (457k Column == 0 && 457k Current + 4 <= End23.1k |
1691 | 23.0k | && *Current == '.' |
1692 | 4.02k | && *(Current + 1) == '.' |
1693 | 4.02k | && *(Current + 2) == '.' |
1694 | 4.02k | && (Current + 3 == End || 4.02k isBlankOrBreak(Current + 3)4.02k )) |
1695 | 4.02k | return scanDocumentIndicator(false); |
1696 | 453k | |
1697 | 453k | if (453k *Current == '['453k ) |
1698 | 3.58k | return scanFlowCollectionStart(true); |
1699 | 449k | |
1700 | 449k | if (449k *Current == '{'449k ) |
1701 | 10.4k | return scanFlowCollectionStart(false); |
1702 | 439k | |
1703 | 439k | if (439k *Current == ']'439k ) |
1704 | 3.56k | return scanFlowCollectionEnd(true); |
1705 | 435k | |
1706 | 435k | if (435k *Current == '}'435k ) |
1707 | 10.4k | return scanFlowCollectionEnd(false); |
1708 | 425k | |
1709 | 425k | if (425k *Current == ','425k ) |
1710 | 27.6k | return scanFlowEntry(); |
1711 | 397k | |
1712 | 397k | if (397k *Current == '-' && 397k isBlankOrBreak(Current + 1)24.0k ) |
1713 | 23.7k | return scanBlockEntry(); |
1714 | 373k | |
1715 | 373k | if (373k *Current == '?' && 373k (FlowLevel || 30 isBlankOrBreak(Current + 1)16 )) |
1716 | 30 | return scanKey(); |
1717 | 373k | |
1718 | 373k | if (373k *Current == ':' && 373k (FlowLevel || 124k isBlankOrBreak(Current + 1)96.7k )) |
1719 | 124k | return scanValue(); |
1720 | 249k | |
1721 | 249k | if (249k *Current == '*'249k ) |
1722 | 16 | return scanAliasOrAnchor(true); |
1723 | 249k | |
1724 | 249k | if (249k *Current == '&'249k ) |
1725 | 13 | return scanAliasOrAnchor(false); |
1726 | 249k | |
1727 | 249k | if (249k *Current == '!'249k ) |
1728 | 1.10k | return scanTag(); |
1729 | 247k | |
1730 | 247k | if (247k *Current == '|' && 247k !FlowLevel2.97k ) |
1731 | 2.97k | return scanBlockScalar(true); |
1732 | 244k | |
1733 | 244k | if (244k *Current == '>' && 244k !FlowLevel20 ) |
1734 | 20 | return scanBlockScalar(false); |
1735 | 244k | |
1736 | 244k | if (244k *Current == '\''244k ) |
1737 | 7.60k | return scanFlowScalar(false); |
1738 | 237k | |
1739 | 237k | if (237k *Current == '"'237k ) |
1740 | 796 | return scanFlowScalar(true); |
1741 | 236k | |
1742 | 236k | // Get a plain scalar. |
1743 | 236k | StringRef FirstChar(Current, 1); |
1744 | 236k | if (!(isBlankOrBreak(Current) |
1745 | 236k | || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) |
1746 | 308 | || (*Current == '-' && 308 !isBlankOrBreak(Current + 1)306 ) |
1747 | 2 | || (!FlowLevel && 2 (*Current == '?' || 2 *Current == ':'2 ) |
1748 | 2 | && isBlankOrBreak(Current + 1)) |
1749 | 2 | || (!FlowLevel && 2 *Current == ':'2 |
1750 | 1 | && Current + 2 < End |
1751 | 1 | && *(Current + 1) == ':' |
1752 | 1 | && !isBlankOrBreak(Current + 2))) |
1753 | 236k | return scanPlainScalar(); |
1754 | 1 | |
1755 | 1 | setError("Unrecognized character while tokenizing."); |
1756 | 1 | return false; |
1757 | 1 | } |
1758 | | |
1759 | | Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, |
1760 | | std::error_code *EC) |
1761 | 2.43k | : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} |
1762 | | |
1763 | | Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, |
1764 | | std::error_code *EC) |
1765 | 101 | : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} |
1766 | | |
1767 | 2.49k | Stream::~Stream() = default; |
1768 | | |
1769 | 809 | bool Stream::failed() { return scanner->failed(); } |
1770 | | |
1771 | 83 | void Stream::printError(Node *N, const Twine &Msg) { |
1772 | 83 | scanner->printError( N->getSourceRange().Start |
1773 | 83 | , SourceMgr::DK_Error |
1774 | 83 | , Msg |
1775 | 83 | , N->getSourceRange()); |
1776 | 83 | } |
1777 | | |
1778 | 2.53k | document_iterator Stream::begin() { |
1779 | 2.53k | if (CurrentDoc) |
1780 | 0 | report_fatal_error("Can only iterate over the stream once"); |
1781 | 2.53k | |
1782 | 2.53k | // Skip Stream-Start. |
1783 | 2.53k | scanner->getNext(); |
1784 | 2.53k | |
1785 | 2.53k | CurrentDoc.reset(new Document(*this)); |
1786 | 2.53k | return document_iterator(CurrentDoc); |
1787 | 2.53k | } |
1788 | | |
1789 | 9.21k | document_iterator Stream::end() { |
1790 | 9.21k | return document_iterator(); |
1791 | 9.21k | } |
1792 | | |
1793 | 52 | void Stream::skip() { |
1794 | 105 | for (document_iterator i = begin(), e = end(); i != e105 ; ++i53 ) |
1795 | 53 | i->skip(); |
1796 | 52 | } |
1797 | | |
1798 | | Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, |
1799 | | StringRef T) |
1800 | 415k | : Doc(D), TypeID(Type), Anchor(A), Tag(T) { |
1801 | 415k | SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); |
1802 | 415k | SourceRange = SMRange(Start, Start); |
1803 | 415k | } |
1804 | | |
1805 | 3.52k | std::string Node::getVerbatimTag() const { |
1806 | 3.52k | StringRef Raw = getRawTag(); |
1807 | 3.52k | if (!Raw.empty() && 3.52k Raw != "!"2.32k ) { |
1808 | 2.32k | std::string Ret; |
1809 | 2.32k | if (Raw.find_last_of('!') == 02.32k ) { |
1810 | 2.28k | Ret = Doc->getTagMap().find("!")->second; |
1811 | 2.28k | Ret += Raw.substr(1); |
1812 | 2.28k | return Ret; |
1813 | 37 | } else if (37 Raw.startswith("!!")37 ) { |
1814 | 31 | Ret = Doc->getTagMap().find("!!")->second; |
1815 | 31 | Ret += Raw.substr(2); |
1816 | 31 | return Ret; |
1817 | 0 | } else { |
1818 | 6 | StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); |
1819 | 6 | std::map<StringRef, StringRef>::const_iterator It = |
1820 | 6 | Doc->getTagMap().find(TagHandle); |
1821 | 6 | if (It != Doc->getTagMap().end()) |
1822 | 4 | Ret = It->second; |
1823 | 2 | else { |
1824 | 2 | Token T; |
1825 | 2 | T.Kind = Token::TK_Tag; |
1826 | 2 | T.Range = TagHandle; |
1827 | 2 | setError(Twine("Unknown tag handle ") + TagHandle, T); |
1828 | 2 | } |
1829 | 37 | Ret += Raw.substr(Raw.find_last_of('!') + 1); |
1830 | 37 | return Ret; |
1831 | 37 | } |
1832 | 1.20k | } |
1833 | 1.20k | |
1834 | 1.20k | switch (getType()) { |
1835 | 42 | case NK_Null: |
1836 | 42 | return "tag:yaml.org,2002:null"; |
1837 | 861 | case NK_Scalar: |
1838 | 861 | case NK_BlockScalar: |
1839 | 861 | // TODO: Tag resolution. |
1840 | 861 | return "tag:yaml.org,2002:str"; |
1841 | 206 | case NK_Mapping: |
1842 | 206 | return "tag:yaml.org,2002:map"; |
1843 | 94 | case NK_Sequence: |
1844 | 94 | return "tag:yaml.org,2002:seq"; |
1845 | 0 | } |
1846 | 0 |
|
1847 | 0 | return ""; |
1848 | 0 | } |
1849 | | |
1850 | 1.14M | Token &Node::peekNext() { |
1851 | 1.14M | return Doc->peekNext(); |
1852 | 1.14M | } |
1853 | | |
1854 | 343k | Token Node::getNext() { |
1855 | 343k | return Doc->getNext(); |
1856 | 343k | } |
1857 | | |
1858 | 285k | Node *Node::parseBlockNode() { |
1859 | 285k | return Doc->parseBlockNode(); |
1860 | 285k | } |
1861 | | |
1862 | 125k | BumpPtrAllocator &Node::getAllocator() { |
1863 | 125k | return Doc->NodeAllocator; |
1864 | 125k | } |
1865 | | |
1866 | 23 | void Node::setError(const Twine &Msg, Token &Tok) const { |
1867 | 23 | Doc->setError(Msg, Tok); |
1868 | 23 | } |
1869 | | |
1870 | 356k | bool Node::failed() const { |
1871 | 356k | return Doc->failed(); |
1872 | 356k | } |
1873 | | |
1874 | 243k | StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { |
1875 | 243k | // TODO: Handle newlines properly. We need to remove leading whitespace. |
1876 | 243k | if (Value[0] == '"'243k ) { // Double quoted. |
1877 | 742 | // Pull off the leading and trailing "s. |
1878 | 742 | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); |
1879 | 742 | // Search for characters that would require unescaping the value. |
1880 | 742 | StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); |
1881 | 742 | if (i != StringRef::npos) |
1882 | 36 | return unescapeDoubleQuoted(UnquotedValue, i, Storage); |
1883 | 706 | return UnquotedValue; |
1884 | 242k | } else if (242k Value[0] == '\''242k ) { // Single quoted. |
1885 | 7.25k | // Pull off the leading and trailing 's. |
1886 | 7.25k | StringRef UnquotedValue = Value.substr(1, Value.size() - 2); |
1887 | 7.25k | StringRef::size_type i = UnquotedValue.find('\''); |
1888 | 7.25k | if (i != StringRef::npos7.25k ) { |
1889 | 4 | // We're going to need Storage. |
1890 | 4 | Storage.clear(); |
1891 | 4 | Storage.reserve(UnquotedValue.size()); |
1892 | 9 | for (; i != StringRef::npos9 ; i = UnquotedValue.find('\'')5 ) { |
1893 | 5 | StringRef Valid(UnquotedValue.begin(), i); |
1894 | 5 | Storage.insert(Storage.end(), Valid.begin(), Valid.end()); |
1895 | 5 | Storage.push_back('\''); |
1896 | 5 | UnquotedValue = UnquotedValue.substr(i + 2); |
1897 | 5 | } |
1898 | 4 | Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); |
1899 | 4 | return StringRef(Storage.begin(), Storage.size()); |
1900 | 4 | } |
1901 | 7.25k | return UnquotedValue; |
1902 | 7.25k | } |
1903 | 235k | // Plain or block. |
1904 | 235k | return Value.rtrim(' '); |
1905 | 235k | } |
1906 | | |
1907 | | StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue |
1908 | | , StringRef::size_type i |
1909 | | , SmallVectorImpl<char> &Storage) |
1910 | 36 | const { |
1911 | 36 | // Use Storage to build proper value. |
1912 | 36 | Storage.clear(); |
1913 | 36 | Storage.reserve(UnquotedValue.size()); |
1914 | 147 | for (; i != StringRef::npos147 ; i = UnquotedValue.find_first_of("\\\r\n")111 ) { |
1915 | 112 | // Insert all previous chars into Storage. |
1916 | 112 | StringRef Valid(UnquotedValue.begin(), i); |
1917 | 112 | Storage.insert(Storage.end(), Valid.begin(), Valid.end()); |
1918 | 112 | // Chop off inserted chars. |
1919 | 112 | UnquotedValue = UnquotedValue.substr(i); |
1920 | 112 | |
1921 | 112 | assert(!UnquotedValue.empty() && "Can't be empty!"); |
1922 | 112 | |
1923 | 112 | // Parse escape or line break. |
1924 | 112 | switch (UnquotedValue[0]) { |
1925 | 23 | case '\r': |
1926 | 23 | case '\n': |
1927 | 23 | Storage.push_back('\n'); |
1928 | 23 | if ( UnquotedValue.size() > 1 |
1929 | 23 | && (UnquotedValue[1] == '\r' || 23 UnquotedValue[1] == '\n'23 )) |
1930 | 3 | UnquotedValue = UnquotedValue.substr(1); |
1931 | 23 | UnquotedValue = UnquotedValue.substr(1); |
1932 | 23 | break; |
1933 | 89 | default: |
1934 | 89 | if (UnquotedValue.size() == 1) |
1935 | 89 | // TODO: Report error. |
1936 | 0 | break; |
1937 | 89 | UnquotedValue = UnquotedValue.substr(1); |
1938 | 89 | switch (UnquotedValue[0]) { |
1939 | 1 | default: { |
1940 | 1 | Token T; |
1941 | 1 | T.Range = StringRef(UnquotedValue.begin(), 1); |
1942 | 1 | setError("Unrecognized escape code!", T); |
1943 | 1 | return ""; |
1944 | 89 | } |
1945 | 5 | case '\r': |
1946 | 5 | case '\n': |
1947 | 5 | // Remove the new line. |
1948 | 5 | if ( UnquotedValue.size() > 1 |
1949 | 5 | && (UnquotedValue[1] == '\r' || 5 UnquotedValue[1] == '\n'5 )) |
1950 | 0 | UnquotedValue = UnquotedValue.substr(1); |
1951 | 5 | // If this was just a single byte newline, it will get skipped |
1952 | 5 | // below. |
1953 | 5 | break; |
1954 | 1 | case '0': |
1955 | 1 | Storage.push_back(0x00); |
1956 | 1 | break; |
1957 | 1 | case 'a': |
1958 | 1 | Storage.push_back(0x07); |
1959 | 1 | break; |
1960 | 2 | case 'b': |
1961 | 2 | Storage.push_back(0x08); |
1962 | 2 | break; |
1963 | 3 | case 't': |
1964 | 3 | case 0x09: |
1965 | 3 | Storage.push_back(0x09); |
1966 | 3 | break; |
1967 | 5 | case 'n': |
1968 | 5 | Storage.push_back(0x0A); |
1969 | 5 | break; |
1970 | 1 | case 'v': |
1971 | 1 | Storage.push_back(0x0B); |
1972 | 1 | break; |
1973 | 1 | case 'f': |
1974 | 1 | Storage.push_back(0x0C); |
1975 | 1 | break; |
1976 | 2 | case 'r': |
1977 | 2 | Storage.push_back(0x0D); |
1978 | 2 | break; |
1979 | 1 | case 'e': |
1980 | 1 | Storage.push_back(0x1B); |
1981 | 1 | break; |
1982 | 3 | case ' ': |
1983 | 3 | Storage.push_back(0x20); |
1984 | 3 | break; |
1985 | 42 | case '"': |
1986 | 42 | Storage.push_back(0x22); |
1987 | 42 | break; |
1988 | 0 | case '/': |
1989 | 0 | Storage.push_back(0x2F); |
1990 | 0 | break; |
1991 | 11 | case '\\': |
1992 | 11 | Storage.push_back(0x5C); |
1993 | 11 | break; |
1994 | 1 | case 'N': |
1995 | 1 | encodeUTF8(0x85, Storage); |
1996 | 1 | break; |
1997 | 1 | case '_': |
1998 | 1 | encodeUTF8(0xA0, Storage); |
1999 | 1 | break; |
2000 | 1 | case 'L': |
2001 | 1 | encodeUTF8(0x2028, Storage); |
2002 | 1 | break; |
2003 | 1 | case 'P': |
2004 | 1 | encodeUTF8(0x2029, Storage); |
2005 | 1 | break; |
2006 | 3 | case 'x': { |
2007 | 3 | if (UnquotedValue.size() < 3) |
2008 | 3 | // TODO: Report error. |
2009 | 0 | break; |
2010 | 3 | unsigned int UnicodeScalarValue; |
2011 | 3 | if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) |
2012 | 3 | // TODO: Report error. |
2013 | 0 | UnicodeScalarValue = 0xFFFD; |
2014 | 3 | encodeUTF8(UnicodeScalarValue, Storage); |
2015 | 3 | UnquotedValue = UnquotedValue.substr(2); |
2016 | 3 | break; |
2017 | 3 | } |
2018 | 2 | case 'u': { |
2019 | 2 | if (UnquotedValue.size() < 5) |
2020 | 2 | // TODO: Report error. |
2021 | 0 | break; |
2022 | 2 | unsigned int UnicodeScalarValue; |
2023 | 2 | if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) |
2024 | 2 | // TODO: Report error. |
2025 | 0 | UnicodeScalarValue = 0xFFFD; |
2026 | 2 | encodeUTF8(UnicodeScalarValue, Storage); |
2027 | 2 | UnquotedValue = UnquotedValue.substr(4); |
2028 | 2 | break; |
2029 | 2 | } |
2030 | 1 | case 'U': { |
2031 | 1 | if (UnquotedValue.size() < 9) |
2032 | 1 | // TODO: Report error. |
2033 | 0 | break; |
2034 | 1 | unsigned int UnicodeScalarValue; |
2035 | 1 | if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) |
2036 | 1 | // TODO: Report error. |
2037 | 0 | UnicodeScalarValue = 0xFFFD; |
2038 | 5 | encodeUTF8(UnicodeScalarValue, Storage); |
2039 | 5 | UnquotedValue = UnquotedValue.substr(8); |
2040 | 5 | break; |
2041 | 5 | } |
2042 | 88 | } |
2043 | 88 | UnquotedValue = UnquotedValue.substr(1); |
2044 | 112 | } |
2045 | 112 | } |
2046 | 35 | Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); |
2047 | 35 | return StringRef(Storage.begin(), Storage.size()); |
2048 | 36 | } |
2049 | | |
2050 | 393k | Node *KeyValueNode::getKey() { |
2051 | 393k | if (Key) |
2052 | 268k | return Key; |
2053 | 124k | // Handle implicit null keys. |
2054 | 124k | { |
2055 | 124k | Token &t = peekNext(); |
2056 | 124k | if ( t.Kind == Token::TK_BlockEnd |
2057 | 124k | || t.Kind == Token::TK_Value |
2058 | 124k | || t.Kind == Token::TK_Error124k ) { |
2059 | 0 | return Key = new (getAllocator()) NullNode(Doc); |
2060 | 0 | } |
2061 | 124k | if (124k t.Kind == Token::TK_Key124k ) |
2062 | 124k | getNext(); // skip TK_Key. |
2063 | 124k | } |
2064 | 124k | |
2065 | 124k | // Handle explicit null keys. |
2066 | 124k | Token &t = peekNext(); |
2067 | 124k | if (t.Kind == Token::TK_BlockEnd || 124k t.Kind == Token::TK_Value124k ) { |
2068 | 3 | return Key = new (getAllocator()) NullNode(Doc); |
2069 | 3 | } |
2070 | 124k | |
2071 | 124k | // We've got a normal key. |
2072 | 124k | return Key = parseBlockNode(); |
2073 | 124k | } |
2074 | | |
2075 | 266k | Node *KeyValueNode::getValue() { |
2076 | 266k | if (Value) |
2077 | 141k | return Value; |
2078 | 124k | getKey()->skip(); |
2079 | 124k | if (failed()) |
2080 | 1 | return Value = new (getAllocator()) NullNode(Doc); |
2081 | 124k | |
2082 | 124k | // Handle implicit null values. |
2083 | 124k | { |
2084 | 124k | Token &t = peekNext(); |
2085 | 124k | if ( t.Kind == Token::TK_BlockEnd |
2086 | 124k | || t.Kind == Token::TK_FlowMappingEnd |
2087 | 124k | || t.Kind == Token::TK_Key |
2088 | 124k | || t.Kind == Token::TK_FlowEntry |
2089 | 124k | || t.Kind == Token::TK_Error124k ) { |
2090 | 20 | return Value = new (getAllocator()) NullNode(Doc); |
2091 | 20 | } |
2092 | 124k | |
2093 | 124k | if (124k t.Kind != Token::TK_Value124k ) { |
2094 | 3 | setError("Unexpected token in Key Value.", t); |
2095 | 3 | return Value = new (getAllocator()) NullNode(Doc); |
2096 | 3 | } |
2097 | 124k | getNext(); // skip TK_Value. |
2098 | 124k | } |
2099 | 124k | |
2100 | 124k | // Handle explicit null values. |
2101 | 124k | Token &t = peekNext(); |
2102 | 124k | if (t.Kind == Token::TK_BlockEnd || 124k t.Kind == Token::TK_Key124k ) { |
2103 | 659 | return Value = new (getAllocator()) NullNode(Doc); |
2104 | 659 | } |
2105 | 123k | |
2106 | 123k | // We got a normal value. |
2107 | 123k | return Value = parseBlockNode(); |
2108 | 123k | } |
2109 | | |
2110 | 174k | void MappingNode::increment() { |
2111 | 174k | if (failed()174k ) { |
2112 | 9 | IsAtEnd = true; |
2113 | 9 | CurrentEntry = nullptr; |
2114 | 9 | return; |
2115 | 9 | } |
2116 | 174k | if (174k CurrentEntry174k ) { |
2117 | 142k | CurrentEntry->skip(); |
2118 | 142k | if (Type == MT_Inline142k ) { |
2119 | 12 | IsAtEnd = true; |
2120 | 12 | CurrentEntry = nullptr; |
2121 | 12 | return; |
2122 | 12 | } |
2123 | 174k | } |
2124 | 174k | Token T = peekNext(); |
2125 | 174k | if (T.Kind == Token::TK_Key || 174k T.Kind == Token::TK_Scalar49.8k ) { |
2126 | 124k | // KeyValueNode eats the TK_Key. That way it can detect null keys. |
2127 | 124k | CurrentEntry = new (getAllocator()) KeyValueNode(Doc); |
2128 | 174k | } else if (49.8k Type == MT_Block49.8k ) { |
2129 | 22.0k | switch (T.Kind) { |
2130 | 22.0k | case Token::TK_BlockEnd: |
2131 | 22.0k | getNext(); |
2132 | 22.0k | IsAtEnd = true; |
2133 | 22.0k | CurrentEntry = nullptr; |
2134 | 22.0k | break; |
2135 | 0 | default: |
2136 | 0 | setError("Unexpected token. Expected Key or Block End", T); |
2137 | 0 | LLVM_FALLTHROUGH; |
2138 | 0 | case Token::TK_Error: |
2139 | 0 | IsAtEnd = true; |
2140 | 0 | CurrentEntry = nullptr; |
2141 | 22.0k | } |
2142 | 49.8k | } else { |
2143 | 27.8k | switch (T.Kind) { |
2144 | 17.4k | case Token::TK_FlowEntry: |
2145 | 17.4k | // Eat the flow entry and recurse. |
2146 | 17.4k | getNext(); |
2147 | 17.4k | return increment(); |
2148 | 10.4k | case Token::TK_FlowMappingEnd: |
2149 | 10.4k | getNext(); |
2150 | 10.4k | LLVM_FALLTHROUGH; |
2151 | 10.4k | case Token::TK_Error: |
2152 | 10.4k | // Set this to end iterator. |
2153 | 10.4k | IsAtEnd = true; |
2154 | 10.4k | CurrentEntry = nullptr; |
2155 | 10.4k | break; |
2156 | 7 | default: |
2157 | 7 | setError( "Unexpected token. Expected Key, Flow Entry, or Flow " |
2158 | 7 | "Mapping End." |
2159 | 7 | , T); |
2160 | 7 | IsAtEnd = true; |
2161 | 7 | CurrentEntry = nullptr; |
2162 | 49.8k | } |
2163 | 49.8k | } |
2164 | 174k | } |
2165 | | |
2166 | 57.2k | void SequenceNode::increment() { |
2167 | 57.2k | if (failed()57.2k ) { |
2168 | 6 | IsAtEnd = true; |
2169 | 6 | CurrentEntry = nullptr; |
2170 | 6 | return; |
2171 | 6 | } |
2172 | 57.2k | if (57.2k CurrentEntry57.2k ) |
2173 | 47.1k | CurrentEntry->skip(); |
2174 | 57.2k | Token T = peekNext(); |
2175 | 57.2k | if (SeqType == ST_Block57.2k ) { |
2176 | 30.2k | switch (T.Kind) { |
2177 | 23.7k | case Token::TK_BlockEntry: |
2178 | 23.7k | getNext(); |
2179 | 23.7k | CurrentEntry = parseBlockNode(); |
2180 | 23.7k | if (!CurrentEntry23.7k ) { // An error occurred. |
2181 | 1 | IsAtEnd = true; |
2182 | 1 | CurrentEntry = nullptr; |
2183 | 1 | } |
2184 | 23.7k | break; |
2185 | 6.51k | case Token::TK_BlockEnd: |
2186 | 6.51k | getNext(); |
2187 | 6.51k | IsAtEnd = true; |
2188 | 6.51k | CurrentEntry = nullptr; |
2189 | 6.51k | break; |
2190 | 1 | default: |
2191 | 1 | setError( "Unexpected token. Expected Block Entry or Block End." |
2192 | 1 | , T); |
2193 | 1 | LLVM_FALLTHROUGH; |
2194 | 1 | case Token::TK_Error: |
2195 | 1 | IsAtEnd = true; |
2196 | 1 | CurrentEntry = nullptr; |
2197 | 30.2k | } |
2198 | 57.2k | } else if (26.9k SeqType == ST_Indentless26.9k ) { |
2199 | 52 | switch (T.Kind) { |
2200 | 35 | case Token::TK_BlockEntry: |
2201 | 35 | getNext(); |
2202 | 35 | CurrentEntry = parseBlockNode(); |
2203 | 35 | if (!CurrentEntry35 ) { // An error occurred. |
2204 | 0 | IsAtEnd = true; |
2205 | 0 | CurrentEntry = nullptr; |
2206 | 0 | } |
2207 | 35 | break; |
2208 | 17 | default: |
2209 | 17 | case Token::TK_Error: |
2210 | 17 | IsAtEnd = true; |
2211 | 17 | CurrentEntry = nullptr; |
2212 | 52 | } |
2213 | 26.9k | } else if (26.9k SeqType == ST_Flow26.9k ) { |
2214 | 26.9k | switch (T.Kind) { |
2215 | 10.1k | case Token::TK_FlowEntry: |
2216 | 10.1k | // Eat the flow entry and recurse. |
2217 | 10.1k | getNext(); |
2218 | 10.1k | WasPreviousTokenFlowEntry = true; |
2219 | 10.1k | return increment(); |
2220 | 3.50k | case Token::TK_FlowSequenceEnd: |
2221 | 3.50k | getNext(); |
2222 | 3.50k | LLVM_FALLTHROUGH; |
2223 | 3.50k | case Token::TK_Error: |
2224 | 3.50k | // Set this to end iterator. |
2225 | 3.50k | IsAtEnd = true; |
2226 | 3.50k | CurrentEntry = nullptr; |
2227 | 3.50k | break; |
2228 | 7 | case Token::TK_StreamEnd: |
2229 | 7 | case Token::TK_DocumentEnd: |
2230 | 7 | case Token::TK_DocumentStart: |
2231 | 7 | setError("Could not find closing ]!", T); |
2232 | 7 | // Set this to end iterator. |
2233 | 7 | IsAtEnd = true; |
2234 | 7 | CurrentEntry = nullptr; |
2235 | 7 | break; |
2236 | 13.2k | default: |
2237 | 13.2k | if (!WasPreviousTokenFlowEntry13.2k ) { |
2238 | 2 | setError("Expected , between entries!", T); |
2239 | 2 | IsAtEnd = true; |
2240 | 2 | CurrentEntry = nullptr; |
2241 | 2 | break; |
2242 | 2 | } |
2243 | 13.2k | // Otherwise it must be a flow entry. |
2244 | 13.2k | CurrentEntry = parseBlockNode(); |
2245 | 13.2k | if (!CurrentEntry13.2k ) { |
2246 | 0 | IsAtEnd = true; |
2247 | 0 | } |
2248 | 3.50k | WasPreviousTokenFlowEntry = false; |
2249 | 3.50k | break; |
2250 | 26.9k | } |
2251 | 26.9k | } |
2252 | 57.2k | } |
2253 | | |
2254 | 4.93k | Document::Document(Stream &S) : stream(S), Root(nullptr) { |
2255 | 4.93k | // Tag maps starts with two default mappings. |
2256 | 4.93k | TagMap["!"] = "!"; |
2257 | 4.93k | TagMap["!!"] = "tag:yaml.org,2002:"; |
2258 | 4.93k | |
2259 | 4.93k | if (parseDirectives()) |
2260 | 13 | expectToken(Token::TK_DocumentStart); |
2261 | 4.93k | Token &T = peekNext(); |
2262 | 4.93k | if (T.Kind == Token::TK_DocumentStart) |
2263 | 4.09k | getNext(); |
2264 | 4.93k | } |
2265 | | |
2266 | 6.99k | bool Document::skip() { |
2267 | 6.99k | if (stream.scanner->failed()) |
2268 | 45 | return false; |
2269 | 6.95k | if (6.95k !Root6.95k ) |
2270 | 58 | getRoot(); |
2271 | 6.95k | Root->skip(); |
2272 | 6.95k | Token &T = peekNext(); |
2273 | 6.95k | if (T.Kind == Token::TK_StreamEnd) |
2274 | 1.34k | return false; |
2275 | 5.60k | if (5.60k T.Kind == Token::TK_DocumentEnd5.60k ) { |
2276 | 3.19k | getNext(); |
2277 | 3.19k | return skip(); |
2278 | 3.19k | } |
2279 | 2.41k | return true; |
2280 | 2.41k | } |
2281 | | |
2282 | 1.45M | Token &Document::peekNext() { |
2283 | 1.45M | return stream.scanner->peekNext(); |
2284 | 1.45M | } |
2285 | | |
2286 | 641k | Token Document::getNext() { |
2287 | 641k | return stream.scanner->getNext(); |
2288 | 641k | } |
2289 | | |
2290 | 25 | void Document::setError(const Twine &Message, Token &Location) const { |
2291 | 25 | stream.scanner->setError(Message, Location.Range.begin()); |
2292 | 25 | } |
2293 | | |
2294 | 356k | bool Document::failed() const { |
2295 | 356k | return stream.scanner->failed(); |
2296 | 356k | } |
2297 | | |
2298 | 290k | Node *Document::parseBlockNode() { |
2299 | 290k | Token T = peekNext(); |
2300 | 290k | // Handle properties. |
2301 | 290k | Token AnchorInfo; |
2302 | 290k | Token TagInfo; |
2303 | 291k | parse_property: |
2304 | 291k | switch (T.Kind) { |
2305 | 16 | case Token::TK_Alias: |
2306 | 16 | getNext(); |
2307 | 16 | return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); |
2308 | 13 | case Token::TK_Anchor: |
2309 | 13 | if (AnchorInfo.Kind == Token::TK_Anchor13 ) { |
2310 | 0 | setError("Already encountered an anchor for this node!", T); |
2311 | 0 | return nullptr; |
2312 | 0 | } |
2313 | 13 | AnchorInfo = getNext(); // Consume TK_Anchor. |
2314 | 13 | T = peekNext(); |
2315 | 13 | goto parse_property; |
2316 | 1.10k | case Token::TK_Tag: |
2317 | 1.10k | if (TagInfo.Kind == Token::TK_Tag1.10k ) { |
2318 | 0 | setError("Already encountered a tag for this node!", T); |
2319 | 0 | return nullptr; |
2320 | 0 | } |
2321 | 1.10k | TagInfo = getNext(); // Consume TK_Tag. |
2322 | 1.10k | T = peekNext(); |
2323 | 1.10k | goto parse_property; |
2324 | 290k | default: |
2325 | 290k | break; |
2326 | 290k | } |
2327 | 290k | |
2328 | 290k | switch (T.Kind) { |
2329 | 17 | case Token::TK_BlockEntry: |
2330 | 17 | // We got an unindented BlockEntry sequence. This is not terminated with |
2331 | 17 | // a BlockEnd. |
2332 | 17 | // Don't eat the TK_BlockEntry, SequenceNode needs it. |
2333 | 17 | return new (NodeAllocator) SequenceNode( stream.CurrentDoc |
2334 | 17 | , AnchorInfo.Range.substr(1) |
2335 | 17 | , TagInfo.Range |
2336 | 17 | , SequenceNode::ST_Indentless); |
2337 | 6.51k | case Token::TK_BlockSequenceStart: |
2338 | 6.51k | getNext(); |
2339 | 6.51k | return new (NodeAllocator) |
2340 | 6.51k | SequenceNode( stream.CurrentDoc |
2341 | 6.51k | , AnchorInfo.Range.substr(1) |
2342 | 6.51k | , TagInfo.Range |
2343 | 6.51k | , SequenceNode::ST_Block); |
2344 | 22.0k | case Token::TK_BlockMappingStart: |
2345 | 22.0k | getNext(); |
2346 | 22.0k | return new (NodeAllocator) |
2347 | 22.0k | MappingNode( stream.CurrentDoc |
2348 | 22.0k | , AnchorInfo.Range.substr(1) |
2349 | 22.0k | , TagInfo.Range |
2350 | 22.0k | , MappingNode::MT_Block); |
2351 | 3.57k | case Token::TK_FlowSequenceStart: |
2352 | 3.57k | getNext(); |
2353 | 3.57k | return new (NodeAllocator) |
2354 | 3.57k | SequenceNode( stream.CurrentDoc |
2355 | 3.57k | , AnchorInfo.Range.substr(1) |
2356 | 3.57k | , TagInfo.Range |
2357 | 3.57k | , SequenceNode::ST_Flow); |
2358 | 10.4k | case Token::TK_FlowMappingStart: |
2359 | 10.4k | getNext(); |
2360 | 10.4k | return new (NodeAllocator) |
2361 | 10.4k | MappingNode( stream.CurrentDoc |
2362 | 10.4k | , AnchorInfo.Range.substr(1) |
2363 | 10.4k | , TagInfo.Range |
2364 | 10.4k | , MappingNode::MT_Flow); |
2365 | 244k | case Token::TK_Scalar: |
2366 | 244k | getNext(); |
2367 | 244k | return new (NodeAllocator) |
2368 | 244k | ScalarNode( stream.CurrentDoc |
2369 | 244k | , AnchorInfo.Range.substr(1) |
2370 | 244k | , TagInfo.Range |
2371 | 244k | , T.Range); |
2372 | 2.99k | case Token::TK_BlockScalar: { |
2373 | 2.99k | getNext(); |
2374 | 2.99k | StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); |
2375 | 2.99k | StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); |
2376 | 2.99k | return new (NodeAllocator) |
2377 | 2.99k | BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), |
2378 | 2.99k | TagInfo.Range, StrCopy, T.Range); |
2379 | 290k | } |
2380 | 13 | case Token::TK_Key: |
2381 | 13 | // Don't eat the TK_Key, KeyValueNode expects it. |
2382 | 13 | return new (NodeAllocator) |
2383 | 13 | MappingNode( stream.CurrentDoc |
2384 | 13 | , AnchorInfo.Range.substr(1) |
2385 | 13 | , TagInfo.Range |
2386 | 13 | , MappingNode::MT_Inline); |
2387 | 56 | case Token::TK_DocumentStart: |
2388 | 56 | case Token::TK_DocumentEnd: |
2389 | 56 | case Token::TK_StreamEnd: |
2390 | 65 | default: |
2391 | 65 | // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not |
2392 | 65 | // !!null null. |
2393 | 65 | return new (NodeAllocator) NullNode(stream.CurrentDoc); |
2394 | 13 | case Token::TK_Error: |
2395 | 13 | return nullptr; |
2396 | 0 | } |
2397 | 0 | llvm_unreachable0 ("Control flow shouldn't reach here."); |
2398 | 0 | return nullptr; |
2399 | 290k | } |
2400 | | |
2401 | 4.93k | bool Document::parseDirectives() { |
2402 | 4.93k | bool isDirective = false; |
2403 | 4.95k | while (true4.95k ) { |
2404 | 4.95k | Token T = peekNext(); |
2405 | 4.95k | if (T.Kind == Token::TK_TagDirective4.95k ) { |
2406 | 13 | parseTAGDirective(); |
2407 | 13 | isDirective = true; |
2408 | 4.95k | } else if (4.93k T.Kind == Token::TK_VersionDirective4.93k ) { |
2409 | 5 | parseYAMLDirective(); |
2410 | 5 | isDirective = true; |
2411 | 5 | } else |
2412 | 4.93k | break; |
2413 | 4.95k | } |
2414 | 4.93k | return isDirective; |
2415 | 4.93k | } |
2416 | | |
2417 | 5 | void Document::parseYAMLDirective() { |
2418 | 5 | getNext(); // Eat %YAML <version> |
2419 | 5 | } |
2420 | | |
2421 | 13 | void Document::parseTAGDirective() { |
2422 | 13 | Token Tag = getNext(); // %TAG <handle> <prefix> |
2423 | 13 | StringRef T = Tag.Range; |
2424 | 13 | // Strip %TAG |
2425 | 13 | T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); |
2426 | 13 | std::size_t HandleEnd = T.find_first_of(" \t"); |
2427 | 13 | StringRef TagHandle = T.substr(0, HandleEnd); |
2428 | 13 | StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); |
2429 | 13 | TagMap[TagHandle] = TagPrefix; |
2430 | 13 | } |
2431 | | |
2432 | 13 | bool Document::expectToken(int TK) { |
2433 | 13 | Token T = getNext(); |
2434 | 13 | if (T.Kind != TK13 ) { |
2435 | 2 | setError("Unexpected token", T); |
2436 | 2 | return false; |
2437 | 2 | } |
2438 | 11 | return true; |
2439 | 11 | } |