/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Format/SortJavaScriptImports.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements a sort operation for JavaScript ES6 imports. |
11 | | /// |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "SortJavaScriptImports.h" |
15 | | #include "TokenAnalyzer.h" |
16 | | #include "TokenAnnotator.h" |
17 | | #include "clang/Basic/Diagnostic.h" |
18 | | #include "clang/Basic/DiagnosticOptions.h" |
19 | | #include "clang/Basic/LLVM.h" |
20 | | #include "clang/Basic/SourceLocation.h" |
21 | | #include "clang/Basic/SourceManager.h" |
22 | | #include "clang/Format/Format.h" |
23 | | #include "llvm/ADT/STLExtras.h" |
24 | | #include "llvm/ADT/SmallVector.h" |
25 | | #include "llvm/Support/Debug.h" |
26 | | #include <algorithm> |
27 | | #include <string> |
28 | | |
29 | | #define DEBUG_TYPE "format-formatter" |
30 | | |
31 | | namespace clang { |
32 | | namespace format { |
33 | | |
34 | | class FormatTokenLexer; |
35 | | |
36 | | using clang::format::FormatStyle; |
37 | | |
38 | | // An imported symbol in a JavaScript ES6 import/export, possibly aliased. |
39 | | struct JsImportedSymbol { |
40 | | StringRef Symbol; |
41 | | StringRef Alias; |
42 | | SourceRange Range; |
43 | | |
44 | 54 | bool operator==(const JsImportedSymbol &RHS) const { |
45 | | // Ignore Range for comparison, it is only used to stitch code together, |
46 | | // but imports at different code locations are still conceptually the same. |
47 | 54 | return Symbol == RHS.Symbol && Alias == RHS.Alias49 ; |
48 | 54 | } |
49 | | }; |
50 | | |
51 | | // An ES6 module reference. |
52 | | // |
53 | | // ES6 implements a module system, where individual modules (~= source files) |
54 | | // can reference other modules, either importing symbols from them, or exporting |
55 | | // symbols from them: |
56 | | // import {foo} from 'foo'; |
57 | | // export {foo}; |
58 | | // export {bar} from 'bar'; |
59 | | // |
60 | | // `export`s with URLs are syntactic sugar for an import of the symbol from the |
61 | | // URL, followed by an export of the symbol, allowing this code to treat both |
62 | | // statements more or less identically, with the exception being that `export`s |
63 | | // are sorted last. |
64 | | // |
65 | | // imports and exports support individual symbols, but also a wildcard syntax: |
66 | | // import * as prefix from 'foo'; |
67 | | // export * from 'bar'; |
68 | | // |
69 | | // This struct represents both exports and imports to build up the information |
70 | | // required for sorting module references. |
71 | | struct JsModuleReference { |
72 | | bool IsExport = false; |
73 | | // Module references are sorted into these categories, in order. |
74 | | enum ReferenceCategory { |
75 | | SIDE_EFFECT, // "import 'something';" |
76 | | ABSOLUTE, // from 'something' |
77 | | RELATIVE_PARENT, // from '../*' |
78 | | RELATIVE, // from './*' |
79 | | }; |
80 | | ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT; |
81 | | // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`. |
82 | | StringRef URL; |
83 | | // Prefix from "import * as prefix". Empty for symbol imports and `export *`. |
84 | | // Implies an empty names list. |
85 | | StringRef Prefix; |
86 | | // Symbols from `import {SymbolA, SymbolB, ...} from ...;`. |
87 | | SmallVector<JsImportedSymbol, 1> Symbols; |
88 | | // Textual position of the import/export, including preceding and trailing |
89 | | // comments. |
90 | | SourceRange Range; |
91 | | }; |
92 | | |
93 | 54 | bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) { |
94 | 54 | if (LHS.IsExport != RHS.IsExport) |
95 | 6 | return LHS.IsExport < RHS.IsExport; |
96 | 48 | if (LHS.Category != RHS.Category) |
97 | 19 | return LHS.Category < RHS.Category; |
98 | 29 | if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT) |
99 | | // Side effect imports might be ordering sensitive. Consider them equal so |
100 | | // that they maintain their relative order in the stable sort below. |
101 | | // This retains transitivity because LHS.Category == RHS.Category here. |
102 | 1 | return false; |
103 | | // Empty URLs sort *last* (for export {...};). |
104 | 28 | if (LHS.URL.empty() != RHS.URL.empty()) |
105 | 1 | return LHS.URL.empty() < RHS.URL.empty(); |
106 | 27 | if (int Res = LHS.URL.compare_lower(RHS.URL)) |
107 | 26 | return Res < 0; |
108 | | // '*' imports (with prefix) sort before {a, b, ...} imports. |
109 | 1 | if (LHS.Prefix.empty() != RHS.Prefix.empty()) |
110 | 1 | return LHS.Prefix.empty() < RHS.Prefix.empty(); |
111 | 0 | if (LHS.Prefix != RHS.Prefix) |
112 | 0 | return LHS.Prefix > RHS.Prefix; |
113 | 0 | return false; |
114 | 0 | } |
115 | | |
116 | | // JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is |
117 | | // implemented as a TokenAnalyzer because ES6 imports have substantial syntactic |
118 | | // structure, making it messy to sort them using regular expressions. |
119 | | class JavaScriptImportSorter : public TokenAnalyzer { |
120 | | public: |
121 | | JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style) |
122 | | : TokenAnalyzer(Env, Style), |
123 | 26 | FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} |
124 | | |
125 | | std::pair<tooling::Replacements, unsigned> |
126 | | analyze(TokenAnnotator &Annotator, |
127 | | SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, |
128 | 26 | FormatTokenLexer &Tokens) override { |
129 | 26 | tooling::Replacements Result; |
130 | 26 | AffectedRangeMgr.computeAffectedLines(AnnotatedLines); |
131 | | |
132 | 26 | const AdditionalKeywords &Keywords = Tokens.getKeywords(); |
133 | 26 | SmallVector<JsModuleReference, 16> References; |
134 | 26 | AnnotatedLine *FirstNonImportLine; |
135 | 26 | std::tie(References, FirstNonImportLine) = |
136 | 26 | parseModuleReferences(Keywords, AnnotatedLines); |
137 | | |
138 | 26 | if (References.empty()) |
139 | 2 | return {Result, 0}; |
140 | | |
141 | 24 | SmallVector<unsigned, 16> Indices; |
142 | 81 | for (unsigned i = 0, e = References.size(); i != e; ++i57 ) |
143 | 57 | Indices.push_back(i); |
144 | 54 | llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) { |
145 | 54 | return References[LHSI] < References[RHSI]; |
146 | 54 | }); |
147 | 24 | bool ReferencesInOrder = llvm::is_sorted(Indices); |
148 | | |
149 | 24 | std::string ReferencesText; |
150 | 24 | bool SymbolsInOrder = true; |
151 | 81 | for (unsigned i = 0, e = Indices.size(); i != e; ++i57 ) { |
152 | 57 | JsModuleReference Reference = References[Indices[i]]; |
153 | 57 | if (appendReference(ReferencesText, Reference)) |
154 | 5 | SymbolsInOrder = false; |
155 | 57 | if (i + 1 < e) { |
156 | | // Insert breaks between imports and exports. |
157 | 33 | ReferencesText += "\n"; |
158 | | // Separate imports groups with two line breaks, but keep all exports |
159 | | // in a single group. |
160 | 33 | if (!Reference.IsExport && |
161 | 30 | (Reference.IsExport != References[Indices[i + 1]].IsExport || |
162 | 28 | Reference.Category != References[Indices[i + 1]].Category)) |
163 | 7 | ReferencesText += "\n"; |
164 | 33 | } |
165 | 57 | } |
166 | | |
167 | 24 | if (ReferencesInOrder && SymbolsInOrder7 ) |
168 | 2 | return {Result, 0}; |
169 | | |
170 | 22 | SourceRange InsertionPoint = References[0].Range; |
171 | 22 | InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); |
172 | | |
173 | | // The loop above might collapse previously existing line breaks between |
174 | | // import blocks, and thus shrink the file. SortIncludes must not shrink |
175 | | // overall source length as there is currently no re-calculation of ranges |
176 | | // after applying source sorting. |
177 | | // This loop just backfills trailing spaces after the imports, which are |
178 | | // harmless and will be stripped by the subsequent formatting pass. |
179 | | // FIXME: A better long term fix is to re-calculate Ranges after sorting. |
180 | 22 | unsigned PreviousSize = getSourceText(InsertionPoint).size(); |
181 | 23 | while (ReferencesText.size() < PreviousSize) { |
182 | 1 | ReferencesText += " "; |
183 | 1 | } |
184 | | |
185 | | // Separate references from the main code body of the file. |
186 | 22 | if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2) |
187 | 17 | ReferencesText += "\n"; |
188 | | |
189 | 22 | LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" |
190 | 22 | << getSourceText(InsertionPoint) << "\nwith:\n" |
191 | 22 | << ReferencesText << "\n"); |
192 | 22 | auto Err = Result.add(tooling::Replacement( |
193 | 22 | Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), |
194 | 22 | ReferencesText)); |
195 | | // FIXME: better error handling. For now, just print error message and skip |
196 | | // the replacement for the release version. |
197 | 22 | if (Err) { |
198 | 0 | llvm::errs() << llvm::toString(std::move(Err)) << "\n"; |
199 | 0 | assert(false); |
200 | 0 | } |
201 | | |
202 | 22 | return {Result, 0}; |
203 | 22 | } |
204 | | |
205 | | private: |
206 | | FormatToken *Current; |
207 | | FormatToken *LineEnd; |
208 | | |
209 | | FormatToken invalidToken; |
210 | | |
211 | | StringRef FileContents; |
212 | | |
213 | 415 | void skipComments() { Current = skipComments(Current); } |
214 | | |
215 | 415 | FormatToken *skipComments(FormatToken *Tok) { |
216 | 420 | while (Tok && Tok->is(tok::comment)417 ) |
217 | 5 | Tok = Tok->Next; |
218 | 415 | return Tok; |
219 | 415 | } |
220 | | |
221 | 326 | void nextToken() { |
222 | 326 | Current = Current->Next; |
223 | 326 | skipComments(); |
224 | 326 | if (!Current || Current == LineEnd->Next) { |
225 | | // Set the current token to an invalid token, so that further parsing on |
226 | | // this line fails. |
227 | 0 | invalidToken.Tok.setKind(tok::unknown); |
228 | 0 | Current = &invalidToken; |
229 | 0 | } |
230 | 326 | } |
231 | | |
232 | 87 | StringRef getSourceText(SourceRange Range) { |
233 | 87 | return getSourceText(Range.getBegin(), Range.getEnd()); |
234 | 87 | } |
235 | | |
236 | 97 | StringRef getSourceText(SourceLocation Begin, SourceLocation End) { |
237 | 97 | const SourceManager &SM = Env.getSourceManager(); |
238 | 97 | return FileContents.substr(SM.getFileOffset(Begin), |
239 | 97 | SM.getFileOffset(End) - SM.getFileOffset(Begin)); |
240 | 97 | } |
241 | | |
242 | | // Appends ``Reference`` to ``Buffer``, returning true if text within the |
243 | | // ``Reference`` changed (e.g. symbol order). |
244 | 57 | bool appendReference(std::string &Buffer, JsModuleReference &Reference) { |
245 | | // Sort the individual symbols within the import. |
246 | | // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';` |
247 | 57 | SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols; |
248 | 57 | llvm::stable_sort( |
249 | 14 | Symbols, [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) { |
250 | 14 | return LHS.Symbol.compare_lower(RHS.Symbol) < 0; |
251 | 14 | }); |
252 | 57 | if (Symbols == Reference.Symbols) { |
253 | | // No change in symbol order. |
254 | 52 | StringRef ReferenceStmt = getSourceText(Reference.Range); |
255 | 52 | Buffer += ReferenceStmt; |
256 | 52 | return false; |
257 | 52 | } |
258 | | // Stitch together the module reference start... |
259 | 5 | SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin(); |
260 | 5 | SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd(); |
261 | 5 | Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart); |
262 | | // ... then the references in order ... |
263 | 18 | for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I13 ) { |
264 | 13 | if (I != Symbols.begin()) |
265 | 8 | Buffer += ","; |
266 | 13 | Buffer += getSourceText(I->Range); |
267 | 13 | } |
268 | | // ... followed by the module reference end. |
269 | 5 | Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd()); |
270 | 5 | return true; |
271 | 5 | } |
272 | | |
273 | | // Parses module references in the given lines. Returns the module references, |
274 | | // and a pointer to the first "main code" line if that is adjacent to the |
275 | | // affected lines of module references, nullptr otherwise. |
276 | | std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *> |
277 | | parseModuleReferences(const AdditionalKeywords &Keywords, |
278 | 26 | SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { |
279 | 26 | SmallVector<JsModuleReference, 16> References; |
280 | 26 | SourceLocation Start; |
281 | 26 | AnnotatedLine *FirstNonImportLine = nullptr; |
282 | 26 | bool AnyImportAffected = false; |
283 | 89 | for (auto Line : AnnotatedLines) { |
284 | 89 | Current = Line->First; |
285 | 89 | LineEnd = Line->Last; |
286 | 89 | skipComments(); |
287 | 89 | if (Start.isInvalid() || References.empty()3 ) |
288 | | // After the first file level comment, consider line comments to be part |
289 | | // of the import that immediately follows them by using the previously |
290 | | // set Start. |
291 | 87 | Start = Line->First->Tok.getLocation(); |
292 | 89 | if (!Current) { |
293 | | // Only comments on this line. Could be the first non-import line. |
294 | 3 | FirstNonImportLine = Line; |
295 | 3 | continue; |
296 | 3 | } |
297 | 86 | JsModuleReference Reference; |
298 | 86 | Reference.Range.setBegin(Start); |
299 | 86 | if (!parseModuleReference(Keywords, Reference)) { |
300 | 26 | if (!FirstNonImportLine) |
301 | 25 | FirstNonImportLine = Line; // if no comment before. |
302 | 26 | break; |
303 | 26 | } |
304 | 60 | FirstNonImportLine = nullptr; |
305 | 60 | AnyImportAffected = AnyImportAffected || Line->Affected27 ; |
306 | 60 | Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); |
307 | 60 | LLVM_DEBUG({ |
308 | 60 | llvm::dbgs() << "JsModuleReference: {" |
309 | 60 | << "is_export: " << Reference.IsExport |
310 | 60 | << ", cat: " << Reference.Category |
311 | 60 | << ", url: " << Reference.URL |
312 | 60 | << ", prefix: " << Reference.Prefix; |
313 | 60 | for (size_t i = 0; i < Reference.Symbols.size(); ++i) |
314 | 60 | llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as " |
315 | 60 | << Reference.Symbols[i].Alias; |
316 | 60 | llvm::dbgs() << ", text: " << getSourceText(Reference.Range); |
317 | 60 | llvm::dbgs() << "}\n"; |
318 | 60 | }); |
319 | 60 | References.push_back(Reference); |
320 | 60 | Start = SourceLocation(); |
321 | 60 | } |
322 | | // Sort imports if any import line was affected. |
323 | 26 | if (!AnyImportAffected) |
324 | 2 | References.clear(); |
325 | 26 | return std::make_pair(References, FirstNonImportLine); |
326 | 26 | } |
327 | | |
328 | | // Parses a JavaScript/ECMAScript 6 module reference. |
329 | | // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules |
330 | | // for grammar EBNF (production ModuleItem). |
331 | | bool parseModuleReference(const AdditionalKeywords &Keywords, |
332 | 86 | JsModuleReference &Reference) { |
333 | 86 | if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export)) |
334 | 26 | return false; |
335 | 60 | Reference.IsExport = Current->is(tok::kw_export); |
336 | | |
337 | 60 | nextToken(); |
338 | 60 | if (Current->isStringLiteral() && !Reference.IsExport2 ) { |
339 | | // "import 'side-effect';" |
340 | 2 | Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT; |
341 | 2 | Reference.URL = |
342 | 2 | Current->TokenText.substr(1, Current->TokenText.size() - 2); |
343 | 2 | return true; |
344 | 2 | } |
345 | | |
346 | 58 | if (!parseModuleBindings(Keywords, Reference)) |
347 | 0 | return false; |
348 | | |
349 | 58 | if (Current->is(Keywords.kw_from)) { |
350 | | // imports have a 'from' clause, exports might not. |
351 | 57 | nextToken(); |
352 | 57 | if (!Current->isStringLiteral()) |
353 | 0 | return false; |
354 | | // URL = TokenText without the quotes. |
355 | 57 | Reference.URL = |
356 | 57 | Current->TokenText.substr(1, Current->TokenText.size() - 2); |
357 | 57 | if (Reference.URL.startswith("..")) |
358 | 3 | Reference.Category = |
359 | 3 | JsModuleReference::ReferenceCategory::RELATIVE_PARENT; |
360 | 54 | else if (Reference.URL.startswith(".")) |
361 | 5 | Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; |
362 | 49 | else |
363 | 49 | Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE; |
364 | 1 | } else { |
365 | | // w/o URL groups with "empty". |
366 | 1 | Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; |
367 | 1 | } |
368 | 58 | return true; |
369 | 58 | } |
370 | | |
371 | | bool parseModuleBindings(const AdditionalKeywords &Keywords, |
372 | 58 | JsModuleReference &Reference) { |
373 | 58 | if (parseStarBinding(Keywords, Reference)) |
374 | 2 | return true; |
375 | 56 | return parseNamedBindings(Keywords, Reference); |
376 | 56 | } |
377 | | |
378 | | bool parseStarBinding(const AdditionalKeywords &Keywords, |
379 | 58 | JsModuleReference &Reference) { |
380 | | // * as prefix from '...'; |
381 | 58 | if (Current->isNot(tok::star)) |
382 | 56 | return false; |
383 | 2 | nextToken(); |
384 | 2 | if (Current->isNot(Keywords.kw_as)) |
385 | 0 | return false; |
386 | 2 | nextToken(); |
387 | 2 | if (Current->isNot(tok::identifier)) |
388 | 0 | return false; |
389 | 2 | Reference.Prefix = Current->TokenText; |
390 | 2 | nextToken(); |
391 | 2 | return true; |
392 | 2 | } |
393 | | |
394 | | bool parseNamedBindings(const AdditionalKeywords &Keywords, |
395 | 56 | JsModuleReference &Reference) { |
396 | 56 | if (Current->is(tok::identifier)) { |
397 | 4 | nextToken(); |
398 | 4 | if (Current->is(Keywords.kw_from)) |
399 | 2 | return true; |
400 | 2 | if (Current->isNot(tok::comma)) |
401 | 0 | return false; |
402 | 2 | nextToken(); // eat comma. |
403 | 2 | } |
404 | 54 | if (Current->isNot(tok::l_brace)) |
405 | 0 | return false; |
406 | | |
407 | | // {sym as alias, sym2 as ...} from '...'; |
408 | 119 | while (54 Current->isNot(tok::r_brace)) { |
409 | 66 | nextToken(); |
410 | 66 | if (Current->is(tok::r_brace)) |
411 | 1 | break; |
412 | 65 | if (!Current->isOneOf(tok::identifier, tok::kw_default)) |
413 | 0 | return false; |
414 | | |
415 | 65 | JsImportedSymbol Symbol; |
416 | 65 | Symbol.Symbol = Current->TokenText; |
417 | | // Make sure to include any preceding comments. |
418 | 65 | Symbol.Range.setBegin( |
419 | 65 | Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin()); |
420 | 65 | nextToken(); |
421 | | |
422 | 65 | if (Current->is(Keywords.kw_as)) { |
423 | 6 | nextToken(); |
424 | 6 | if (!Current->isOneOf(tok::identifier, tok::kw_default)) |
425 | 0 | return false; |
426 | 6 | Symbol.Alias = Current->TokenText; |
427 | 6 | nextToken(); |
428 | 6 | } |
429 | 65 | Symbol.Range.setEnd(Current->Tok.getLocation()); |
430 | 65 | Reference.Symbols.push_back(Symbol); |
431 | | |
432 | 65 | if (!Current->isOneOf(tok::r_brace, tok::comma)) |
433 | 0 | return false; |
434 | 65 | } |
435 | 54 | nextToken(); // consume r_brace |
436 | 54 | return true; |
437 | 54 | } |
438 | | }; |
439 | | |
440 | | tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, |
441 | | StringRef Code, |
442 | | ArrayRef<tooling::Range> Ranges, |
443 | 26 | StringRef FileName) { |
444 | | // FIXME: Cursor support. |
445 | 26 | return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) |
446 | 26 | .process() |
447 | 26 | .first; |
448 | 26 | } |
449 | | |
450 | | } // end namespace format |
451 | | } // end namespace clang |