1519fc96cSDimitry Andric //===--- SourceCode.cpp - Source code manipulation routines -----*- C++ -*-===//
2519fc96cSDimitry Andric //
3519fc96cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4519fc96cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5519fc96cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6519fc96cSDimitry Andric //
7519fc96cSDimitry Andric //===----------------------------------------------------------------------===//
8519fc96cSDimitry Andric //
9519fc96cSDimitry Andric // This file provides functions that simplify extraction of source code.
10519fc96cSDimitry Andric //
11519fc96cSDimitry Andric //===----------------------------------------------------------------------===//
12519fc96cSDimitry Andric #include "clang/Tooling/Transformer/SourceCode.h"
13cfca06d7SDimitry Andric #include "clang/AST/ASTContext.h"
14cfca06d7SDimitry Andric #include "clang/AST/Attr.h"
15cfca06d7SDimitry Andric #include "clang/AST/Comment.h"
16cfca06d7SDimitry Andric #include "clang/AST/Decl.h"
17cfca06d7SDimitry Andric #include "clang/AST/DeclCXX.h"
18cfca06d7SDimitry Andric #include "clang/AST/DeclTemplate.h"
19cfca06d7SDimitry Andric #include "clang/AST/Expr.h"
20cfca06d7SDimitry Andric #include "clang/Basic/SourceManager.h"
21519fc96cSDimitry Andric #include "clang/Lex/Lexer.h"
22cfca06d7SDimitry Andric #include "llvm/Support/Errc.h"
23cfca06d7SDimitry Andric #include "llvm/Support/Error.h"
24cfca06d7SDimitry Andric #include <set>
25519fc96cSDimitry Andric
26519fc96cSDimitry Andric using namespace clang;
27519fc96cSDimitry Andric
28cfca06d7SDimitry Andric using llvm::errc;
29cfca06d7SDimitry Andric using llvm::StringError;
30cfca06d7SDimitry Andric
getText(CharSourceRange Range,const ASTContext & Context)31519fc96cSDimitry Andric StringRef clang::tooling::getText(CharSourceRange Range,
32519fc96cSDimitry Andric const ASTContext &Context) {
33519fc96cSDimitry Andric return Lexer::getSourceText(Range, Context.getSourceManager(),
34519fc96cSDimitry Andric Context.getLangOpts());
35519fc96cSDimitry Andric }
36519fc96cSDimitry Andric
maybeExtendRange(CharSourceRange Range,tok::TokenKind Next,ASTContext & Context)37519fc96cSDimitry Andric CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range,
38519fc96cSDimitry Andric tok::TokenKind Next,
39519fc96cSDimitry Andric ASTContext &Context) {
40cfca06d7SDimitry Andric CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(),
41cfca06d7SDimitry Andric Context.getLangOpts());
42cfca06d7SDimitry Andric if (R.isInvalid())
43519fc96cSDimitry Andric return Range;
44cfca06d7SDimitry Andric Token Tok;
45cfca06d7SDimitry Andric bool Err =
46cfca06d7SDimitry Andric Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(),
47cfca06d7SDimitry Andric Context.getLangOpts(), /*IgnoreWhiteSpace=*/true);
48cfca06d7SDimitry Andric if (Err || !Tok.is(Next))
49cfca06d7SDimitry Andric return Range;
50cfca06d7SDimitry Andric return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation());
51cfca06d7SDimitry Andric }
52cfca06d7SDimitry Andric
validateRange(const CharSourceRange & Range,const SourceManager & SM,bool AllowSystemHeaders)53b1c73532SDimitry Andric llvm::Error clang::tooling::validateRange(const CharSourceRange &Range,
54e3b55780SDimitry Andric const SourceManager &SM,
55e3b55780SDimitry Andric bool AllowSystemHeaders) {
56cfca06d7SDimitry Andric if (Range.isInvalid())
57cfca06d7SDimitry Andric return llvm::make_error<StringError>(errc::invalid_argument,
58cfca06d7SDimitry Andric "Invalid range");
59cfca06d7SDimitry Andric
60cfca06d7SDimitry Andric if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
61cfca06d7SDimitry Andric return llvm::make_error<StringError>(
62cfca06d7SDimitry Andric errc::invalid_argument, "Range starts or ends in a macro expansion");
63cfca06d7SDimitry Andric
64e3b55780SDimitry Andric if (!AllowSystemHeaders) {
65cfca06d7SDimitry Andric if (SM.isInSystemHeader(Range.getBegin()) ||
66cfca06d7SDimitry Andric SM.isInSystemHeader(Range.getEnd()))
67cfca06d7SDimitry Andric return llvm::make_error<StringError>(errc::invalid_argument,
68cfca06d7SDimitry Andric "Range is in system header");
69e3b55780SDimitry Andric }
70cfca06d7SDimitry Andric
71cfca06d7SDimitry Andric std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
72cfca06d7SDimitry Andric std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
73cfca06d7SDimitry Andric if (BeginInfo.first != EndInfo.first)
74cfca06d7SDimitry Andric return llvm::make_error<StringError>(
75cfca06d7SDimitry Andric errc::invalid_argument, "Range begins and ends in different files");
76cfca06d7SDimitry Andric
77cfca06d7SDimitry Andric if (BeginInfo.second > EndInfo.second)
78e3b55780SDimitry Andric return llvm::make_error<StringError>(errc::invalid_argument,
79e3b55780SDimitry Andric "Range's begin is past its end");
80cfca06d7SDimitry Andric
81cfca06d7SDimitry Andric return llvm::Error::success();
82519fc96cSDimitry Andric }
83519fc96cSDimitry Andric
validateEditRange(const CharSourceRange & Range,const SourceManager & SM)84e3b55780SDimitry Andric llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range,
85e3b55780SDimitry Andric const SourceManager &SM) {
86e3b55780SDimitry Andric return validateRange(Range, SM, /*AllowSystemHeaders=*/false);
87e3b55780SDimitry Andric }
88e3b55780SDimitry Andric
spelledInMacroDefinition(SourceLocation Loc,const SourceManager & SM)89e3b55780SDimitry Andric static bool spelledInMacroDefinition(SourceLocation Loc,
90e3b55780SDimitry Andric const SourceManager &SM) {
91e3b55780SDimitry Andric while (Loc.isMacroID()) {
92e3b55780SDimitry Andric const auto &Expansion = SM.getSLocEntry(SM.getFileID(Loc)).getExpansion();
93e3b55780SDimitry Andric if (Expansion.isMacroArgExpansion()) {
94e3b55780SDimitry Andric // Check the spelling location of the macro arg, in case the arg itself is
95e3b55780SDimitry Andric // in a macro expansion.
96e3b55780SDimitry Andric Loc = Expansion.getSpellingLoc();
97e3b55780SDimitry Andric } else {
98e3b55780SDimitry Andric return true;
99e3b55780SDimitry Andric }
100e3b55780SDimitry Andric }
101e3b55780SDimitry Andric return false;
102e3b55780SDimitry Andric }
103e3b55780SDimitry Andric
104ac9a064cSDimitry Andric // Returns the expansion char-range of `Loc` if `Loc` is a split token. For
105ac9a064cSDimitry Andric // example, `>>` in nested templates needs the first `>` to be split, otherwise
106ac9a064cSDimitry Andric // the `SourceLocation` of the token would lex as `>>` instead of `>`.
107ac9a064cSDimitry Andric static std::optional<CharSourceRange>
getExpansionForSplitToken(SourceLocation Loc,const SourceManager & SM,const LangOptions & LangOpts)108ac9a064cSDimitry Andric getExpansionForSplitToken(SourceLocation Loc, const SourceManager &SM,
109ac9a064cSDimitry Andric const LangOptions &LangOpts) {
110ac9a064cSDimitry Andric if (Loc.isMacroID()) {
111ac9a064cSDimitry Andric bool Invalid = false;
112ac9a064cSDimitry Andric auto &SLoc = SM.getSLocEntry(SM.getFileID(Loc), &Invalid);
113ac9a064cSDimitry Andric if (Invalid)
114ac9a064cSDimitry Andric return std::nullopt;
115ac9a064cSDimitry Andric if (auto &Expansion = SLoc.getExpansion();
116ac9a064cSDimitry Andric !Expansion.isExpansionTokenRange()) {
117ac9a064cSDimitry Andric // A char-range expansion is only used where a token-range would be
118ac9a064cSDimitry Andric // incorrect, and so identifies this as a split token (and importantly,
119ac9a064cSDimitry Andric // not as a macro).
120ac9a064cSDimitry Andric return Expansion.getExpansionLocRange();
121ac9a064cSDimitry Andric }
122ac9a064cSDimitry Andric }
123ac9a064cSDimitry Andric return std::nullopt;
124ac9a064cSDimitry Andric }
125ac9a064cSDimitry Andric
126ac9a064cSDimitry Andric // If `Range` covers a split token, returns the expansion range, otherwise
127ac9a064cSDimitry Andric // returns `Range`.
getRangeForSplitTokens(CharSourceRange Range,const SourceManager & SM,const LangOptions & LangOpts)128ac9a064cSDimitry Andric static CharSourceRange getRangeForSplitTokens(CharSourceRange Range,
129ac9a064cSDimitry Andric const SourceManager &SM,
130ac9a064cSDimitry Andric const LangOptions &LangOpts) {
131ac9a064cSDimitry Andric if (Range.isTokenRange()) {
132ac9a064cSDimitry Andric auto BeginToken = getExpansionForSplitToken(Range.getBegin(), SM, LangOpts);
133ac9a064cSDimitry Andric auto EndToken = getExpansionForSplitToken(Range.getEnd(), SM, LangOpts);
134ac9a064cSDimitry Andric if (EndToken) {
135ac9a064cSDimitry Andric SourceLocation BeginLoc =
136ac9a064cSDimitry Andric BeginToken ? BeginToken->getBegin() : Range.getBegin();
137ac9a064cSDimitry Andric // We can't use the expansion location with a token-range, because that
138ac9a064cSDimitry Andric // will incorrectly lex the end token, so use a char-range that ends at
139ac9a064cSDimitry Andric // the split.
140ac9a064cSDimitry Andric return CharSourceRange::getCharRange(BeginLoc, EndToken->getEnd());
141ac9a064cSDimitry Andric } else if (BeginToken) {
142ac9a064cSDimitry Andric // Since the end token is not split, the whole range covers the split, so
143ac9a064cSDimitry Andric // the only adjustment we make is to use the expansion location of the
144ac9a064cSDimitry Andric // begin token.
145ac9a064cSDimitry Andric return CharSourceRange::getTokenRange(BeginToken->getBegin(),
146ac9a064cSDimitry Andric Range.getEnd());
147ac9a064cSDimitry Andric }
148ac9a064cSDimitry Andric }
149ac9a064cSDimitry Andric return Range;
150ac9a064cSDimitry Andric }
151ac9a064cSDimitry Andric
getRange(const CharSourceRange & EditRange,const SourceManager & SM,const LangOptions & LangOpts,bool IncludeMacroExpansion)152e3b55780SDimitry Andric static CharSourceRange getRange(const CharSourceRange &EditRange,
153519fc96cSDimitry Andric const SourceManager &SM,
154e3b55780SDimitry Andric const LangOptions &LangOpts,
155e3b55780SDimitry Andric bool IncludeMacroExpansion) {
156e3b55780SDimitry Andric CharSourceRange Range;
157e3b55780SDimitry Andric if (IncludeMacroExpansion) {
158e3b55780SDimitry Andric Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
159e3b55780SDimitry Andric } else {
160ac9a064cSDimitry Andric auto AdjustedRange = getRangeForSplitTokens(EditRange, SM, LangOpts);
161ac9a064cSDimitry Andric if (spelledInMacroDefinition(AdjustedRange.getBegin(), SM) ||
162ac9a064cSDimitry Andric spelledInMacroDefinition(AdjustedRange.getEnd(), SM))
163e3b55780SDimitry Andric return {};
164e3b55780SDimitry Andric
165ac9a064cSDimitry Andric auto B = SM.getSpellingLoc(AdjustedRange.getBegin());
166ac9a064cSDimitry Andric auto E = SM.getSpellingLoc(AdjustedRange.getEnd());
167ac9a064cSDimitry Andric if (AdjustedRange.isTokenRange())
168e3b55780SDimitry Andric E = Lexer::getLocForEndOfToken(E, 0, SM, LangOpts);
169e3b55780SDimitry Andric Range = CharSourceRange::getCharRange(B, E);
170e3b55780SDimitry Andric }
171e3b55780SDimitry Andric return Range;
172e3b55780SDimitry Andric }
173e3b55780SDimitry Andric
getFileRangeForEdit(const CharSourceRange & EditRange,const SourceManager & SM,const LangOptions & LangOpts,bool IncludeMacroExpansion)174e3b55780SDimitry Andric std::optional<CharSourceRange> clang::tooling::getFileRangeForEdit(
175e3b55780SDimitry Andric const CharSourceRange &EditRange, const SourceManager &SM,
176e3b55780SDimitry Andric const LangOptions &LangOpts, bool IncludeMacroExpansion) {
177e3b55780SDimitry Andric CharSourceRange Range =
178e3b55780SDimitry Andric getRange(EditRange, SM, LangOpts, IncludeMacroExpansion);
179cfca06d7SDimitry Andric bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM));
180cfca06d7SDimitry Andric if (IsInvalid)
181e3b55780SDimitry Andric return std::nullopt;
182519fc96cSDimitry Andric return Range;
183e3b55780SDimitry Andric }
184cfca06d7SDimitry Andric
getFileRange(const CharSourceRange & EditRange,const SourceManager & SM,const LangOptions & LangOpts,bool IncludeMacroExpansion)185e3b55780SDimitry Andric std::optional<CharSourceRange> clang::tooling::getFileRange(
186e3b55780SDimitry Andric const CharSourceRange &EditRange, const SourceManager &SM,
187e3b55780SDimitry Andric const LangOptions &LangOpts, bool IncludeMacroExpansion) {
188e3b55780SDimitry Andric CharSourceRange Range =
189e3b55780SDimitry Andric getRange(EditRange, SM, LangOpts, IncludeMacroExpansion);
190e3b55780SDimitry Andric bool IsInvalid =
191e3b55780SDimitry Andric llvm::errorToBool(validateRange(Range, SM, /*AllowSystemHeaders=*/true));
192e3b55780SDimitry Andric if (IsInvalid)
193e3b55780SDimitry Andric return std::nullopt;
194e3b55780SDimitry Andric return Range;
195cfca06d7SDimitry Andric }
196cfca06d7SDimitry Andric
startsWithNewline(const SourceManager & SM,const Token & Tok)197cfca06d7SDimitry Andric static bool startsWithNewline(const SourceManager &SM, const Token &Tok) {
198cfca06d7SDimitry Andric return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]);
199cfca06d7SDimitry Andric }
200cfca06d7SDimitry Andric
contains(const std::set<tok::TokenKind> & Terminators,const Token & Tok)201cfca06d7SDimitry Andric static bool contains(const std::set<tok::TokenKind> &Terminators,
202cfca06d7SDimitry Andric const Token &Tok) {
203cfca06d7SDimitry Andric return Terminators.count(Tok.getKind()) > 0;
204cfca06d7SDimitry Andric }
205cfca06d7SDimitry Andric
206cfca06d7SDimitry Andric // Returns the exclusive, *file* end location of the entity whose last token is
207cfca06d7SDimitry Andric // at location 'EntityLast'. That is, it returns the location one past the last
208cfca06d7SDimitry Andric // relevant character.
209cfca06d7SDimitry Andric //
210cfca06d7SDimitry Andric // Associated tokens include comments, horizontal whitespace and 'Terminators'
211cfca06d7SDimitry Andric // -- optional tokens, which, if any are found, will be included; if
212cfca06d7SDimitry Andric // 'Terminators' is empty, we will not include any extra tokens beyond comments
213cfca06d7SDimitry Andric // and horizontal whitespace.
214cfca06d7SDimitry Andric static SourceLocation
getEntityEndLoc(const SourceManager & SM,SourceLocation EntityLast,const std::set<tok::TokenKind> & Terminators,const LangOptions & LangOpts)215cfca06d7SDimitry Andric getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast,
216cfca06d7SDimitry Andric const std::set<tok::TokenKind> &Terminators,
217cfca06d7SDimitry Andric const LangOptions &LangOpts) {
218cfca06d7SDimitry Andric assert(EntityLast.isValid() && "Invalid end location found.");
219cfca06d7SDimitry Andric
220cfca06d7SDimitry Andric // We remember the last location of a non-horizontal-whitespace token we have
221cfca06d7SDimitry Andric // lexed; this is the location up to which we will want to delete.
222cfca06d7SDimitry Andric // FIXME: Support using the spelling loc here for cases where we want to
223cfca06d7SDimitry Andric // analyze the macro text.
224cfca06d7SDimitry Andric
225cfca06d7SDimitry Andric CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast);
226cfca06d7SDimitry Andric // FIXME: Should check isTokenRange(), for the (rare) case that
227cfca06d7SDimitry Andric // `ExpansionRange` is a character range.
228cfca06d7SDimitry Andric std::unique_ptr<Lexer> Lexer = [&]() {
229cfca06d7SDimitry Andric bool Invalid = false;
230cfca06d7SDimitry Andric auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd());
231cfca06d7SDimitry Andric llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid);
232cfca06d7SDimitry Andric assert(!Invalid && "Cannot get file/offset");
233cfca06d7SDimitry Andric return std::make_unique<clang::Lexer>(
234cfca06d7SDimitry Andric SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(),
235cfca06d7SDimitry Andric File.data() + FileOffset.second, File.end());
236cfca06d7SDimitry Andric }();
237cfca06d7SDimitry Andric
238cfca06d7SDimitry Andric // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown).
239cfca06d7SDimitry Andric Lexer->SetKeepWhitespaceMode(true);
240cfca06d7SDimitry Andric
241cfca06d7SDimitry Andric // Generally, the code we want to include looks like this ([] are optional),
242cfca06d7SDimitry Andric // If Terminators is empty:
243cfca06d7SDimitry Andric // [ <comment> ] [ <newline> ]
244cfca06d7SDimitry Andric // Otherwise:
245cfca06d7SDimitry Andric // ... <terminator> [ <comment> ] [ <newline> ]
246cfca06d7SDimitry Andric
247cfca06d7SDimitry Andric Token Tok;
248cfca06d7SDimitry Andric bool Terminated = false;
249cfca06d7SDimitry Andric
250cfca06d7SDimitry Andric // First, lex to the current token (which is the last token of the range that
251cfca06d7SDimitry Andric // is definitely associated with the decl). Then, we process the first token
252cfca06d7SDimitry Andric // separately from the rest based on conditions that hold specifically for
253cfca06d7SDimitry Andric // that first token.
254cfca06d7SDimitry Andric //
255cfca06d7SDimitry Andric // We do not search for a terminator if none is required or we've already
256cfca06d7SDimitry Andric // encountered it. Otherwise, if the original `EntityLast` location was in a
257cfca06d7SDimitry Andric // macro expansion, we don't have visibility into the text, so we assume we've
258cfca06d7SDimitry Andric // already terminated. However, we note this assumption with
259cfca06d7SDimitry Andric // `TerminatedByMacro`, because we'll want to handle it somewhat differently
260cfca06d7SDimitry Andric // for the terminators semicolon and comma. These terminators can be safely
261cfca06d7SDimitry Andric // associated with the entity when they appear after the macro -- extra
262cfca06d7SDimitry Andric // semicolons have no effect on the program and a well-formed program won't
263cfca06d7SDimitry Andric // have multiple commas in a row, so we're guaranteed that there is only one.
264cfca06d7SDimitry Andric //
265cfca06d7SDimitry Andric // FIXME: This handling of macros is more conservative than necessary. When
266cfca06d7SDimitry Andric // the end of the expansion coincides with the end of the node, we can still
267cfca06d7SDimitry Andric // safely analyze the code. But, it is more complicated, because we need to
268cfca06d7SDimitry Andric // start by lexing the spelling loc for the first token and then switch to the
269cfca06d7SDimitry Andric // expansion loc.
270cfca06d7SDimitry Andric bool TerminatedByMacro = false;
271cfca06d7SDimitry Andric Lexer->LexFromRawLexer(Tok);
272cfca06d7SDimitry Andric if (Terminators.empty() || contains(Terminators, Tok))
273cfca06d7SDimitry Andric Terminated = true;
274cfca06d7SDimitry Andric else if (EntityLast.isMacroID()) {
275cfca06d7SDimitry Andric Terminated = true;
276cfca06d7SDimitry Andric TerminatedByMacro = true;
277cfca06d7SDimitry Andric }
278cfca06d7SDimitry Andric
279cfca06d7SDimitry Andric // We save the most recent candidate for the exclusive end location.
280cfca06d7SDimitry Andric SourceLocation End = Tok.getEndLoc();
281cfca06d7SDimitry Andric
282cfca06d7SDimitry Andric while (!Terminated) {
283cfca06d7SDimitry Andric // Lex the next token we want to possibly expand the range with.
284cfca06d7SDimitry Andric Lexer->LexFromRawLexer(Tok);
285cfca06d7SDimitry Andric
286cfca06d7SDimitry Andric switch (Tok.getKind()) {
287cfca06d7SDimitry Andric case tok::eof:
288cfca06d7SDimitry Andric // Unexpected separators.
289cfca06d7SDimitry Andric case tok::l_brace:
290cfca06d7SDimitry Andric case tok::r_brace:
291cfca06d7SDimitry Andric case tok::comma:
292cfca06d7SDimitry Andric return End;
293cfca06d7SDimitry Andric // Whitespace pseudo-tokens.
294cfca06d7SDimitry Andric case tok::unknown:
295cfca06d7SDimitry Andric if (startsWithNewline(SM, Tok))
296cfca06d7SDimitry Andric // Include at least until the end of the line.
297cfca06d7SDimitry Andric End = Tok.getEndLoc();
298cfca06d7SDimitry Andric break;
299cfca06d7SDimitry Andric default:
300cfca06d7SDimitry Andric if (contains(Terminators, Tok))
301cfca06d7SDimitry Andric Terminated = true;
302cfca06d7SDimitry Andric End = Tok.getEndLoc();
303cfca06d7SDimitry Andric break;
304cfca06d7SDimitry Andric }
305cfca06d7SDimitry Andric }
306cfca06d7SDimitry Andric
307cfca06d7SDimitry Andric do {
308cfca06d7SDimitry Andric // Lex the next token we want to possibly expand the range with.
309cfca06d7SDimitry Andric Lexer->LexFromRawLexer(Tok);
310cfca06d7SDimitry Andric
311cfca06d7SDimitry Andric switch (Tok.getKind()) {
312cfca06d7SDimitry Andric case tok::unknown:
313cfca06d7SDimitry Andric if (startsWithNewline(SM, Tok))
314cfca06d7SDimitry Andric // We're done, but include this newline.
315cfca06d7SDimitry Andric return Tok.getEndLoc();
316cfca06d7SDimitry Andric break;
317cfca06d7SDimitry Andric case tok::comment:
318cfca06d7SDimitry Andric // Include any comments we find on the way.
319cfca06d7SDimitry Andric End = Tok.getEndLoc();
320cfca06d7SDimitry Andric break;
321cfca06d7SDimitry Andric case tok::semi:
322cfca06d7SDimitry Andric case tok::comma:
323cfca06d7SDimitry Andric if (TerminatedByMacro && contains(Terminators, Tok)) {
324cfca06d7SDimitry Andric End = Tok.getEndLoc();
325cfca06d7SDimitry Andric // We've found a real terminator.
326cfca06d7SDimitry Andric TerminatedByMacro = false;
327cfca06d7SDimitry Andric break;
328cfca06d7SDimitry Andric }
329cfca06d7SDimitry Andric // Found an unrelated token; stop and don't include it.
330cfca06d7SDimitry Andric return End;
331cfca06d7SDimitry Andric default:
332cfca06d7SDimitry Andric // Found an unrelated token; stop and don't include it.
333cfca06d7SDimitry Andric return End;
334cfca06d7SDimitry Andric }
335cfca06d7SDimitry Andric } while (true);
336cfca06d7SDimitry Andric }
337cfca06d7SDimitry Andric
338cfca06d7SDimitry Andric // Returns the expected terminator tokens for the given declaration.
339cfca06d7SDimitry Andric //
340cfca06d7SDimitry Andric // If we do not know the correct terminator token, returns an empty set.
341cfca06d7SDimitry Andric //
342cfca06d7SDimitry Andric // There are cases where we have more than one possible terminator (for example,
343cfca06d7SDimitry Andric // we find either a comma or a semicolon after a VarDecl).
getTerminators(const Decl & D)344cfca06d7SDimitry Andric static std::set<tok::TokenKind> getTerminators(const Decl &D) {
345cfca06d7SDimitry Andric if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D))
346cfca06d7SDimitry Andric return {tok::semi};
347cfca06d7SDimitry Andric
348cfca06d7SDimitry Andric if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D))
349cfca06d7SDimitry Andric return {tok::r_brace, tok::semi};
350cfca06d7SDimitry Andric
351cfca06d7SDimitry Andric if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D))
352cfca06d7SDimitry Andric return {tok::comma, tok::semi};
353cfca06d7SDimitry Andric
354cfca06d7SDimitry Andric return {};
355cfca06d7SDimitry Andric }
356cfca06d7SDimitry Andric
357cfca06d7SDimitry Andric // Starting from `Loc`, skips whitespace up to, and including, a single
358cfca06d7SDimitry Andric // newline. Returns the (exclusive) end of any skipped whitespace (that is, the
359cfca06d7SDimitry Andric // location immediately after the whitespace).
skipWhitespaceAndNewline(const SourceManager & SM,SourceLocation Loc,const LangOptions & LangOpts)360cfca06d7SDimitry Andric static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM,
361cfca06d7SDimitry Andric SourceLocation Loc,
362cfca06d7SDimitry Andric const LangOptions &LangOpts) {
363cfca06d7SDimitry Andric const char *LocChars = SM.getCharacterData(Loc);
364cfca06d7SDimitry Andric int i = 0;
365cfca06d7SDimitry Andric while (isHorizontalWhitespace(LocChars[i]))
366cfca06d7SDimitry Andric ++i;
367cfca06d7SDimitry Andric if (isVerticalWhitespace(LocChars[i]))
368cfca06d7SDimitry Andric ++i;
369cfca06d7SDimitry Andric return Loc.getLocWithOffset(i);
370cfca06d7SDimitry Andric }
371cfca06d7SDimitry Andric
372cfca06d7SDimitry Andric // Is `Loc` separated from any following decl by something meaningful (e.g. an
373cfca06d7SDimitry Andric // empty line, a comment), ignoring horizontal whitespace? Since this is a
374cfca06d7SDimitry Andric // heuristic, we return false when in doubt. `Loc` cannot be the first location
375cfca06d7SDimitry Andric // in the file.
atOrBeforeSeparation(const SourceManager & SM,SourceLocation Loc,const LangOptions & LangOpts)376cfca06d7SDimitry Andric static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc,
377cfca06d7SDimitry Andric const LangOptions &LangOpts) {
378cfca06d7SDimitry Andric // If the preceding character is a newline, we'll check for an empty line as a
379cfca06d7SDimitry Andric // separator. However, we can't identify an empty line using tokens, so we
380cfca06d7SDimitry Andric // analyse the characters. If we try to use tokens, we'll just end up with a
381cfca06d7SDimitry Andric // whitespace token, whose characters we'd have to analyse anyhow.
382cfca06d7SDimitry Andric bool Invalid = false;
383cfca06d7SDimitry Andric const char *LocChars =
384cfca06d7SDimitry Andric SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid);
385cfca06d7SDimitry Andric assert(!Invalid &&
386cfca06d7SDimitry Andric "Loc must be a valid character and not the first of the source file.");
387cfca06d7SDimitry Andric if (isVerticalWhitespace(LocChars[0])) {
388cfca06d7SDimitry Andric for (int i = 1; isWhitespace(LocChars[i]); ++i)
389cfca06d7SDimitry Andric if (isVerticalWhitespace(LocChars[i]))
390cfca06d7SDimitry Andric return true;
391cfca06d7SDimitry Andric }
392cfca06d7SDimitry Andric // We didn't find an empty line, so lex the next token, skipping past any
393cfca06d7SDimitry Andric // whitespace we just scanned.
394cfca06d7SDimitry Andric Token Tok;
395cfca06d7SDimitry Andric bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts,
396cfca06d7SDimitry Andric /*IgnoreWhiteSpace=*/true);
397cfca06d7SDimitry Andric if (Failed)
398cfca06d7SDimitry Andric // Any text that confuses the lexer seems fair to consider a separation.
399cfca06d7SDimitry Andric return true;
400cfca06d7SDimitry Andric
401cfca06d7SDimitry Andric switch (Tok.getKind()) {
402cfca06d7SDimitry Andric case tok::comment:
403cfca06d7SDimitry Andric case tok::l_brace:
404cfca06d7SDimitry Andric case tok::r_brace:
405cfca06d7SDimitry Andric case tok::eof:
406cfca06d7SDimitry Andric return true;
407cfca06d7SDimitry Andric default:
408cfca06d7SDimitry Andric return false;
409cfca06d7SDimitry Andric }
410cfca06d7SDimitry Andric }
411cfca06d7SDimitry Andric
getAssociatedRange(const Decl & Decl,ASTContext & Context)412cfca06d7SDimitry Andric CharSourceRange tooling::getAssociatedRange(const Decl &Decl,
413cfca06d7SDimitry Andric ASTContext &Context) {
414cfca06d7SDimitry Andric const SourceManager &SM = Context.getSourceManager();
415cfca06d7SDimitry Andric const LangOptions &LangOpts = Context.getLangOpts();
416cfca06d7SDimitry Andric CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange());
417cfca06d7SDimitry Andric
418cfca06d7SDimitry Andric // First, expand to the start of the template<> declaration if necessary.
419cfca06d7SDimitry Andric if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) {
420cfca06d7SDimitry Andric if (const auto *T = Record->getDescribedClassTemplate())
421cfca06d7SDimitry Andric if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
422cfca06d7SDimitry Andric Range.setBegin(T->getBeginLoc());
423cfca06d7SDimitry Andric } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) {
424cfca06d7SDimitry Andric if (const auto *T = F->getDescribedFunctionTemplate())
425cfca06d7SDimitry Andric if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
426cfca06d7SDimitry Andric Range.setBegin(T->getBeginLoc());
427cfca06d7SDimitry Andric }
428cfca06d7SDimitry Andric
429cfca06d7SDimitry Andric // Next, expand the end location past trailing comments to include a potential
430cfca06d7SDimitry Andric // newline at the end of the decl's line.
431cfca06d7SDimitry Andric Range.setEnd(
432cfca06d7SDimitry Andric getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts));
433cfca06d7SDimitry Andric Range.setTokenRange(false);
434cfca06d7SDimitry Andric
435cfca06d7SDimitry Andric // Expand to include preceeding associated comments. We ignore any comments
436cfca06d7SDimitry Andric // that are not preceeding the decl, since we've already skipped trailing
437cfca06d7SDimitry Andric // comments with getEntityEndLoc.
438cfca06d7SDimitry Andric if (const RawComment *Comment =
439cfca06d7SDimitry Andric Decl.getASTContext().getRawCommentForDeclNoCache(&Decl))
440cfca06d7SDimitry Andric // Only include a preceding comment if:
441cfca06d7SDimitry Andric // * it is *not* separate from the declaration (not including any newline
442cfca06d7SDimitry Andric // that immediately follows the comment),
443cfca06d7SDimitry Andric // * the decl *is* separate from any following entity (so, there are no
444cfca06d7SDimitry Andric // other entities the comment could refer to), and
445cfca06d7SDimitry Andric // * it is not a IfThisThenThat lint check.
446cfca06d7SDimitry Andric if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(),
447cfca06d7SDimitry Andric Range.getBegin()) &&
448cfca06d7SDimitry Andric !atOrBeforeSeparation(
449cfca06d7SDimitry Andric SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts),
450cfca06d7SDimitry Andric LangOpts) &&
451cfca06d7SDimitry Andric atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) {
452cfca06d7SDimitry Andric const StringRef CommentText = Comment->getRawText(SM);
453cfca06d7SDimitry Andric if (!CommentText.contains("LINT.IfChange") &&
454cfca06d7SDimitry Andric !CommentText.contains("LINT.ThenChange"))
455cfca06d7SDimitry Andric Range.setBegin(Comment->getBeginLoc());
456cfca06d7SDimitry Andric }
457cfca06d7SDimitry Andric // Add leading attributes.
458cfca06d7SDimitry Andric for (auto *Attr : Decl.attrs()) {
459cfca06d7SDimitry Andric if (Attr->getLocation().isInvalid() ||
460cfca06d7SDimitry Andric !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin()))
461cfca06d7SDimitry Andric continue;
462cfca06d7SDimitry Andric Range.setBegin(Attr->getLocation());
463cfca06d7SDimitry Andric
464cfca06d7SDimitry Andric // Extend to the left '[[' or '__attribute((' if we saw the attribute,
465cfca06d7SDimitry Andric // unless it is not a valid location.
466cfca06d7SDimitry Andric bool Invalid;
467cfca06d7SDimitry Andric StringRef Source =
468cfca06d7SDimitry Andric SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid);
469cfca06d7SDimitry Andric if (Invalid)
470cfca06d7SDimitry Andric continue;
471cfca06d7SDimitry Andric llvm::StringRef BeforeAttr =
472cfca06d7SDimitry Andric Source.substr(0, SM.getFileOffset(Range.getBegin()));
473cfca06d7SDimitry Andric llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim();
474cfca06d7SDimitry Andric
475cfca06d7SDimitry Andric for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) {
476cfca06d7SDimitry Andric // Handle whitespace between attribute prefix and attribute value.
477312c0ed1SDimitry Andric if (BeforeAttrStripped.ends_with(Prefix)) {
478cfca06d7SDimitry Andric // Move start to start position of prefix, which is
479cfca06d7SDimitry Andric // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix)
480cfca06d7SDimitry Andric // positions to the left.
481cfca06d7SDimitry Andric Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>(
482cfca06d7SDimitry Andric -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size())));
483cfca06d7SDimitry Andric break;
484cfca06d7SDimitry Andric // If we didn't see '[[' or '__attribute' it's probably coming from a
485cfca06d7SDimitry Andric // macro expansion which is already handled by makeFileCharRange(),
486cfca06d7SDimitry Andric // below.
487cfca06d7SDimitry Andric }
488cfca06d7SDimitry Andric }
489cfca06d7SDimitry Andric }
490cfca06d7SDimitry Andric
491cfca06d7SDimitry Andric // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But,
492cfca06d7SDimitry Andric // Range.getBegin() may be inside an expansion.
493cfca06d7SDimitry Andric return Lexer::makeFileCharRange(Range, SM, LangOpts);
494519fc96cSDimitry Andric }
495