//===--- BreakableToken.cpp - Format C++ code -----------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Contains implementation of BreakableToken class and classes derived /// from it. /// //===----------------------------------------------------------------------===// #include "BreakableToken.h" #include "clang/Basic/CharInfo.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include <algorithm> #define DEBUG_TYPE "format-token-breaker" namespace clang { namespace format { static const char *const Blanks = " \t\v\f\r"; static bool IsBlank(char C) { switch (C) { case ' ': case '\t': case '\v': case '\f': case '\r': return true; default: return false; } } static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; unsigned MaxSplitBytes = 0; for (unsigned NumChars = 0; NumChars < MaxSplit && MaxSplitBytes < Text.size();) { unsigned BytesInChar = encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); NumChars += encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn, TabWidth, Encoding); MaxSplitBytes += BytesInChar; } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); if (SpaceOffset == StringRef::npos || // Don't break at leading whitespace. Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { // Make sure that we don't break at leading whitespace that // reaches past MaxSplit. StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); if (FirstNonWhitespace == StringRef::npos) // If the comment is only whitespace, we cannot split. return BreakableToken::Split(StringRef::npos, 0); SpaceOffset = Text.find_first_of( Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); } if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); return BreakableToken::Split(BeforeCut.size(), AfterCut.begin() - BeforeCut.end()); } return BreakableToken::Split(StringRef::npos, 0); } static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { // FIXME: Reduce unit test case. if (Text.empty()) return BreakableToken::Split(StringRef::npos, 0); if (ColumnLimit <= UsedColumns) return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = ColumnLimit - UsedColumns; StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; StringRef::size_type SplitPoint = 0; for (unsigned Chars = 0;;) { unsigned Advance; if (Text[0] == '\\') { Advance = encoding::getEscapeSequenceLength(Text); Chars += Advance; } else { Advance = encoding::getCodePointNumBytes(Text[0], Encoding); Chars += encoding::columnWidthWithTabs( Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); } if (Chars > MaxSplit || Text.size() <= Advance) break; if (IsBlank(Text[0])) SpaceOffset = SplitPoint; if (Text[0] == '/') SlashOffset = SplitPoint; if (Advance == 1 && !isAlphanumeric(Text[0])) WordStartOffset = SplitPoint; SplitPoint += Advance; Text = Text.substr(Advance); } if (SpaceOffset != 0) return BreakableToken::Split(SpaceOffset + 1, 0); if (SlashOffset != 0) return BreakableToken::Split(SlashOffset + 1, 0); if (WordStartOffset != 0) return BreakableToken::Split(WordStartOffset + 1, 0); if (SplitPoint != 0) return BreakableToken::Split(SplitPoint, 0); return BreakableToken::Split(StringRef::npos, 0); } unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + encoding::columnWidthWithTabs(Line.substr(Offset, Length), StartColumn + Prefix.size(), Style.TabWidth, Encoding); } BreakableSingleLineToken::BreakableSingleLineToken( const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { assert(Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } BreakableStringLiteral::BreakableStringLiteral( const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, InPPDirective, Encoding, Style) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getStringSplit(Line.substr(TailOffset), StartColumn + Prefix.size() + Postfix.size(), ColumnLimit, Style.TabWidth, Encoding); } void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { unsigned LeadingSpaces = StartColumn; // The '@' of an ObjC string literal (@"Test") does not become part of the // string token. // FIXME: It might be a cleaner solution to merge the tokens as a // precomputation step. if (Prefix.startswith("@")) --LeadingSpaces; Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); } static StringRef getLineCommentIndentPrefix(StringRef Comment) { static const char *const KnownPrefixes[] = {"///", "//", "//!"}; StringRef LongestPrefix; for (StringRef KnownPrefix : KnownPrefixes) { if (Comment.startswith(KnownPrefix)) { size_t PrefixLength = KnownPrefix.size(); while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') ++PrefixLength; if (PrefixLength > LongestPrefix.size()) LongestPrefix = Comment.substr(0, PrefixLength); } } return LongestPrefix; } BreakableLineComment::BreakableLineComment( const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableSingleLineToken(Token, IndentLevel, StartColumn, getLineCommentIndentPrefix(Token.TokenText), "", InPPDirective, Encoding, Style) { OriginalPrefix = Prefix; if (Token.TokenText.size() > Prefix.size() && isAlphanumeric(Token.TokenText[Prefix.size()])) { if (Prefix == "//") Prefix = "// "; else if (Prefix == "///") Prefix = "/// "; else if (Prefix == "//!") Prefix = "//! "; } } BreakableToken::Split BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), ColumnLimit, Style.TabWidth, Encoding); } void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { Whitespaces.replaceWhitespaceInToken( Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); } void BreakableLineComment::replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { Whitespaces.replaceWhitespaceInToken( Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); } void BreakableLineComment::replaceWhitespaceBefore( unsigned LineIndex, WhitespaceManager &Whitespaces) { if (OriginalPrefix != Prefix) { Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); } } BreakableBlockComment::BreakableBlockComment( const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { StringRef TokenText(Token.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); int IndentDelta = StartColumn - OriginalStartColumn; LeadingWhitespace.resize(Lines.size()); StartOfLineColumn.resize(Lines.size()); StartOfLineColumn[0] = StartColumn + 2; for (size_t i = 1; i < Lines.size(); ++i) adjustWhitespace(i, IndentDelta); Decoration = "* "; if (Lines.size() == 1 && !FirstInLine) { // Comments for which FirstInLine is false can start on arbitrary column, // and available horizontal space can be too small to align consecutive // lines with the first one. // FIXME: We could, probably, align them to current indentation level, but // now we just wrap them without stars. Decoration = ""; } for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { // If the last line is empty, the closing "*/" will have a star. if (i + 1 == e && Lines[i].empty()) break; if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) continue; while (!Lines[i].startswith(Decoration)) Decoration = Decoration.substr(0, Decoration.size() - 1); } LastLineNeedsDecoration = true; IndentAtLineBreak = StartOfLineColumn[0] + 1; for (size_t i = 1; i < Lines.size(); ++i) { if (Lines[i].empty()) { if (i + 1 == Lines.size()) { // Empty last line means that we already have a star as a part of the // trailing */. We also need to preserve whitespace, so that */ is // correctly indented. LastLineNeedsDecoration = false; } else if (Decoration.empty()) { // For all other lines, set the start column to 0 if they're empty, so // we do not insert trailing whitespace anywhere. StartOfLineColumn[i] = 0; } continue; } // The first line already excludes the star. // For all other lines, adjust the line to exclude the star and // (optionally) the first whitespace. unsigned DecorationSize = Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); StartOfLineColumn[i] += DecorationSize; Lines[i] = Lines[i].substr(DecorationSize); LeadingWhitespace[i] += DecorationSize; if (!Decoration.startswith(Lines[i])) IndentAtLineBreak = std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); } IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; for (size_t i = 0; i < Lines.size(); ++i) { llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] << "\n"; } }); } void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, int IndentDelta) { // When in a preprocessor directive, the trailing backslash in a block comment // is not needed, but can serve a purpose of uniformity with necessary escaped // newlines outside the comment. In this case we remove it here before // trimming the trailing whitespace. The backslash will be re-added later when // inserting a line break. size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) --EndOfPreviousLine; // Calculate the end of the non-whitespace text in the previous line. EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); if (EndOfPreviousLine == StringRef::npos) EndOfPreviousLine = 0; else ++EndOfPreviousLine; // Calculate the start of the non-whitespace text in the current line. size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); if (StartOfLine == StringRef::npos) StartOfLine = Lines[LineIndex].rtrim("\r\n").size(); StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); // Adjust Lines to only contain relevant text. Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); // Adjust LeadingWhitespace to account all whitespace between the lines // to the current line. LeadingWhitespace[LineIndex] = Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); // Adjust the start column uniformly across all lines. StartOfLineColumn[LineIndex] = encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + IndentDelta; } unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } unsigned BreakableBlockComment::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); return ContentStartColumn + encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), ContentStartColumn, Style.TabWidth, Encoding) + // The last line gets a "*/" postfix. (LineIndex + 1 == Lines.size() ? 2 : 0); } BreakableToken::Split BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getCommentSplit(Lines[LineIndex].substr(TailOffset), getContentStartColumn(LineIndex, TailOffset), ColumnLimit, Style.TabWidth, Encoding); } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { StringRef Text = Lines[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; if (LineIndex + 1 == Lines.size() && Text.size() == Split.first + Split.second) { // For the last line we need to break before "*/", but not to add "* ". Prefix = ""; } unsigned BreakOffsetInToken = Text.data() - Tok.TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; assert(IndentAtLineBreak >= Decoration.size()); Whitespaces.replaceWhitespaceInToken( Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, IndentLevel, IndentAtLineBreak - Decoration.size()); } void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { StringRef Text = Lines[LineIndex].substr(TailOffset); unsigned BreakOffsetInToken = Text.data() - Tok.TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; Whitespaces.replaceWhitespaceInToken( Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); } void BreakableBlockComment::replaceWhitespaceBefore( unsigned LineIndex, WhitespaceManager &Whitespaces) { if (LineIndex == 0) return; StringRef Prefix = Decoration; if (Lines[LineIndex].empty()) { if (LineIndex + 1 == Lines.size()) { if (!LastLineNeedsDecoration) { // If the last line was empty, we don't need a prefix, as the */ will // line up with the decoration (if it exists). Prefix = ""; } } else if (!Decoration.empty()) { // For other empty lines, if we do have a decoration, adapt it to not // contain a trailing whitespace. Prefix = Prefix.substr(0, 1); } } else { if (StartOfLineColumn[LineIndex] == 1) { // This line starts immediately after the decorating *. Prefix = Prefix.substr(0, 1); } } unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - Tok.TokenText.data() - LeadingWhitespace[LineIndex]; Whitespaces.replaceWhitespaceInToken( Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, InPPDirective, 1, IndentLevel, StartOfLineColumn[LineIndex] - Prefix.size()); } unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const { // If we break, we always break at the predefined indent. if (TailOffset != 0) return IndentAtLineBreak; return std::max(0, StartOfLineColumn[LineIndex]); } } // namespace format } // namespace clang