380 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			380 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
//===--- Format.cpp -----------------------------------------*- C++-*------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
#include "Format.h"
 | 
						|
#include "support/Logger.h"
 | 
						|
#include "clang/Basic/FileManager.h"
 | 
						|
#include "clang/Basic/SourceManager.h"
 | 
						|
#include "clang/Format/Format.h"
 | 
						|
#include "clang/Lex/Lexer.h"
 | 
						|
#include "clang/Tooling/Core/Replacement.h"
 | 
						|
#include "llvm/Support/Unicode.h"
 | 
						|
 | 
						|
namespace clang {
 | 
						|
namespace clangd {
 | 
						|
namespace {
 | 
						|
 | 
						|
/// Append closing brackets )]} to \p Code to make it well-formed.
 | 
						|
/// Clang-format conservatively refuses to format files with unmatched brackets
 | 
						|
/// as it isn't sure where the errors are and so can't correct.
 | 
						|
/// When editing, it's reasonable to assume code before the cursor is complete.
 | 
						|
void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
 | 
						|
  SourceManagerForFile FileSM("dummy.cpp", Code);
 | 
						|
  auto &SM = FileSM.get();
 | 
						|
  FileID FID = SM.getMainFileID();
 | 
						|
  Lexer Lex(FID, SM.getBufferOrFake(FID), SM,
 | 
						|
            format::getFormattingLangOpts(Style));
 | 
						|
  Token Tok;
 | 
						|
  std::vector<char> Brackets;
 | 
						|
  while (!Lex.LexFromRawLexer(Tok)) {
 | 
						|
    switch(Tok.getKind()) {
 | 
						|
      case tok::l_paren:
 | 
						|
        Brackets.push_back(')');
 | 
						|
        break;
 | 
						|
      case tok::l_brace:
 | 
						|
        Brackets.push_back('}');
 | 
						|
        break;
 | 
						|
      case tok::l_square:
 | 
						|
        Brackets.push_back(']');
 | 
						|
        break;
 | 
						|
      case tok::r_paren:
 | 
						|
        if (!Brackets.empty() && Brackets.back() == ')')
 | 
						|
          Brackets.pop_back();
 | 
						|
        break;
 | 
						|
      case tok::r_brace:
 | 
						|
        if (!Brackets.empty() && Brackets.back() == '}')
 | 
						|
          Brackets.pop_back();
 | 
						|
        break;
 | 
						|
      case tok::r_square:
 | 
						|
        if (!Brackets.empty() && Brackets.back() == ']')
 | 
						|
          Brackets.pop_back();
 | 
						|
        break;
 | 
						|
      default:
 | 
						|
        continue;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  // Attempt to end any open comments first.
 | 
						|
  Code.append("\n// */\n");
 | 
						|
  Code.append(Brackets.rbegin(), Brackets.rend());
 | 
						|
}
 | 
						|
 | 
						|
static StringRef commentMarker(llvm::StringRef Line) {
 | 
						|
  for (StringRef Marker : {"///", "//"}){
 | 
						|
    auto I = Line.rfind(Marker);
 | 
						|
    if (I != StringRef::npos)
 | 
						|
      return Line.substr(I, Marker.size());
 | 
						|
  }
 | 
						|
  return "";
 | 
						|
}
 | 
						|
 | 
						|
llvm::StringRef firstLine(llvm::StringRef Code) {
 | 
						|
  return Code.take_until([](char C) { return C == '\n'; });
 | 
						|
}
 | 
						|
 | 
						|
llvm::StringRef lastLine(llvm::StringRef Code) {
 | 
						|
  llvm::StringRef Rest = Code;
 | 
						|
  while (!Rest.empty() && Rest.back() != '\n')
 | 
						|
    Rest = Rest.drop_back();
 | 
						|
  return Code.substr(Rest.size());
 | 
						|
}
 | 
						|
 | 
						|
// Filename is needed for tooling::Replacement and some overloads of reformat().
 | 
						|
// Its value should not affect the outcome. We use the default from reformat().
 | 
						|
llvm::StringRef Filename = "<stdin>";
 | 
						|
 | 
						|
// tooling::Replacement from overlapping StringRefs: From must be part of Code.
 | 
						|
tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
 | 
						|
                                 llvm::StringRef To) {
 | 
						|
  assert(From.begin() >= Code.begin() && From.end() <= Code.end());
 | 
						|
  // The filename is required but ignored.
 | 
						|
  return tooling::Replacement(Filename, From.data() - Code.data(),
 | 
						|
                              From.size(), To);
 | 
						|
}
 | 
						|
 | 
						|
// High-level representation of incremental formatting changes.
 | 
						|
// The changes are made in two steps.
 | 
						|
// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
 | 
						|
//    comment markers when splitting a line comment with a newline).
 | 
						|
// 2) a selective clang-format run:
 | 
						|
//    - the "source code" passed to clang format is the code up to the cursor,
 | 
						|
//      a placeholder for the cursor, and some closing brackets
 | 
						|
//    - the formatting is restricted to the cursor and (possibly) other ranges
 | 
						|
//      (e.g. the old line when inserting a newline).
 | 
						|
//    - changes before the cursor are applied, those after are discarded.
 | 
						|
struct IncrementalChanges {
 | 
						|
  // Changes that should be applied before running clang-format.
 | 
						|
  tooling::Replacements Changes;
 | 
						|
  // Ranges of the original source code that should be clang-formatted.
 | 
						|
  // The CursorProxyText will also be formatted.
 | 
						|
  std::vector<tooling::Range> FormatRanges;
 | 
						|
  // The source code that should stand in for the cursor when clang-formatting.
 | 
						|
  // e.g. after inserting a newline, a line-comment at the cursor is used to
 | 
						|
  // ensure that the newline is preserved.
 | 
						|
  std::string CursorPlaceholder;
 | 
						|
};
 | 
						|
 | 
						|
// After a newline:
 | 
						|
//  - we continue any line-comment that was split
 | 
						|
//  - we format the old line in addition to the cursor
 | 
						|
//  - we represent the cursor with a line comment to preserve the newline
 | 
						|
IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
 | 
						|
                                                     unsigned Cursor) {
 | 
						|
  IncrementalChanges Result;
 | 
						|
  // Before newline, code looked like:
 | 
						|
  //    leading^trailing
 | 
						|
  // After newline, code looks like:
 | 
						|
  //    leading
 | 
						|
  //    indentation^trailing
 | 
						|
  // Where indentation was added by the editor.
 | 
						|
  StringRef Trailing = firstLine(Code.substr(Cursor));
 | 
						|
  StringRef Indentation = lastLine(Code.take_front(Cursor));
 | 
						|
  if (Indentation.data() == Code.data()) {
 | 
						|
    vlog("Typed a newline, but we're still on the first line!");
 | 
						|
    return Result;
 | 
						|
  }
 | 
						|
  StringRef Leading =
 | 
						|
      lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
 | 
						|
  StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
 | 
						|
 | 
						|
  // Strip leading whitespace on trailing line.
 | 
						|
  StringRef TrailingTrim = Trailing.ltrim();
 | 
						|
  if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
 | 
						|
    cantFail(Result.Changes.add(
 | 
						|
        replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
 | 
						|
 | 
						|
  // If we split a comment, replace indentation with a comment marker.
 | 
						|
  // If the editor made the new line a comment, also respect that.
 | 
						|
  StringRef CommentMarker = commentMarker(Leading);
 | 
						|
  bool NewLineIsComment = !commentMarker(Indentation).empty();
 | 
						|
  if (!CommentMarker.empty() &&
 | 
						|
      (NewLineIsComment || !commentMarker(NextLine).empty() ||
 | 
						|
       (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
 | 
						|
    using llvm::sys::unicode::columnWidthUTF8;
 | 
						|
    // We indent the new comment to match the previous one.
 | 
						|
    StringRef PreComment =
 | 
						|
        Leading.take_front(CommentMarker.data() - Leading.data());
 | 
						|
    std::string IndentAndComment =
 | 
						|
        (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
 | 
						|
            .str();
 | 
						|
    cantFail(
 | 
						|
        Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
 | 
						|
  } else {
 | 
						|
    // Remove any indentation and let clang-format re-add it.
 | 
						|
    // This prevents the cursor marker dragging e.g. an aligned comment with it.
 | 
						|
    cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
 | 
						|
  }
 | 
						|
 | 
						|
  // If we put a the newline inside a {} pair, put } on its own line...
 | 
						|
  if (CommentMarker.empty() && Leading.endswith("{") &&
 | 
						|
      Trailing.startswith("}")) {
 | 
						|
    cantFail(
 | 
						|
        Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
 | 
						|
    // ...and format it.
 | 
						|
    Result.FormatRanges.push_back(
 | 
						|
        tooling::Range(Trailing.data() - Code.data() + 1, 1));
 | 
						|
  }
 | 
						|
 | 
						|
  // Format the whole leading line.
 | 
						|
  Result.FormatRanges.push_back(
 | 
						|
      tooling::Range(Leading.data() - Code.data(), Leading.size()));
 | 
						|
 | 
						|
  // We use a comment to represent the cursor, to preserve the newline.
 | 
						|
  // A trailing identifier improves parsing of e.g. for without braces.
 | 
						|
  // Exception: if the previous line has a trailing comment, we can't use one
 | 
						|
  // as the cursor (they will be aligned). But in this case we don't need to.
 | 
						|
  Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
 | 
						|
 | 
						|
  return Result;
 | 
						|
}
 | 
						|
 | 
						|
IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
 | 
						|
                                         llvm::StringRef InsertedText) {
 | 
						|
  IncrementalChanges Result;
 | 
						|
  if (InsertedText == "\n")
 | 
						|
    return getIncrementalChangesAfterNewline(Code, Cursor);
 | 
						|
 | 
						|
  Result.CursorPlaceholder = " /**/";
 | 
						|
  return Result;
 | 
						|
}
 | 
						|
 | 
						|
// Returns equivalent replacements that preserve the correspondence between
 | 
						|
// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
 | 
						|
// replacement will be split.
 | 
						|
std::vector<tooling::Replacement>
 | 
						|
split(const tooling::Replacements &Replacements, unsigned OldCursor,
 | 
						|
      unsigned NewCursor) {
 | 
						|
  std::vector<tooling::Replacement> Result;
 | 
						|
  int LengthChange = 0;
 | 
						|
  for (const tooling::Replacement &R : Replacements) {
 | 
						|
    if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
 | 
						|
      Result.push_back(R);
 | 
						|
      LengthChange += R.getReplacementText().size() - R.getLength();
 | 
						|
    } else if (R.getOffset() < OldCursor) { // overlaps cursor
 | 
						|
      int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
 | 
						|
      assert(ReplacementSplit >= 0 &&
 | 
						|
             ReplacementSplit <= int(R.getReplacementText().size()) &&
 | 
						|
             "NewCursor incompatible with OldCursor!");
 | 
						|
      Result.push_back(tooling::Replacement(
 | 
						|
          R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
 | 
						|
          R.getReplacementText().take_front(ReplacementSplit)));
 | 
						|
      Result.push_back(tooling::Replacement(
 | 
						|
          R.getFilePath(), OldCursor,
 | 
						|
          R.getLength() - (OldCursor - R.getOffset()),
 | 
						|
          R.getReplacementText().drop_front(ReplacementSplit)));
 | 
						|
    } else if (R.getOffset() >= OldCursor) { // after cursor
 | 
						|
      Result.push_back(R);
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return Result;
 | 
						|
}
 | 
						|
 | 
						|
} // namespace
 | 
						|
 | 
						|
// We're simulating the following sequence of changes:
 | 
						|
//   - apply the pre-formatting edits (see getIncrementalChanges)
 | 
						|
//   - insert a placeholder for the cursor
 | 
						|
//   - format some of the resulting code
 | 
						|
//   - remove the cursor placeholder again
 | 
						|
// The replacements we return are produced by composing these.
 | 
						|
//
 | 
						|
// The text we actually pass to clang-format is slightly different from this,
 | 
						|
// e.g. we have to close brackets. We ensure these differences are *after*
 | 
						|
// all the regions we want to format, and discard changes in them.
 | 
						|
std::vector<tooling::Replacement>
 | 
						|
formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
 | 
						|
                  llvm::StringRef InsertedText, format::FormatStyle Style) {
 | 
						|
  IncrementalChanges Incremental =
 | 
						|
      getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
 | 
						|
  // Never *remove* lines in response to pressing enter! This annoys users.
 | 
						|
  if (InsertedText == "\n") {
 | 
						|
    Style.MaxEmptyLinesToKeep = 1000;
 | 
						|
    Style.KeepEmptyLinesAtTheStartOfBlocks = true;
 | 
						|
  }
 | 
						|
 | 
						|
  // Compute the code we want to format:
 | 
						|
  // 1) Start with code after the pre-formatting edits.
 | 
						|
  std::string CodeToFormat = cantFail(
 | 
						|
      tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
 | 
						|
  unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
 | 
						|
  // 2) Truncate code after the last interesting range.
 | 
						|
  unsigned FormatLimit = Cursor;
 | 
						|
  for (tooling::Range &R : Incremental.FormatRanges)
 | 
						|
    FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
 | 
						|
  CodeToFormat.resize(FormatLimit);
 | 
						|
  // 3) Insert a placeholder for the cursor.
 | 
						|
  CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
 | 
						|
  // 4) Append brackets after FormatLimit so the code is well-formed.
 | 
						|
  closeBrackets(CodeToFormat, Style);
 | 
						|
 | 
						|
  // Determine the ranges to format:
 | 
						|
  std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
 | 
						|
  // Ranges after the cursor need to be adjusted for the placeholder.
 | 
						|
  for (auto &R : RangesToFormat) {
 | 
						|
    if (R.getOffset() > Cursor)
 | 
						|
      R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
 | 
						|
                         R.getLength());
 | 
						|
  }
 | 
						|
  // We also format the cursor.
 | 
						|
  RangesToFormat.push_back(
 | 
						|
      tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
 | 
						|
  // Also update FormatLimit for the placeholder, we'll use this later.
 | 
						|
  FormatLimit += Incremental.CursorPlaceholder.size();
 | 
						|
 | 
						|
  // Run clang-format, and truncate changes at FormatLimit.
 | 
						|
  tooling::Replacements FormattingChanges;
 | 
						|
  format::FormattingAttemptStatus Status;
 | 
						|
  for (const tooling::Replacement &R : format::reformat(
 | 
						|
           Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
 | 
						|
    if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
 | 
						|
      cantFail(FormattingChanges.add(R));
 | 
						|
    else if(R.getOffset() < FormatLimit) { // Overlaps limit.
 | 
						|
      if (R.getReplacementText().empty()) // Deletions are easy to handle.
 | 
						|
        cantFail(FormattingChanges.add(tooling::Replacement(Filename,
 | 
						|
            R.getOffset(), FormatLimit - R.getOffset(), "")));
 | 
						|
      else
 | 
						|
        // Hopefully won't happen in practice?
 | 
						|
        elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
 | 
						|
             Cursor, CodeToFormat);
 | 
						|
    }
 | 
						|
  }
 | 
						|
  if (!Status.FormatComplete)
 | 
						|
    vlog("Incremental format incomplete at line {0}", Status.Line);
 | 
						|
 | 
						|
  // Now we are ready to compose the changes relative to OriginalCode.
 | 
						|
  //   edits -> insert placeholder -> format -> remove placeholder.
 | 
						|
  // We must express insert/remove as Replacements.
 | 
						|
  tooling::Replacements InsertCursorPlaceholder(
 | 
						|
      tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
 | 
						|
  unsigned FormattedCursorStart =
 | 
						|
               FormattingChanges.getShiftedCodePosition(Cursor),
 | 
						|
           FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
 | 
						|
               Cursor + Incremental.CursorPlaceholder.size());
 | 
						|
  tooling::Replacements RemoveCursorPlaceholder(
 | 
						|
      tooling::Replacement(Filename, FormattedCursorStart,
 | 
						|
                           FormattedCursorEnd - FormattedCursorStart, ""));
 | 
						|
 | 
						|
  // We can't simply merge() and return: tooling::Replacements will combine
 | 
						|
  // adjacent edits left and right of the cursor. This gives the right source
 | 
						|
  // code, but loses information about where the cursor is!
 | 
						|
  // Fortunately, none of the individual passes lose information, so:
 | 
						|
  //  - we use merge() to compute the final Replacements
 | 
						|
  //  - we chain getShiftedCodePosition() to compute final cursor position
 | 
						|
  //  - we split the final Replacements at the cursor position, so that
 | 
						|
  //    each Replacement lies either before or after the cursor.
 | 
						|
  tooling::Replacements Final;
 | 
						|
  unsigned FinalCursor = OriginalCursor;
 | 
						|
#ifndef NDEBUG
 | 
						|
  std::string FinalCode = std::string(OriginalCode);
 | 
						|
  dlog("Initial code: {0}", FinalCode);
 | 
						|
#endif
 | 
						|
  for (auto Pass :
 | 
						|
       std::vector<std::pair<const char *, const tooling::Replacements *>>{
 | 
						|
           {"Pre-formatting changes", &Incremental.Changes},
 | 
						|
           {"Insert placeholder", &InsertCursorPlaceholder},
 | 
						|
           {"clang-format", &FormattingChanges},
 | 
						|
           {"Remove placeholder", &RemoveCursorPlaceholder}}) {
 | 
						|
    Final = Final.merge(*Pass.second);
 | 
						|
    FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
 | 
						|
#ifndef NDEBUG
 | 
						|
    FinalCode =
 | 
						|
        cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
 | 
						|
    dlog("After {0}:\n{1}^{2}", Pass.first,
 | 
						|
         StringRef(FinalCode).take_front(FinalCursor),
 | 
						|
         StringRef(FinalCode).drop_front(FinalCursor));
 | 
						|
#endif
 | 
						|
  }
 | 
						|
  return split(Final, OriginalCursor, FinalCursor);
 | 
						|
}
 | 
						|
 | 
						|
unsigned
 | 
						|
transformCursorPosition(unsigned Offset,
 | 
						|
                        const std::vector<tooling::Replacement> &Replacements) {
 | 
						|
  unsigned OriginalOffset = Offset;
 | 
						|
  for (const auto &R : Replacements) {
 | 
						|
    if (R.getOffset() + R.getLength() <= OriginalOffset) {
 | 
						|
      // Replacement is before cursor.
 | 
						|
      Offset += R.getReplacementText().size();
 | 
						|
      Offset -= R.getLength();
 | 
						|
    } else if (R.getOffset() < OriginalOffset) {
 | 
						|
      // Replacement overlaps cursor.
 | 
						|
      // Preserve position within replacement text, as far as possible.
 | 
						|
      unsigned PositionWithinReplacement = Offset - R.getOffset();
 | 
						|
      if (PositionWithinReplacement > R.getReplacementText().size()) {
 | 
						|
        Offset += R.getReplacementText().size();
 | 
						|
        Offset -= PositionWithinReplacement;
 | 
						|
      }
 | 
						|
    } else {
 | 
						|
      // Replacement after cursor.
 | 
						|
      break; // Replacements are sorted, the rest are also after the cursor.
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return Offset;
 | 
						|
}
 | 
						|
 | 
						|
} // namespace clangd
 | 
						|
} // namespace clang
 |